From dfae53a4a95ddeb4e264d723b089bc73125f4155 Mon Sep 17 00:00:00 2001 From: Ruth Petrie Date: Wed, 11 Mar 2020 12:08:37 +0000 Subject: [PATCH] modify for new variable --- scripts/batch_up_copy_from_archive.py | 25 +++++++++++ scripts/copy_from_archive_batch_up.py | 25 +++++++++++ scripts/copy_from_archive_lotus_submit.sh | 10 +++++ .../copy_from_archive_setup_batch_lotus.sh | 15 +++++++ scripts/copy_from_archive_to_gws.py | 44 +++++++++++++++++++ scripts/esgf_search.py | 2 +- scripts/run_quality_control.py | 2 +- scripts/utils.py | 6 ++- 8 files changed, 125 insertions(+), 4 deletions(-) create mode 100644 scripts/batch_up_copy_from_archive.py create mode 100644 scripts/copy_from_archive_batch_up.py create mode 100755 scripts/copy_from_archive_lotus_submit.sh create mode 100755 scripts/copy_from_archive_setup_batch_lotus.sh create mode 100644 scripts/copy_from_archive_to_gws.py diff --git a/scripts/batch_up_copy_from_archive.py b/scripts/batch_up_copy_from_archive.py new file mode 100644 index 0000000..315e5e2 --- /dev/null +++ b/scripts/batch_up_copy_from_archive.py @@ -0,0 +1,25 @@ +#!/usr/bin/python2.7 + +import os +from sys import argv +from subprocess import call + +# FAILED_DATAFILES = '/group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/logfiles/fixable_datafiles.log' +# DATASETS_TO_FIX_DIR = '/group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/status_logs/fix_2019-01-29/to_fix' + +# DATASET_IDS_TO_FIX = '/group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/status_logs/fix_2019-01-29/dataset_ids_to_fix_2019-02-01_r2.log' +start = int(argv[1]) +PER_BATCH = 50 +srt_idx = start * PER_BATCH +end_idx = srt_idx + PER_BATCH + +filename = "/group_workspaces/jasmin2/cp4cds1/qc/meridional_wind/c3s-expts.txt" +with open(filename) as r: + dataset_paths = [line.strip() for line in r] + +for ds in dataset_paths[srt_idx: end_idx]: + + run_cmd = ["python", "/group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/scripts/copy_from_archive_to_gws.py", ds.strip()] + run = call(run_cmd) + if run != 0: + print("ERROR RUNNING %s" % run_cmd) \ No newline at end of file diff --git a/scripts/copy_from_archive_batch_up.py b/scripts/copy_from_archive_batch_up.py new file mode 100644 index 0000000..315e5e2 --- /dev/null +++ b/scripts/copy_from_archive_batch_up.py @@ -0,0 +1,25 @@ +#!/usr/bin/python2.7 + +import os +from sys import argv +from subprocess import call + +# FAILED_DATAFILES = '/group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/logfiles/fixable_datafiles.log' +# DATASETS_TO_FIX_DIR = '/group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/status_logs/fix_2019-01-29/to_fix' + +# DATASET_IDS_TO_FIX = '/group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/status_logs/fix_2019-01-29/dataset_ids_to_fix_2019-02-01_r2.log' +start = int(argv[1]) +PER_BATCH = 50 +srt_idx = start * PER_BATCH +end_idx = srt_idx + PER_BATCH + +filename = "/group_workspaces/jasmin2/cp4cds1/qc/meridional_wind/c3s-expts.txt" +with open(filename) as r: + dataset_paths = [line.strip() for line in r] + +for ds in dataset_paths[srt_idx: end_idx]: + + run_cmd = ["python", "/group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/scripts/copy_from_archive_to_gws.py", ds.strip()] + run = call(run_cmd) + if run != 0: + print("ERROR RUNNING %s" % run_cmd) \ No newline at end of file diff --git a/scripts/copy_from_archive_lotus_submit.sh b/scripts/copy_from_archive_lotus_submit.sh new file mode 100755 index 0000000..7f552c1 --- /dev/null +++ b/scripts/copy_from_archive_lotus_submit.sh @@ -0,0 +1,10 @@ +#!/bin/bash +# This is the starter program and calls the batchup_cp4cds_movefiles.py + +odir=/group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/lotus-logs/va-2020-03-010_r2/ + +mkdir -p $odir + +for batch in $(seq 0 113); do + bsub -o $odir/%J.out -W 24:00 ./copy_from_archive_setup_batch_lotus.sh $batch +done \ No newline at end of file diff --git a/scripts/copy_from_archive_setup_batch_lotus.sh b/scripts/copy_from_archive_setup_batch_lotus.sh new file mode 100755 index 0000000..c6840bb --- /dev/null +++ b/scripts/copy_from_archive_setup_batch_lotus.sh @@ -0,0 +1,15 @@ +#!/bin/bash + +batch_no=$1 + +## For each job on the host ensure the correct virtual environment is activated +source /group_workspaces/jasmin2/cp4cds1/qc/qc-app2/venv2/bin/activate +cd /group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/ + +## For each job on the host ensure the Django settings are exported +export DJANGO_SETTINGS_MODULE=qcproj.settings +export PYTHONPATH=$PWD:$PWD/scripts:$PWD/lotus_submit + + +## Call the python program with the arguments variable, table, frequency and any command line arguments +python /group_workspaces/jasmin2/cp4cds1/qc/qc-app-dev/qcapp/scripts/copy_from_archive_batch_up.py $batch_no \ No newline at end of file diff --git a/scripts/copy_from_archive_to_gws.py b/scripts/copy_from_archive_to_gws.py new file mode 100644 index 0000000..bf04e3a --- /dev/null +++ b/scripts/copy_from_archive_to_gws.py @@ -0,0 +1,44 @@ + +from setup_django import * +import os +import sys +import shutil +import time + +GWSDIR = "/group_workspaces/jasmin2/cp4cds1/data/cmip5_raw/output1/" +ARCHIVE = "/badc/cmip5/data/cmip5/output1/" + +def main(ds_path): + + version = os.readlink('/'.join(ds_path.split("/")[:-1])) + parts = ds_path.split('/') + parts[-2] = version + ds_version_src_path = '/'.join(parts) + + + ds_version_dst_path = ds_version_src_path.replace(ARCHIVE, GWSDIR) + dst_parts = ds_version_dst_path.split('/') + dst_parts[-2] = dst_parts[-1] + dst_parts[-1] = "files" + dst_parts.append(version.strip('v')) + ds_version_dst_path = '/'.join(dst_parts) + "/" + print(ds_version_dst_path) + + + # print(ds_version_src_path, ds_version_dst_path) + for f in os.listdir(ds_version_src_path): + fname = os.path.join(ds_version_src_path, f) + # print(fname) + # print(ds_version_dst_path) + if not os.path.exists(ds_version_dst_path): + os.makedirs(ds_version_dst_path) + try: + shutil.copy(fname, ds_version_dst_path) + except(IOError): + time.sleep(4) + shutil.copy(fname, ds_version_dst_path) + +if __name__ == "__main__": + + ds_path = sys.argv[1] + main(ds_path) \ No newline at end of file diff --git a/scripts/esgf_search.py b/scripts/esgf_search.py index 1f6e279..6fcb0a3 100644 --- a/scripts/esgf_search.py +++ b/scripts/esgf_search.py @@ -25,7 +25,7 @@ def find_all_local_datafiles(variable, frequency, table, experiment): url = datafile_search_template.format(cedaindex, project, variable, frequency, table, experiment, distrib, latest) - json_logdir, json_file = _define_local_json_cache_names(variable, frequency, table, experiment) + json_logdir, json_file = define_local_json_cache_names(variable, frequency, table, experiment) if not os.path.exists(json_file): resp = requests.get(url, verify=False) diff --git a/scripts/run_quality_control.py b/scripts/run_quality_control.py index 9e59481..3a992ac 100644 --- a/scripts/run_quality_control.py +++ b/scripts/run_quality_control.py @@ -296,7 +296,7 @@ def run_qc(variable, frequency, table): for experiment in ALLEXPTS: - datasets = Dataset.objects.filter(variable=variable, frequency=frequency, cmor_table=table, experiment=experiment) + datasets = Dataset.objects.filter(variable=variable, frequency=frequency, cmor_table=table, experiment=experiment, model='HadCM3') for ds in datasets: datafiles = ds.datafile_set.all() diff --git a/scripts/utils.py b/scripts/utils.py index 1e046cb..fd9fe32 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -315,8 +315,10 @@ def check_log_exists(file, qcdir, ext): -def get_and_make_logdir(datafile, force_version): - inst, model, expt, freq, realm, table, ensemble, var, ver, ncfile = datafile.gws_path.split('/')[8:] +def get_and_make_logdir(datafile, force_version=None): + print(datafile.gws_path) + adaf + inst, model, expt, freq, realm, table, ensemble, var, ver, ncfile = datafile.gws_path.split('/')[7:] if force_version: v_version = 'v{}'.format(force_version) else: