adding 4th gen POD files

nishsilva · May 21, 2024 · a6cc18b · a6cc18b
1 parent 56c3c8e
commit a6cc18b
Show file tree

Hide file tree

Showing 7 changed files with 489 additions and 3 deletions.
diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv
@@ -1,3 +1,3 @@
 activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
-CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none
-CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none
+CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/glade/work/netige/mdtf_Apr24_2/mdtf/MDTF-diagnostics/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none
+CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/glade/work/netige/mdtf_Apr24_2/mdtf/MDTF-diagnostics/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none
diff --git a/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json b/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json
@@ -187,5 +187,5 @@
   "description": null,
   "title": null,
   "last_updated": "2023-06-01",
-  "catalog_file": "file:/Users/jess/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv"
+  "catalog_file": "file:/glade/work/netige/mdtf_Apr24_2/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv"
 }
diff --git a/diagnostics/trial_4thgen/settings.jsonc b/diagnostics/trial_4thgen/settings.jsonc
@@ -0,0 +1,52 @@
+// Example MDTF diagnostic settings file for multiple cases
+//
+// This example builds upon the single case (i.e. `example`) example POD
+// and illustrates how to design and implement a POD that uses multiple
+// model source datasets. These can be the same experiment with different
+// models, two different experiments from the same model, or two different
+// time periods within the same simulation.
+//
+// Comments are restricted to those relevant to the multi-case example.
+// Please see the single case example POD and the documentation for more
+// details.
+//
+
+// Basic POD Settings
+{
+  "settings" : {
+    "description" : "A trial 4th generation POD to read and process data from Data Catalogs",
+    "driver" : "trial_4thgen.py",
+    "long_name" : "4th Generation Trial POD",
+    "convention": "cmip",
+    "runtime_requirements": {
+        "python3": ["matplotlib", "xarray", "netCDF4"]
+    }
+  },
+
+// Variable Coordinates
+  "dimensions": {
+    "lat": {
+             "standard_name": "latitude",
+             "units": "degrees_north",
+             "axis": "Y"
+           },
+    "lon": {
+             "standard_name": "longitude",
+             "units": "degrees_east",
+             "axis": "X"
+           },
+    "time": {"standard_name": "time"}
+  },
+
+// Variables
+  "varlist" : {
+    "zos": {
+      "frequency" : "mon",
+      "realm": "ocean",
+      "dimensions": ["time", "lat", "lon"],
+      "modifier": "",
+      "standard_name" : "sea_surface_height_above_geoid",
+      "units": "m"
+    }
+  }
+}
diff --git a/diagnostics/trial_4thgen/trial_4thgen.html b/diagnostics/trial_4thgen/trial_4thgen.html
@@ -0,0 +1,29 @@
+<title>MDTF example-multicase diagnostic</title>
+<img src="../mdtf_diag_banner.png">
+<h3>Multi-Case Example Diagnostic: zonal-average near-surface temperature anomaly</h3>
+<p>
+    This POD illustrates how multiple cases (experiments) can be analyzed together.
+    The MDTF-diagnostics framework initializes and processes each case, writes the environment variables for the cases
+    to a yaml file (case_info.yml), and exports an ESM Intake catalog with information about the post-processed
+    data for each ease to the working directory (WORK_DIR). The example_multicase POD driver script reads
+    environment information from case_info.yml into a dictionary that it references to read data from the
+    post-processed files in the data catalog.
+</p>
+<p>
+    The example_multicase POD reads near-surface air temperature (TAS) from netcdf output files for multiple cases.
+    The POD time averages the TAS data and calculates the anomaly relative to the global mean.
+    The anomalies are zonally-averaged and the results from all cases are shown on a single plot.
+</p>
+<TABLE>
+    <TR>
+        <TH align=left style="color:navy">Time averages
+        <TH align=left>Model Results
+    <TR>
+        <TH align=left>Zonal-mean near-surface temperature anomalies (K)
+</TABLE>
+<!--
+  If the POD generates one figure per case, use the following format in your python script file paths:
+  <TH align=center><A href=model/example_model_plot_{{CASENAME}}.png>{{CASENAME}}</A>
+  The framework output manager will automatically append a link to each file
+-->
+<TH align=center><A href=model/example_multicase_plot.png>plot</A> -->
diff --git a/diagnostics/trial_4thgen/trial_4thgen.py b/diagnostics/trial_4thgen/trial_4thgen.py
@@ -0,0 +1,165 @@
+# MDTF Example Diagnostic POD for Multiple Cases / Experiments
+# ================================================================================
+#
+# This file is part of the Multicase Example Diagnostic POD of the MDTF code
+# package (see mdtf/MDTF-diagnostics/LICENSE.txt)
+#
+# Example Diagnostic POD
+#
+#   Last update: Feb-2022
+#
+#   This example builds upon the single case `example` POD
+#   and illustrates how to design and implement a POD that uses multiple
+#   model source datasets. These can be the same experiment with different
+#   models, two different experiments from the same model, or two different
+#   time periods within the same simulation.
+#
+#   Version & Contact info
+#
+#   - Version/revision information: version 1.1 (Oct-2022)
+#   - Model Development Task Force Framework Team
+#
+#   Open source copyright agreement
+#
+#   The MDTF framework is distributed under the LGPLv3 license (see LICENSE.txt).
+#
+#   Functionality
+#
+#   Metadata associated with the different cases are passed from the
+#   framework to the POD via a yaml file (case_info.yaml) that the POD reads into a dictionary.
+#   The POD iterates over the case entries in the dictionary and opens the input datasets.
+#   The `tas` variable is extracted for each case and the time average is taken over the dataset.
+#   Anomalies are calculated relative to the global mean and then zonally-averaged. The resulting plot
+#   contains one line for each case.
+#
+#   Required programming language and libraries
+#
+#     * Python >= 3.10
+#     * xarray
+#     * matplotlib
+#     * intake
+#     * yaml
+#     * sys
+#     * os
+#     * numpy
+#
+#   Required model output variables
+#
+#     * tas - Surface (2-m) air temperature (CF: air_temperature)
+#
+#   References
+#
+#      Maloney, E. D, and Co-authors, 2019: Process-oriented evaluation of climate
+#         and wether forcasting models. BAMS, 100(9), 1665-1686,
+#         doi:10.1175/BAMS-D-18-0042.1.
+
+
+# Import modules used in the POD
+import os
+import matplotlib
+
+matplotlib.use("Agg")  # non-X windows backend
+
+import matplotlib.pyplot as plt
+import numpy as np
+import intake
+import sys
+import yaml
+
+# Part 1: Read in the model data
+# ------------------------------
+# Debugging: remove following line in final PR
+# os.environ["WORK_DIR"] = "/Users/jess/mdtf/wkdir/MDTF_output/example_multicase"
+work_dir = os.environ["WORK_DIR"]
+# Receive a dictionary of case information from the framework
+print("reading case_info")
+# Remove following line final PR
+# os.environ["case_env_file"] = os.path.join(work_dir, "case_info.yml")
+case_env_file = os.environ["case_env_file"]
+assert os.path.isfile(case_env_file), f"case environment file not found"
+with open(case_env_file, 'r') as stream:
+    try:
+        case_info = yaml.safe_load(stream)
+    except yaml.YAMLError as exc:
+        print(exc)
+
+cat_def_file = case_info['CATALOG_FILE']
+case_list = case_info['CASE_LIST']
+# all cases share variable names and dimension coords, so just get first result for each
+tas_var = [case['tas_var'] for case in case_list.values()][0]
+time_coord = [case['time_coord'] for case in case_list.values()][0]
+lat_coord = [case['lat_coord'] for case in case_list.values()][0]
+lon_coord = [case['lon_coord'] for case in case_list.values()][0]
+# open the csv file using information provided by the catalog definition file
+cat = intake.open_esm_datastore(cat_def_file)
+# filter catalog by desired variable and output frequency
+tas_subset = cat.search(variable_id=tas_var, frequency="day")
+# examine assets for a specific file
+#tas_subset['CMIP.synthetic.day.r1i1p1f1.day.gr.atmos.r1i1p1f1.1980-01-01-1984-12-31'].df
+# convert tas_subset catalog to an xarray dataset dict
+tas_dict = tas_subset.to_dataset_dict(
+    xarray_open_kwargs={"decode_times": True, "use_cftime": True}
+)
+
+# Part 2: Do some calculations (time and zonal means)
+# ---------------------------------------------------
+
+tas_arrays = {}
+
+# Loop over cases
+for k, v in tas_dict.items():
+    # load the tas data for case k
+    arr = tas_dict[k][tas_var]
+
+    # take the time mean
+    arr = arr.mean(dim=tas_dict[k][time_coord].name)
+
+    # this block shuffles the data to make this single case look more
+    # interesting.  ** DELETE THIS ** once we test with real data
+
+    arr.load()
+    values = arr.to_masked_array().flatten()
+    np.random.shuffle(values)
+    values = values.reshape(arr.shape)
+    arr.values = values
+
+    # convert to anomalies
+    arr = arr - arr.mean()
+
+    # take the zonal mean
+    arr = arr.mean(dim=tas_dict[k][lon_coord].name)
+
+    tas_arrays[k] = arr
+
+
+# Part 3: Make a plot that contains results from each case
+# --------------------------------------------------------
+
+# set up the figure
+fig = plt.figure(figsize=(12, 4))
+ax = plt.subplot(1, 1, 1)
+
+# loop over cases
+for k, v in tas_arrays.items():
+    v.plot(ax=ax, label=k)
+
+# add legend
+plt.legend()
+
+# add title
+plt.title("Zonal Mean Surface Air Temperature Anomaly")
+
+# save the plot in the right location
+assert os.path.isdir(f"{work_dir}/model/PS"), f'Assertion error: {work_dir}/model/PS not found'
+plt.savefig(f"{work_dir}/model/PS/example_multicase_plot.eps", bbox_inches="tight")
+
+
+# Part 4: Close the catalog files and
+# release variable dict reference for garbage collection
+# ------------------------------------------------------
+cat.close()
+tas_dict = None
+# Part 5: Confirm POD executed successfully
+# ----------------------------------------
+print("Last log message by example_multicase POD: finished successfully!")
+sys.exit(0)
diff --git a/example_multicase.jsonc b/example_multicase.jsonc
@@ -0,0 +1,120 @@
+// This a template for configuring MDTF to run PODs that analyze multi-run/ensemble data
+//
+// Copy this file, rename it, and customize the settings as needed
+// Pass your file to the framework using the -f/--input-file flag.
+// Any other explicit command line options will override what's listed here.
+//
+// All text to the right of an unquoted "//" is a comment and ignored, as well
+// as blank lines (JSONC quasi-standard.)
+//
+// Remove your test config file, or any changes you make to this template if you do not rename it,
+// from your remote repository before you submit a PR for review.
+// To generate CMIP synthetic data in the example dataset, run the following:
+// > mamba env create --force -q -f ./src/conda/_env_synthetic_data.yml
+// > conda activate _MDTF_synthetic_data
+// > pip install mdtf-test-data
+// > cd <root directory>/mdtf
+// > mkdir mdtf_test_data && cd mdtf_test_data
+// > mdtf_synthetic.py -c CMIP --startyear 1980 --nyears 5
+// > mdtf_synthetic.py -c CMIP --startyear 1985 --nyears 5
+// Note that MODEL_DATA_ROOT assumes that mdtf_test_data is one directory above MDTF-diagnostics
+// in this sample config file
+{
+  // Run each ensemble on the example POD.
+  // Add other PODs that work on ensemble datasets to the pod_list as needed
+  "pod_list" : [
+      //"example"
+     "example_multicase"
+   ],
+   // Each case corresponds to a different simulation/output dataset
+   // startdate, enddate: either YYYY-MM-DD, YYYYMMDD:HHMMSS, or YYYY-MM-DD:HHMMSS
+   "case_list":
+    {
+      "CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231":
+        {
+          "model": "test",
+          "convention": "CMIP",
+          "startdate": "19800101120000",
+          "enddate": "19841231000000"
+        }
+      ,
+      "CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231":
+        {
+          "model": "test",
+          "convention": "CMIP",
+          "startdate": "19850101",
+          "enddate": "19891231"
+        }
+    },
+  // PATHS ---------------------------------------------------------------------
+  // Location of supporting data downloaded when the framework was installed.
+  // If a relative path is given, it's resolved relative to the MDTF-diagnostics
+  // code directory. Environment variables (eg, $HOME) can be referenced with a
+  // "$" and will be expended to their current values when the framework runs.
+  // Full or relative path to model data ESM-intake catalog header file
+
+    "DATA_CATALOG": "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json",
+
+  // Parent directory containing observational data used by individual PODs.
+  "OBS_DATA_ROOT": "../inputdata/obs_data",
+
+  // Working directory.
+  "WORK_DIR": "../wkdir",
+
+  // Directory to write output. The results of each run of the framework will be
+  // put in a subdirectory of this directory. Defaults to WORKING_DIR if blank.
+  "OUTPUT_DIR": "../wkdir",
+
+  // Location of the Anaconda/miniconda or micromamba installation to use for managing
+  // dependencies (path returned by running `conda info --base` or `micromamba info`.)
+  //"conda_root": "/glade/u/apps/opt/conda",
+  "conda_root": "/glade/u/home/netige/miniconda3",
+
+  // Directory containing the framework-specific conda environments. This should
+  // be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left
+  // blank, the framework will look for its environments in conda_root/envs
+  //"conda_env_root": "/glade/u/home/netige/miniconda3/envs",
+  "conda_env_root": "/glade/work/netige/mdtf_Apr24_2/mdtf",
+
+
+  // Location of the micromamba executable. Required if using micromamba
+  "micromamba_exe": "",
+
+  // SETTINGS ------------------------------------------------------------------
+  // Any command-line option recognized by the mdtf script (type `mdtf --help`)
+  // can be set here, in the form "flag name": "desired setting".
+
+  // Settings affecting what output is generated:
+  // Set to true to run the preprocessor; default true:
+  "run_pp": true,
+  // Set to true to perform data translation; default false:
+  "translate_data": true,
+  // Set to true to have PODs save postscript figures in addition to bitmaps.
+  "save_ps": false,
+
+  // Set to true for files > 4 GB
+  "large_file": false,
+
+  // If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs
+  // run to completion
+  "save_pp_data": true,
+
+  // Set to true to save HTML and bitmap plots in a .tar file.
+  "make_variab_tar": false,
+
+  // Generate html output for multiple figures per case
+  "make_multicase_figure_html": false,
+
+  // Set to true to overwrite results in OUTPUT_DIR; otherwise results saved
+  // under a unique name.
+  "overwrite": false,
+
+  // List with custom preprocessing script(s) to run on data
+  // Place these scripts in the user_scripts directory of your copy of the MDTF-diagnostics repository
+  "user_pp_scripts" : [],
+
+  // Settings used in debugging:
+
+  // Log verbosity level.
+  "verbose": 1
+}