Skip to content

Commit

Permalink
adding 4th gen POD files
Browse files Browse the repository at this point in the history
  • Loading branch information
Nishchitha Etige committed May 21, 2024
1 parent 56c3c8e commit a6cc18b
Show file tree
Hide file tree
Showing 7 changed files with 489 additions and 3 deletions.
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
activity_id,branch_method,branch_time_in_child,branch_time_in_parent,experiment,experiment_id,frequency,grid,grid_label,institution_id,nominal_resolution,parent_activity_id,parent_experiment_id,parent_source_id,parent_time_units,parent_variant_label,product,realm,source_id,source_type,sub_experiment,sub_experiment_id,table_id,variable_id,variant_label,member_id,standard_name,long_name,units,vertical_levels,init_year,start_time,end_time,time_range,path,version
CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none
CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/Users/jess/mdtf/inputdata/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none
CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1980-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1980-01-01,1984-12-31,1980-01-01-1984-12-31,/glade/work/netige/mdtf_Apr24_2/mdtf/MDTF-diagnostics/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231/day/CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231.tas.day.nc,none
CMIP,standard,,,,synthetic,day,,gr,,,CMIP,,,days since 1985-01-01,r1i1p1f1,,atmos,,,none,none,day,tas,r1i1p1f1,r1i1p1f1,air_temperature,Near-Surface Air Temperature,K,1,,1985-01-01,1989-12-31,1985-01-01-1989-12-31,/glade/work/netige/mdtf_Apr24_2/mdtf/MDTF-diagnostics/mdtf_test_data/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231/day/CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231.tas.day.nc,none
Original file line number Diff line number Diff line change
Expand Up @@ -187,5 +187,5 @@
"description": null,
"title": null,
"last_updated": "2023-06-01",
"catalog_file": "file:/Users/jess/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv"
"catalog_file": "file:/glade/work/netige/mdtf_Apr24_2/mdtf/MDTF-diagnostics/diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.csv"
}
52 changes: 52 additions & 0 deletions diagnostics/trial_4thgen/settings.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
// Example MDTF diagnostic settings file for multiple cases
//
// This example builds upon the single case (i.e. `example`) example POD
// and illustrates how to design and implement a POD that uses multiple
// model source datasets. These can be the same experiment with different
// models, two different experiments from the same model, or two different
// time periods within the same simulation.
//
// Comments are restricted to those relevant to the multi-case example.
// Please see the single case example POD and the documentation for more
// details.
//

// Basic POD Settings
{
"settings" : {
"description" : "A trial 4th generation POD to read and process data from Data Catalogs",
"driver" : "trial_4thgen.py",
"long_name" : "4th Generation Trial POD",
"convention": "cmip",
"runtime_requirements": {
"python3": ["matplotlib", "xarray", "netCDF4"]
}
},

// Variable Coordinates
"dimensions": {
"lat": {
"standard_name": "latitude",
"units": "degrees_north",
"axis": "Y"
},
"lon": {
"standard_name": "longitude",
"units": "degrees_east",
"axis": "X"
},
"time": {"standard_name": "time"}
},

// Variables
"varlist" : {
"zos": {
"frequency" : "mon",
"realm": "ocean",
"dimensions": ["time", "lat", "lon"],
"modifier": "",
"standard_name" : "sea_surface_height_above_geoid",
"units": "m"
}
}
}
29 changes: 29 additions & 0 deletions diagnostics/trial_4thgen/trial_4thgen.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
<title>MDTF example-multicase diagnostic</title>
<img src="../mdtf_diag_banner.png">
<h3>Multi-Case Example Diagnostic: zonal-average near-surface temperature anomaly</h3>
<p>
This POD illustrates how multiple cases (experiments) can be analyzed together.
The MDTF-diagnostics framework initializes and processes each case, writes the environment variables for the cases
to a yaml file (case_info.yml), and exports an ESM Intake catalog with information about the post-processed
data for each ease to the working directory (WORK_DIR). The example_multicase POD driver script reads
environment information from case_info.yml into a dictionary that it references to read data from the
post-processed files in the data catalog.
</p>
<p>
The example_multicase POD reads near-surface air temperature (TAS) from netcdf output files for multiple cases.
The POD time averages the TAS data and calculates the anomaly relative to the global mean.
The anomalies are zonally-averaged and the results from all cases are shown on a single plot.
</p>
<TABLE>
<TR>
<TH align=left style="color:navy">Time averages
<TH align=left>Model Results
<TR>
<TH align=left>Zonal-mean near-surface temperature anomalies (K)
</TABLE>
<!--
If the POD generates one figure per case, use the following format in your python script file paths:
<TH align=center><A href=model/example_model_plot_{{CASENAME}}.png>{{CASENAME}}</A>
The framework output manager will automatically append a link to each file
-->
<TH align=center><A href=model/example_multicase_plot.png>plot</A> -->
165 changes: 165 additions & 0 deletions diagnostics/trial_4thgen/trial_4thgen.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
# MDTF Example Diagnostic POD for Multiple Cases / Experiments
# ================================================================================
#
# This file is part of the Multicase Example Diagnostic POD of the MDTF code
# package (see mdtf/MDTF-diagnostics/LICENSE.txt)
#
# Example Diagnostic POD
#
# Last update: Feb-2022
#
# This example builds upon the single case `example` POD
# and illustrates how to design and implement a POD that uses multiple
# model source datasets. These can be the same experiment with different
# models, two different experiments from the same model, or two different
# time periods within the same simulation.
#
# Version & Contact info
#
# - Version/revision information: version 1.1 (Oct-2022)
# - Model Development Task Force Framework Team
#
# Open source copyright agreement
#
# The MDTF framework is distributed under the LGPLv3 license (see LICENSE.txt).
#
# Functionality
#
# Metadata associated with the different cases are passed from the
# framework to the POD via a yaml file (case_info.yaml) that the POD reads into a dictionary.
# The POD iterates over the case entries in the dictionary and opens the input datasets.
# The `tas` variable is extracted for each case and the time average is taken over the dataset.
# Anomalies are calculated relative to the global mean and then zonally-averaged. The resulting plot
# contains one line for each case.
#
# Required programming language and libraries
#
# * Python >= 3.10
# * xarray
# * matplotlib
# * intake
# * yaml
# * sys
# * os
# * numpy
#
# Required model output variables
#
# * tas - Surface (2-m) air temperature (CF: air_temperature)
#
# References
#
# Maloney, E. D, and Co-authors, 2019: Process-oriented evaluation of climate
# and wether forcasting models. BAMS, 100(9), 1665-1686,
# doi:10.1175/BAMS-D-18-0042.1.


# Import modules used in the POD
import os
import matplotlib

matplotlib.use("Agg") # non-X windows backend

import matplotlib.pyplot as plt
import numpy as np
import intake
import sys
import yaml

# Part 1: Read in the model data
# ------------------------------
# Debugging: remove following line in final PR
# os.environ["WORK_DIR"] = "/Users/jess/mdtf/wkdir/MDTF_output/example_multicase"
work_dir = os.environ["WORK_DIR"]
# Receive a dictionary of case information from the framework
print("reading case_info")
# Remove following line final PR
# os.environ["case_env_file"] = os.path.join(work_dir, "case_info.yml")
case_env_file = os.environ["case_env_file"]
assert os.path.isfile(case_env_file), f"case environment file not found"
with open(case_env_file, 'r') as stream:
try:
case_info = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)

cat_def_file = case_info['CATALOG_FILE']
case_list = case_info['CASE_LIST']
# all cases share variable names and dimension coords, so just get first result for each
tas_var = [case['tas_var'] for case in case_list.values()][0]
time_coord = [case['time_coord'] for case in case_list.values()][0]
lat_coord = [case['lat_coord'] for case in case_list.values()][0]
lon_coord = [case['lon_coord'] for case in case_list.values()][0]
# open the csv file using information provided by the catalog definition file
cat = intake.open_esm_datastore(cat_def_file)
# filter catalog by desired variable and output frequency
tas_subset = cat.search(variable_id=tas_var, frequency="day")
# examine assets for a specific file
#tas_subset['CMIP.synthetic.day.r1i1p1f1.day.gr.atmos.r1i1p1f1.1980-01-01-1984-12-31'].df
# convert tas_subset catalog to an xarray dataset dict
tas_dict = tas_subset.to_dataset_dict(
xarray_open_kwargs={"decode_times": True, "use_cftime": True}
)

# Part 2: Do some calculations (time and zonal means)
# ---------------------------------------------------

tas_arrays = {}

# Loop over cases
for k, v in tas_dict.items():
# load the tas data for case k
arr = tas_dict[k][tas_var]

# take the time mean
arr = arr.mean(dim=tas_dict[k][time_coord].name)

# this block shuffles the data to make this single case look more
# interesting. ** DELETE THIS ** once we test with real data

arr.load()
values = arr.to_masked_array().flatten()
np.random.shuffle(values)
values = values.reshape(arr.shape)
arr.values = values

# convert to anomalies
arr = arr - arr.mean()

# take the zonal mean
arr = arr.mean(dim=tas_dict[k][lon_coord].name)

tas_arrays[k] = arr


# Part 3: Make a plot that contains results from each case
# --------------------------------------------------------

# set up the figure
fig = plt.figure(figsize=(12, 4))
ax = plt.subplot(1, 1, 1)

# loop over cases
for k, v in tas_arrays.items():
v.plot(ax=ax, label=k)

# add legend
plt.legend()

# add title
plt.title("Zonal Mean Surface Air Temperature Anomaly")

# save the plot in the right location
assert os.path.isdir(f"{work_dir}/model/PS"), f'Assertion error: {work_dir}/model/PS not found'
plt.savefig(f"{work_dir}/model/PS/example_multicase_plot.eps", bbox_inches="tight")


# Part 4: Close the catalog files and
# release variable dict reference for garbage collection
# ------------------------------------------------------
cat.close()
tas_dict = None
# Part 5: Confirm POD executed successfully
# ----------------------------------------
print("Last log message by example_multicase POD: finished successfully!")
sys.exit(0)
120 changes: 120 additions & 0 deletions example_multicase.jsonc
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
// This a template for configuring MDTF to run PODs that analyze multi-run/ensemble data
//
// Copy this file, rename it, and customize the settings as needed
// Pass your file to the framework using the -f/--input-file flag.
// Any other explicit command line options will override what's listed here.
//
// All text to the right of an unquoted "//" is a comment and ignored, as well
// as blank lines (JSONC quasi-standard.)
//
// Remove your test config file, or any changes you make to this template if you do not rename it,
// from your remote repository before you submit a PR for review.
// To generate CMIP synthetic data in the example dataset, run the following:
// > mamba env create --force -q -f ./src/conda/_env_synthetic_data.yml
// > conda activate _MDTF_synthetic_data
// > pip install mdtf-test-data
// > cd <root directory>/mdtf
// > mkdir mdtf_test_data && cd mdtf_test_data
// > mdtf_synthetic.py -c CMIP --startyear 1980 --nyears 5
// > mdtf_synthetic.py -c CMIP --startyear 1985 --nyears 5
// Note that MODEL_DATA_ROOT assumes that mdtf_test_data is one directory above MDTF-diagnostics
// in this sample config file
{
// Run each ensemble on the example POD.
// Add other PODs that work on ensemble datasets to the pod_list as needed
"pod_list" : [
//"example"
"example_multicase"
],
// Each case corresponds to a different simulation/output dataset
// startdate, enddate: either YYYY-MM-DD, YYYYMMDD:HHMMSS, or YYYY-MM-DD:HHMMSS
"case_list":
{
"CMIP_Synthetic_r1i1p1f1_gr1_19800101-19841231":
{
"model": "test",
"convention": "CMIP",
"startdate": "19800101120000",
"enddate": "19841231000000"
}
,
"CMIP_Synthetic_r1i1p1f1_gr1_19850101-19891231":
{
"model": "test",
"convention": "CMIP",
"startdate": "19850101",
"enddate": "19891231"
}
},
// PATHS ---------------------------------------------------------------------
// Location of supporting data downloaded when the framework was installed.
// If a relative path is given, it's resolved relative to the MDTF-diagnostics
// code directory. Environment variables (eg, $HOME) can be referenced with a
// "$" and will be expended to their current values when the framework runs.
// Full or relative path to model data ESM-intake catalog header file

"DATA_CATALOG": "./diagnostics/example_multicase/esm_catalog_CMIP_synthetic_r1i1p1f1_gr1.json",

// Parent directory containing observational data used by individual PODs.
"OBS_DATA_ROOT": "../inputdata/obs_data",

// Working directory.
"WORK_DIR": "../wkdir",

// Directory to write output. The results of each run of the framework will be
// put in a subdirectory of this directory. Defaults to WORKING_DIR if blank.
"OUTPUT_DIR": "../wkdir",

// Location of the Anaconda/miniconda or micromamba installation to use for managing
// dependencies (path returned by running `conda info --base` or `micromamba info`.)
//"conda_root": "/glade/u/apps/opt/conda",
"conda_root": "/glade/u/home/netige/miniconda3",

// Directory containing the framework-specific conda environments. This should
// be equal to the "--env_dir" flag passed to conda_env_setup.sh. If left
// blank, the framework will look for its environments in conda_root/envs
//"conda_env_root": "/glade/u/home/netige/miniconda3/envs",
"conda_env_root": "/glade/work/netige/mdtf_Apr24_2/mdtf",


// Location of the micromamba executable. Required if using micromamba
"micromamba_exe": "",

// SETTINGS ------------------------------------------------------------------
// Any command-line option recognized by the mdtf script (type `mdtf --help`)
// can be set here, in the form "flag name": "desired setting".

// Settings affecting what output is generated:
// Set to true to run the preprocessor; default true:
"run_pp": true,
// Set to true to perform data translation; default false:
"translate_data": true,
// Set to true to have PODs save postscript figures in addition to bitmaps.
"save_ps": false,

// Set to true for files > 4 GB
"large_file": false,

// If true, leave pp data in OUTPUT_DIR after preprocessing; if false, delete pp data after PODs
// run to completion
"save_pp_data": true,

// Set to true to save HTML and bitmap plots in a .tar file.
"make_variab_tar": false,

// Generate html output for multiple figures per case
"make_multicase_figure_html": false,

// Set to true to overwrite results in OUTPUT_DIR; otherwise results saved
// under a unique name.
"overwrite": false,

// List with custom preprocessing script(s) to run on data
// Place these scripts in the user_scripts directory of your copy of the MDTF-diagnostics repository
"user_pp_scripts" : [],

// Settings used in debugging:

// Log verbosity level.
"verbose": 1
}
Loading

0 comments on commit a6cc18b

Please sign in to comment.