diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index 374af182ef..1a06a6ddae 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -377,6 +377,8 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | HWSD | cSoil (Lmon), areacella (fx), sftlf (fx) | 3 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| IAP | thetao, tos (Omon) | 2 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ISCCP-FH | alb, prw, ps, rlds, rlus, rlut, rlutcs, rsds, rsdt, rsus, rsut, rsutcs, tas, ts (Amon) | 2 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | JMA-TRANSCOM | nbp (Lmon), fgco2 (Omon) | 3 | Python | diff --git a/esmvaltool/cmorizers/data/cmor_config/IAP.yml b/esmvaltool/cmorizers/data/cmor_config/IAP.yml new file mode 100644 index 0000000000..6d3c79c9ef --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/IAP.yml @@ -0,0 +1,23 @@ +--- +# Common global attributes for Cmorizer output +attributes: + dataset_id: IAP + version: 'v4.2' + tier: 2 + modeling_realm: reanaly + project_id: OBS6 + source: 'http://www.ocean.iap.ac.cn/' + reference: 'iap' + comment: '' + +# Variables to cmorize (here use only filename prefix) +variables: + thetao: + mip: Omon + name: temperature + raw_var: temp + srf_var: tos + +custom: + create_areacello: false + reference_year: 2000 diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index abd5a80fde..5ef52acf11 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -778,6 +778,22 @@ datasets: HWSD_SOIL_CLM_RES.nc4 A registration is required + IAP: + tier: 2 + source: http://www.ocean.iap.ac.cn/pages/dataService/dataService.html + last_access: 2025-01-20 + info: | + Download the following files: + Temperature_IAPv4.2_gridded_data_1940_1949.zip + Temperature_IAPv4.2_gridded_data_1950_1959.zip + Temperature_IAPv4.2_gridded_data_1960_1969.zip + Temperature_IAPv4.2_gridded_data_1970_1979.zip + Temperature_IAPv4.2_gridded_data_1980_1989.zip + Temperature_IAPv4.2_gridded_data_1990_1999.zip + Temperature_IAPv4.2_gridded_data_2000_2009.zip + Temperature_IAPv4.2_gridded_data_2010_2019.zip + Temperature_IAPv4.2_gridded_data_2020_2023.zip + ISCCP-FH: tier: 2 source: https://isccp.giss.nasa.gov/pub/flux-fh/tar-nc4_MPF/ diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/iap.py b/esmvaltool/cmorizers/data/downloaders/datasets/iap.py new file mode 100644 index 0000000000..d23cf30ca2 --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/iap.py @@ -0,0 +1,55 @@ +# pylint: disable=too-many-arguments +# pylint: disable=R0917 +# pylint: disable=too-many-locals +"""Script to download IAP datasets.""" +import logging +from datetime import datetime +from dateutil import relativedelta + +from esmvaltool.cmorizers.data.downloaders.wget import WGetDownloader + +logger = logging.getLogger(__name__) + + +def download_dataset(config, dataset, dataset_info, start_date, end_date, + overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + if start_date is None: + start_date = datetime(year=1940, month=1, day=1) + if end_date is None: + end_date = datetime(year=2024, month=12, day=31) + + loop_date = start_date + + downloader = WGetDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + + while loop_date <= end_date: + print(loop_date) + downloader.download_file( + "http://www.ocean.iap.ac.cn/ftp/cheng/" + "IAPv4.2_IAP_Temperature_gridded_1month_netcdf/Monthly/" + f"IAPv4_Temp_monthly_1_6000m_year_{loop_date.year}" + f"_month_{loop_date.month:02d}.nc", + wget_options=[]) + loop_date += relativedelta.relativedelta(months=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/iap.py b/esmvaltool/cmorizers/data/formatters/datasets/iap.py new file mode 100644 index 0000000000..a47c46a092 --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/iap.py @@ -0,0 +1,193 @@ +# pylint: disable=unused-argument +# pylint: disable=too-many-arguments +# pylint: disable=too-many-function-args +# pylint: disable=R0917 +# pylint: disable=E1121 +# flake8: noqa +"""ESMValTool CMORizer for IAP data. + +Tier + Tier 2: other freely-available dataset. + +Source + IAPv4.2: "http://www.ocean.iap.ac.cn/ftp/cheng/" + "IAPv4.2_IAP_Temperature_gridded_1month_netcdf/Monthly/" + +Last access: 20250220 + +Download and processing instructions + All handled by the script (download only if local data are missing) + + Alternatively, download and unzip the following files: + Temperature_IAPv4.2_gridded_data_1940_1949.zip + Temperature_IAPv4.2_gridded_data_1950_1959.zip + Temperature_IAPv4.2_gridded_data_1960_1969.zip + Temperature_IAPv4.2_gridded_data_1970_1979.zip + Temperature_IAPv4.2_gridded_data_1980_1989.zip + Temperature_IAPv4.2_gridded_data_1990_1999.zip + Temperature_IAPv4.2_gridded_data_2000_2009.zip + Temperature_IAPv4.2_gridded_data_2010_2019.zip + Temperature_IAPv4.2_gridded_data_2020_2023.zip +""" + +import logging +import os +import warnings +from warnings import catch_warnings +from datetime import datetime +from dateutil import relativedelta + +import iris +import cf_units +import numpy as np + +from esmvaltool.cmorizers.data.utilities import ( + fix_coords, + fix_var_metadata, + save_variable, + set_global_atts, +) + +logger = logging.getLogger(__name__) + +try: + iris.FUTURE.date_microseconds = True + iris.FUTURE.save_split_attrs = True +except AttributeError as e: + # Handle cases where FUTURE or the attributes don't exist + logger.warning("AttributeError: %s", e) +except (TypeError, ValueError) as e: + # Handle specific errors if these might occur + logger.warning("TypeError or ValueError: %s", e) +except BaseException as e: + # Fallback for rare or unknown issues, but avoid catching Exception + logger.warning("An unexpected error occurred: %s", e) + +def collect_files(in_dir, cfg, start_date, end_date): + """ Create list of files path to be processed.""" + file_list = [] + + if start_date is None: + start_date = datetime(year=1940, month=1, day=1) + if end_date is None: + end_date = datetime(year=2024, month=12, day=31) + + loop_date = start_date + + while loop_date <= end_date: + fname = ( + f"IAPv4_Temp_monthly_1_6000m_year_{loop_date.year}" + f"_month_{loop_date.month:02d}.nc" + ) + in_file = os.path.join(in_dir, fname) + file_list.append(in_file) + loop_date += relativedelta.relativedelta(months=1) + + return file_list + + +def process_data(cube, reference_year): + """ Process raw data. Convert to Kelvin and add time dimension. + Concatenate the cubes and return the new cube. + """ + # Convert temperature from Celsius to Kelvin and add time dimension + temperature_data = np.expand_dims(cube.data, axis=0) + temperature_data = np.moveaxis( + temperature_data, (0, 1, 2, 3), (0, 2, 3, 1) + ) # Reorder axes + + # Create time coordinate + start_date = datetime( + int(cube.attributes["StartYear"]), + int(cube.attributes["StartMonth"]), + int(cube.attributes["StartDay"]), + ) + reference_date = datetime(2000, 1, 1) + time_points = [(start_date - reference_date).days] + + time_coord = iris.coords.DimCoord( + time_points, + standard_name="time", + units=( + f"days since {reference_date.year}-" + f"{reference_date.month}-{reference_date.day}" + ), + ) + + # Remove old date attributes + for key in ["StartDay", "StartMonth", "StartYear", + "EndDay", "EndMonth", "EndYear"]: + del cube.attributes[key] + + # Get existing coordinates and rename 'standard depth' to 'depth' + latitude_coord = cube.coord("latitude") + longitude_coord = cube.coord("longitude") + depth_coord = cube.coord("standard depth") + depth_coord.rename("depth") + + # Create and return the new cube + return iris.cube.Cube( + temperature_data, + var_name="Temperature", + dim_coords_and_dims=[ + (time_coord, 0), + (depth_coord, 1), + (latitude_coord, 2), + (longitude_coord, 3), + ], + attributes=cube.attributes, + ) + + +def extract_variable(in_files, out_dir, attrs, raw_info, cmor_table): + """Extract variables and create OBS dataset.""" + var = raw_info["var"] + var_info = cmor_table.get_variable(raw_info["mip"], var) + rawvar = raw_info["raw_var"] + with catch_warnings(): + warnings.simplefilter("ignore") # Ignore all warnings + cubes = iris.load(in_files, rawvar) + reference_year = raw_info["reference_year"] + cubes = iris.cube.CubeList( + [process_data(cube, reference_year) for cube in cubes] + ) + + iris.util.equalise_attributes(cubes) + cube = cubes.concatenate_cube() + fix_var_metadata(cube, var_info) + fix_coords(cube) + set_global_atts(cube, attrs) + save_variable(cube, var, out_dir, attrs, unlimited_dimensions=["time"]) + + # derive ocean surface + if "srf_var" in raw_info: + var_info = cmor_table.get_variable( + raw_info["mip"], raw_info["srf_var"]) + logger.info("Extract surface OBS for %s", raw_info["srf_var"]) + level_constraint = iris.Constraint(cube.var_name, depth=1) + cube_os = cube.extract(level_constraint) + fix_var_metadata(cube_os, var_info) + save_variable( + cube_os, raw_info["srf_var"], out_dir, attrs, + unlimited_dimensions=["time"] + ) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorization func call.""" + cmor_table = cfg["cmor_table"] + glob_attrs = cfg["attributes"] + + # run the cmorization + for var, vals in cfg["variables"].items(): + in_files = collect_files(in_dir, cfg, start_date, end_date) + logger.info("CMORizing var %s from input set %s", var, vals["name"]) + raw_info = cfg["variables"][var] + raw_info.update( + { + "var": var, + "reference_year": cfg["custom"]["reference_year"], + } + ) + glob_attrs["mip"] = vals["mip"] + extract_variable(in_files, out_dir, glob_attrs, raw_info, cmor_table) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index d69583fb01..2e3c7b1728 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -534,6 +534,16 @@ diagnostics: type: sat, version: 1, start_year: 1991, end_year: 2002} scripts: null + IAP: + description: IAP check + variables: + thetao: + tos: + additional_datasets: + - {dataset: IAP, project: OBS6, mip: Omon, tier: 2, + type: reanaly, version: "v4.2", start_year: 1990, end_year: 1991} + scripts: null + ISCCP-FH: description: ISCCP-FH check variables: diff --git a/esmvaltool/references/iap.bibtex b/esmvaltool/references/iap.bibtex new file mode 100644 index 0000000000..a8a0b6be29 --- /dev/null +++ b/esmvaltool/references/iap.bibtex @@ -0,0 +1,11 @@ +@article{cheng2024, + doi = {10.5194/essd-16-3517-2024}, + author = {Cheng, L. and Pan, Y. and Tan, Z. and Zheng, H. and Zhu, Y. and Wei, W. and Du, J. and Yuan, H. and Li, G. and Ye, H. and Gouretski, V. and Li, Y. and Trenberth, K. E. and Abraham, J. and Jin, Y. and Reseghetti, F. and Lin, X. and Zhang, B. and Chen, G. and Mann, M. E. and Zhu, J.}, + title = {IAPv4 ocean temperature and ocean heat content gridded dataset}, + journal = {Earth System Science Data}, + volume = {16}, + year = {2024}, + number = {8}, + pages = {3517--3546}, + url = {https://essd.copernicus.org/articles/16/3517/2024/} +}