diff --git a/doc/sphinx/source/input.rst b/doc/sphinx/source/input.rst index f9bcfafc3e..4c0cc3e508 100644 --- a/doc/sphinx/source/input.rst +++ b/doc/sphinx/source/input.rst @@ -333,6 +333,8 @@ A list of the datasets for which a CMORizers is available is provided in the fol +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-OZONE | toz, tozStderr, tro3prof, tro3profStderr (Amon) | 2 | NCL | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ +| ESACCI-SEAICE | siconc (SIday, SImon) | 2 | Python | ++------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SEA-SURFACE-SALINITY | sos (Omon) | 2 | Python | +------------------------------+------------------------------------------------------------------------------------------------------+------+-----------------+ | ESACCI-SOILMOISTURE | sm (Eday, Lmon), smStderr (Eday) | 2 | Python | diff --git a/esmvaltool/cmorizers/data/cmor_config/ESACCI-SEAICE.yml b/esmvaltool/cmorizers/data/cmor_config/ESACCI-SEAICE.yml new file mode 100644 index 0000000000..29de49519c --- /dev/null +++ b/esmvaltool/cmorizers/data/cmor_config/ESACCI-SEAICE.yml @@ -0,0 +1,23 @@ +--- +# Common global attributes for Cmorizer output +attributes: + dataset_id: ESACCI-SEAICE + version: L4-SICONC-RE-SSMI-12.5kmEASE2-fv3.0 + tier: 2 + modeling_realm: sat + project_id: OBS6 + source: 'ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sea_ice_' + reference: 'esacci-seaice' + comment: '' + +# Variables to cmorize (here use only filename prefix) +variables: +# daily and monthly frequency + siconc: + short_name: siconc + mip1: SIday + mip2: SImon + raw: ice_conc + frequency1: day + frequency2: mon + file: ESACCI-SEAICE-L4-SICONC-RE_SSMI_12.5kmEASE2-{region}-{year}*-fv3.0.nc diff --git a/esmvaltool/cmorizers/data/datasets.yml b/esmvaltool/cmorizers/data/datasets.yml index 4c7c168009..966ac04891 100644 --- a/esmvaltool/cmorizers/data/datasets.yml +++ b/esmvaltool/cmorizers/data/datasets.yml @@ -529,6 +529,15 @@ datasets: limb_profiles/l3/merged/merged_monthly_zonal_mean/v0002 Put all files under a single directory (no subdirectories with years). + ESACCI-SEAICE: + tier: 2 + source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sea_ice/data/ + last_access: 2024-11-07 + info: | + Download the data from: + sea_ice_concentration/L4/ssmi_ssmis/12.5km/v3.0/*/ + Put all files under a single directory (no subdirectories with years / months). + ESACCI-SOILMOISTURE: tier: 2 source: ftp://anon-ftp.ceda.ac.uk/neodc/esacci/soil_moisture/data/ diff --git a/esmvaltool/cmorizers/data/downloaders/datasets/esacci_seaice.py b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_seaice.py new file mode 100644 index 0000000000..12b95eecdd --- /dev/null +++ b/esmvaltool/cmorizers/data/downloaders/datasets/esacci_seaice.py @@ -0,0 +1,52 @@ +"""Script to download ESACCI-SEAICE.""" +from datetime import datetime + +from dateutil import relativedelta + +from esmvaltool.cmorizers.data.downloaders.ftp import CCIDownloader + + +def download_dataset(config, dataset, dataset_info, start_date, end_date, + overwrite): + """Download dataset. + + Parameters + ---------- + config : dict + ESMValTool's user configuration + dataset : str + Name of the dataset + dataset_info : dict + Dataset information from the datasets.yml file + start_date : datetime + Start of the interval to download + end_date : datetime + End of the interval to download + overwrite : bool + Overwrite already downloaded files + """ + if start_date is None: + start_date = datetime(1991, 1, 1) + if end_date is None: + end_date = datetime(2020, 12, 31) + + downloader = CCIDownloader( + config=config, + dataset=dataset, + dataset_info=dataset_info, + overwrite=overwrite, + ) + downloader.ftp_name = 'sea_ice' + downloader.connect() + + regions = ('NH', 'SH') + basepath = 'sea_ice_concentration/L4/ssmi_ssmis/12.5km/v3.0' + + loop_date = start_date + while loop_date <= end_date: + for region in regions: + path = (f'{basepath}/{region}/{loop_date.year}/' + f'{loop_date.month:02d}') + downloader.set_cwd(path) + downloader.download_folder('.', sub_folder=region) + loop_date += relativedelta.relativedelta(months=1) diff --git a/esmvaltool/cmorizers/data/formatters/datasets/esacci_seaice.py b/esmvaltool/cmorizers/data/formatters/datasets/esacci_seaice.py new file mode 100644 index 0000000000..2d688b77cd --- /dev/null +++ b/esmvaltool/cmorizers/data/formatters/datasets/esacci_seaice.py @@ -0,0 +1,319 @@ +"""ESMValTool CMORizer for ESACCI-SEAICE data. + +Tier + Tier 2: other freely-available dataset. + +Source + ftp://anon-ftp.ceda.ac.uk/neodc/esacci/sea_ice/data + +Last access + 20241107 + +Download and processing instructions + Download the data from: + sea_ice_concentration/L4/ssmi_ssmis/12.5km/v3.0/{year}/{month} + Put all files under a single directory (no subdirectories + with years or months). +""" +import glob +import logging +import os +from copy import deepcopy +from datetime import datetime +from dateutil import relativedelta + +import cf_units +import iris +import numpy as np +from dask import array as da +from esmvalcore.cmor._fixes.common import OceanFixGrid +from esmvalcore.cmor.table import CMOR_TABLES +from esmvalcore.preprocessor import monthly_statistics +from iris.coords import AuxCoord +from esmvaltool.cmorizers.data import utilities as utils + +from ...utilities import save_variable + +logger = logging.getLogger(__name__) + + +def _create_nan_cube(cube, year, month, day, is_daily): + """Create cube containing only nan from existing cube.""" + nan_cube = cube.copy() + nan_cube.data = da.ma.masked_greater(cube.core_data(), -1e20) + + # Read dataset time unit and calendar from file + dataset_time_unit = str(nan_cube.coord('time').units) + dataset_time_calender = nan_cube.coord('time').units.calendar + # Convert datetime + if is_daily: + hrs = 12 + else: + hrs = 0 + newtime = datetime(year=year, month=month, day=day, + hour=hrs, minute=0, second=0, microsecond=0) + newtime_num = cf_units.date2num(newtime, dataset_time_unit, + dataset_time_calender) + nan_cube.coord('time').points = float(newtime_num) + + # remove existing time bounds and create new bounds + coord = nan_cube.coord('time') + if is_daily: + bnd1 = newtime + relativedelta.relativedelta(hours=-12) + bnd2 = bnd1 + relativedelta.relativedelta(days=1) + else: + bnd1 = newtime + relativedelta.relativedelta(days=-day + 1) + bnd2 = bnd1 + relativedelta.relativedelta(months=1) + coord.bounds = [cf_units.date2num(bnd1, dataset_time_unit, + dataset_time_calender), + cf_units.date2num(bnd2, dataset_time_unit, + dataset_time_calender)] + + return nan_cube + + +def _create_areacello(cfg, cube, glob_attrs, out_dir): + var_info = cfg['cmor_table'].get_variable('Ofx', 'areacello') + glob_attrs['mip'] = 'Ofx' + lat_coord = cube.coord('latitude') + + arcube = iris.cube.Cube(np.zeros(lat_coord.shape, np.float32), + standard_name=var_info.standard_name, + long_name=var_info.long_name, + var_name=var_info.short_name, + units='m2', + # time is index 0, add cell index dim + dim_coords_and_dims=[(cube.coords()[1], 0), + (cube.coords()[2], 1)]) + + # each grid cell is 12.5 km x 12.5 km + arcube.data = arcube.core_data() + 12500 * 12500 + + arcube.add_aux_coord(lat_coord, (0, 1)) + arcube.add_aux_coord(cube.coord('longitude'), (0, 1)) + utils.fix_var_metadata(arcube, var_info) + utils.set_global_atts(arcube, glob_attrs) + utils.save_variable(arcube, var_info.short_name, out_dir, glob_attrs, + zlib=True) + + +def _fix_coordinates(cube, definition): + """Fix coordinates.""" + axis2def = {'T': 'time', 'X': 'longitude', 'Y': 'latitude'} + axes = ['T', 'X', 'Y'] + + for axis in axes: + coord_def = definition.coordinates.get(axis2def[axis]) + if coord_def: + coord = cube.coord(axis=axis) + if axis == 'T': + coord.convert_units('days since 1850-1-1 00:00:00.0') + coord.points = coord.core_points().astype('float64') + if len(coord.points) > 1: + if coord.bounds is not None: + coord.bounds = None + coord.guess_bounds() + coord.standard_name = coord_def.standard_name + coord.var_name = coord_def.out_name + coord.long_name = coord_def.long_name + + return cube + + +def _extract_variable(in_files, var, cfg, out_dir, is_daily, year0, region): + logger.info("CMORizing variable '%s' from input files '%s'", + var['short_name'], ', '.join(in_files)) + attributes = deepcopy(cfg['attributes']) + attributes['mip'] = var['mip1'] + attributes['raw'] = var['raw'] + cmor_table = CMOR_TABLES[attributes['project_id']] + definition = cmor_table.get_variable(var['mip1'], var['short_name']) + + # load all input files (1 year) into 1 cube + # --> drop attributes that differ among input files + cube_list = iris.load(in_files, var['raw']) + + # remove ancillary variables + for cube in cube_list: + for ancillary_variable in cube.ancillary_variables(): + cube.remove_ancillary_variable(ancillary_variable.standard_name) + + # (global) attributes to remove + drop_attrs = ['tracking_id', 'id', 'time_coverage_start', + 'time_coverage_end', 'date_created', + 'inputfilelist', 'history', 'valid_min', 'valid_max'] + + new_list = iris.cube.CubeList() + + for cube in cube_list: + for attr in drop_attrs: + if attr in cube.attributes.keys(): + cube.attributes.pop(attr) + + new_list.append(cube) + + # make sure there is one cube for every day (daily data) or + # every month (monthly data) of the year + # (print debug info about missing days/months and add cube with + # nan to fill gaps + + full_list = iris.cube.CubeList() + time_list = [] + + for cube in new_list: + loncoord = cube.coord('longitude') + latcoord = cube.coord('latitude') + loncoord.points = np.round(loncoord.core_points(), 3) + latcoord.points = np.round(latcoord.core_points(), 3) + + # create list of available days/months ('time_list') + + for cube in new_list: + timecoord = cube.coord('time') + cubetime = timecoord.units.num2date(timecoord.points) + ctnew = cubetime[0].replace(hour=0, minute=0, second=0, microsecond=0) + time_list.append(ctnew) + + # create cube list for every day/month of the year by adding + # cubes containing only nan to fill possible gaps + + if is_daily: + loop_date = datetime(year0, 1, 1) + while loop_date <= datetime(year0, 12, 31): + date_available = False + for idx, cubetime in enumerate(time_list): + if loop_date == cubetime: + date_available = True + break + if date_available: + full_list.append(new_list[idx]) + else: + logger.debug("No data available for %d/%d/%d", loop_date.month, + loop_date.day, loop_date.year) + nan_cube = _create_nan_cube(new_list[0], loop_date.year, + loop_date.month, loop_date.day, + is_daily) + full_list.append(nan_cube) + loop_date += relativedelta.relativedelta(days=1) + else: + loop_date = datetime(year0, 1, 15) + while loop_date <= datetime(year0, 12, 31): + date_available = False + for idx, cubetime in enumerate(time_list): + if loop_date == cubetime: + date_available = True + break + if date_available: + full_list.append(new_list[idx]) + else: + logger.debug("No data available for %d/%d", loop_date.month, + loop_date.year) + nan_cube = _create_nan_cube(new_list[0], loop_date.year, + loop_date.month, loop_date.day, + is_daily) + full_list.append(nan_cube) + loop_date += relativedelta.relativedelta(months=1) + + iris.util.unify_time_units(full_list) + cube = full_list.concatenate_cube() + cube.coord('time').points = cube.coord('time').core_points().astype( + 'float64') + + # Set correct names + cube.var_name = definition.short_name + cube.standard_name = definition.standard_name + cube.long_name = definition.long_name + + # Fix units + cube.units = definition.units + + # Fix ocean-type grid (2-dim lat + lon) + fixcube = OceanFixGrid(definition) + cube = fixcube.fix_metadata(cubes=[cube])[0] + + # Fix coordinates + cube = _fix_coordinates(cube, definition) + cube.coord('latitude').attributes = None + cube.coord('longitude').attributes = None + + # add aux coord 'typesi' + area_type = AuxCoord([1.0], standard_name='area_type', var_name='type', + long_name='Sea Ice area type') + cube.add_aux_coord(area_type) + + # add attribute cell_measures +# siconc:cell_measures = "area: areacello" +# cube.attributes.update({"cell_meaures": "area: areacello"}) + cube.attributes.locals['cell_measures'] = 'area: areacello' + + # Fix data type + cube.data = cube.core_data().astype('float32') + + # save daily results + logger.debug("Saving cube\n%s", cube) + logger.debug("Setting time dimension to UNLIMITED while saving!") + version = attributes['version'] + attributes['version'] = f'{version}-{region}' + save_variable(cube, cube.var_name, + out_dir, attributes, + unlimited_dimensions=['time']) + + # calculate monthly means + cube = monthly_statistics(cube, operator='mean') + # Remove monthly statistics aux coordinates + cube.remove_coord(cube.coord('month_number')) + cube.remove_coord(cube.coord('year')) + # save monthly results + logger.debug("Saving cube\n%s", cube) + logger.debug("Setting time dimension to UNLIMITED while saving!") + version = attributes['version'] + attributes['mip'] = var['mip2'] + definition = cmor_table.get_variable(var['mip2'], var['short_name']) + save_variable(cube, cube.var_name, + out_dir, attributes, + unlimited_dimensions=['time']) + + # create and save areacello + # (code adadapted from formatter 'nsidc_g02202_sh.py') + _create_areacello(cfg, cube, attributes, out_dir) + + logger.info("Finished CMORizing %s", ', '.join(in_files)) + + +def cmorization(in_dir, out_dir, cfg, cfg_user, start_date, end_date): + """Cmorize ESACCI-AEROSOL dataset.""" + glob_attrs = cfg['attributes'] + + logger.info("Starting cmorization for tier%s OBS files: %s", + glob_attrs['tier'], glob_attrs['dataset_id']) + logger.info("Input data from: %s", in_dir) + logger.info("Output will be written to: %s", out_dir) + logger.info('CMORizing ESACCI-SEAICE version %s', glob_attrs['version']) + + if start_date is None: + start_date = datetime(1991, 1, 1) + if end_date is None: + end_date = datetime(2020, 12, 31) + + regions = ('NH', 'SH') + + for region in regions: + for short_name, var in cfg['variables'].items(): + if 'short_name' not in var: + var['short_name'] = short_name + loop_date = start_date + daily = True + while loop_date <= end_date: + filepattern = os.path.join( + in_dir, region, + var['file'].format(year=loop_date.year, region=region) + ) + in_files = glob.glob(filepattern) + if not in_files: + logger.info('%d: no data not found for ' + 'variable %s', loop_date.year, short_name) + else: + _extract_variable(in_files, var, cfg, out_dir, daily, + loop_date.year, region) + + loop_date += relativedelta.relativedelta(years=1) diff --git a/esmvaltool/recipes/examples/recipe_check_obs.yml b/esmvaltool/recipes/examples/recipe_check_obs.yml index 880aef831a..b5cab184e8 100644 --- a/esmvaltool/recipes/examples/recipe_check_obs.yml +++ b/esmvaltool/recipes/examples/recipe_check_obs.yml @@ -292,6 +292,26 @@ diagnostics: type: sat, version: L3, start_year: 2007, end_year: 2008} scripts: null + ESACCI-SEAICE: + description: ESACCI-SEAICE check + variables: + areacello: + mip: Ofx + siconc_daily: + short_name: siconc + mip: SIday + frequency: day + siconc_monthly: + short_name: siconc + mip: SImon + frequency: mon + additional_datasets: + - {dataset: ESACCI-SEAICE, project: OBS6, tier: 2, + type: sat, version: L4-SICONC-RE-SSMI-12.5kmEASE2-fv3.0-SH, start_year: 1991, end_year: 2020} + - {dataset: ESACCI-SEAICE, project: OBS6, tier: 2, + type: sat, version: L4-SICONC-RE-SSMI-12.5kmEASE2-fv3.0-NH, start_year: 1991, end_year: 2020} + scripts: null + ESACCI-SEA-SURFACE-SALINITY: description: ESACCI-SEA-SURFACE-SALINITY check variables: diff --git a/esmvaltool/references/esacci-seaice.bibtex b/esmvaltool/references/esacci-seaice.bibtex new file mode 100644 index 0000000000..6676de506c --- /dev/null +++ b/esmvaltool/references/esacci-seaice.bibtex @@ -0,0 +1,8 @@ +@article{esacci-seaice, + doi = {10.5285/eade27004395466aaa006135e1b2ad1a}, + url = {https://doi.org/10.5285/eade27004395466aaa006135e1b2ad1a}, + year = 2016, + publisher = {Centre for Environmental Data Analysis}, + author = {ESA Sea Ice CCI project team and Sandven, S.}, + title = {ESA Sea Ice Climate Change Initiative (Sea Ice CCI) Dataset Collection.}, +}