From 39d1e5d857f2065902bbb9f502590fb6e22bc23a Mon Sep 17 00:00:00 2001 From: gsketefian <31046882+gsketefian@users.noreply.github.com> Date: Mon, 18 Nov 2024 08:55:29 -0700 Subject: [PATCH] [develop] Make `get_obs` tasks day-dependent in workflow; other improvements and bug fixes (#1137) This PR fixes multiple bugs in the verification (vx) and other parts of the SRW App, the main one being that the get_obs tasks as well as some of the vx pre-processing tasks currently do not work for an experiment with multiple cycles if those cycles overlap in time: * Changes related to get_obs tasks * Changes related to vx pre-processing tasks (PcpCombine_obs and Pb2nc_obs) * Small, self-contained bug fixes and improvements * New WE2E tests added --------- Co-authored-by: Michael Kavulich Co-authored-by: Gillian Petro <96886803+gspetro-NOAA@users.noreply.github.com> --- doc/Makefile | 2 +- .../ush/eval_metplus_timestr_tmpl.rst | 7 + doc/TechDocs/ush/get_obs.rst | 7 + doc/TechDocs/ush/modules.rst | 5 +- .../ush/set_cycle_and_obs_timeinfo.rst | 7 + doc/TechDocs/ush/set_cycle_dates.rst | 7 - doc/TechDocs/ush/set_leadhrs.rst | 7 + .../BuildingRunningTesting/RunSRW.rst | 631 +++++++++--- .../CustomizingTheWorkflow/ConfigWorkflow.rst | 432 ++++++-- doc/UsersGuide/Reference/Glossary.rst | 15 +- doc/conf.py | 10 +- jobs/JREGIONAL_GET_VERIF_OBS | 1 - modulefiles/tasks/derecho/get_obs.local.lua | 1 + modulefiles/tasks/gaea/get_obs.local.lua | 1 + modulefiles/tasks/hera/get_obs.local.lua | 3 +- modulefiles/tasks/hercules/get_obs.local.lua | 1 + modulefiles/tasks/jet/get_obs.local.lua | 3 +- modulefiles/tasks/noaacloud/get_obs.local.lua | 1 + modulefiles/tasks/orion/get_obs.local.lua | 1 + parm/data_locations.yml | 11 +- parm/metplus/EnsembleStat.conf | 43 +- parm/metplus/GenEnsProd.conf | 6 +- parm/metplus/GridStat_ensmean.conf | 19 +- parm/metplus/GridStat_ensprob.conf | 11 +- parm/metplus/GridStat_or_PointStat.conf | 21 +- parm/metplus/Pb2nc_obs.conf | 2 +- parm/metplus/PcpCombine.conf | 37 +- parm/metplus/PointStat_ensmean.conf | 10 +- parm/metplus/PointStat_ensprob.conf | 8 +- .../vx_config_det.obs_gdas.model_aiml.yaml | 54 + .../vx_config_det.obs_gdas.model_gfs.yaml | 54 + .../{ => vx_configs}/vx_config_det.yaml | 5 +- .../{ => vx_configs}/vx_config_ens.yaml | 5 +- parm/wflow/default_workflow.yaml | 16 +- parm/wflow/verify_det.yaml | 92 +- parm/wflow/verify_ens.yaml | 191 ++-- parm/wflow/verify_pre.yaml | 136 ++- scripts/exregional_check_post_output.sh | 46 +- scripts/exregional_get_verif_obs.sh | 547 +--------- ...onal_run_met_genensprod_or_ensemblestat.sh | 107 +- ...gional_run_met_gridstat_or_pointstat_vx.sh | 93 +- ...un_met_gridstat_or_pointstat_vx_ensmean.sh | 81 +- ...un_met_gridstat_or_pointstat_vx_ensprob.sh | 80 +- scripts/exregional_run_met_pb2nc_obs.sh | 129 ++- scripts/exregional_run_met_pcpcombine.sh | 120 ++- tests/WE2E/machine_suites/comprehensive | 12 + .../WE2E/machine_suites/coverage.hera.gnu.com | 6 + .../machine_suites/coverage.hera.intel.nco | 6 + tests/WE2E/run_WE2E_tests.py | 7 +- ...g.custom_ESGgrid_Great_Lakes_snow_8km.yaml | 5 +- ...fig.MET_ensemble_verification_only_vx.yaml | 6 +- ...nsemble_verification_only_vx_time_lag.yaml | 14 +- ...g.MET_ensemble_verification_winter_wx.yaml | 6 +- ...cst_custom-vx-config_aiml-fourcastnet.yaml | 63 ++ ...-fcst_custom-vx-config_aiml-graphcast.yaml | 63 ++ ...st_custom-vx-config_aiml-panguweather.yaml | 63 ++ ...vx-det_long-fcst_custom-vx-config_gfs.yaml | 66 ++ ...vx-det_long-fcst_winter-wx_SRW-staged.yaml | 62 ++ ...x-det_multicyc_fcst-overlap_ncep-hrrr.yaml | 61 ++ ...-det_multicyc_first-obs-00z_ncep-hrrr.yaml | 61 ++ ...x-det_multicyc_last-obs-00z_ncep-hrrr.yaml | 61 ++ ...lticyc_long-fcst-no-overlap_nssl-mpas.yaml | 63 ++ ..._multicyc_long-fcst-overlap_nssl-mpas.yaml | 62 ++ ....vx-det_multicyc_no-00z-obs_nssl-mpas.yaml | 63 ++ ...et_multicyc_no-fcst-overlap_ncep-hrrr.yaml | 63 ++ tests/test_python/test_set_cycle_dates.py | 39 +- ush/bash_utils/eval_METplus_timestr_tmpl.sh | 281 ----- ush/config.community.yaml | 6 - ush/config_defaults.yaml | 473 ++++++--- ush/eval_metplus_timestr_tmpl.py | 63 ++ ush/get_crontab_contents.py | 2 +- ush/get_obs.py | 961 ++++++++++++++++++ ush/launch_FV3LAM_wflow.sh | 4 +- ush/machine/hera.yaml | 4 +- ush/mrms_pull_topofhour.py | 77 +- ush/retrieve_data.py | 1 + ush/set_cycle_and_obs_timeinfo.py | 847 +++++++++++++++ ush/set_cycle_dates.py | 33 - ush/set_leadhrs.py | 94 ++ ush/set_vx_fhr_list.sh | 295 ------ ush/set_vx_params.sh | 94 +- ush/setup.py | 391 +++++-- ush/source_util_funcs.sh | 10 - ush/valid_param_vals.yaml | 4 +- 84 files changed, 5326 insertions(+), 2232 deletions(-) create mode 100644 doc/TechDocs/ush/eval_metplus_timestr_tmpl.rst create mode 100644 doc/TechDocs/ush/get_obs.rst create mode 100644 doc/TechDocs/ush/set_cycle_and_obs_timeinfo.rst delete mode 100644 doc/TechDocs/ush/set_cycle_dates.rst create mode 100644 doc/TechDocs/ush/set_leadhrs.rst create mode 100644 modulefiles/tasks/derecho/get_obs.local.lua create mode 100644 modulefiles/tasks/gaea/get_obs.local.lua create mode 100644 modulefiles/tasks/hercules/get_obs.local.lua create mode 100644 modulefiles/tasks/noaacloud/get_obs.local.lua create mode 100644 modulefiles/tasks/orion/get_obs.local.lua create mode 100644 parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml create mode 100644 parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml rename parm/metplus/{ => vx_configs}/vx_config_det.yaml (98%) rename parm/metplus/{ => vx_configs}/vx_config_ens.yaml (95%) create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml create mode 100644 tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml delete mode 100644 ush/bash_utils/eval_METplus_timestr_tmpl.sh create mode 100644 ush/eval_metplus_timestr_tmpl.py create mode 100644 ush/get_obs.py create mode 100644 ush/set_cycle_and_obs_timeinfo.py delete mode 100644 ush/set_cycle_dates.py create mode 100644 ush/set_leadhrs.py delete mode 100644 ush/set_vx_fhr_list.sh diff --git a/doc/Makefile b/doc/Makefile index 9663ba3996..a4fac61e1a 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -24,4 +24,4 @@ linkcheck: # "make mode" option. $(O) is meant as a shortcut for $(SPHINXOPTS). %: Makefile - @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) -w $(BUILDDIR)/warnings.log \ No newline at end of file + @$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) -w $(BUILDDIR)/warnings.log diff --git a/doc/TechDocs/ush/eval_metplus_timestr_tmpl.rst b/doc/TechDocs/ush/eval_metplus_timestr_tmpl.rst new file mode 100644 index 0000000000..7ccc52bb64 --- /dev/null +++ b/doc/TechDocs/ush/eval_metplus_timestr_tmpl.rst @@ -0,0 +1,7 @@ +eval\_metplus\_timestr\_tmpl module +=================================== + +.. automodule:: eval_metplus_timestr_tmpl + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/get_obs.rst b/doc/TechDocs/ush/get_obs.rst new file mode 100644 index 0000000000..6b4e2ac936 --- /dev/null +++ b/doc/TechDocs/ush/get_obs.rst @@ -0,0 +1,7 @@ +get\_obs module +=============== + +.. automodule:: get_obs + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/modules.rst b/doc/TechDocs/ush/modules.rst index 2070e75ad0..6ac0346624 100644 --- a/doc/TechDocs/ush/modules.rst +++ b/doc/TechDocs/ush/modules.rst @@ -12,18 +12,21 @@ ush create_diag_table_file create_model_configure_file create_ufs_configure_file + eval_metplus_timestr_tmpl generate_FV3LAM_wflow get_crontab_contents + get_obs link_fix mrms_pull_topofhour python_utils retrieve_data run_srw_tests - set_cycle_dates + set_cycle_and_obs_timeinfo set_fv3nml_ens_stoch_seeds set_fv3nml_sfc_climo_filenames set_gridparams_ESGgrid set_gridparams_GFDLgrid + set_leadhrs set_predef_grid_params setup update_input_nml diff --git a/doc/TechDocs/ush/set_cycle_and_obs_timeinfo.rst b/doc/TechDocs/ush/set_cycle_and_obs_timeinfo.rst new file mode 100644 index 0000000000..13ec7b9b73 --- /dev/null +++ b/doc/TechDocs/ush/set_cycle_and_obs_timeinfo.rst @@ -0,0 +1,7 @@ +set\_cycle\_and\_obs\_timeinfo module +===================================== + +.. automodule:: set_cycle_and_obs_timeinfo + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/TechDocs/ush/set_cycle_dates.rst b/doc/TechDocs/ush/set_cycle_dates.rst deleted file mode 100644 index 1af14392fd..0000000000 --- a/doc/TechDocs/ush/set_cycle_dates.rst +++ /dev/null @@ -1,7 +0,0 @@ -set\_cycle\_dates module -======================== - -.. automodule:: set_cycle_dates - :members: - :undoc-members: - :show-inheritance: diff --git a/doc/TechDocs/ush/set_leadhrs.rst b/doc/TechDocs/ush/set_leadhrs.rst new file mode 100644 index 0000000000..b0172264d3 --- /dev/null +++ b/doc/TechDocs/ush/set_leadhrs.rst @@ -0,0 +1,7 @@ +set\_leadhrs module +=================== + +.. automodule:: set_leadhrs + :members: + :undoc-members: + :show-inheritance: diff --git a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst index bea4ab59aa..b66b399652 100644 --- a/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst +++ b/doc/UsersGuide/BuildingRunningTesting/RunSRW.rst @@ -198,15 +198,6 @@ The user must set the specifics of their experiment configuration in a ``config. * - ACCOUNT - "" - "an_account" - * - CCPA_OBS_DIR - - "{{ workflow.EXPTDIR }}/obs_data/ccpa/proc" - - "" - * - MRMS_OBS_DIR - - "{{ workflow.EXPTDIR }}/obs_data/mrms/proc" - - "" - * - NDAS_OBS_DIR - - "{{ workflow.EXPTDIR }}/obs_data/ndas/proc" - - "" * - USE_CRON_TO_RELAUNCH - false - false @@ -264,9 +255,6 @@ The user must set the specifics of their experiment configuration in a ``config. * - NUM_ENS_MEMBERS - 1 - 2 - * - VX_FCST_MODEL_NAME - - '{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}' - - FV3_GFS_v16_CONUS_25km .. _GeneralConfig: @@ -629,15 +617,21 @@ The output files (in ``.png`` format) will be located in the ``postprd`` directo * To configure an experiment to run METplus verification tasks, see the :ref:`next section `. * Otherwise, skip to :numref:`Section %s ` to generate the workflow. + .. _VXConfig: Configure METplus Verification Suite (Optional) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -Users who want to use the METplus verification suite to evaluate their forecasts need to add additional information to their machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip to the next step (:numref:`Section %s: Generate the SRW App Workflow `). +Users who want to use the METplus verification (VX) suite to evaluate their forecasts or to evaluate +a staged forecast (e.g. from another forecasting system) need to add additional information to their +machine file (``ush/machine/.yaml``) or their ``config.yaml`` file. Other users may skip +to the next step (:numref:`Section %s: Generate the SRW App Workflow `). + +To use METplus verification, MET and METplus modules need to be installed on the system. .. note:: - If METplus users update their METplus installation, they must update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: + If users update their METplus installation, they must also update the module load statements in ``ufs-srweather-app/modulefiles/tasks//run_vx.local`` to correspond to their system's updated installation: .. code-block:: console @@ -645,15 +639,54 @@ Users who want to use the METplus verification suite to evaluate their forecasts module load met/ module load metplus/ -To use METplus verification, MET and METplus modules need to be installed. To turn on verification tasks in the workflow, include the ``parm/wflow/verify_*.yaml`` file(s) in the ``rocoto: tasks: taskgroups:`` section of ``config.yaml``. For example: -.. code-block:: console - - rocoto: - tasks: - taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' +Background +`````````````` +Whether generated by the SRW App or another forecasting system, a forecasting experiment consists +of one or more forecast periods known as cycles. If there is one forecast per cycle, the experiment +is referred to briefly as a deterministic forecast, and if there are multiple, it is referred to as +an ensemble forecast. Verification of a deterministic forecast is known (unsurprisingly) as +deterministic VX, while verification of an ensemble forecast as a whole is known as ensemble VX. +It is also possible to consider each member of an ensemble separately and verify each such member +deterministically. + +The SRW App allows users to include in the Rocoto XML that defines the workflow various tasks that +perform deterministic and/or ensemble VX. The forecast files to be verified may be generated as part +of the SRW experiment that is performing the verification, or they may be pre-generated files that +are staged somewhere on disk. In the latter case, the forecast files may have been generated from a +previous SRW experiment, or they may have been generated from another forecasting system altogether +(see :numref:`Section %s ` for the procedure to stage forecast files). In the SRW +App, the flag ``DO_ENSEMBLE`` in the ``global:`` section of ``config.yaml`` specifies whether the +(generated or staged) forecast files to be verified constitute a deterministic or an ensemble forecast. +Setting ``DO_ENSEMBLE: False`` (the default) causes the SRW App workflow to assume that the forecast +is deterministic, while setting ``DO_ENSEMBLE: True`` causes it to assume that the forecast is ensemble. +In the latter case, the number of ensemble members must be specified via the variable ``NUM_ENS_MEMBERS``, +also found in the ``global:`` section of ``config.yaml``. + +Both deterministic and ensemble VX require observation and forecast files as well as possible preprocessing +of those files. Thus, whenever deterministic or ensemble VX tasks are included in the workflow, preprocessing +(meta)tasks must also be included that check for the presence of the required obs and forecast files on disk, +retrieve obs files if necessary from a data store such as NOAA HPSS (see note below regarding forecast files), +and preprocess both types of files as needed. We refer to these collectively as the VX preprocessing tasks. -:numref:`Table %s ` indicates which functions each ``verify_*.yaml`` file configures. Users must add ``verify_pre.yaml`` anytime they want to run verification (VX); it runs preprocessing tasks that are necessary for both deterministic and ensemble VX. Then users can add ``verify_det.yaml`` for deterministic VX or ``verify_ens.yaml`` for ensemble VX (or both). Note that ensemble VX requires the user to be running an ensemble forecast or to stage ensemble forecast files in an appropriate location. +.. note:: + Currently, the SRW App workflow does not support the ability to retrieve forecast files from data stores; + these must either be generated by the forecast model in the SRW App or be manually staged by the user. + See :numref:`Section %s ` for details. + + +Adding VX Tasks to the Workflow +`````````````````````````````````` +To add verification tasks to the workflow, users must include the VX taskgroup files ``verify_pre.yaml``, +``verify_det.yaml``, and/or ``verify_ens.yaml`` (all located in the ``parm/wflow`` directory) in the ``rocoto: +tasks: taskgroups:`` section of ``config.yaml``. :numref:`Table %s ` specifies the set of workflow +VX (meta)tasks that each ``verify_*.yaml`` file defines. As implied above, users must add ``verify_pre.yaml`` +to ``rocoto: tasks: taskgroups:`` anytime they want to run deterministic and/or ensemble VX because this +contains VX preprocessing tasks that are required by both VX types. Then users can add ``verify_det.yaml`` +to run deterministic VX on either a deterministic forecast or on each member of an ensemble forecast, they +can add ``verify_ens.yaml`` to run ensemble VX on an ensemble forecast, or they can add both if they want to +run both deterministic and ensemble VX on an ensemble forecast (where the deterministic VX is performed on +each member of the ensemble). .. _VX-yamls: @@ -661,43 +694,267 @@ To use METplus verification, MET and METplus modules need to be installed. To t :widths: 20 50 :header-rows: 1 - * - File - - Description - * - verify_pre.yaml - - Contains (meta)tasks that are prerequisites for both deterministic and ensemble verification (vx) - * - verify_det.yaml - - Perform deterministic vx - * - verify_ens.yaml - - Perform ensemble vx (must set ``DO_ENSEMBLE: true`` in ``config.yaml``) - -The ``verify_*.yaml`` files include the definitions of several common verification tasks by default. Individual verification tasks appear in :numref:`Table %s `. The tasks in the ``verify_*.yaml`` files are independent of each other, so users may want to turn some off depending on the needs of their experiment. To turn off a task, simply include its entry from ``verify_*.yaml`` as an empty YAML entry in ``config.yaml``. For example, to turn off PointStat tasks: + * - Taskgroup File + - Taskgroup Description + * - ``verify_pre.yaml`` + - Defines (meta)tasks that run the VX preprocessing tasks that are prerequisites for both deterministic + and ensemble VX. + * - ``verify_det.yaml`` + - Defines (meta)tasks that perform deterministic VX on a single forecast or on each member of an ensemble + forecast (the latter requires ``DO_ENSEMBLE`` and ``NUM_ENS_MEMBERS`` in ``config.yaml`` to be set to + ``True`` and the number of ensemble members, respectively). + * - ``verify_ens.yaml`` + - Defines (meta)tasks that perform ensemble VX on an ensemble of forecasts as a whole (requires ``DO_ENSEMBLE`` + and ``NUM_ENS_MEMBERS`` in ``config.yaml`` to be set to ``True`` and the number of ensemble members, + respectively). + +For example, to enable deterministic VX, ``rocoto: tasks: taskgroups:`` may be set as follows: .. code-block:: console rocoto: tasks: - taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' - metatask_vx_ens_member: - metatask_PointStat_mem#mem#: + taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", + "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +This setting can apply to either a deterministic or an ensemble forecast. In the latter case, it will +cause deterministic VX will be performed on each member of the ensemble (but not on the ensemble as a +whole). Note that with this setting, the UFS Weather Model will be run as part of the workflow to generate +forecast output because ``prep.yaml``, ``coldstart.yaml``, and ``post.yaml`` are also included in +``rocoto: tasks: taskgroups:``. Whether these forecasts are deterministic or ensemble depends on +whether ``DO_ENSEMBLE`` in ``config.yaml`` is set to ``False`` or ``True``, respectively (and, if +``True``, ``NUM_ENS_MEMBERS`` must be set to the number of ensemble members). Similarly, to enable +ensemble VX for an ensemble forecast as well as deterministic VX for each member of that ensemble, +``rocoto: tasks: taskgroups:`` may be set as follows: + +.. code-block:: console + rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", + "parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml" "parm/wflow/verify_ens.yaml"]|include }}' + +If deterministic VX of each ensemble member is not desired, ``verify_det.yaml`` must be left out of the +above. Note that, as in the previous example, this setting of ``rocoto: tasks: taskgroups:`` will cause +the workflow to run the UFS Weather Model to generate forecast output because ``prep.yaml``, ``coldstart.yaml``, +and ``post.yaml`` are again included, but in this case, ``DO_ENSEMBLE`` **must be** set to ``True`` (and +``NUM_ENS_MEMBERS`` set appropriately) in ``config.yaml`` because inclusion of ``verify_ens.yaml`` requires +that the forecast be an ensemble one. + +If users want to manually stage the forecast files instead of generating them with the SRW's native weather +model (see :numref:`Section %s ` for the procedure), they must exclude ``prep.yaml``, +``coldstart.yaml``, and ``post.yaml`` from the examples above. Also, regardless of whether the forecast +files are generated by the SRW App or staged manually by the user, if the forecast to be verified is an +ensemble one, in the ``global:`` section of ``config.yaml`` users must set ``DO_ENSEMBLE`` to ``True`` +and ``NUM_ENS_MEMBERS`` to the number of ensemble members. This tells the workflow to look for multiple +forecasts for each cycle instead of just one (as well as the number of such forecasts). More information about configuring the ``rocoto:`` section can be found in :numref:`Section %s `. -If users have access to NOAA :term:`HPSS` but have not pre-staged the data, the default ``verify_pre.yaml`` taskgroup will activate the tasks, and the workflow will attempt to download the appropriate data from NOAA HPSS. In this case, the ``*_OBS_DIR`` paths must be set to the location where users want the downloaded data to reside. -Users who do not have access to NOAA HPSS and do not have the data on their system will need to download :term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data manually from collections of publicly available data. +VX Taskgroup Organization and VX Field Groups +````````````````````````````````````````````````` +The VX (meta)tasks in the ``verify_*.yaml`` taskgroup files are described in detail in :numref:`Table %s +`. They are organized as follows. + +The (meta)tasks in ``verify_pre.yaml`` each +operate on a single observation (obs) type (except for ``metatask_check_post_output_all_mems``, which operates on the +forecast(s) and checks for the presence of all necessary forecast files), while the ones in ``verify_det.yaml`` +and ``verify_ens.yaml`` operate on one or more verification field groups. A verification field group +represents one or more meteorologial fields that are operated on (e.g. verified) together in a single +call to one of the METplus tools (such as GridStat, PointStat, GenEnsProd, and EnsembleStat), and each +field group has associated with it an obs type against which those forecast fields are verified. The +set of valid VX field groups, the obs types they are associated with, and a brief description of the +fields they include are given in :numref:`Table %s `. -Users who have already staged the observation data needed for METplus (i.e., the :term:`CCPA`, :term:`MRMS`, and :term:`NDAS` data) on their system should set the path to this data in ``config.yaml``. +.. _VXFieldGroupDescsTable: + +.. list-table:: Valid Verification Field Groups and Descriptions + :widths: 20 20 60 + :header-rows: 1 + + * - VX Field Group + - Associated Obs Type + - Fields Included in Group + * - APCP + - CCPA + - Accumulated precipitation for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - ASNOW + - NOHRSC + - Accumulated snowfall for the accumulation intervals specified in ``VX_APCP_ACCUMS_HRS`` + * - REFC + - MRMS + - Composite reflectivity + * - RETOP + - MRMS + - Echo top + * - SFC + - NDAS + - Various surface and near-surface fields (e.g. at the surface, 2 m, 10 m, etc) + * - UPA + - NDAS + - Various upper-air fields (e.g. at 800 mb, 500 mb, etc) + +The ``VX_FIELD_GROUPS`` list in the ``verification:`` section of ``config.yaml`` specifies the VX field +groups for which to run verification. In order to avoid unwanted computation, the Rocoto XML will include +only those (meta)tasks that operate on field groups or obs types associated with field groups in ``VX_FIELD_GROUPS``. +Thus, inclusion of a ``verify_*.yaml`` taskgroup file under the +``rocoto: tasks: taskgroups:`` section of ``config.yaml`` does not mean that all the (meta)tasks in that +file will be included in the workflow. For example, setting: .. code-block:: console - platform: - CCPA_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ccpa/proc - NOHRSC_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/nohrsc/proc - MRMS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/mrms/proc - NDAS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ndas/proc + VX_FIELD_GROUPS: [ 'APCP', 'REFC', 'RETOP', 'SFC', 'UPA' ] + +in ``config.yaml`` and including all three taskgroups ``verify_*.yaml`` in ``rocoto: tasks: taskgroups:`` +will add to the Rocoto XML the VX (meta)tasks for all valid field groups except those for accumulated +snowfall (``'ASNOW'``) and its associated obs type (:term:`NOHRSC`). In other words, all the (meta)tasks +in :numref:`Table %s `. will be included in the Rocoto XML except for those +associated with the :term:`NOHRSC` obs type and the ``'ASNOW'`` field group. Users might want to set +``VX_FIELD_GROUPS`` in this way for example because the forecast experiment they are verifying is for a +summer period for which ``ASNOW`` is not relevant. + + +Staging Observation Files +`````````````````````````````````` +The taskgroup in ``verify_pre.yaml`` defines a set of workflow tasks named ``get_obs_*``, where the ``*`` +represents any one of the supported obs types: :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and :term:`NDAS`. These ``get_obs_*`` tasks +will first check on disk for the existence of the obs files required for VX using the locations specified +by the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` in the ``verification:`` section of +``config.yaml``. The ``*_OBS_DIR`` are the base directories in which the obs files are or should be +staged, and the ``OBS_*_FN_TEMPLATES[1,3,...]`` are the file name templates (with METplus time strings +used for templating; see example below). Thus, the templates for the full paths to the obs files are +given by: + +.. code-block:: console + + {*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]} + +The contents of the ``OBS_*_FN_TEMPLATES`` list come in pairs, where the first element +of each pair (with even-numbered indices ``[0,2,...]``) refers to the field group, +while the second element (with odd-numbered indices ``[1,3,...]``) refers to the +corresponding sets of files that the obs type contains. Note that ``OBS_*_FN_TEMPLATES +[1,3,...]`` may include leading subdirectories and are +relative to the obs type's ``*_OBS_DIR``. + +If the obs files exist at the locations specified by ``{*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]}``, then the ``get_obs_*`` tasks will +succeed, and the workflow will move on to subsequent tasks. If one or more obs files do not exist, the +``get_obs_*`` tasks will attempt to retrieve the required files from a data store such as NOAA HPSS and +place them in the locations specified by ``{*_OBS_DIR}/{OBS_*_FN_TEMPLATES[1,3,...]}``. Assuming +that attempt is successful, the workflow will move on to subsequent tasks. Thus: + + * Users who have the obs files already available (staged) on their system only need to set ``*_OBS_DIR`` + and ``OBS_*_FN_TEMPLATES[1,3,...]`` in ``config.yaml`` to match those staging locations and file names. + + * Users who do not have the obs files available on their systems and do not have access to NOAA HPSS + need to download :term:`CCPA`, :term:`NOHRSC`, :term:`MRMS`, and/or :term:`NDAS` files manually + from collections of publicly available data. + Then, as above, they must set ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` to match those + staging locations and file names. + + * Users who have access to a data store that hosts the necessary files (e.g. NOAA HPSS) do not need to + manually stage the obs data because the ``get_obs_*`` tasks will retrieve the necessary obs and place + them in the locations specified by ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]``. By default, + the files will be placed under the experiment directory, but + users may change the values of these variables if they want the retrieved files to be placed elsewhere. + + +As an example, consider a case in which all four types of obs are needed for verification. Then ``*_OBS_DIR`` +and ``OBS_*_FN_TEMPLATES`` might be set as follows: + +.. code-block:: console + + verification: + + CCPA_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ccpa + NOHRSC_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/nohrsc + MRMS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/mrms + NDAS_OBS_DIR: /path/to/UFS_SRW_data/develop/obs_data/ndas + + OBS_CCPA_FN_TEMPLATES: [ 'APCP', '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' ] + OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', 'sfav2_CONUS_6h_{valid?fmt=%Y%m%d%H}_grid184.grb2' ] + OBS_MRMS_FN_TEMPLATES: [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', + 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFC_UPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + +Now further consider the CCPA obs type. If one of the days encompassed by the forecast(s) is 20240429, +then the ``get_obs_ccpa`` task associated with this day will check for the existence of the set of obs +files given by + +``/path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t{HH}z.01h.hrap.conus.gb2`` + +where ``{HH}`` takes on all hours of this day at which the verification requires CCPA obs. For example, +if performing (deterministic or ensemble) VX on 1-hour APCP for a 3-hour forecast that starts at 06z, +``{HH}`` will take on the values 07, 08, and 09. Then the files that ``get_obs_ccpa`` will look for +are: + +.. code-block:: console + + /path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t07z.01h.hrap.conus.gb2 + /path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t08z.01h.hrap.conus.gb2 + /path/to/UFS_SRW_data/develop/obs_data/ccpa/20240429/ccpa.t09z.01h.hrap.conus.gb2 + +If all these exist, ``get_obs_ccpa`` will simply confirm their existence and will not need to retrieve +any files. If not, it will try to retrieve the files from a data store such as NOAA HPSS and place them +at the above locations. + + +.. _VXStageFcstFiles: + +Staging Forecast Files +`````````````````````````````````` +As noted above, the SRW App currently does not support the ability to retrieve forecast files from +data stores. Thus, the forecast files must either be generated by the forecast model in the SRW App, +or they must be manually staged by the user. Note that manually staged forecast files do not have +to be ones generated by the SRW App; they can be outputs from another forecasting system. + +The locations of the forecast files are defined by the variables ``VX_FCST_INPUT_BASEDIR``, +``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` in the ``verification:`` section of ``config.yaml``. +``VX_FCST_INPUT_BASEDIR`` is the base directory in which the files are located, ``FCST_SUBDIR_TEMPLATE`` +is a template specifying a set of subdirectories under ``VX_FCST_INPUT_BASEDIR``, and ``FCST_FN_TEMPLATE`` +is the file name template. As with the obs, the templating in ``FCST_SUBDIR_TEMPLATE`` and +``FCST_FN_TEMPLATE`` uses METplus time strings. Thus, the full template to the forecast files +is given by + +.. code-block:: console + + {VX_FCST_INPUT_BASEDIR}/{FCST_SUBDIR_TEMPLATE}/{FCST_FN_TEMPLATE} + +If the forecast files are manually staged, then these three variables must be set such that they +together point to the locations of the staged files. If they are generated by the SRW App, then +the user does not need to set these variables; they will by default be set to point to the forecast +files. + + +Summary +`````````````` +In summary, users must take the following steps to enable VX tasks in the SRW App workflow: + + #. Add the necessary VX taskgroup files ``verify_*.yaml`` to the ``rocoto: tasks: taskgroups:`` + section of ``config.yaml``. ``verify_pre.yaml`` must always be added; ``verify_det.yaml`` + must be added to enable deterministic VX (either of a deterministic forecast or of each + member of an ensemble forecast); and ``verify_ens.yaml`` must be added to enable ensemble + VX (of an ensemble forecast as a whole). + + #. If performing ensemble verification and/or deterministic verification of ensemble members + (i.e. if the forecast to be verified is an ensemble), in the ``global:`` section of ``config.yaml`` + set ``DO_ENSEMBLE`` to ``True`` and ``NUM_ENS_MEMBERS`` to the number of ensemble members. + + #. If manually staging the obs files (e.g. because users don't have access to NOAA HPSS), set + the variables ``*_OBS_DIR`` and ``OBS_*_FN_TEMPLATES[1,3,...]`` in the ``verification:`` section + of ``config.yaml`` to the locations of these files on disk (where the ``*`` in these variable + names can be any of the supported obs types). + + #. If manually staging the forecast files (as opposed to generating them by running the weather + model in the SRW App), set the forecast file paths to the locations of these files on disk + using the variables ``VX_FCST_INPUT_BASEDIR``, ``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE`` + in the ``verification:`` section of ``config.yaml``. + + #. Specify the field groups to verify in the list ``VX_FIELD_GROUPS`` in the ``verification:`` + section of ``config.yaml``. Valid values for field groups are given in :numref:`Table %s `. + +After completing these steps, users can proceed to generate the experiment (see :numref:`Section %s `) -After adding the VX tasks to the ``rocoto:`` section and the data paths to the ``platform:`` section, users can proceed to generate the experiment, which will perform VX tasks in addition to the default workflow tasks. .. _GenerateWorkflow: @@ -802,87 +1059,223 @@ In addition to the baseline tasks described in :numref:`Table %s ` below. The column "taskgroup" indicates the taskgroup file that must be included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` (see :numref:`Section %s ` for more details). For each task, ``mem###`` refers to either ``mem000`` (if running a deterministic forecast) or a specific forecast member number (if running an ensemble forecast). "Metatasks" indicate task definitions that will become more than one workflow task based on different variables, number of hours, etc., as described in the Task Description column. See :numref:`Section %s ` for more details about metatasks. + +The METplus verification tasks and metatasks that are included by default in ``verify_*.yaml`` are described +in :numref:`Table %s `. The ``taskgroup`` entry after the name of each (meta)task indicates +the taskgroup file that must be included in the user's ``config.yaml`` file under ``rocoto: tasks: taskgroups:`` +in order for that (meta)task to be considered for inclusion in the workflow (see :numref:`Section %s ` +for details). As described in :numref:`Section %s `, metatasks define a set of tasks in the +workflow based on multiple values of one or more parameters such as the ensemble member index, the accumulation +interval (for cumulative fields such as accumulated precipitation), and the name of the verification field group +(see description of ``VX_FIELD_GROUPS`` in :numref:`Section %s `). .. _VXWorkflowTasksTable: -.. list-table:: Verification (VX) Workflow Tasks in the SRW App - :widths: 20 20 50 +.. list-table:: Default Verification (VX) Workflow Tasks and Metatasks in the SRW App + :widths: 5 95 :header-rows: 1 - * - Workflow Task - - ``taskgroup`` + * - Workflow (Meta)Task (``taskgroup``) - Task Description - * - :bolditalic:`task_get_obs_ccpa` - - ``verify_pre.yaml`` - - If user has staged :term:`CCPA` data for verification, checks to ensure that data exists in the specified location (``CCPA_OBS_DIR``). If data does not exist, attempts to retrieve that data from NOAA :term:`HPSS`. - * - :bolditalic:`task_get_obs_ndas` - - ``verify_pre.yaml`` - - If user has staged :term:`NDAS` data for verification, checks to ensure that data exists in the specified location (``NDAS_OBS_DIR``). If data does not exist, attempts to retrieve that data from NOAA HPSS. - * - :bolditalic:`task_get_obs_nohrsc` - - ``verify_pre.yaml`` - - Retrieves and organizes hourly :term:`NOHRSC` data from NOAA HPSS. Can only be run if ``verify_pre.yaml`` is included in a ``tasksgroups`` list *and* user has access to NOAA :term:`HPSS` data. ``ASNOW`` should also be added to the ``VX_FIELDS`` list. - * - :bolditalic:`task_get_obs_mrms` - - ``verify_pre.yaml`` - - If user has staged :term:`MRMS` data for verification, checks to ensure that data exists in the specified location (``MRMS_OBS_DIR``). If data does not exist, attempts to retrieve that data from NOAA HPSS. - * - :bolditalic:`task_run_MET_Pb2nc_obs` - - ``verify_pre.yaml`` - - Converts files from prepbufr to NetCDF format. - * - :bolditalic:`metatask_PcpCombine_obs` - - ``verify_pre.yaml`` - - Derives 3-hr, 6-hr, and 24-hr accumulated precipitation observations from the 1-hr observation files. In log files, tasks will be named like ``MET_PcpCombine_obs_APCP##h``, where ``##h`` is 03h, 06h, or 24h. - * - :bolditalic:`metatask_check_post_output_all_mems` - - ``verify_pre.yaml`` - - Ensures that required post-processing tasks have completed and that the output exists in the correct form and location for each forecast member. In log files, tasks will be named like ``check_post_output_mem###``. - * - :bolditalic:`metatask_PcpCombine_fcst_APCP_all_accums_all_mems` - - ``verify_pre.yaml`` - - Derives accumulated precipitation forecast for 3-hr, 6-hr, and 24-hr windows for all forecast members based on 1-hr precipitation forecast values. In log files, tasks will be named like ``MET_PcpCombine_fcst_APCP##h_mem###``, where ``##h`` is 03h, 06h, or 24h. - * - :bolditalic:`metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems` - - ``verify_pre.yaml`` - - Derives accumulated snow forecast for 6-hr and 24-hr windows for all forecast members based on 1-hr precipitation forecast values. In log files, tasks will be named like ``MET_PcpCombine_fcst_ASNOW##h_mem###``, where ``##h`` is 06h or 24h. - * - :bolditalic:`metatask_GridStat_CCPA_all_accums_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-grid verification for 1-h, 3-h, 6-h, and 24-h (i.e., daily) accumulated precipitation. In log files, tasks will be named like ``run_MET_GridStat_vx_APCP##h_mem###``. - * - :bolditalic:`metatask_GridStat_NOHRSC_all_accums_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-grid verification for 6-h and 24-h (i.e., daily) accumulated snow. In log files, tasks will be named like ``run_MET_GridStat_vx_ASNOW##h_mem###``. - * - :bolditalic:`metatask_GridStat_MRMS_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-grid verification for composite reflectivity and :term:`echo top`. In log files, tasks will be named like ``run_MET_GridStat_vx_REFC_mem###`` or ``run_MET_GridStat_vx_RETOP_mem###``. - * - :bolditalic:`metatask_PointStat_NDAS_all_mems` - - ``verify_det.yaml`` - - Runs METplus grid-to-point verification for surface and upper-air variables. In log files, tasks will be named like ``run_MET_PointStat_vx_SFC_mem###`` or ``run_MET_PointStat_vx_UPA_mem###``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_CCPA` :raw-html:`

` - (formerly *VX_ENSGRID_##h*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid ensemble verification for 1-h, 3-h, 6-h, and 24-h (i.e., daily) accumulated precipitation. In log files, tasks will be named like ``run_MET_EnsembleStat_vx_APCP##h`` or ``run_MET_GenEnsProd_vx_APCP##h``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_NOHRSC` - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid ensemble verification for 6-h and 24-h (i.e., daily) accumulated snow. In log files, tasks will be named like ``run_MET_EnsembleStat_vx_ASNOW##h`` or ``run_MET_GenEnsProd_vx_ASNOW##h``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_MRMS` :raw-html:`

` - (formerly *VX_ENSGRID_[REFC|RETOP]*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid ensemble verification for composite reflectivity and :term:`echo top`. In log files, tasks will be named like ``run_MET_GenEnsProd_vx_[REFC|RETOP]`` or ``run_MET_EnsembleStat_vx_[REFC|RETOP]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GridStat_CCPA_ensmeanprob_all_accums` :raw-html:`

` - (formerly *VX_ENSGRID_MEAN_##h* and *VX_ENSGRID_PROB_##h*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid verification for (1) ensemble mean 1-h, 3-h, 6-h, and 24h (i.e., daily) accumulated precipitation and (2) 1-h, 3-h, 6-h, and 24h (i.e., daily) accumulated precipitation probabilistic output. In log files, the ensemble mean subtask will be named like ``run_MET_GridStat_vx_ensmean_APCP##h`` and the ensemble probabilistic output subtask will be named like ``run_MET_GridStat_vx_ensprob_APCP##h``, where ``##h`` is 01h, 03h, 06h, or 24h. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GridStat_NOHRSC_ensmeanprob_all_accums` - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid verification for (1) ensemble mean 6-h and 24h (i.e., daily) accumulated snow and (2) 6-h and 24h (i.e., daily) accumulated snow probabilistic output. In log files, the ensemble mean subtask will be named like ``run_MET_GridStat_vx_ensmean_ASNOW##h`` and the ensemble probabilistic output subtask will be named like ``run_MET_GridStat_vx_ensprob_ASNOW##h``, where ``##h`` is 06h or 24h. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GridStat_MRMS_ensprob` :raw-html:`

` - (formerly *VX_ENSGRID_PROB_[REFC|RETOP]*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-grid verification for ensemble probabilities for composite reflectivity and :term:`echo top`. In log files, tasks will be named like ``run_MET_GridStat_vx_ensprob_[REFC|RETOP]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_NDAS` :raw-html:`

` - (formerly *VX_ENSPOINT*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-point ensemble verification for surface and upper-air variables. In log files, tasks will be named like ``run_MET_GenEnsProd_vx_[SFC|UPA]`` or ``run_MET_EnsembleStat_vx_[SFC|UPA]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. - * - :bolditalic:`metatask_PointStat_NDAS_ensmeanprob` :raw-html:`

` - (formerly *VX_ENSPOINT_[MEAN|PROB]*) - - ``verify_ens.yaml`` - - Runs METplus grid-to-point verification for (1) ensemble mean surface and upper-air variables and (2) ensemble probabilities for surface and upper-air variables. In log files, tasks will be named like ``run_MET_PointStat_vx_ensmean_[SFC|UPA]`` or ``run_MET_PointStat_vx_ensprob_[SFC|UPA]``. Can only be run if ``DO_ENSEMBLE: true`` in ``config.yaml``. + + * - :bolditalic:`task_get_obs_ccpa` (``verify_pre.yaml``) + - Checks for existence of staged :term:`CCPA` obs files at locations specified by ``CCPA_OBS_DIR`` + and ``OBS_CCPA_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'APCP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_nohrsc` (``verify_pre.yaml``) + - Checks for existence of staged :term:`NOHRSC` obs files at locations specified by ``NOHRSC_OBS_DIR`` + and ``OBS_NOHRSC_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_mrms` (``verify_pre.yaml``) + - Checks for existence of staged :term:`MRMS` obs files at locations specified by ``MRMS_OBS_DIR`` + and ``OBS_MRMS_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'REFC'`` and/or ``'RETOP'`` are included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_get_obs_ndas` (``verify_pre.yaml``) + - Checks for existence of staged :term:`NDAS` obs files at locations specified by ``NDAS_OBS_DIR`` + and ``OBS_NDAS_FN_TEMPLATES``. If any files do not exist, it attempts to retrieve all the files + from a data store (e.g. NOAA :term:`HPSS`) and place them in those locations. This task is included + in the workflow only if ``'SFC'`` and/or ``'UPA'`` are included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`task_run_MET_Pb2nc_obs_NDAS` (``verify_pre.yaml``) + - Converts NDAS obs prepbufr files to NetCDF format. + + * - :bolditalic:`metatask_PcpCombine_APCP_all_accums_obs_CCPA` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing observed APCP for the accumulation intervals + specified in ``VX_APCP_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the obs are obtained by adding APCP values over multiple obs accumulation intervals. + For example, if the obs contain 1-hour accumulations and 3-hr accumulation is specified in ``VX_APCP_ACCUMS_HRS``, + then groups of 3 successive 1-hour APCP values in the obs are added to obtain the 3-hour values. + In Rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_APCP{accum_intvl}h_obs_CCPA``, + where ``{accum_intvl}`` is the accumulation interval in hours (e.g., ``01``, ``03``, ``06``, etc.) + for which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is + included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing observed ASNOW for the accumulation intervals + specified in ``VX_ASNOW_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the obs are obtained by adding ASNOW values over multiple obs accumulation intervals. + For example, if the obs contain 6-hour accumulations and 24-hr accumulation is specified in ``VX_ASNOW_ACCUMS_HRS``, + then groups of 4 successive 6-hour ASNOW values in the obs are added to obtain the 24-hour values. + In Rocoto, the tasks under this metatask are named ``run_MET_PcpCombine_ASNOW{accum_intvl}h_obs_NOHRSC``, + where ``{accum_intvl}`` is the accumulation interval in hours (e.g., ``06``, ``24``, etc.) for which + the task is being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in + ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_check_post_output_all_mems` (``verify_pre.yaml``) + - Set of tasks that ensure that the post-processed forecast files required for verification exist in + the locations specified by ``VX_FCST_INPUT_BASEDIR``, ``FCST_SUBDIR_TEMPLATE``, and ``FCST_FN_TEMPLATE``. + In Rocoto, the tasks under this metatask are named ``check_post_output_mem{mem_indx}``, where ``{mem_indx}`` + is the index of the ensemble forecast member. This takes on the values ``001``, ``002``, ... for an + ensemble of forecasts or just ``000`` for a single deterministic forecast. This metatask is included + in the workflow if at least one other verification task or metatask is included. + + * - :bolditalic:`metatask_PcpCombine_APCP_all_accums_all_mems` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing forecast APCP for the accumulation intervals + specified in ``VX_APCP_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the forecasts are obtained by adding APCP values over multiple forecast accumulation + intervals. For example, if the forecasts contain 1-hour accumulations, but 3-hr accumulation is specified + in ``VX_APCP_ACCUMS_HRS``, then groups of 3 successive 1-hour APCP values in the forecasts are + added to obtain the 3-hour values. In Rocoto, the tasks under this metatask are named + ``run_MET_PcpCombine_APCP{accum_intvl}h_fcst_mem{mem_indx}``, where ``{accum_intvl}`` and + ``{mem_indx}`` are the accumulation interval (in hours, e.g., ``01``, ``03``, ``06``, etc.) and + the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for + which the task is being run. This metatask is included in the workflow only if ``'APCP'`` is + included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PcpCombine_ASNOW_all_accums_all_mems` (``verify_pre.yaml``) + - Set of tasks that generate NetCDF files containing forecast ASNOW for the accumulation intervals + specified in ``VX_ASNOW_ACCUMS_HRS``. Files for accumulation intervals larger than the one + provided in the forecasts are obtained by adding ASNOW values over multiple forecast accumulation + intervals. For example, if the forecasts contain 1-hour accumulations, but 6-hr accumulation is specified + in ``VX_ASNOW_ACCUMS_HRS``, then groups of 6 successive 1-hour ASNOW values in the forecasts are + added to obtain 6-hour values. In Rocoto, the tasks under this metatask are named + ``run_MET_PcpCombine_ASNOW{accum_intvl}h_fcst_mem{mem_indx}``, where ``{accum_intvl}`` and + ``{mem_indx}`` are the accumulation interval (in hours, e.g., ``06``, ``24``, etc.) and the ensemble + forecast member index (or just ``000`` for a single deterministic forecast) for which the task is + being run. This metatask is included in the workflow only if ``'ASNOW'`` is included in + ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_APCP_all_accums_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-grid verification of accumulated precipitation (represented by the + verification field group ``APCP``) for the intervals specified in ``VX_APCP_ACCUMS_HRS``. In Rocoto, + the tasks under this metatask are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_mem{mem_indx}``, + where ``{accum_intvl}`` and ``{mem_indx}`` are the accumulation interval in hours (e.g., ``01``, + ``03``, ``06``, etc.) and the ensemble forecast member index (or just ``000`` for a single deterministic + forecast) for which the task is being run. This metatask is included in the workflow only if ``'APCP'`` + is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ASNOW_all_accums_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-grid verification of accumulated snowfall (represented by the verification + field group ``ASNOW``) for the intervals specified in ``VX_ASNOW_ACCUMS_HRS``. In Rocoto, the tasks under + this metatask are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_mem{mem_indx}``, where ``{accum_intvl}`` + and ``{mem_indx}`` are the accumulation interval in hours (e.g., ``06``, ``24``, etc.) and the ensemble + forecast member index (or just ``000`` for a single deterministic forecast) for which the task is being + run. This metatask is included in the workflow only if ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_REFC_RETOP_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-grid verification of :term:`composite reflectivity` (represented by + the verification field group ``REFC``) and :term:`echo top` (represented by the verification field + group ``RETOP``). In Rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_mem{mem_indx}``, + where ``field_group`` and ``{mem_indx}`` are the field group (in this case either ``REFC`` or ``RETOP``) + and the ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which + the task is being run. The tasks for ``REFC`` are included in the workflow only if ``'REFC'`` is + included in ``VX_FIELD_GROUPS``, and the ones for ``RETOP`` are included only if ``'RETOP'`` is included + in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PointStat_SFC_UPA_all_mems` (``verify_det.yaml``) + - Set of tasks that run grid-to-point verification of surface fields (represented by the verification field + group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``). In Rocoto, + the tasks under this metatask are named ``run_MET_PointStat_vx_{field_group}_mem{mem_indx}``, where + ``field_group`` and ``{mem_indx}`` are the field group (in this case either ``SFC`` or ``UPA``) and the + ensemble forecast member index (or just ``000`` for a single deterministic forecast) for which the task + is being run. The tasks for the surface fields are included in the workflow only if ``'SFC'`` is included + in ``VX_FIELD_GROUPS``, and the ones for the upper-air fields are included only if ``'UPA'`` is included + in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_APCP_all_accums` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on APCP for the intervals + specified in ``VX_APCP_ACCUMS_HRS``. In Rocoto, the tasks under this metatask that run ``GenEnsProd`` + are named ``run_MET_GenEnsProd_vx_APCP{accum_intvl}h``, and the ones that run ``EnsembleStat`` are + named ``run_MET_EnsembleStat_vx_APCP{accum_intvl}h``, where ``{accum_intvl}`` is the accumulation + interval in hours (e.g., ``01``, ``03``, ``06``, etc.) for which the tasks are being run. This metatask + is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'APCP'`` + is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on ASNOW for the intervals + specified in ``VX_ASNOW_ACCUMS_HRS``. In Rocoto, the tasks under this metatask that run ``GenEnsProd`` + are named ``run_MET_GenEnsProd_vx_ASNOW{accum_intvl}h`` and the ones that run ``EnsembleStat`` are + named ``run_MET_EnsembleStat_vx_ASNOW{accum_intvl}h``, where ``{accum_intvl}`` is the accumulation + interval in hours (e.g., ``06``, ``24``, etc.) for which the tasks are being run. This metatask will be + included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'ASNOW'`` + is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_REFC_RETOP` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on REFC (:term:`composite + reflectivity`) and RETOP (:term:`echo top`). In Rocoto, the tasks under this metatask that run + ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, and the ones that run ``EnsembleStat`` + are named ``run_MET_EnsembleStat_vx_{field_group}``, where ``{field_group}`` is the field group (in + this case either ``REFC`` or ``RETOP``) for which the tasks are being run. The tasks for ``REFC`` are + included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'REFC'`` + is included in ``VX_FIELD_GROUPS``, and the ones for ``RETOP`` are included only if ``DO_ENSEMBLE`` is + set to ``True`` in ``config.yaml`` and ``'RETOP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GenEnsProd_EnsembleStat_SFC_UPA` (``verify_ens.yaml``) + - Set of tasks that run :term:`MET`'s ``GenEnsProd`` and ``EnsembleStat`` tools on surface fields (represented + by the verification field group ``SFC``) and upper-air fields (represented by the verification field group + ``UPA``). In Rocoto, the tasks under this metatask that run ``GenEnsProd`` are named ``run_MET_GenEnsProd_vx_{field_group}``, + and the ones that run ``EnsembleStat`` are named ``run_MET_EnsembleStat_vx_{field_group}``, where ``{field_group}`` + is the field group (in this case either ``SFC`` or ``UPA``) for which the tasks are being run. The tasks for + ``SFC`` are included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'SFC'`` + is included in ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to + ``True`` in ``config.yaml`` and ``'UPA'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_APCP_all_accums_ensmeanprob` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid verification of the ensemble mean of APCP and grid-to-grid probabilistic + verification of the ensemble of APCP forecasts as a whole. In Rocoto, the tasks under this metatask for + ensemble mean verification are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_ensmean``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_APCP{accum_intvl}h_ensprob``, where + ``{accum_intvl}`` is the accumulation interval in hours (e.g., ``01``, ``03``, ``06``, etc.) for which the + tasks are being run. This metatask is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` + in ``config.yaml`` and ``'APCP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_ASNOW_all_accums_ensmeanprob` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid verification of the ensemble mean of ASNOW and grid-to-grid probabilistic + verification of the ensemble of ASNOW forecasts as a whole. In Rocoto, the tasks under this metatask for + ensemble mean verification are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_ensmean``, and the ones for + ensemble probabilistic verification are named ``run_MET_GridStat_vx_ASNOW{accum_intvl}h_ensprob``, where + ``{accum_intvl}`` is the accumulation interval in hours (e.g., ``01``, ``03``, ``06``, etc.) for which the + tasks are being run. These tasks will be included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` + in ``config.yaml`` and ``'ASNOW'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_GridStat_REFC_RETOP_ensprob` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid probabilistic verification of the ensemble of :term:`composite reflectivity` + (represented by the verification field group ``REFC``) and :term:`echo top` (represented by the field group + ``RETOP``). (Note that there is no grid-to-grid verification of the ensemble mean of these quantities.) + In Rocoto, the tasks under this metatask are named ``run_MET_GridStat_vx_{field_group}_ensprob``, where + ``{field_group}`` is the field group (in this case either ``REFC`` or ``RETOP``) for which the task is + being run. The task for ``REFC`` is included in the workflow only if ``DO_ENSEMBLE`` is set to ``True`` + in ``config.yaml`` and ``'REFC'`` is included in ``VX_FIELD_GROUPS``, and the one for ``RETOP`` is included + only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'RETOP'`` is included in ``VX_FIELD_GROUPS``. + + * - :bolditalic:`metatask_PointStat_SFC_UPA_ensmeanprob` (``verify_ens.yaml``) + - Set of tasks that run grid-to-grid verification of the ensemble mean of surface fields (represented by the + verification field group ``SFC``) and upper-air fields (represented by the verification field group ``UPA``) + as well as grid-to-grid probabilistic verification of the ensemble of the surface and upper-air field + forecasts as a whole. In Rocoto, the tasks under this metatask for ensemble mean verification are named + ``run_MET_PointStat_vx_{field_group}_ensmean``, and the ones for ensemble probabilistic verification are + named ``run_MET_PointStat_vx_{field_group}_ensprob``, where ``{field_group}`` is the field group (in this + case either ``SFC`` or ``UPA``) on which the task is being run. The tasks for ``SFC`` are included in the + workflow only if ``DO_ENSEMBLE`` is set to ``True`` in ``config.yaml`` and ``'SFC'`` is included in + ``VX_FIELD_GROUPS``, and the ones for ``UPA`` are included only if ``DO_ENSEMBLE`` is set to ``True`` in + ``config.yaml`` and ``'UPA'`` is included in ``VX_FIELD_GROUPS``. + .. _Run: @@ -1176,7 +1569,7 @@ Each task should finish with error code 0. For example: End exregional_get_extrn_mdl_files.sh at Wed Nov 16 18:08:19 UTC 2022 with error code 0 (time elapsed: 00:00:01) -Check the batch script output file in your experiment directory for a “SUCCESS” message near the end of the file. +Check the batch script output file in your experiment directory for a "SUCCESS" message near the end of the file. .. _RegionalWflowTasks: diff --git a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst index 8b9e7648a7..50d343f57e 100644 --- a/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst +++ b/doc/UsersGuide/CustomizingTheWorkflow/ConfigWorkflow.rst @@ -169,48 +169,6 @@ These settings define platform-specific run commands. Users should set run comma ``PRE_TASK_CMDS``: (Default: "") Pre-task commands such as ``ulimit`` needed by tasks. For example: ``'{ ulimit -s unlimited; ulimit -a; }'`` -METplus Parameters ----------------------- - -:ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. Many of the METplus parameters are described below, but additional documentation for the METplus components is available on the `METplus website `__. - -.. _METParamNote: - -.. note:: - Where a date field is required: - * ``YYYY`` refers to the 4-digit valid year - * ``MM`` refers to the 2-digit valid month - * ``DD`` refers to the 2-digit valid day of the month - * ``HH`` refers to the 2-digit valid hour of the day - * ``mm`` refers to the 2-digit valid minutes of the hour - * ``SS`` refers to the two-digit valid seconds of the hour - -``CCPA_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ccpa/proc"``) - User-specified location of the directory where :term:`CCPA` hourly precipitation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure, as well as important caveats about errors in the metadata and file names. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - -``NOHRSC_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc"``) - User-specified location of top-level directory where NOHRSC 6- and 24-hour snowfall accumulation files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file scripts/exregional_get_verif_obs.sh for more details about files and directory structure - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - - .. note:: - Due to limited availability of NOHRSC observation data on NOAA :term:`HPSS` and the likelihood that snowfall accumulation verification will not be desired outside of winter cases, this verification option is currently not present in the workflow by default. In order to use it, the verification environment variable ``VX_FIELDS`` should be updated to include ``ASNOW``. This will allow the related workflow tasks to be run. - -``MRMS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/mrms/proc"``) - User-specified location of the directory where :term:`MRMS` composite reflectivity and echo top files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in the ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. - -``NDAS_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/ndas/proc"``) - User-specified location of top-level directory where :term:`NDAS` prepbufr files used by METplus are located (or, if retrieved by the workflow, where they will be placed). See comments in file ``scripts/exregional_get_verif_obs.sh`` for more details about files and directory structure. - - .. attention:: - Do not set this to the same path as other ``*_OBS_DIR`` variables; otherwise unexpected results and data loss may occur. Other Platform-Specific Directories -------------------------------------- @@ -336,6 +294,9 @@ Directory Parameters ``EXPTDIR``: (Default: ``'{{ [workflow.EXPT_BASEDIR, workflow.EXPT_SUBDIR]|path_join }}'``) The full path to the experiment directory. By default, this value will point to ``"${EXPT_BASEDIR}/${EXPT_SUBDIR}"``, but the user can define it differently in the configuration file if desired. +``WFLOW_FLAG_FILES_DIR``: (Default: ``'{{ [workflow.EXPTDIR, "wflow_flag_files"]|path_join }}'``) + Directory in which flag files marking completion of various workflow tasks can be placed. + Pre-Processing File Separator Parameters -------------------------------------------- @@ -1634,95 +1595,372 @@ Pressure Tendency Diagnostic ``PRINT_DIFF_PGR``: (Default: false) Option to turn on/off the pressure tendency diagnostic. -Verification Parameters -========================== +Verification (VX) Parameters +================================= Non-default parameters for verification tasks are set in the ``verification:`` section of the ``config.yaml`` file. -General Verification Parameters ---------------------------------- +.. note:: + The verification tasks in the SRW App are based on the :ref:`METplus ` + verification software developed at the Developmental Testbed Center (:term:`DTC`). + :ref:`METplus ` is a scientific verification framework that spans a wide range of temporal and spatial scales. + Full documentation for METplus is available on the `METplus website `__. -``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) - Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loud. +.. _METParamNote: -Templates for Observation Files +.. note:: + Where a date field is required: + * ``YYYY`` refers to the 4-digit valid year + * ``MM`` refers to the 2-digit valid month + * ``DD`` refers to the 2-digit valid day of the month + * ``HH`` refers to the 2-digit valid hour of the day + * ``mm`` refers to the 2-digit valid minutes of the hour + * ``SS`` refers to the two-digit valid seconds of the hour + +.. _GeneralVXParams: + +General VX Parameters --------------------------------- -This section includes template variables for :term:`CCPA`, :term:`MRMS`, :term:`NOHRSC`, and :term:`NDAS` observation files. +``VX_FIELD_GROUPS``: (Default: [ "APCP", "REFC", "RETOP", "SFC", "UPA" ]) + The groups of fields (some of which may consist of only a single field) on which + to run verification. + + Since accumulated snowfall (``ASNOW``) is often not of interest in non-winter + cases and because observation files for ``ASNOW`` are not available on NOAA + HPSS for retrospective cases before March 2020, by default ``ASNOW`` is not + included ``VX_FIELD_GROUPS``, but it may be added to this list in order to + include the verification tasks for ``ASNOW`` in the workflow. Valid values: + ``"APCP"`` | ``"ASNOW"`` | ``"REFC"`` | ``"RETOP"`` | ``"SFC"`` | ``"UPA"`` + +``VX_APCP_ACCUMS_HRS``: (Default: [ 1, 3, 6, 24 ]) + The accumulation intervals (in hours) to include in the verification of + accumulated precipitation (APCP). If ``VX_FIELD_GROUPS`` contains ``"APCP"``, + then ``VX_APCP_ACCUMS_HRS`` must contain at least one element. Otherwise, + ``VX_APCP_ACCUMS_HRS`` will be ignored. Valid values: ``1`` | ``3`` | ``6`` | ``24`` -``OBS_CCPA_APCP_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2'``) - File name template for CCPA accumulated precipitation (APCP) observations. This template is used by the workflow tasks that call the METplus *PcpCombine* tool on CCPA obs to find the input observation files containing 1-hour APCP and then generate NetCDF files containing either 1-hour or greater than 1-hour APCP. +``VX_ASNOW_ACCUMS_HRS``: (Default: [ 6, 24 ]) + The accumulation intervals (in hours) to include in the verification of + accumulated snowfall (ASNOW). If ``VX_FIELD_GROUPS`` contains ``"ASNOW"``, + then ``VX_ASNOW_ACCUMS_HRS`` must contain at least one element. Otherwise, + ``VX_ASNOW_ACCUMS_HRS`` will be ignored. Valid values: ``6`` | ``12`` | ``18`` | ``24`` + +``VX_CONFIG_[DET|ENS]_FN``: (Default: ``vx_configs/vx_config_[det|ens].yaml``) + Names of configuration files for deterministic and ensemble verification + that specify the field groups, field names, levels, and (if applicable) + thresholds for which to run verification. These are relative to the + directory ``METPLUS_CONF`` in which the METplus config templates are + located. They may include leading relative paths before the file + names, e.g. ``some_dir/another_dir/vx_config_det.yaml``. -``OBS_NOHRSC_ASNOW_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/sfav2_CONUS_${ACCUM_HH}h_{valid?fmt=%Y%m%d%H}_grid184.grb2'``) - File name template for NOHRSC snow observations. +``VX_OUTPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) + Template for base (i.e. top-level) directory in which METplus will place + its output. -``OBS_MRMS_REFC_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2'``) - File name template for :term:`MRMS` reflectivity observations. -``OBS_MRMS_RETOP_FN_TEMPLATE``: (Default: ``'{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2'``) - File name template for MRMS echo top observations. +METplus-Specific Parameters +----------------------------------- -``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE``: (Default: ``'prepbufr.ndas.{valid?fmt=%Y%m%d%H}'``) - File name template for :term:`NDAS` surface and upper air observations. This template is used by the workflow tasks that call the METplus *Pb2nc* tool on NDAS obs to find the input observation files containing ADP surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate NetCDF versions of these files. +``METPLUS_VERBOSITY_LEVEL``: (Default: ``2``) + Logging verbosity level used by METplus verification tools. Valid values: 0 to 5, with 0 quiet and 5 loudest. -``OBS_NDAS_SFCorUPA_FN_METPROC_TEMPLATE``: (Default: ``'${OBS_NDAS_SFCorUPA_FN_TEMPLATE}.nc'``) - File name template for NDAS surface and upper air observations after processing by MET's *pb2nc* tool (to change format to NetCDF). -``OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: (Default: ``'${OBS_CCPA_APCP_FN_TEMPLATE}_a${ACCUM_HH}h.nc'``) - Template used to specify the names of the output NetCDF observation files generated by the workflow verification tasks that call the METplus *PcpCombine* tool on CCPA observations. (These files will contain observations of accumulated precipitation [APCP], both for 1 hour and for > 1 hour accumulation periods, in NetCDF format.) +VX Parameters for Observations +------------------------------------- -``OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc'``) - Template used to specify the names of the output NetCDF observation files generated by the workflow verification tasks that call the METplus Pb2nc tool on NDAS observations. (These files will contain obs ADPSFC or ADPUPA fields in NetCDF format.) +.. note:: + The observation types that the SRW App can currently retrieve (if necessary) + and use in verification are: + * CCPA (Climatology-Calibrated Precipitation Analysis) + * NOHRSC (National Operational Hydrologic Remote Sensing Center) + * MRMS (Multi-Radar Multi-Sensor) + * NDAS (NAM Data Assimilation System) -VX Forecast Model Name ------------------------- + The script ``ush/get_obs.py`` contains further details on the files and + directory structure of each obs type. -``VX_FCST_MODEL_NAME``: (Default: ``'{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}'``) - String that specifies a descriptive name for the model being verified. This is used in forming the names of the verification output files as well as in the contents of those files. +``[CCPA|NOHRSC|MRMS|NDAS]_OBS_AVAIL_INTVL_HRS``: (Defaults: [1|6|1|1]) + Time interval (in hours) at which the various types of obs are available + on NOAA's HPSS. -``VX_FIELDS``: (Default: [ "APCP", "REFC", "RETOP", "SFC", "UPA" ]) - The fields or groups of fields for which verification tasks will run. Because ``ASNOW`` is often not of interest in cases outside of winter, and because observation files are not located for retrospective cases on NOAA HPSS before March 2020, ``ASNOW`` is not included by default. ``"ASNOW"`` may be added to this list in order to include the related verification tasks in the workflow. Valid values: ``"APCP"`` | ``"REFC"`` | ``"RETOP"`` | ``"SFC"`` | ``"UPA"`` | ``"ASNOW"`` - -``VX_APCP_ACCUMS_HRS``: (Default: [ 1, 3, 6, 24 ]) - The accumulation periods (in hours) to consider for accumulated precipitation (APCP). If ``VX_FIELDS`` contains ``"APCP"``, then ``VX_APCP_ACCUMS_HRS`` must contain at least one element. If ``VX_FIELDS`` does not contain ``"APCP"``, ``VX_APCP_ACCUMS_HRS`` will be ignored. Valid values: ``1`` | ``3`` | ``6`` | ``24`` + Note that MRMS files are in fact available every few minutes, but here + we set the obs availability interval to 1 hour because currently that + is the shortest output interval for forecasts, i.e. the forecasts cannot + (yet) support sub-hourly output. -``VX_ASNOW_ACCUMS_HRS``: (Default: [ 6, 24 ]) - The accumulation periods (in hours) to consider for ``ASNOW`` (accumulated snowfall). If ``VX_FIELDS`` contains ``"ASNOW"``, then ``VX_ASNOW_ACCUMS_HRS`` must contain at least one element. If ``VX_FIELDS`` does not contain ``"ASNOW"``, ``VX_ASNOW_ACCUMS_HRS`` will be ignored. Valid values: ``6`` | ``24`` +``[CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR``: (Default: ``"{{ workflow.EXPTDIR }}/obs_data/[ccpa|nohrsc|mrms|ndas]"``) + Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by + the verification tasks are located. If the files do not exist, they + will be retrieved and placed under this directory. Note that: -Verification (VX) Directories ------------------------------- + * If the obs files need to be retrieved (e.g. from NOAA's HPSS), because + they are not already staged on disk, then the user must have write + permission to this directory. Otherwise, the ``get_obs`` workflow + tasks that attempt to create these files will fail. + + * CCPA obs contain errors in the metadata for a certain range of dates + that need to be corrected during obs retrieval. This is described + in more detail in the script ``ush/get_obs.py``. + +``OBS_[CCPA|NOHRSC|MRMS|NDAS]_FN_TEMPLATES``: + **Defaults:** + + ``OBS_CCPA_FN_TEMPLATES``: + .. code-block:: console + + [ 'APCP', + '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] + + ``OBS_NOHRSC_FN_TEMPLATES``: + .. code-block:: console + + [ 'ASNOW', + '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] + + ``OBS_MRMS_FN_TEMPLATES``: + .. code-block:: console + + [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', + 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] + + ``OBS_NDAS_FN_TEMPLATES``: + .. code-block:: console + + [ 'SFCandUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + + File name templates for various obs types. These are meant to be used + in METplus configuration files and thus contain METplus time formatting + strings. Each of these variables is a python list containing pairs of + values. The first element of each pair specifies the verification field + group(s) for which the file name template will be needed, and the second + element is the file name template itself, which may include a leading + relative directory. (Here, by "verification field group", we mean a + group of fields that is verified together in the workflow; see the + description of the variable ``VX_FIELD_GROUPS``.) For example, for CCPA + obs, the variable name is ``OBS_CCPA_FN_TEMPLATES``. From the default value + of this variable given above, we see that if ``CCPA_OBS_AVAIL_INTVL_HRS`` + is set to 1 (i.e. the CCPA obs are assumed to be available every hour) + and the valid time is 2024042903, then the obs file (including a relative + path) to look for and, if necessary, create is + + ``20240429/ccpa.t03z.01h.hrap.conus.gb2`` + + This file will be used in the verification of fields under the APCP + field group (which consist of accumulated precipitation for the + accumulation intervals specified in ``VX_APCP_ACCUMS_HRS``). + + Note that: + + * The file name templates are relative to the obs base directories given in + the variables + + ``[CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR`` + + defined above. Thus, the template for the full path to the obs files + is given, e.g. for CCPA obs, by + + .. code-block:: console + + CCPA_OBS_DIR/OBS_CCPA_FN_TEMPLATES[1] + + where the ``[1]`` indicates the second element of the list ``OBS_CCPA_FN_TEMPLATES``. + + * The file name templates may represent file names only, or they may + include leading relative directories. + + * The default values of these variables for the CCPA, NOHRSC, and NDAS + obs types contain only one pair of values (because these obs types + contain only one set of files that we use in the verification) while + the default value for the MRMS obs type contains two pairs of values, + one for the set of files that contains composite reflectivity data + and another for the set that contains echo top data. This is simply + because the MRMS obs type does not group all its fields together into + one set of files as does, for example, the NDAS obs type. + + * Each file name template must contain full information about the year, + month, day, and hour by including METplus time formatting strings for + this information. Some of this information (e.g. the year, month, + and day) may be in the relative directory portion of the template and + the rest (e.g. the hour) in the file name, or there may be no relative + directory portion and all of this information may be in the file name, + but all four pieces of timing information must be present somewhere in + each template as METplus time formatting strings. If not, obs files + created by the ``get_obs`` tasks for different days might overwrite each + other. + + * The workflow generation scripts create a ``get_obs`` task for each obs + type that is needed in the verification and for each day on which that + obs type is needed at at least some hours. That ``get_obs`` task first + checks whether all the necessary obs files for that day already exist + at the locations specified by the full path template(s) (which are + obtained by combining the base directories [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + with the file name template(s)). If for a given day one or more of + these obs files do not exist on disk, the ``get_obs`` task will retrieve + "raw" versions of these files from a data store (e.g. NOAA's HPSS) + and will place them in a temporary "raw" directory. It will then + move or copy these raw files to the locations specified by the full + path template(s). + + * The raw obs files, i.e. the obs files as they are named and arranged + in the data stores and retrieved and placed in the raw directories, + may be arranged differently and/or have names that are different from + the ones specified in the file name templates. If so, they are renamed + while being moved or copied from the raw directories to the locations + specified by the full path template(s). (The lists of templates for + searching for and retrieving files from the data stores is different + than the METplus templates described here; the former are given in + the data retrieval configuration file at ``parm/data_locations.yml``.) + + * When the ex-scripts for the various vx tasks are converted from bash + to python scripts, these variables should be converted from python + lists to python dictionaries, where the first element of each pair + becomes the key and the second becomes the value. This currently + cannot be done due to limitations in the workflow on converting + python dictionaries to bash variables. + +``REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]``: (Defaults: [True|True|True|True]) + Flag specifying whether to remove the "raw" observation directories + after retrieving the specified type of obs (CCPA, NOHRSC, MRMS, or + NOHRSC) from a data store (e.g. NOAA's HPSS). The raw directories + are the ones in which the observation files are placed immediately + after pulling them from the data store but before performing any + processing on them such as renaming the files and/or reorganizing + their directory structure. + +``OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: + **Default:** -``VX_FCST_INPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) - Template for top-level directory containing forecast (but not obs) files that will be used as input into METplus for verification. + .. code-block:: console -``VX_OUTPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) - Template for top-level directory in which METplus will place its output. + {%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }} -``VX_NDIGITS_ENSMEM_NAMES``: 3 - Number of digits in the ensemble member names. This is a configurable variable to allow users to change its value (e.g., to go from "mem004" to "mem04") when using staged forecast files that do not use the same number of digits as the SRW App. + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's PcpCombine tool on + CCPA observations. These files will contain observed accumulated + precipitation in NetCDF format for various accumulation intervals. -Verification (VX) File Name and Path Templates ------------------------------------------------- +``OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: + **Default:** -This section contains file name and path templates used in the verification (VX) tasks. + .. code-block:: console -``FCST_SUBDIR_TEMPLATE``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}/postprd{% endif %}'``) - A template for the subdirectory containing input forecast files for VX tasks. + {%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }} -``FCST_FN_TEMPLATE``: (Default: ``'${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2'``) - A template for the forecast file names used as input to verification tasks. + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's PcpCombine tool on + NOHRSC observations. These files will contain observed accumulated + snowfall for various accumulaton intervals. -``FCST_FN_METPROC_TEMPLATE``: (Default: ``'${NET_default}.t{init?fmt=%H}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc'``) - A template for how to name the forecast files for accumulated precipitation (APCP) with greater than 1-hour accumulation (i.e., 3-, 6-, and 24-hour accumulations) after processing by ``PcpCombine``. +``OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT``: (Default: ``'${OBS_NDAS_FN_TEMPLATES[1]}.nc'``) + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's Pb2nc tool on the + prepbufr files in NDAS observations. These files will contain the + observed surface (SFC) and upper-air (UPA) fields in NetCDF format + (instead of NDAS's native prepbufr format). ``NUM_MISSING_OBS_FILES_MAX``: (Default: 2) - For verification tasks that need observational data, this specifies the maximum number of observation files that may be missing. If more than this number are missing, the verification task will error out. - Note that this is a crude way of checking that there are enough observations to conduct verification since this number should probably depend on the field being verified, the time interval between observations, the length of the forecast, etc. An alternative may be to specify the maximum allowed fraction of observation files that can be missing (i.e., the number missing divided by the number that are expected to exist). + For verification tasks that need observational data, this specifies + the maximum number of observation files that may be missing. If more + than this number are missing, the verification task will error out. + This is a crude way of checking that there are enough obs to conduct + verification (crude because this number should probably depend on the + field being verified, the time interval between observations, the + length of the forecast, etc; an alternative may be to specify the + maximum allowed fraction of obs files that can be missing). + + +VX Parameters for Forecasts +---------------------------------- + +``VX_FCST_MODEL_NAME``: (Default: ``'{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}'``) + String that specifies a descriptive name for the model being verified. + This is used in forming the names of the verification output files and + is also included in the contents of those files. + +``VX_FCST_OUTPUT_INTVL_HRS``: (Default: 1) + The forecast output interval (in hours) to assume for verification + purposes. + + .. note:: + If/when a variable is created in this configuration file that specifies + the forecast output interval for native SRW forecasts, it should be + used as the default value of this variable. + +``VX_FCST_INPUT_BASEDIR``: (Default: ``'{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}'``) + METplus template for the name of the base (i.e. top-level) directory + containing the forecast files to use as inputs to the verification + tasks. + +``FCST_SUBDIR_TEMPLATE``: + **Default:** + + .. code-block:: console + + {%- if user.RUN_ENVIR == "nco" %} + {{- "${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}" }} + {%- else %} + {{- "{init?fmt=%Y%m%d%H?shift=-${time_lag}}" }} + {%- if global.DO_ENSEMBLE %} + {{- "/${ensmem_name}" }} + {%- endif %} + {{- "/postprd" }} + {%- endif %} + + METplus template for the name of the subdirectory containing forecast + files to use as inputs to the verification tasks. + +``FCST_FN_TEMPLATE``: + **Default:** + + .. code-block:: console + + {{- "${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2" }} + + METplus template for the names of the forecast files to use as inputs + to the verification tasks. + +``FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT``: + **Default:** + + .. code-block:: console + + {{- "${NET_default}.t{init?fmt=%H}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc" }} + + METplus template for the names of the NetCDF files generated by the + worfklow verification tasks that call METplus's PcpCombine tool on + forecast output. These files will contain forecast accumulated + precipitation in NetCDF format for various accumulation intervals. + +``VX_NDIGITS_ENSMEM_NAMES``: (Default: 3) + Number of digits to assume/use in the forecast ensemble member identifier + string used in directory and file names and other instances in which the + ensemble member needs to be identified. For example, if this is set to + 3, the identifier for ensemble member 4 will be "mem004", while if it's + set to 2, the identifier will be "mem04". This is useful when verifying + staged forecast files from a forecasting model/system other than the + SRW that uses a different number of digits in the ensemble member + identifier string. ``NUM_MISSING_FCST_FILES_MAX``: (Default: 0) - For verification tasks that need forecast data, this specifies the maximum number of post-processed forecast files that may be missing. If more than this number are missing, the verification task will not be run. + For verification tasks that need forecast data, this specifies the + maximum number of post-processed forecast files that may be missing. + If more than this number are missing, the verification task will exit + with an error. + Coupled AQM Configuration Parameters ===================================== diff --git a/doc/UsersGuide/Reference/Glossary.rst b/doc/UsersGuide/Reference/Glossary.rst index 48fb970cab..5b60c5b40d 100644 --- a/doc/UsersGuide/Reference/Glossary.rst +++ b/doc/UsersGuide/Reference/Glossary.rst @@ -26,7 +26,7 @@ Glossary chgres_cube The preprocessing software used to create initial and boundary condition files to - “cold start” the forecast model. It is part of :term:`UFS_UTILS`. + "cold start" the forecast model. It is part of :term:`UFS_UTILS`. CIN Convective Inhibition. @@ -48,6 +48,9 @@ Glossary Component Repository A :term:`repository` that contains, at a minimum, source code for a single component. + composite reflectivity + `Composite reflectivity `__ is a display or mapping of the maximum radar reflectivity factor at any altitude as a function of position on the ground. + Container `Docker `__ describes a container as "a standard unit of software that packages up code and all its dependencies so the application runs quickly and reliably from one computing environment to another." @@ -70,6 +73,9 @@ Glossary data assimilation Data assimilation is the process of combining observations, model data, and error statistics to achieve the best estimate of the state of a system. One of the major sources of error in weather and climate forecasts is uncertainty related to the initial conditions that are used to generate future predictions. Even the most precise instruments have a small range of unavoidable measurement error, which means that tiny measurement errors (e.g., related to atmospheric conditions and instrument location) can compound over time. These small differences result in very similar forecasts in the short term (i.e., minutes, hours), but they cause widely divergent forecasts in the long term. Errors in weather and climate forecasts can also arise because models are imperfect representations of reality. Data assimilation systems seek to mitigate these problems by combining the most timely observational data with a "first guess" of the atmospheric state (usually a previous forecast) and other sources of data to provide a "best guess" analysis of the atmospheric state to start a weather or climate simulation. When combined with an "ensemble" of model runs (many forecasts with slightly different conditions), data assimilation helps predict a range of possible atmospheric states, giving an overall measure of uncertainty in a given forecast. + DTC + The `Developmental Testbed Center `__ is a distributed facility where the NWP community can test and evaluate new models and techniques for use in research and operations. + dycore dynamical core Global atmospheric model based on fluid dynamics principles, including Euler's equations of motion. @@ -87,7 +93,7 @@ Glossary Extended Schmidt Gnomonic (ESG) grid. The ESG grid uses the map projection developed by Jim Purser of NOAA :term:`EMC` (:cite:t:`Purser_2020`). ESMF - `Earth System Modeling Framework `__. The ESMF defines itself as “a suite of software tools for developing high-performance, multi-component Earth science modeling applications.” + `Earth System Modeling Framework `__. The ESMF defines itself as "a suite of software tools for developing high-performance, multi-component Earth science modeling applications." ex-scripts Scripting layer (contained in ``ufs-srweather-app/scripts/``) that should be called by a :term:`J-job ` for each workflow componentto run a specific task or sub-task in the workflow. The different scripting layers are described in detail in the :nco:`NCO Implementation Standards document ` @@ -152,6 +158,9 @@ Glossary MERRA2 The `Modern-Era Retrospective analysis for Research and Applications, Version 2 `__ provides satellite observation data back to 1980. According to NASA, "It was introduced to replace the original MERRA dataset because of the advances made in the assimilation system that enable assimilation of modern hyperspectral radiance and microwave observations, along with GPS-Radio Occultation datasets. It also uses NASA's ozone profile observations that began in late 2004. Additional advances in both the GEOS model and the GSI assimilation system are included in MERRA-2. Spatial resolution remains about the same (about 50 km in the latitudinal direction) as in MERRA." + MET + The `Model Evaluation Tools `__ is a highly-configurable, state-of-the-art suite of verification tools developed at the :term:`DTC`. + MPI MPI stands for Message Passing Interface. An MPI is a standardized communication system used in parallel programming. It establishes portable and efficient syntax for the exchange of messages and data between multiple processors that are used by a single computer program. An MPI is required for high-performance computing (HPC) systems. @@ -215,7 +224,7 @@ Glossary The branch of physical geography dealing with mountains. Parameterizations - Simplified functions that approximate the effects of small-scale processes (e.g., microphysics, gravity wave drag) that cannot be explicitly resolved by a model grid’s representation of the earth. + Simplified functions that approximate the effects of small-scale processes (e.g., microphysics, gravity wave drag) that cannot be explicitly resolved by a model grid's representation of the earth. RAP `Rapid Refresh `__. The continental-scale NOAA hourly-updated assimilation/modeling system operational at :term:`NCEP`. RAP covers North America and is comprised primarily of a numerical forecast model and an analysis/assimilation system to initialize that model. RAP is complemented by the higher-resolution 3km High-Resolution Rapid Refresh (:term:`HRRR`) model. diff --git a/doc/conf.py b/doc/conf.py index 2b5bf7b4d4..e8f15567d8 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -253,7 +253,7 @@ def setup(app): # -- Options for autodoc extension --------------------------------------- autodoc_mock_imports = ["f90nml","cartopy","mpl_toolkits.basemap","fill_jinja_template", - "matplotlib","numpy","uwtools","mpl_toolkits", + "matplotlib","numpy","uwtools","mpl_toolkits","metplus", ] logger = logging.getLogger(__name__) @@ -310,3 +310,11 @@ def warn_undocumented_members(app, what, name, obj, options, lines): 'fire-ug': ('https://fire-behavior.readthedocs.io/en/latest/%s', '%s'), } +# Define environment variables that need to exist when running the top-level code in python +# files (outside of functions, classes, etc.). +# +# METPLUS_ROOT just needs to exist in the environment; its value is not important since for +# the purpose of building the documentation, METplus is loaded by including "metplus" in +# the autodoc_mock_imports list above, not via use of the METPLUS_ROOT environment variable. +os.environ["METPLUS_ROOT"] = "" + diff --git a/jobs/JREGIONAL_GET_VERIF_OBS b/jobs/JREGIONAL_GET_VERIF_OBS index 7c083e96c6..8190314fc5 100755 --- a/jobs/JREGIONAL_GET_VERIF_OBS +++ b/jobs/JREGIONAL_GET_VERIF_OBS @@ -69,7 +69,6 @@ This is the J-job script for the task that checks, pulls, and stages observation data for verification purposes. ========================================================================" -# # #----------------------------------------------------------------------- # diff --git a/modulefiles/tasks/derecho/get_obs.local.lua b/modulefiles/tasks/derecho/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/derecho/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/gaea/get_obs.local.lua b/modulefiles/tasks/gaea/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/gaea/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/hera/get_obs.local.lua b/modulefiles/tasks/hera/get_obs.local.lua index dcca3116d8..e8d902abab 100644 --- a/modulefiles/tasks/hera/get_obs.local.lua +++ b/modulefiles/tasks/hera/get_obs.local.lua @@ -1,3 +1,2 @@ load("hpss") -unload("python") -load("python_srw") +load("run_vx.local") diff --git a/modulefiles/tasks/hercules/get_obs.local.lua b/modulefiles/tasks/hercules/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/hercules/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/jet/get_obs.local.lua b/modulefiles/tasks/jet/get_obs.local.lua index dcca3116d8..e8d902abab 100644 --- a/modulefiles/tasks/jet/get_obs.local.lua +++ b/modulefiles/tasks/jet/get_obs.local.lua @@ -1,3 +1,2 @@ load("hpss") -unload("python") -load("python_srw") +load("run_vx.local") diff --git a/modulefiles/tasks/noaacloud/get_obs.local.lua b/modulefiles/tasks/noaacloud/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/noaacloud/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/modulefiles/tasks/orion/get_obs.local.lua b/modulefiles/tasks/orion/get_obs.local.lua new file mode 100644 index 0000000000..c03abd8dfe --- /dev/null +++ b/modulefiles/tasks/orion/get_obs.local.lua @@ -0,0 +1 @@ +load("run_vx.local") diff --git a/parm/data_locations.yml b/parm/data_locations.yml index e65a796739..a706676f9b 100644 --- a/parm/data_locations.yml +++ b/parm/data_locations.yml @@ -319,10 +319,7 @@ CCPA_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_internal_dir: - - "./00" - - "./06" - - "./12" - - "./18" + - "./{hh}" archive_file_names: - "com2_ccpa_prod_ccpa.{yyyy}{mm}{dd}.tar" - "gpfs_dell1_nco_ops_com_ccpa_prod_ccpa.{yyyy}{mm}{dd}.tar" @@ -330,7 +327,7 @@ CCPA_obs: - "com_ccpa_v4.2_ccpa.{yyyy}{mm}{dd}.tar" file_names: obs: - - "ccpa.t{hh}z.01h.hrap.conus.gb2" + - "ccpa.t*z.01h.hrap.conus.gb2" MRMS_obs: hpss: @@ -365,11 +362,13 @@ NDAS_obs: - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} + - /NCEPPROD/hpssprod/runhistory/rh{yyyy}/{yyyy}{mm}/{yyyy}{mm}{dd} archive_file_names: - "com2_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "gpfs_dell1_nco_ops_com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_nam_prod_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" - "com_obsproc_v1.1_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" + - "com_obsproc_v1.2_nam.{yyyy}{mm}{dd}{hh}.bufr.tar" file_names: obs: - "./nam.t{hh}z.prepbufr.tm*.nr" @@ -386,6 +385,6 @@ NOHRSC_obs: - "dcom_{yyyy}{mm}{dd}.tar" file_names: obs: - - "sfav2_CONUS_*h_{yyyy}{mm}{dd}{hh}_grid184.grb2" + - "sfav2_CONUS_6h_{yyyy}{mm}{dd}*_grid184.grb2" archive_internal_dir: - ./wgrbbul/nohrsc_snowfall/ diff --git a/parm/metplus/EnsembleStat.conf b/parm/metplus/EnsembleStat.conf index 2caeda1521..15ba1d9321 100644 --- a/parm/metplus/EnsembleStat.conf +++ b/parm/metplus/EnsembleStat.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST @@ -87,7 +87,7 @@ OBS_FILE_WINDOW_BEGIN = -300 OBS_FILE_WINDOW_END = 300 OBS_{{METPLUS_TOOL_NAME}}_WINDOW_BEGIN = 0 OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = 0 -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} OBS_WINDOW_BEGIN = -1799 OBS_WINDOW_END = 1800 OBS_{{METPLUS_TOOL_NAME}}_WINDOW_BEGIN = {OBS_WINDOW_BEGIN} @@ -105,7 +105,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} # ens.vld_thresh value in the MET config file {{METPLUS_TOOL_NAME}}_ENS_VLD_THRESH = 1.0 -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} {{METPLUS_TOOL_NAME}}_OBS_QUALITY_INC = 0, 1, 2, 3, 9, NA #{{METPLUS_TOOL_NAME}}_OBS_QUALITY_EXC = @@ -118,7 +118,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {{METPLUS_TOOL_NAME}}_MET_OBS_ERR_TABLE = {MET_BASE}/table_files/obs_error_table.txt {%- elif input_field_group in ['REFC', 'RETOP'] %} {{METPLUS_TOOL_NAME}}_MET_OBS_ERR_TABLE = -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {{METPLUS_TOOL_NAME}}_MET_OBS_ERR_TABLE = {MET_BASE}/table_files/obs_error_table.txt {%- endif %} @@ -135,7 +135,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {%- set comment_or_null = '' %} {%- set regrid_to_grid = 'FCST' %} {%- set regrid_method = 'BUDGET' %} -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {%- set comment_or_null = '#' %} {%- set regrid_to_grid = 'NONE' %} {%- set regrid_method = 'BILIN' %} @@ -160,8 +160,8 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {{METPLUS_TOOL_NAME}}_DUPLICATE_FLAG = NONE {{METPLUS_TOOL_NAME}}_SKIP_CONST = TRUE {{METPLUS_TOOL_NAME}}_OBS_ERROR_FLAG = FALSE -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} -{{METPLUS_TOOL_NAME}}_MESSAGE_TYPE = {{input_field_group}} +{%- elif input_field_group in ['SFC', 'UPA'] %} +{{METPLUS_TOOL_NAME}}_MESSAGE_TYPE = {{fieldname_in_met_filedir_names}} {{METPLUS_TOOL_NAME}}_DUPLICATE_FLAG = NONE {{METPLUS_TOOL_NAME}}_SKIP_CONST = FALSE {{METPLUS_TOOL_NAME}}_OBS_ERROR_FLAG = FALSE @@ -197,7 +197,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {%- set comment_or_null = '' %} {%- elif input_field_group in ['REFC', 'RETOP'] %} {%- set comment_or_null = '' %} -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {%- set comment_or_null = '#' %} {%- endif %} {{comment_or_null}}{{METPLUS_TOOL_NAME}}_CLIMO_CDF_BINS = 1 @@ -207,7 +207,7 @@ OBS_{{METPLUS_TOOL_NAME}}_WINDOW_END = {OBS_WINDOW_END} {{METPLUS_TOOL_NAME}}_MASK_GRID = {%- elif input_field_group in ['REFC', 'RETOP'] %} {{METPLUS_TOOL_NAME}}_MASK_GRID = FULL -{%- elif input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- elif input_field_group in ['SFC', 'UPA'] %} {{METPLUS_TOOL_NAME}}_MASK_GRID = {%- endif %} @@ -483,7 +483,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = convert(x) = x * 3.28084 * 0.001; ;; Convert {{opts_indent}}ens_phist_bin_size = 0.05; {%- endif %} - {%- elif input_field_group == 'ADPSFC' %} + {%- elif input_field_group == 'SFC' %} {%- if field_fcst == 'HGT' %} FCST_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; @@ -499,7 +499,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = interp = { type = [ { method = NEAREST; width FCST_VAR{{ns.var_count}}_OPTIONS = GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} FCST_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; @@ -516,15 +516,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} @@ -615,7 +608,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = censor_thresh = lt-20; {{opts_indent}}ens_phist_bin_size = 0.05; {%- endif %} - {%- elif input_field_group == 'ADPSFC' %} + {%- elif input_field_group == 'SFC' %} {%- if field_obs in ['DPT', 'TMP', 'WIND'] %} OBS_VAR{{ns.var_count}}_OPTIONS = obs_error = { flag = TRUE; dist_type = NONE; dist_parm = []; inst_bias_scale = 1.0; inst_bias_offset = 0.0; min = NA; max = NA; } @@ -623,7 +616,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = obs_error = { flag = TRUE; dist_type = NONE; d OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215 {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_obs in ['DPT', 'HGT', 'TMP', 'WIND'] %} OBS_VAR{{ns.var_count}}_OPTIONS = obs_error = { flag = TRUE; dist_type = NONE; dist_parm = []; inst_bias_scale = 1.0; inst_bias_offset = 0.0; min = NA; max = NA; } @@ -656,7 +649,7 @@ OUTPUT_BASE = {{output_base}} # # Point observation input directory for {{MetplusToolName}}. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR = {{obs_input_dir}} {%- else %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR = @@ -664,7 +657,7 @@ OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR = # # Grid observation input directory for {{MetplusToolName}}. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_DIR = {%- else %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_DIR = {{obs_input_dir}} @@ -698,7 +691,7 @@ STAGING_DIR = {{staging_dir}} # Template for point observation input to {{MetplusToolName}} relative to # OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_DIR. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_TEMPLATE = {{obs_input_fn_template}} {%- else %} OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_TEMPLATE = @@ -707,7 +700,7 @@ OBS_{{METPLUS_TOOL_NAME}}_POINT_INPUT_TEMPLATE = # Template for gridded observation input to {{MetplusToolName}} relative to # OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_DIR. # -{%- if input_field_group in ['ADPSFC', 'ADPUPA'] %} +{%- if input_field_group in ['SFC', 'UPA'] %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_TEMPLATE = {%- else %} OBS_{{METPLUS_TOOL_NAME}}_GRID_INPUT_TEMPLATE = {{obs_input_fn_template}} diff --git a/parm/metplus/GenEnsProd.conf b/parm/metplus/GenEnsProd.conf index 6c47cedb0d..153eae196b 100644 --- a/parm/metplus/GenEnsProd.conf +++ b/parm/metplus/GenEnsProd.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST @@ -328,7 +328,7 @@ Set forecast field options. ENS_VAR{{ns.var_count}}_OPTIONS = convert(x) = x * 3.28084 * 0.001; ;; Convert from meters to kilofeet. {%- endif %} - {%- elif input_field_group == 'ADPSFC' %} + {%- elif input_field_group == 'SFC' %} {%- if field_fcst == 'HGT' %} ENS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; @@ -344,7 +344,7 @@ ENS_VAR{{ns.var_count}}_OPTIONS = interp = { type = [ { method = NEAREST; width ENS_VAR{{ns.var_count}}_OPTIONS = GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} ENS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/GridStat_ensmean.conf b/parm/metplus/GridStat_ensmean.conf index 6bbc20e3f8..7c3b3b7ad9 100644 --- a/parm/metplus/GridStat_ensmean.conf +++ b/parm/metplus/GridStat_ensmean.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST @@ -174,8 +174,8 @@ following dictionary. 'ASNOW': [], 'REFC': [], 'RETOP': [], - 'ADPSFC': ['TCDC', 'VIS', 'HGT'], - 'ADPUPA': []} %} + 'SFC': ['TCDC', 'VIS', 'HGT'], + 'UPA': []} %} {%- set fields_fcst_to_exclude = fields_fcst_to_exclude_by_field_group[input_field_group] %} {#- @@ -383,7 +383,7 @@ Set forecast field options. {%- endif %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPUPA' %} + {%- if input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} FCST_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; @@ -400,15 +400,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} @@ -469,7 +462,7 @@ Set observation field options. OBS_VAR{{ns.var_count}}_OPTIONS = convert(x) = 100.0*x; {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_obs == 'CAPE' %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/GridStat_ensprob.conf b/parm/metplus/GridStat_ensprob.conf index a43b8ed340..abde89ef4b 100644 --- a/parm/metplus/GridStat_ensprob.conf +++ b/parm/metplus/GridStat_ensprob.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST @@ -354,15 +354,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} diff --git a/parm/metplus/GridStat_or_PointStat.conf b/parm/metplus/GridStat_or_PointStat.conf index 39d34eb24f..155b028291 100644 --- a/parm/metplus/GridStat_or_PointStat.conf +++ b/parm/metplus/GridStat_or_PointStat.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST @@ -93,7 +93,7 @@ METPLUS_CONF = {{ '{' ~ METPLUS_TOOL_NAME ~ '_OUTPUT_DIR}' }}/metplus_final.{{me {%- if (METPLUS_TOOL_NAME == 'GRID_STAT') %} - {%- if (input_field_group == 'APCP') %} + {%- if input_field_group in ['APCP', 'ASNOW'] %} #{{METPLUS_TOOL_NAME}}_INTERP_FIELD = BOTH #{{METPLUS_TOOL_NAME}}_INTERP_VLD_THRESH = 1.0 @@ -500,7 +500,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = set_attr_lead = "{lead?fmt=%H%M%S}"; {{opts_indent}}cnt_logic = UNION; {%- endif %} - {%- elif (input_field_group == 'ADPSFC') %} + {%- elif (input_field_group == 'SFC') %} {%- if (field_fcst in ['WIND']) %} {{opts_indent}}GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. @@ -518,7 +518,7 @@ FCST_VAR{{ns.var_count}}_OPTIONS = set_attr_lead = "{lead?fmt=%H%M%S}"; {{opts_indent}}desc = "CEILING"; {%- endif %} - {%- elif (input_field_group == 'ADPUPA') %} + {%- elif (input_field_group == 'UPA') %} {%- if (field_fcst in ['HGT']) %} {%- if (levels_fcst[0] in ['L0']) %} @@ -539,15 +539,8 @@ PcpCombine tool. In that file, the field name consists of the observation field name here (field_obs) with the accumulation period appended to it (separated by an underscore), so we must do the same here to get an exact match. - -Note: -Turns out for ASNOW, PcpCombine is not run for obs, so we exclude that -from the "if" clause here (so it goes into the "else"). For workflow -behavior uniformity between APCP and ASNOW, consider running PcpCombine -for ASNOW observations as well (just as it's run for APCP observations). - {%- if (input_field_group in ['APCP', 'ASNOW']) %} #} - {%- if (input_field_group in ['APCP']) %} + {%- if (input_field_group in ['APCP', 'ASNOW']) %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}}_{{accum_hh}} {%- else %} OBS_VAR{{ns.var_count}}_NAME = {{field_obs}} @@ -606,7 +599,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = convert(x) = x * 3280.84 * 0.001; {{opts_indent}}cnt_logic = UNION; {%- endif %} - {%- elif (input_field_group == 'ADPSFC') %} + {%- elif (input_field_group == 'SFC') %} {%- if (field_obs in ['WIND']) %} OBS_VAR{{ns.var_count}}_OPTIONS = GRIB2_pdt = 0; ;; Derive instantaneous 10-m wind from U/V components, overriding max 10-m wind. @@ -619,7 +612,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; {{opts_indent}}interp = { type = [ { method = NEAREST; width = 1; } ]; } {%- endif %} - {%- elif (input_field_group == 'ADPUPA') %} + {%- elif (input_field_group == 'UPA') %} {%- if (field_obs in ['CAPE', 'MLCAPE']) %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/Pb2nc_obs.conf b/parm/metplus/Pb2nc_obs.conf index 729bf2ba06..24d469602f 100644 --- a/parm/metplus/Pb2nc_obs.conf +++ b/parm/metplus/Pb2nc_obs.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{leadhr_list}} # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST diff --git a/parm/metplus/PcpCombine.conf b/parm/metplus/PcpCombine.conf index 3cee69df1d..04562dc14b 100644 --- a/parm/metplus/PcpCombine.conf +++ b/parm/metplus/PcpCombine.conf @@ -35,7 +35,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST @@ -79,17 +79,26 @@ FCST_PCP_COMBINE_RUN = False # {{FCST_OR_OBS}}_PCP_COMBINE_METHOD = ADD -{%- if (FCST_OR_OBS == 'FCST') and (input_field_group == 'ASNOW') %} +{%- if (FCST_OR_OBS == 'FCST') %} + {%- if (input_field_group == 'ASNOW') %} # # Specify name of variable for Snowfall Accumulation. -# NOTE: Currently TSNOWP is used which is a constant-density estimate of snowfall accumulation. -# In future RRFS development, a GSL product with variable-density snowfall accumulation -# is planned for UPP. When that is included and turned on in post, this variable may be changed -# to ASNOW. # -FCST_PCP_COMBINE_INPUT_NAMES = TSNOWP - -FCST_PCP_COMBINE_INPUT_LEVELS = A01 +# NOTE: +# For forecasts, currently TSNOWP is used which is a constant-density +# estimate of snowfall accumulation. In future RRFS development, a GSL +# product with variable-density snowfall accumulation is planned for UPP. +# When that is included and turned on in post, this variable may be +# changed to ASNOW. +# +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_NAMES = TSNOWP +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_LEVELS = A{{input_accum_hh}} + {%- endif %} +{%- elif (FCST_OR_OBS == 'OBS') %} + {%- if (input_field_group == 'ASNOW') %} +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_NAMES = ASNOW +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_LEVELS = A{{input_accum_hh}} + {%- endif %} {%- endif %} # # Specify how to name the array in the NetCDF file that PcpCombine @@ -98,7 +107,7 @@ FCST_PCP_COMBINE_INPUT_LEVELS = A01 # For accumulation variables (which is the only type of variable that we # run PcpCombine on), we add the accumulation period to the variable name # because this is how METplus normally sets names. This is because, -# epending on the settings in the METplus configuration file, it is +# depending on the settings in the METplus configuration file, it is # possible for a single NetCDF output file to contain output for multiple # accumulations, so even though the "level" attribute of each accumulation # variable in the output file will contain the level (e.g. "A1" or "A3"), @@ -110,18 +119,18 @@ FCST_PCP_COMBINE_INPUT_LEVELS = A01 # the output NetCDF file). # {%- if (input_field_group in ['APCP', 'ASNOW']) %} -{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}}_{{accum_hh}} +{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}}_{{output_accum_hh}} {%- else %} {{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_NAME = {{fieldname_in_met_output}} {%- endif %} # # Accumulation interval available in the input data. # -{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = 01 +{{FCST_OR_OBS}}_PCP_COMBINE_INPUT_ACCUMS = {{input_accum_hh}} # # Accumulation interval to generate in the output file. # -{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_ACCUM = {{accum_hh}} +{{FCST_OR_OBS}}_PCP_COMBINE_OUTPUT_ACCUM = {{output_accum_hh}} # # If the output NetCDF file already exists, specify whether or not to # skip the call to PcpCombine. @@ -177,7 +186,7 @@ FCST_PCP_COMBINE_CONSTANT_INIT = True # # Name to identify observation data in output. # -OBTYPE = CCPA +OBTYPE = {{obtype}} {%- endif %} # # Specify file type of input data. diff --git a/parm/metplus/PointStat_ensmean.conf b/parm/metplus/PointStat_ensmean.conf index b16a481dbd..fc9ccec85b 100644 --- a/parm/metplus/PointStat_ensmean.conf +++ b/parm/metplus/PointStat_ensmean.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST @@ -238,8 +238,8 @@ following dictionary. 'ASNOW': [], 'REFC': [], 'RETOP': [], - 'ADPSFC': ['TCDC', 'VIS', 'HGT'], - 'ADPUPA': []} %} + 'SFC': ['TCDC', 'VIS', 'HGT'], + 'UPA': []} %} {%- set fields_fcst_to_exclude = fields_fcst_to_exclude_by_field_group[input_field_group] %} {#- @@ -419,7 +419,7 @@ Set forecast field options. {%- endif %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPUPA' %} + {%- if input_field_group == 'UPA' %} {%- if field_fcst == 'CAPE' %} FCST_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; @@ -481,7 +481,7 @@ Set observation field options. {%- set opts_indent_len = opts_indent_len - 1 %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPUPA' %} + {%- if input_field_group == 'UPA' %} {%- if field_obs == 'CAPE' %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/PointStat_ensprob.conf b/parm/metplus/PointStat_ensprob.conf index 84b9f3954d..42ac254a4b 100644 --- a/parm/metplus/PointStat_ensprob.conf +++ b/parm/metplus/PointStat_ensprob.conf @@ -31,7 +31,7 @@ INIT_INCREMENT = 3600 # List of forecast leads to process for each run time (init or valid) # In hours if units are not specified # If unset, defaults to 0 (don't loop through forecast leads) -LEAD_SEQ = {{fhr_list}} +LEAD_SEQ = {{vx_leadhr_list}} # # Order of loops to process data - Options are times, processes # Not relevant if only one item is in the PROCESS_LIST @@ -355,7 +355,7 @@ Set forecast field options. {%- endif %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPSFC' %} + {%- if input_field_group == 'SFC' %} {%- if field_fcst == 'HGT' %} FCST_VAR{{ns.var_count}}_OPTIONS = desc = "CEILING"; @@ -400,7 +400,7 @@ Set observation field options. {%- set opts_indent_len = opts_indent_len - 1 %} {%- set opts_indent = ' '*opts_indent_len %} - {%- if input_field_group == 'ADPSFC' %} + {%- if input_field_group == 'SFC' %} {%- if field_obs == 'CEILING' %} OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; @@ -409,7 +409,7 @@ OBS_VAR{{ns.var_count}}_OPTIONS = GRIB_lvl_typ = 215; OBS_VAR{{ns.var_count}}_OPTIONS = interp = { type = [ { method = NEAREST; width = 1; } ]; } {%- endif %} - {%- elif input_field_group == 'ADPUPA' %} + {%- elif input_field_group == 'UPA' %} {%- if field_obs == 'CAPE' %} OBS_VAR{{ns.var_count}}_OPTIONS = cnt_thresh = [ >0 ]; diff --git a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml new file mode 100644 index 0000000000..81425cc1a1 --- /dev/null +++ b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_aiml.yaml @@ -0,0 +1,54 @@ +# +# This configuration file specifies the field groups, fields, levels, +# and thresholds to use for DETERMINISTIC verification. The format is +# as follows: +# +# FIELD_GROUP1: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# FIELD_GROUP2: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# ... +# +# If the threshold list for a given combination of field group, field, +# and level is set to the empty string ([]), then all values of that +# field will be included in the verification. +# +# Both the keys that represent field groups, fields, and levels and the +# strings in the list of thresholds may contain the separator string "%%" +# that separates the value of the quantity for the forecast from that for +# the observations. For example, if a field is set to +# +# RETOP%%EchoTop18 +# +# it means the name of the field in the forecast data is RETOP while its +# name in the observations is EchoTop18. +# +SFC: + TMP: + Z2: [] + UGRD: + Z10: ['ge2.572'] + VGRD: + Z10: ['ge2.572'] + WIND: + Z10: ['ge2.572', 'ge2.572&<5.144', 'ge5.144', 'ge10.288', 'ge15.433'] + PRES%%PRMSL: + Z0: [] diff --git a/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml new file mode 100644 index 0000000000..dde2dd3302 --- /dev/null +++ b/parm/metplus/vx_configs/vx_config_det.obs_gdas.model_gfs.yaml @@ -0,0 +1,54 @@ +# +# This configuration file specifies the field groups, fields, levels, +# and thresholds to use for DETERMINISTIC verification. The format is +# as follows: +# +# FIELD_GROUP1: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# FIELD_GROUP2: +# FIELD1: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# FIELD2: +# LEVEL1: list_of_thresholds +# LEVEL2: list_of_thresholds +# ... +# ... +# +# ... +# +# If the threshold list for a given combination of field group, field, +# and level is set to the empty string ([]), then all values of that +# field will be included in the verification. +# +# Both the keys that represent field groups, fields, and levels and the +# strings in the list of thresholds may contain the separator string "%%" +# that separates the value of the quantity for the forecast from that for +# the observations. For example, if a field is set to +# +# RETOP%%EchoTop18 +# +# it means the name of the field in the forecast data is RETOP while its +# name in the observations is EchoTop18. +# +SFC: + TMP: + Z2: [] + UGRD: + Z10: ['ge2.572'] + VGRD: + Z10: ['ge2.572'] + WIND: + Z10: ['ge2.572', 'ge2.572&<5.144', 'ge5.144', 'ge10.288', 'ge15.433'] + PRMSL: + Z0: [] diff --git a/parm/metplus/vx_config_det.yaml b/parm/metplus/vx_configs/vx_config_det.yaml similarity index 98% rename from parm/metplus/vx_config_det.yaml rename to parm/metplus/vx_configs/vx_config_det.yaml index 8ea3fd5e13..48b8aff97b 100644 --- a/parm/metplus/vx_config_det.yaml +++ b/parm/metplus/vx_configs/vx_config_det.yaml @@ -50,13 +50,14 @@ APCP: ASNOW: ASNOW: A6: ['gt0.0', 'ge2.54', 'ge5.08', 'ge10.16', 'ge20.32'] + A24: ['gt0.0', 'ge2.54', 'ge10.16', 'ge20.32', 'ge30.48'] REFC: REFC%%MergedReflectivityQCComposite: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] RETOP: RETOP%%EchoTop18: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] -ADPSFC: +SFC: TMP: Z2: [] DPT: @@ -89,7 +90,7 @@ ADPSFC: L0%%Z0: ['ge1.0%%ge164&&le166'] CICEP%%PRWE: L0%%Z0: ['ge1.0%%ge174&&le176'] -ADPUPA: +UPA: TMP: P1000: &adpupa_tmp_threshes [] diff --git a/parm/metplus/vx_config_ens.yaml b/parm/metplus/vx_configs/vx_config_ens.yaml similarity index 95% rename from parm/metplus/vx_config_ens.yaml rename to parm/metplus/vx_configs/vx_config_ens.yaml index 5f55254a4c..4eb1524648 100644 --- a/parm/metplus/vx_config_ens.yaml +++ b/parm/metplus/vx_configs/vx_config_ens.yaml @@ -14,13 +14,14 @@ APCP: ASNOW: ASNOW: A6: ['gt0.0', 'ge2.54', 'ge5.08', 'ge10.16', 'ge20.32'] + A24: ['gt0.0', 'ge2.54', 'ge10.16', 'ge20.32', 'ge30.48'] REFC: REFC%%MergedReflectivityQCComposite: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] RETOP: RETOP%%EchoTop18: L0%%Z500: ['ge20', 'ge30', 'ge40', 'ge50'] -ADPSFC: +SFC: TMP: Z2: ['ge268', 'ge273', 'ge278', 'ge293', 'ge298', 'ge303'] DPT: @@ -33,7 +34,7 @@ ADPSFC: L0: ['lt1609', 'lt8045', 'ge8045'] HGT%%CEILING: L0: ['lt152', 'lt305', 'lt914'] -ADPUPA: +UPA: TMP: P850: ['ge288', 'ge293', 'ge298'] P700: ['ge273', 'ge278', 'ge283'] diff --git a/parm/wflow/default_workflow.yaml b/parm/wflow/default_workflow.yaml index e37fdae1ea..4ffb6f288a 100644 --- a/parm/wflow/default_workflow.yaml +++ b/parm/wflow/default_workflow.yaml @@ -4,7 +4,7 @@ rocoto: entities: ACCOUNT: '{{ user.ACCOUNT }}' - CCPA_OBS_DIR: '{{ platform.CCPA_OBS_DIR }}' + CCPA_OBS_DIR: '{{ verification.CCPA_OBS_DIR }}' COLDSTART: '{{ workflow.COLDSTART }}' COMINgfs: '{{ platform.get("COMINgfs") }}' GLOBAL_VAR_DEFNS_FP: '{{ workflow.GLOBAL_VAR_DEFNS_FP }}' @@ -14,10 +14,10 @@ rocoto: LOAD_MODULES_RUN_TASK: '{{ workflow.LOAD_MODULES_RUN_TASK_FP }} {{ user.MACHINE }}' LOGEXT: ".log" NET: '{{ nco.NET_default }}' - MRMS_OBS_DIR: '{{ platform.MRMS_OBS_DIR }}' + MRMS_OBS_DIR: '{{ verification.MRMS_OBS_DIR }}' NCORES_PER_NODE: '{{ platform.NCORES_PER_NODE }}' - NDAS_OBS_DIR: '{{ platform.NDAS_OBS_DIR }}' - NOHRSC_OBS_DIR: '{{ platform.NOHRSC_OBS_DIR }}' + NDAS_OBS_DIR: '{{ verification.NDAS_OBS_DIR }}' + NOHRSC_OBS_DIR: '{{ verification.NOHRSC_OBS_DIR }}' PARTITION_DEFAULT: '{{ platform.get("PARTITION_DEFAULT") }}' PARTITION_FCST: '{{ platform.get("PARTITION_FCST") }}' PARTITION_HPSS: '{{ platform.get("PARTITION_HPSS") }}' @@ -51,7 +51,13 @@ rocoto: forecast: - !startstopfreq ['{{workflow.DATE_FIRST_CYCL}}', '{{workflow.DATE_LAST_CYCL}}', '{{workflow.INCR_CYCL_FREQ}}'] cycled_from_second: - - !startstopfreq ['{%- if workflow.DATE_FIRST_CYCL != workflow.DATE_LAST_CYCL %}{{ [workflow.DATE_FIRST_CYCL[0:8], "{:02d}".format(workflow.INCR_CYCL_FREQ)]|join }}{%- else %}{{workflow.DATE_FIRST_CYCL}}{%- endif %}', '{{workflow.DATE_LAST_CYCL}}', '{{workflow.INCR_CYCL_FREQ}}'] + - !startstopfreq ['{%- if workflow.DATE_FIRST_CYCL != workflow.DATE_LAST_CYCL %} + {{- workflow.DATE_SECOND_CYCL }} + {%- else %} + {{- workflow.DATE_FIRST_CYCL }} + {%- endif %}', + '{{ workflow.DATE_LAST_CYCL }}', + '{{ workflow.INCR_CYCL_FREQ }}'] log: !cycstr '&LOGDIR;/FV3LAM_wflow.{% if user.RUN_ENVIR == "nco" %}{{ workflow.WORKFLOW_ID + "." }}{% endif %}log' tasks: taskgroups: '{{ ["parm/wflow/prep.yaml", "parm/wflow/coldstart.yaml", "parm/wflow/post.yaml", "parm/wflow/test.yaml"]|include }}' diff --git a/parm/wflow/verify_det.yaml b/parm/wflow/verify_det.yaml index a62adb4481..c090ea8b0c 100644 --- a/parm/wflow/verify_det.yaml +++ b/parm/wflow/verify_det.yaml @@ -1,6 +1,6 @@ default_task_verify_det: &default_task_verify_det account: '&ACCOUNT;' - attrs: + attrs: &default_attrs cycledefs: forecast maxtries: '1' envars: &default_vars @@ -21,26 +21,28 @@ default_task_verify_det: &default_task_verify_det queue: '&QUEUE_DEFAULT;' walltime: 00:30:00 -metatask_GridStat_CCPA_all_accums_all_mems: +metatask_GridStat_APCP_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_GridStat_CCPA_APCP#ACCUM_HH#h_all_mems: + metatask_GridStat_APCP#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' task_run_MET_GridStat_vx_APCP#ACCUM_HH#h_mem#mem#: <<: *default_task_verify_det attrs: + <<: *default_attrs maxtries: '2' command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' - VAR: APCP + FIELD_GROUP: 'APCP' ACCUM_HH: '#ACCUM_HH#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'CCPA' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.CCPA_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' walltime: 02:00:00 @@ -48,100 +50,134 @@ metatask_GridStat_CCPA_all_accums_all_mems: and: taskdep_pcpcombine_obs: attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h + task: run_MET_PcpCombine_APCP#ACCUM_HH#h_obs_CCPA taskdep_pcpcombine_fcst: attrs: - task: run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem# + task: run_MET_PcpCombine_APCP#ACCUM_HH#h_fcst_mem#mem# -metatask_GridStat_NOHRSC_all_accums_all_mems: +metatask_GridStat_ASNOW_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_GridStat_NOHRSC_ASNOW#ACCUM_HH#h_all_mems: + metatask_GridStat_ASNOW#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' task_run_MET_GridStat_vx_ASNOW#ACCUM_HH#h_mem#mem#: <<: *default_task_verify_det attrs: + <<: *default_attrs maxtries: '2' command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' - VAR: ASNOW + FIELD_GROUP: 'ASNOW' ACCUM_HH: '#ACCUM_HH#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'NOHRSC' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.NOHRSC_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' walltime: 02:00:00 dependency: and: - taskdep_get_obs_nohrsc: + taskdep_pcpcombine_obs: attrs: - task: get_obs_nohrsc + task: run_MET_PcpCombine_ASNOW#ACCUM_HH#h_obs_NOHRSC taskdep_pcpcombine_fcst: attrs: - task: run_MET_PcpCombine_fcst_ASNOW#ACCUM_HH#h_mem#mem# + task: run_MET_PcpCombine_ASNOW#ACCUM_HH#h_fcst_mem#mem# -metatask_GridStat_MRMS_all_mems: +metatask_GridStat_REFC_RETOP_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - metatask_GridStat_MRMS_mem#mem#: + metatask_GridStat_REFC_RETOP_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_#VAR#_mem#mem#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GridStat_vx_#FIELD_GROUP#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: <<: *default_vars OBS_DIR: '&MRMS_OBS_DIR;' - VAR: '#VAR#' - ACCUM_HH: '01' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'MRMS' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.MRMS_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'L0' FCST_THRESH: 'all' walltime: 02:00:00 dependency: and: - taskdep_get_obs_mrms: + datadep_all_get_obs_mrms_complete: attrs: - task: get_obs_mrms + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_mrms tasks + # are complete are all present before launching any GridStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' datadep_post_files_exist: attrs: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' -metatask_PointStat_NDAS_all_mems: +metatask_PointStat_SFC_UPA_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - metatask_PointStat_NDAS_mem#mem#: + metatask_PointStat_SFC_UPA_mem#mem#: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_PointStat_vx_#VAR#_mem#mem#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_PointStat_vx_#FIELD_GROUP#_mem#mem#: <<: *default_task_verify_det command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'POINTSTAT' OBTYPE: 'NDAS' - ACCUM_HH: '01' ENSMEM_INDX: "#mem#" SLASH_ENSMEM_SUBDIR_OR_NULL: '{% if global.DO_ENSEMBLE %}{{ "/mem#mem#" }}{% endif %}' + OBS_AVAIL_INTVL_HRS: '{{- verification.NDAS_OBS_AVAIL_INTVL_HRS }}' FCST_LEVEL: 'all' FCST_THRESH: 'all' walltime: 01:00:00 dependency: and: - taskdep_pb2nc: + datadep_all_pb2nc_obs_ndas_complete: attrs: - task: run_MET_Pb2nc_obs + age: 00:00:00:30 + # Check that the flag files that indicate that the Pb2NC tasks are + # complete are all present before launching any PointStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' datadep_post_files_exist: attrs: age: 00:00:00:30 diff --git a/parm/wflow/verify_ens.yaml b/parm/wflow/verify_ens.yaml index 71bc20b3b0..d3601c14d8 100644 --- a/parm/wflow/verify_ens.yaml +++ b/parm/wflow/verify_ens.yaml @@ -21,7 +21,7 @@ default_task_verify_ens: &default_task_verify_ens queue: '&QUEUE_DEFAULT;' walltime: 01:00:00 -metatask_GenEnsProd_EnsembleStat_CCPA: +metatask_GenEnsProd_EnsembleStat_APCP_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h: &task_GenEnsProd_CCPA @@ -31,22 +31,15 @@ metatask_GenEnsProd_EnsembleStat_CCPA: <<: *default_vars ACCUM_HH: '#ACCUM_HH#' OBS_DIR: '&CCPA_OBS_DIR;' - VAR: APCP + FIELD_GROUP: 'APCP' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'CCPA' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - and: - # The PcpCombine task for obs must be complete because this GenEnsProd - # task checks to see the forecast hours for which obs are available before - # processing the forecast for those hours. - taskdep_pcpcombine_obs: - attrs: - task: run_MET_PcpCombine_obs_APCP#ACCUM_HH#h - metataskdep_pcpcombine_fcst: - attrs: - metatask: PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems + metataskdep_pcpcombine_fcst: + attrs: + metatask: PcpCombine_APCP#ACCUM_HH#h_all_mems task_run_MET_EnsembleStat_vx_APCP#ACCUM_HH#h: <<: *task_GenEnsProd_CCPA envars: @@ -54,11 +47,15 @@ metatask_GenEnsProd_EnsembleStat_CCPA: METPLUSTOOLNAME: 'ENSEMBLESTAT' FCST_THRESH: 'none' dependency: - taskdep_genensprod: - attrs: - task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h + and: + taskdep_pcpcombine_obs_ccpa: &taskdep_pcpcombine_obs_ccpa + attrs: + task: run_MET_PcpCombine_APCP#ACCUM_HH#h_obs_CCPA + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h -metatask_GenEnsProd_EnsembleStat_NOHRSC: +metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' task_run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h: &task_GenEnsProd_NOHRSC @@ -68,19 +65,16 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: <<: *default_vars ACCUM_HH: '#ACCUM_HH#' OBS_DIR: '&NOHRSC_OBS_DIR;' - VAR: ASNOW + FIELD_GROUP: 'ASNOW' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'NOHRSC' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: and: - # The PcpCombine task for obs must be complete because this GenEnsProd - # task checks to see the forecast hours for which obs are available before - # processing the forecast for those hours. metataskdep_pcpcombine_fcst: attrs: - metatask: PcpCombine_fcst_ASNOW#ACCUM_HH#h_all_mems + metatask: PcpCombine_ASNOW#ACCUM_HH#h_all_mems task_run_MET_EnsembleStat_vx_ASNOW#ACCUM_HH#h: <<: *task_GenEnsProd_NOHRSC envars: @@ -89,24 +83,24 @@ metatask_GenEnsProd_EnsembleStat_NOHRSC: FCST_THRESH: 'none' dependency: and: - taskdep: + taskdep_pcpcombine_obs_nohrsc: &taskdep_pcpcombine_obs_nohrsc attrs: - task: get_obs_nohrsc + task: run_MET_PcpCombine_ASNOW#ACCUM_HH#h_obs_NOHRSC taskdep_genensprod: attrs: task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h -metatask_GenEnsProd_EnsembleStat_MRMS: +metatask_GenEnsProd_EnsembleStat_REFC_RETOP: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_MRMS + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GenEnsProd_vx_#FIELD_GROUP#: &task_GenEnsProd_MRMS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' envars: &envars_GenEnsProd_MRMS <<: *default_vars ACCUM_HH: '01' OBS_DIR: '&MRMS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'MRMS' FCST_LEVEL: 'L0' @@ -116,7 +110,7 @@ metatask_GenEnsProd_EnsembleStat_MRMS: metataskdep_check_post_output: &check_post_output attrs: metatask: check_post_output_all_mems - task_run_MET_EnsembleStat_vx_#VAR#: + task_run_MET_EnsembleStat_vx_#FIELD_GROUP#: <<: *task_GenEnsProd_MRMS envars: <<: *envars_GenEnsProd_MRMS @@ -125,138 +119,179 @@ metatask_GenEnsProd_EnsembleStat_MRMS: FCST_THRESH: 'none' dependency: and: - taskdep_get_obs_mrms: + datadep_all_get_obs_mrms_complete: &all_get_obs_mrms_complete attrs: - task: get_obs_mrms + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_mrms tasks + # are complete are all present before launching any EnsembleStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_mrms_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' taskdep_genensprod: attrs: - task: run_MET_GenEnsProd_vx_#VAR# + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_GenEnsProd_EnsembleStat_NDAS: +metatask_GenEnsProd_EnsembleStat_SFC_UPA: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GenEnsProd_vx_#VAR#: &task_GenEnsProd_NDAS + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GenEnsProd_vx_#FIELD_GROUP#: &task_GenEnsProd_NDAS <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GENENSPROD_OR_ENSEMBLESTAT"' envars: &envars_GenEnsProd_NDAS <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GENENSPROD' OBTYPE: 'NDAS' ACCUM_HH: '01' FCST_LEVEL: 'all' FCST_THRESH: 'all' - walltime: 02:30:00 + walltime: 04:15:00 dependency: - and: - # The Pb2nc task (which is run only for obs) must be complete because - # this GenEnsProd task checks to see the forecast hours for which obs - # are available before processing the forecast for those hours. - taskdep_pb2nc: - attrs: - task: run_MET_Pb2nc_obs - metataskdep_check_post_output: - <<: *check_post_output - task_run_MET_EnsembleStat_vx_#VAR#: + metataskdep_check_post_output: + <<: *check_post_output + task_run_MET_EnsembleStat_vx_#FIELD_GROUP#: <<: *task_GenEnsProd_NDAS envars: <<: *envars_GenEnsProd_NDAS METPLUSTOOLNAME: 'ENSEMBLESTAT' walltime: 01:00:00 dependency: - taskdep_genensprod: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + datadep_all_pb2nc_obs_ndas_complete: &all_pb2nc_obs_ndas_complete + attrs: + age: 00:00:00:30 + # Check that the flag files that indicate that the Pb2NC tasks are + # complete are all present before launching any EnsembleStat task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_INST|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_INST[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/run_met_pb2nc_obs_ndas_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_GridStat_CCPA_ensmeanprob_all_accums: +metatask_GridStat_APCP_all_accums_ensmeanprob: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_CCPA_ens#statlc#_all_accums: + metatask_GridStat_APCP_all_accums_ens#statlc#: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ens#statlc#_APCP#ACCUM_HH#h: + task_run_MET_GridStat_vx_APCP#ACCUM_HH#h_ens#statlc#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: <<: *default_vars OBS_DIR: '&CCPA_OBS_DIR;' - VAR: APCP + FIELD_GROUP: 'APCP' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'CCPA' ACCUM_HH: '#ACCUM_HH#' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h + and: + taskdep_pcpcombine_obs_ccpa: + <<: *taskdep_pcpcombine_obs_ccpa + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_APCP#ACCUM_HH#h -metatask_GridStat_NOHRSC_ensmeanprob_all_accums: +metatask_GridStat_ASNOW_all_accums_ensmeanprob: var: stat: MEAN PROB statlc: mean prob - metatask_GridStat_NOHRSC_ens#statlc#_all_accums: + metatask_GridStat_ASNOW_all_accums_ens#statlc#: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ens#statlc#_ASNOW#ACCUM_HH#h: + task_run_MET_GridStat_vx_ASNOW#ACCUM_HH#h_ens#statlc#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: <<: *default_vars OBS_DIR: '&NOHRSC_OBS_DIR;' - VAR: ASNOW + FIELD_GROUP: 'ASNOW' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'NOHRSC' ACCUM_HH: '#ACCUM_HH#' FCST_LEVEL: 'A#ACCUM_HH#' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h + and: + taskdep_pcpcombine_obs_nohrsc: + <<: *taskdep_pcpcombine_obs_nohrsc + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_ASNOW#ACCUM_HH#h -metatask_GridStat_MRMS_ensprob: +metatask_GridStat_REFC_RETOP_ensprob: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_GridStat_vx_ensprob_#VAR#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["REFC", "RETOP"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_GridStat_vx_#FIELD_GROUP#_ensprob: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENSPROB"' envars: <<: *default_vars ACCUM_HH: '01' OBS_DIR: '&MRMS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'GRIDSTAT' OBTYPE: 'MRMS' FCST_LEVEL: 'L0' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + datadep_all_get_obs_mrms_complete: + <<: *all_get_obs_mrms_complete + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# -metatask_PointStat_NDAS_ensmeanprob: +metatask_PointStat_SFC_UPA_ensmeanprob: var: stat: MEAN PROB statlc: mean prob - metatask_PointStat_NDAS_ens#statlc#: + metatask_PointStat_SFC_UPA_ens#statlc#: var: - VAR: '{% for var in verification.VX_FIELDS %}{% if var in ["ADPSFC", "ADPUPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' - task_run_MET_PointStat_vx_ens#statlc#_#VAR#: + FIELD_GROUP: '{% for var in verification.VX_FIELD_GROUPS %}{% if var in ["SFC", "UPA"] %}{{ "%s " % var }}{% endif %}{% endfor %}' + task_run_MET_PointStat_vx_#FIELD_GROUP#_ens#statlc#: <<: *default_task_verify_ens command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_GRIDSTAT_OR_POINTSTAT_VX_ENS#stat#"' envars: <<: *default_vars OBS_DIR: '&NDAS_OBS_DIR;' - VAR: '#VAR#' + FIELD_GROUP: '#FIELD_GROUP#' METPLUSTOOLNAME: 'POINTSTAT' OBTYPE: 'NDAS' ACCUM_HH: '01' FCST_LEVEL: 'all' FCST_THRESH: 'all' dependency: - taskdep: - attrs: - task: run_MET_GenEnsProd_vx_#VAR# + and: + datadep_all_pb2nc_obs_ndas_complete: + <<: *all_pb2nc_obs_ndas_complete + taskdep_genensprod: + attrs: + task: run_MET_GenEnsProd_vx_#FIELD_GROUP# diff --git a/parm/wflow/verify_pre.yaml b/parm/wflow/verify_pre.yaml index 0d4e1c2448..2b86772565 100644 --- a/parm/wflow/verify_pre.yaml +++ b/parm/wflow/verify_pre.yaml @@ -1,7 +1,7 @@ default_task_verify_pre: &default_task_verify_pre account: '&ACCOUNT;' attrs: - cycledefs: forecast + cycledefs: cycledefs_obs_days_inst maxtries: '1' envars: &default_vars GLOBAL_VAR_DEFNS_FP: '&GLOBAL_VAR_DEFNS_FP;' @@ -23,26 +23,27 @@ default_task_verify_pre: &default_task_verify_pre task_get_obs_ccpa: <<: *default_task_verify_pre + attrs: + cycledefs: cycledefs_obs_days_cumul + maxtries: '1' command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - ACCUM_HH: '01' - OBS_DIR: '&CCPA_OBS_DIR;' OBTYPE: 'CCPA' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" - walltime: 00:45:00 + walltime: 02:00:00 task_get_obs_nohrsc: <<: *default_task_verify_pre + attrs: + cycledefs: cycledefs_obs_days_cumul + maxtries: '1' command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&NOHRSC_OBS_DIR;' OBTYPE: 'NOHRSC' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -53,10 +54,7 @@ task_get_obs_mrms: command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&MRMS_OBS_DIR;' OBTYPE: 'MRMS' - VAR: 'REFC RETOP' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' queue: "&QUEUE_HPSS;" @@ -64,26 +62,24 @@ task_get_obs_mrms: task_get_obs_ndas: <<: *default_task_verify_pre + command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' envars: <<: *default_vars - OBS_DIR: '&NDAS_OBS_DIR;' OBTYPE: 'NDAS' - FHR: '{% for h in range(0, workflow.FCST_LEN_HRS+1) %}{{ " %02d" % h }}{% endfor %}' - command: '&LOAD_MODULES_RUN_TASK; "get_obs" "&JOBSdir;/JREGIONAL_GET_VERIF_OBS"' queue: "&QUEUE_HPSS;" native: '{% if platform.get("SCHED_NATIVE_CMD_HPSS") %}{{ platform.SCHED_NATIVE_CMD_HPSS }}{% else %}{{ platform.SCHED_NATIVE_CMD}}{% endif %}' partition: '{% if platform.get("PARTITION_HPSS") %}&PARTITION_HPSS;{% else %}None{% endif %}' walltime: 02:00:00 -task_run_MET_Pb2nc_obs: +task_run_MET_Pb2nc_obs_NDAS: <<: *default_task_verify_pre attrs: - cycledefs: forecast + cycledefs: cycledefs_obs_days_inst maxtries: '2' command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PB2NC_OBS"' envars: <<: *default_vars - VAR: ADPSFC + FIELD_GROUP: 'SFC' ACCUM_HH: '01' FCST_OR_OBS: OBS OBTYPE: NDAS @@ -102,10 +98,10 @@ task_run_MET_Pb2nc_obs: attrs: task: get_obs_ndas -metatask_PcpCombine_obs: +metatask_PcpCombine_APCP_all_accums_obs_CCPA: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - task_run_MET_PcpCombine_obs_APCP#ACCUM_HH#h: + task_run_MET_PcpCombine_APCP#ACCUM_HH#h_obs_CCPA: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -113,24 +109,78 @@ metatask_PcpCombine_obs: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: APCP + FIELD_GROUP: 'APCP' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: OBS OBTYPE: CCPA OBS_DIR: '&CCPA_OBS_DIR;' + OBS_AVAIL_INTVL_HRS: '{{- verification.CCPA_OBS_AVAIL_INTVL_HRS }}' METPLUSTOOLNAME: 'PCPCOMBINE' dependency: and: datadep: text: "&CCPA_OBS_DIR;" - or: - not: - taskvalid: - attrs: - task: get_obs_ccpa - taskdep: - attrs: - task: get_obs_ccpa + datadep_all_get_obs_ccpa_complete: + attrs: + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_ccpa tasks + # are complete are all present before launching any PcpCombine task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_CUMUL|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_CUMUL[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_ccpa_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_ccpa_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' + +metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC: + var: + ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' + task_run_MET_PcpCombine_ASNOW#ACCUM_HH#h_obs_NOHRSC: + <<: *default_task_verify_pre + attrs: + cycledefs: forecast + maxtries: '2' + command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' + envars: + <<: *default_vars + FIELD_GROUP: 'ASNOW' + ACCUM_HH: '#ACCUM_HH#' + FCST_OR_OBS: OBS + OBTYPE: NOHRSC + OBS_DIR: '&NOHRSC_OBS_DIR;' + OBS_AVAIL_INTVL_HRS: '{{- verification.NOHRSC_OBS_AVAIL_INTVL_HRS }}' + METPLUSTOOLNAME: 'PCPCOMBINE' + dependency: + and: + datadep: + text: "&NOHRSC_OBS_DIR;" + datadep_all_get_obs_nohrsc_complete: + attrs: + age: 00:00:00:30 + # Check that the flag files that indicate that the get_obs_nohrsc tasks + # are complete are all present before launching any PcpCombine task. + text: '{%- set num_obs_days = workflow.OBS_DAYS_ALL_CYCLES_CUMUL|length %} + {%- set indent = " " %} + {%- set indent_p2 = indent + " " %} + {%- for n in range(0, num_obs_days) %} + {%- set yyyymmdd = workflow.OBS_DAYS_ALL_CYCLES_CUMUL[n] %} + {%- if n == 0 %} + {{- workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_nohrsc_" ~ yyyymmdd ~ "_complete.txt" }} + {%- else %} + {{- indent ~ "\n" }} + {{- indent ~ "\n" }} + {{- indent_p2 ~ workflow.WFLOW_FLAG_FILES_DIR ~ "/get_obs_nohrsc_" ~ yyyymmdd ~ "_complete.txt" }} + {%- endif %} + {%- if n != num_obs_days-1 %} {{- "\n" }} {%- endif %} + {%- endfor %}' metatask_check_post_output_all_mems: var: @@ -143,8 +193,6 @@ metatask_check_post_output_all_mems: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_CHECK_POST_OUTPUT"' envars: <<: *default_vars - VAR: APCP - ACCUM_HH: '01' ENSMEM_INDX: '#mem#' dependency: # This "or" checks that the necessary stand-alone post tasks or forecast @@ -183,7 +231,15 @@ metatask_check_post_output_all_mems: # metatask: run_post_mem#mem#_all_fhrs taskdep: attrs: - task: '{% for h in range(0, workflow.LONG_FCST_LEN+1) %}{% if h > 0 %}{{" \n"}}{% endif %}{%- endfor -%}' + task: '{%- for h in range(0, workflow.LONG_FCST_LEN+1) %} + {%- if h > 0 %} + {{- " \n" }} + {%- endif %} + {%- endfor %}' # This "and" is to check whether post is being run inline (i.e. as part of # the weather model), and if so, to ensure that the forecast task for the # current member has completed. @@ -210,13 +266,13 @@ metatask_check_post_output_all_mems: taskvalid: <<: *fcst_task_exists -metatask_PcpCombine_fcst_APCP_all_accums_all_mems: +metatask_PcpCombine_APCP_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_APCP_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_PcpCombine_fcst_APCP#ACCUM_HH#h_all_mems: + metatask_PcpCombine_APCP#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - task_run_MET_PcpCombine_fcst_APCP#ACCUM_HH#h_mem#mem#: + task_run_MET_PcpCombine_APCP#ACCUM_HH#h_fcst_mem#mem#: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -224,7 +280,7 @@ metatask_PcpCombine_fcst_APCP_all_accums_all_mems: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: APCP + FIELD_GROUP: 'APCP' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: FCST OBTYPE: CCPA @@ -236,15 +292,15 @@ metatask_PcpCombine_fcst_APCP_all_accums_all_mems: attrs: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' - walltime: 00:10:00 + walltime: 00:30:00 -metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: +metatask_PcpCombine_ASNOW_all_accums_all_mems: var: ACCUM_HH: '{% for ah in verification.VX_ASNOW_ACCUMS_HRS %}{% if workflow.FCST_LEN_HRS >= ah %}{{ "%02d " % ah }}{% endif %}{% endfor %}' - metatask_PcpCombine_fcst_ASNOW#ACCUM_HH#h_all_mems: + metatask_PcpCombine_ASNOW#ACCUM_HH#h_all_mems: var: mem: '{% if global.DO_ENSEMBLE %}{% for m in range(1, global.NUM_ENS_MEMBERS+1) %}{{ "%03d "%m }}{%- endfor -%} {% else %}{{ "000"|string }}{% endif %}' - task_run_MET_PcpCombine_fcst_ASNOW#ACCUM_HH#h_mem#mem#: + task_run_MET_PcpCombine_ASNOW#ACCUM_HH#h_fcst_mem#mem#: <<: *default_task_verify_pre attrs: cycledefs: forecast @@ -252,7 +308,7 @@ metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: command: '&LOAD_MODULES_RUN_TASK; "run_vx" "&JOBSdir;/JREGIONAL_RUN_MET_PCPCOMBINE"' envars: <<: *default_vars - VAR: ASNOW + FIELD_GROUP: 'ASNOW' ACCUM_HH: '#ACCUM_HH#' FCST_OR_OBS: FCST OBTYPE: NOHRSC @@ -264,4 +320,4 @@ metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems: attrs: age: 00:00:00:30 text: !cycstr '{{ workflow.EXPTDIR }}/@Y@m@d@H/post_files_exist_mem#mem#.txt' - walltime: 00:10:00 + walltime: 00:30:00 diff --git a/scripts/exregional_check_post_output.sh b/scripts/exregional_check_post_output.sh index 320311cc94..4d5836519c 100755 --- a/scripts/exregional_check_post_output.sh +++ b/scripts/exregional_check_post_output.sh @@ -11,7 +11,7 @@ # CDATE # ENSMEM_INDX # GLOBAL_VAR_DEFNS_FP -# VAR +# METPLUS_ROOT (used by ush/set_leadhrs.py) # # Experiment variables # @@ -52,14 +52,6 @@ done # #----------------------------------------------------------------------- # -# Source files defining auxiliary functions for verification. -# -#----------------------------------------------------------------------- -# -. $USHdir/set_vx_fhr_list.sh -# -#----------------------------------------------------------------------- -# # Save current shell options (in a global array). Then set new options # for this script/function. # @@ -104,38 +96,34 @@ user-staged. #----------------------------------------------------------------------- # i="0" -if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then +if [[ $(boolify "${DO_ENSEMBLE}") == "TRUE" ]]; then i=$( bc -l <<< "${ENSMEM_INDX}-1" ) fi time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) # #----------------------------------------------------------------------- # -# Get the list of forecast hours for which there is a post-processed -# output file. Note that: -# -# 1) CDATE (in YYYYMMDDHH format) is already available via the call to -# the job_preamble.sh script in the j-job of this ex-script. -# 2) VAR is set to "APCP" and ACCUM_HH is set to "01" because we assume -# the output files are hourly, so these settings will result in the -# function set_vx_fhr_list checking for existence of hourly post output -# files. +# Check to ensure that all the expected post-processed forecast output +# files are present on disk. This is done by the set_leadhrs function +# below. Note that CDATE (in YYYYMMDDHH format) is already available via +# the call to the job_preamble.sh script in the j-job of this ex-script. # #----------------------------------------------------------------------- # ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${VX_FCST_INPUT_BASEDIR}" \ - fn_template="${FCST_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" + +FHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="0" \ + --lhr_max="${FCST_LEN_HRS}" \ + --lhr_intvl="${VX_FCST_OUTPUT_INTVL_HRS}" \ + --base_dir="${VX_FCST_INPUT_BASEDIR}" \ + --fn_template="${FCST_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" \ + --time_lag="${time_lag%.*}") || \ +print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_get_verif_obs.sh b/scripts/exregional_get_verif_obs.sh index 6ad6aaed0e..d457a6b5d8 100755 --- a/scripts/exregional_get_verif_obs.sh +++ b/scripts/exregional_get_verif_obs.sh @@ -31,7 +31,7 @@ #----------------------------------------------------------------------- # . $USHdir/source_util_funcs.sh -for sect in user nco ; do +for sect in user workflow nco ; do source_yaml ${GLOBAL_VAR_DEFNS_FP} ${sect} done # @@ -43,532 +43,43 @@ done #----------------------------------------------------------------------- # { save_shell_opts; . $USHdir/preamble.sh; } > /dev/null 2>&1 -set -x # #----------------------------------------------------------------------- # -# This script performs several important tasks for preparing data for -# verification tasks. Depending on the value of the environment variable -# OBTYPE=(CCPA|MRMS|NDAS|NOHRSC), the script will prepare that particular data -# set. -# -# If data is not available on disk (in the location specified by -# CCPA_OBS_DIR, MRMS_OBS_DIR, NDAS_OBS_DIR, or NOHRSC_OBS_DIR respectively), -# the script attempts to retrieve the data from HPSS using the retrieve_data.py -# script. Depending on the data set, there are a few strange quirks and/or -# bugs in the way data is organized; see in-line comments for details. -# -# -# CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {CCPA_OBS_DIR}/{YYYYMMDD}/ccpa.t{HH}z.01h.hrap.conus.gb2 -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -# -# Notes about the data and how it's used for verification: -# -# 1. Accumulation is currently hardcoded to 01h. The verification will -# use MET/pcp-combine to sum 01h files into desired accumulations. -# -# 2. There is a problem with the valid time in the metadata for files -# valid from 19 - 00 UTC (or files under the '00' directory). This is -# accounted for in this script for data retrieved from HPSS, but if you -# have manually staged data on disk you should be sure this is accouned -# for. See in-line comments below for details. -# -# -# MRMS (Multi-Radar Multi-Sensor) radar observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {MRMS_OBS_DIR}/{YYYYMMDD}/[PREFIX]{YYYYMMDD}-{HH}0000.grib2, -# -# Where [PREFIX] is MergedReflectivityQCComposite_00.50_ for reflectivity -# data and EchoTop_18_00.50_ for echo top data. If data is not available -# at the top of the hour, you should rename the file closest in time to -# your hour(s) of interest to the above naming format. A script -# "ush/mrms_pull_topofhour.py" is provided for this purpose. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. -# -# -# NDAS (NAM Data Assimilation System) conventional observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: -# -# {NDAS_OBS_DIR}/{YYYYMMDD}/prepbufr.ndas.{YYYYMMDDHH} -# -# Note that data retrieved from HPSS and other sources may be in a -# different format: nam.t{hh}z.prepbufr.tm{prevhour}.nr, where hh is -# either 00, 06, 12, or 18, and prevhour is the number of hours prior to -# hh (00 through 05). If using custom staged data, you will have to -# rename the files accordingly. -# -# If data is retrieved from HPSS, it will automatically staged by this -# this script. +# Make sure the obs type is valid. Then call a python script to check +# for the presence of obs files on disk and get them if needed. # +#----------------------------------------------------------------------- # -# NOHRSC snow accumulation observations -# ---------- -# If data is available on disk, it must be in the following -# directory structure and file name conventions expected by verification -# tasks: +valid_obtypes=("CCPA" "MRMS" "NDAS" "NOHRSC") +if [[ ! ${valid_obtypes[@]} =~ ${OBTYPE} ]]; then + print_err_msg_exit "\ +Invalid observation type (OBTYPE) specified for script: + OBTYPE = \"${OBTYPE}\" +Valid observation types are: + $(printf "\"%s\" " ${valid_obtypes[@]}) +" +fi + +cmd="\ +python3 -u ${USHdir}/get_obs.py \ +--var_defns_path "${GLOBAL_VAR_DEFNS_FP}" \ +--obtype ${OBTYPE} \ +--obs_day ${PDY}" +print_info_msg " +CALLING: ${cmd}" +${cmd} || print_err_msg_exit "Error calling get_obs.py" # -# {NOHRSC_OBS_DIR}/{YYYYMMDD}/sfav2_CONUS_{AA}h_{YYYYMMDD}{HH}_grid184.grb2 -# -# where AA is the 2-digit accumulation duration in hours: 06 or 24 +#----------------------------------------------------------------------- # -# METplus is configured to verify snowfall using 06- and 24-h accumulated -# snowfall from 6- and 12-hourly NOHRSC files, respectively. +# Create flag file that indicates completion of task. This is needed by +# the workflow. # -# If data is retrieved from HPSS, it will automatically staged by this -# this script. - #----------------------------------------------------------------------- -# Create and enter top-level obs directory (so temporary data from HPSS won't collide with other tasks) -mkdir -p ${OBS_DIR} -cd ${OBS_DIR} - -# Set log file for retrieving obs -logfile=retrieve_data.log - -# PDY and cyc are defined in rocoto XML...they are the yyyymmdd and hh for initial forecast hour respectively -iyyyy=$(echo ${PDY} | cut -c1-4) -imm=$(echo ${PDY} | cut -c5-6) -idd=$(echo ${PDY} | cut -c7-8) -ihh=${cyc} - -# Unix date utility needs dates in yyyy-mm-dd hh:mm:ss format -unix_init_DATE="${iyyyy}-${imm}-${idd} ${ihh}:00:00" - -# This awk expression gets the last item of the list $FHR -fcst_length=$(echo ${FHR} | awk '{ print $NF }') -# Make sure fcst_length isn't octal (leading zero) -fcst_length=$((10#${fcst_length})) - -current_fcst=0 -while [[ ${current_fcst} -le ${fcst_length} ]]; do - # Calculate valid date info using date utility - vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" +%Y%m%d%H) - unix_vdate=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours" "+%Y-%m-%d %H:00:00") - vyyyymmdd=$(echo ${vdate} | cut -c1-8) - vhh=$(echo ${vdate} | cut -c9-10) - - # Calculate valid date + 1 day; this is needed because some obs files - # are stored in the *next* day's 00h directory - vdate_p1=$($DATE_UTIL -d "${unix_init_DATE} ${current_fcst} hours 1 day" +%Y%m%d%H) - vyyyymmdd_p1=$(echo ${vdate_p1} | cut -c1-8) - - #remove leading zero again, this time keep original - vhh_noZero=$((10#${vhh})) - - # Retrieve CCPA observations - if [[ ${OBTYPE} == "CCPA" ]]; then - - #CCPA is accumulation observations, so none to retrieve for hour zero - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((${current_fcst} + 1)) - continue - fi - - # Staging location for raw CCPA data from HPSS - ccpa_raw=${OBS_DIR}/raw - - # Reorganized CCPA location - ccpa_proc=${OBS_DIR} - - # Accumulation is for accumulation of CCPA data to pull (hardcoded to 01h, see note above.) - accum=01 - - # Check if file exists on disk; if not, pull it. - ccpa_file="$ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2" - if [[ -f "${ccpa_file}" ]]; then - echo "${OBTYPE} file exists on disk:" - echo "${ccpa_file}" - else - echo "${OBTYPE} file does not exist on disk:" - echo "${ccpa_file}" - echo "Will attempt to retrieve from remote locations" - - # Create necessary raw and prop directories - if [[ ! -d "$ccpa_raw/${vyyyymmdd}" ]]; then - mkdir -p $ccpa_raw/${vyyyymmdd} - fi - if [[ ! -d "$ccpa_raw/${vyyyymmdd_p1}" ]]; then - mkdir -p $ccpa_raw/${vyyyymmdd_p1} - fi - if [[ ! -d "$ccpa_proc/${vyyyymmdd}" ]]; then - mkdir -p $ccpa_proc/${vyyyymmdd} - fi - # Check if valid hour is 00 - if [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - # Pull CCPA data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd_p1}${vhh} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path $ccpa_raw/${vyyyymmdd_p1} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - else - # Pull CCPA data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type CCPA_obs \ - --output_path $ccpa_raw/${vyyyymmdd} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - $cmd || print_err_msg_exit "\ - Could not retrieve CCPA data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - fi - - # One hour CCPA files have incorrect metadata in the files under the "00" directory from 20180718 to 20210504. - # After data is pulled, reorganize into correct valid yyyymmdd structure. - if [[ ${vhh_noZero} -ge 1 && ${vhh_noZero} -le 6 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 7 && ${vhh_noZero} -le 12 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 13 && ${vhh_noZero} -le 18 ]]; then - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - elif [[ ${vhh_noZero} -ge 19 && ${vhh_noZero} -le 23 ]]; then - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 $ccpa_raw/${vyyyymmdd_p1}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -set_date -24hr -grib $ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -s - else - cp $ccpa_raw/${vyyyymmdd_p1}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - fi - elif [[ ${vhh_noZero} -eq 0 ]]; then - # One hour CCPA files on HPSS have incorrect metadata in the files under the "00" directory from 20180718 to 20210504. - if [[ ${vyyyymmdd} -ge 20180718 && ${vyyyymmdd} -le 20210504 ]]; then - wgrib2 $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -set_date -24hr -grib $ccpa_proc/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 -s - else - cp $ccpa_raw/${vyyyymmdd}/ccpa.t${vhh}z.${accum}h.hrap.conus.gb2 $ccpa_proc/${vyyyymmdd} - fi - fi - - fi - # Retrieve MRMS observations - elif [[ ${OBTYPE} == "MRMS" ]]; then - # Top-level MRMS directory - # raw MRMS data from HPSS - mrms_raw=${OBS_DIR}/raw - - # Reorganized MRMS location - mrms_proc=${OBS_DIR} - - # For each field (REFC and RETOP), check if file exists on disk; if not, pull it. - for field in ${VAR[@]}; do - if [ "${field}" = "REFC" ]; then - field_base_name="MergedReflectivityQCComposite" - level="_00.50_" - elif [ "${field}" = "RETOP" ]; then - field_base_name="EchoTop" - level="_18_00.50_" - else - echo "Invalid field: ${field}" - print_err_msg_exit "\ - Invalid field specified: ${field} - - Valid options are 'REFC', 'RETOP'. -" - fi - - mrms_file="$mrms_proc/${vyyyymmdd}/${field_base_name}${level}${vyyyymmdd}-${vhh}0000.grib2" - - if [[ -f "${mrms_file}" ]]; then - echo "${OBTYPE} file exists on disk for field ${field}:\n${mrms_file}" - else - echo "${OBTYPE} file does not exist on disk for field ${field}:\n${mrms_file}" - echo "Will attempt to retrieve from remote locations" - # Create directories if necessary - if [[ ! -d "$mrms_raw/${vyyyymmdd}" ]]; then - mkdir -p $mrms_raw/${vyyyymmdd} - fi - if [[ ! -d "$mrms_proc/${vyyyymmdd}" ]]; then - mkdir -p $mrms_proc/${vyyyymmdd} - fi - - - # Pull MRMS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type MRMS_obs \ - --output_path $mrms_raw/${vyyyymmdd} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve MRMS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - hour=0 - while [[ ${hour} -le 23 ]]; do - HH=$(printf "%02d" $hour) - echo "hour=${hour}" - python ${USHdir}/mrms_pull_topofhour.py --valid_time ${vyyyymmdd}${HH} --outdir ${mrms_proc} --source ${mrms_raw} --product ${field_base_name} - hour=$((${hour} + 1)) # hourly increment - done - - fi - done - - # Retrieve NDAS observations - elif [[ ${OBTYPE} == "NDAS" ]]; then - # raw NDAS data from HPSS - ndas_raw=${OBS_DIR}/raw - - # Reorganized NDAS location - ndas_proc=${OBS_DIR} - - # Check if file exists on disk - ndas_file="$ndas_proc/prepbufr.ndas.${vyyyymmdd}${vhh}" - if [[ -f "${ndas_file}" ]]; then - echo "${OBTYPE} file exists on disk:" - echo "${ndas_file}" - else - echo "${OBTYPE} file does not exist on disk:" - echo "${ndas_file}" - echo "Will attempt to retrieve from remote locations" - # NDAS data is available in 6-hourly combined tar files, each with 7 1-hour prepbufr files: - # nam.tHHz.prepbufr.tm00.nr, nam.tHHz.prepbufr.tm01.nr, ... , nam.tHHz.prepbufr.tm06.nr - # - # The "tm" here means "time minus", so nam.t12z.prepbufr.tm00.nr is valid for 12z, - # nam.t00z.prepbufr.tm03.nr is valid for 21z the previous day, etc. - # This means that every six hours we have to obs files valid for the same time: - # nam.tHHz.prepbufr.tm00.nr and nam.t[HH+6]z.prepbufr.tm06.nr - # We want to use the tm06 file because it contains more/better obs (confirmed with EMC: even - # though the earlier files are larger, this is because the time window is larger) - - # The current logic of this script will likely stage more files than you need, but will never - # pull more HPSS tarballs than necessary - - if [[ ${current_fcst} -eq 0 && ${current_fcst} -ne ${fcst_length} ]]; then - # If at forecast hour zero, skip to next hour. - current_fcst=$((${current_fcst} + 1)) - continue - fi - - if [[ ${vhh_noZero} -eq 0 || ${vhh_noZero} -eq 6 || ${vhh_noZero} -eq 12 || ${vhh_noZero} -eq 18 ]]; then - - if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then - mkdir -p $ndas_raw/${vyyyymmdd}${vhh} - fi - - # Pull NDAS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path $ndas_raw/${vyyyymmdd}${vhh} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - if [[ ! -d "$ndas_proc" ]]; then - mkdir -p $ndas_proc - fi - - # copy files from the previous 6 hours ("tm" means "time minus") - # The tm06 files contain more/better observations than tm00 for the equivalent time - for tm in $(seq 1 6); do - vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_vdate} ${tm} hours ago" +%Y%m%d%H) - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - - cp $ndas_raw/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr $ndas_proc/prepbufr.ndas.${vyyyymmddhh_tm} - done - - fi - - # If at last forecast hour, make sure we're getting the last observations - if [[ ${current_fcst} -eq ${fcst_length} ]]; then - echo "Retrieving NDAS obs for final forecast hour" - vhh_noZero=$((vhh_noZero + 6 - (vhh_noZero % 6))) - if [[ ${vhh_noZero} -eq 24 ]]; then - vyyyymmdd=${vyyyymmdd_p1} - vhh=00 - elif [[ ${vhh_noZero} -eq 6 ]]; then - vhh=06 - else - vhh=${vhh_noZero} - fi - - if [[ ! -d "$ndas_raw/${vyyyymmdd}${vhh}" ]]; then - mkdir -p $ndas_raw/${vyyyymmdd}${vhh} - fi - - # Pull NDAS data from HPSS - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NDAS_obs \ - --output_path $ndas_raw/${vyyyymmdd}${vhh} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NDAS data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - - if [[ ! -d "$ndas_proc" ]]; then - mkdir -p $ndas_proc - fi - - for tm in $(seq 1 6); do - last_fhr=$((fcst_length + 6 - (vhh_noZero % 6))) - unix_fdate=$($DATE_UTIL -d "${unix_init_DATE} ${last_fhr} hours" "+%Y-%m-%d %H:00:00") - vyyyymmddhh_tm=$($DATE_UTIL -d "${unix_fdate} ${tm} hours ago" +%Y%m%d%H) - tm2=$(echo $tm | awk '{printf "%02d\n", $0;}') - - cp $ndas_raw/${vyyyymmdd}${vhh}/nam.t${vhh}z.prepbufr.tm${tm2}.nr $ndas_proc/prepbufr.ndas.${vyyyymmddhh_tm} - done - - fi - - fi - - # Retrieve NOHRSC observations - elif [[ ${OBTYPE} == "NOHRSC" ]]; then - - #NOHRSC is accumulation observations, so none to retrieve for hour zero - if [[ ${current_fcst} -eq 0 ]]; then - current_fcst=$((${current_fcst} + 1)) - continue - fi - - # Reorganized NOHRSC location (no need for raw data dir) - nohrsc_proc=${OBS_DIR} - - nohrsc06h_file="$nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_06h_${vyyyymmdd}${vhh}_grid184.grb2" - nohrsc24h_file="$nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_24h_${vyyyymmdd}${vhh}_grid184.grb2" - retrieve=0 - # If 24-hour files should be available (at 00z and 12z) then look for both files - # Otherwise just look for 6hr file - if (( ${current_fcst} % 12 == 0 )) && (( ${current_fcst} >= 24 )) ; then - if [[ ! -f "${nohrsc06h_file}" || ! -f "${nohrsc24h_file}" ]] ; then - retrieve=1 - echo "${OBTYPE} files do not exist on disk:" - echo "${nohrsc06h_file}" - echo "${nohrsc24h_file}" - echo "Will attempt to retrieve from remote locations" - else - echo "${OBTYPE} files exist on disk:" - echo "${nohrsc06h_file}" - echo "${nohrsc24h_file}" - fi - elif (( ${current_fcst} % 6 == 0 )) ; then - if [[ ! -f "${nohrsc06h_file}" ]]; then - retrieve=1 - echo "${OBTYPE} file does not exist on disk:" - echo "${nohrsc06h_file}" - echo "Will attempt to retrieve from remote locations" - else - echo "${OBTYPE} file exists on disk:" - echo "${nohrsc06h_file}" - fi - fi - if [ $retrieve == 1 ]; then - if [[ ! -d "$nohrsc_proc/${vyyyymmdd}" ]]; then - mkdir -p $nohrsc_proc/${vyyyymmdd} - fi - - # Pull NOHRSC data from HPSS; script will retrieve all files so only call once - cmd=" - python3 -u ${USHdir}/retrieve_data.py \ - --debug \ - --file_set obs \ - --config ${PARMdir}/data_locations.yml \ - --cycle_date ${vyyyymmdd}${vhh} \ - --data_stores hpss \ - --data_type NOHRSC_obs \ - --output_path $nohrsc_proc/${vyyyymmdd} \ - --summary_file ${logfile}" - - echo "CALLING: ${cmd}" - - $cmd || print_err_msg_exit "\ - Could not retrieve NOHRSC data from HPSS - - The following command exited with a non-zero exit status: - ${cmd} -" - # 6-hour forecast needs to be renamed - mv $nohrsc_proc/${vyyyymmdd}/sfav2_CONUS_6h_${vyyyymmdd}${vhh}_grid184.grb2 ${nohrsc06h_file} - fi - - else - print_err_msg_exit "\ - Invalid OBTYPE specified for script; valid options are CCPA, MRMS, NDAS, and NOHRSC - " - fi # Increment to next forecast hour - # Increment to next forecast hour - echo "Finished fcst hr=${current_fcst}" - current_fcst=$((${current_fcst} + 1)) - -done - - -# Clean up raw, unprocessed observation files -rm -rf ${OBS_DIR}/raw - +# +mkdir -p ${WFLOW_FLAG_FILES_DIR} +file_bn="get_obs_$(echo_lowercase ${OBTYPE})" +touch "${WFLOW_FLAG_FILES_DIR}/${file_bn}_${PDY}_complete.txt" # #----------------------------------------------------------------------- # diff --git a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh index 1c09dc09c6..a7ec52ad6a 100755 --- a/scripts/exregional_run_met_genensprod_or_ensemblestat.sh +++ b/scripts/exregional_run_met_genensprod_or_ensemblestat.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -108,7 +107,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -135,23 +134,23 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" ;; esac @@ -159,7 +158,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" fi @@ -184,7 +183,7 @@ for (( i=0; i<${NUM_ENS_MEMBERS}; i++ )); do time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) - if [ "${VAR}" = "APCP" ] || [ "${VAR}" = "ASNOW" ]; then + if [ "${FIELD_GROUP}" = "APCP" ] || [ "${FIELD_GROUP}" = "ASNOW" ]; then template="${cdate_ensmem_subdir_or_null:+${cdate_ensmem_subdir_or_null}/}metprd/PcpCombine_fcst/${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" else template="${FCST_SUBDIR_TEMPLATE}/${FCST_FN_TEMPLATE}" @@ -204,30 +203,54 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Generate the list of forecast hours for which to run the specified +# METplus tool. +# +# If running the GenEnsProd tool, we set this to the list of forecast +# output times without filtering for the existence of observation files +# corresponding to those times. This is because GenEnsProd operates +# only on forecasts; it does not need observations. # -# Note that strictly speaking, this does not need to be done if the MET/ -# METplus tool being called is GenEnsProd (because this tool only operates -# on forecasts), but we run the check anyway in this case in order to -# keep the code here simpler and because the output of GenEnsProd for -# forecast hours with missing observations will not be used anyway in -# downstream verification tasks. +# On the other hand, if running the EnsembleStat tool, we set the list of +# forecast hours to a set of times that takes into consideration whether +# or not observations exist. We do this by starting with the full list +# of forecast times for which there is forecast output and then removing +# from that list any times for which there is no corresponding observations. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +if [ "${MetplusToolName}" = "GenEnsProd" ]; then + VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --skip_check_files ) || \ + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + +elif [ "${MetplusToolName}" = "EnsembleStat" ]; then + VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + --time_lag="${time_lag%.*}" ) || \ + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" +fi # #----------------------------------------------------------------------- # @@ -261,15 +284,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -284,7 +307,7 @@ fi # metplus_config_tmpl_bn="${MetplusToolName}" metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}" -metplus_log_bn="${metplus_config_bn}" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # @@ -294,14 +317,12 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -# Load the yaml-like file containing the configuration for ensemble +# Load the yaml-like file containing the configuration for ensemble # verification. # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. @@ -334,7 +355,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # @@ -366,13 +387,13 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # # Verification configuration dictionary. # -'vx_config_dict': +'vx_config_dict': ${vx_config_dict:-} " @@ -384,7 +405,7 @@ uw template render \ -o ${metplus_config_fp} \ --verbose \ --values-file "${tmpfile}" \ - --search-path "/" + --search-path "/" err=$? rm $tmpfile diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh index abe5e3dd31..0531d21755 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -70,8 +69,8 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform deterministic verification of the specified field (VAR) -for a single forecast. +tool to perform deterministic verification of the specified field group +(FIELD_GROUP) for a single forecast. ========================================================================" # #----------------------------------------------------------------------- @@ -95,10 +94,12 @@ FIELDNAME_IN_FCST_INPUT="" FIELDNAME_IN_MET_OUTPUT="" FIELDNAME_IN_MET_FILEDIR_NAMES="" +# Note that ACCUM_HH will not be defined for the REFC, RETOP, SFC, and +# UPA field groups. set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ + field_group="${FIELD_GROUP}" \ + accum_hh="${ACCUM_HH:-}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ outvarname_fieldname_in_fcst_input="FIELDNAME_IN_FCST_INPUT" \ @@ -136,11 +137,13 @@ time_lag=$( bc -l <<< "${ENS_TIME_LAG_HRS[$i]}*${SECS_PER_HOUR}" ) # vx_fcst_input_basedir=$( eval echo "${VX_FCST_INPUT_BASEDIR}" ) vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) + ensmem_indx=$(printf "%0${VX_NDIGITS_ENSMEM_NAMES}d" $(( 10#${ENSMEM_INDX}))) ensmem_name="mem${ensmem_indx}" if [ "${RUN_ENVIR}" = "nco" ]; then slash_cdate_or_null="" slash_ensmem_subdir_or_null="" + slash_obs_or_null="" else slash_cdate_or_null="/${CDATE}" # @@ -157,8 +160,10 @@ else # if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then slash_ensmem_subdir_or_null="/${ensmem_name}" + slash_obs_or_null="/obs" else slash_ensmem_subdir_or_null="" + slash_obs_or_null="" fi fi @@ -166,26 +171,26 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" - FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" + FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" - FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" + FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}/metprd/PcpCombine_fcst" FCST_INPUT_FN_TEMPLATE="${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" ;; @@ -194,7 +199,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE="${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE}" @@ -202,29 +207,41 @@ fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_INPUT_FN_TEMPLATE} ) -OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}" +OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}" STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ + --time_lag="${time_lag%.*}") || \ + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" # #----------------------------------------------------------------------- # @@ -258,15 +275,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -281,7 +298,7 @@ fi # metplus_config_tmpl_bn="GridStat_or_PointStat" metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_${ensmem_name}" -metplus_log_bn="${metplus_config_bn}" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # @@ -296,9 +313,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="det" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_DET_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. @@ -331,7 +346,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # @@ -363,7 +378,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh index 2c8378c128..5ecc588316 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensmean.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -70,8 +69,8 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform verification of the specified field (VAR) on the ensemble -mean. +tool to perform verification of the specified field group (FIELD_GROUP) +on the ensemble mean. ========================================================================" # #----------------------------------------------------------------------- @@ -97,7 +96,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -123,20 +122,20 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" ;; esac FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/metprd/GenEnsProd" @@ -144,7 +143,7 @@ if [ "${grid_or_point}" = "grid" ]; then elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" FCST_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/metprd/GenEnsProd" fi @@ -157,23 +156,35 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensmean" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + # #----------------------------------------------------------------------- # @@ -216,15 +227,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -238,8 +249,8 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="${MetplusToolName}_ensmean" -metplus_config_bn="${MetplusToolName}_ensmean_${FIELDNAME_IN_MET_FILEDIR_NAMES}" -metplus_log_bn="${metplus_config_bn}" +metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensmean" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # @@ -254,9 +265,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. @@ -289,7 +298,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # @@ -321,7 +330,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh index eae1850ad8..c7693fe06c 100755 --- a/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh +++ b/scripts/exregional_run_met_gridstat_or_pointstat_vx_ensprob.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -70,7 +69,8 @@ Entering script: \"${scrfunc_fn}\" In directory: \"${scrfunc_dir}\" This is the ex-script for the task that runs the METplus ${MetplusToolName} -tool to perform verification of the specified field (VAR) on the ensemble +tool to perform verification of the specified field group (FIELD_GROUP) +on the ensemble frequencies/probabilities. ========================================================================" # @@ -97,7 +97,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -123,27 +123,27 @@ if [ "${grid_or_point}" = "grid" ]; then case "${FIELDNAME_IN_MET_FILEDIR_NAMES}" in "APCP"*) - OBS_INPUT_DIR="${vx_output_basedir}/metprd/PcpCombine_obs" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" OBS_INPUT_FN_TEMPLATE="${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "ASNOW"*) - OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE}" + OBS_INPUT_DIR="${vx_output_basedir}${slash_cdate_or_null}/obs/metprd/PcpCombine_obs" + OBS_INPUT_FN_TEMPLATE="${OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT}" ;; "REFC") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_REFC_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[1]}" ;; "RETOP") OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_RETOP_FN_TEMPLATE}" + OBS_INPUT_FN_TEMPLATE="${OBS_MRMS_FN_TEMPLATES[3]}" ;; esac elif [ "${grid_or_point}" = "point" ]; then OBS_INPUT_DIR="${vx_output_basedir}/metprd/Pb2nc_obs" - OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT}" + OBS_INPUT_FN_TEMPLATE="${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT}" fi OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_INPUT_FN_TEMPLATE} ) @@ -156,23 +156,35 @@ STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensprob" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data. +# Set the lead hours for which to run the MET/METplus tool. This is done +# by starting with the full list of lead hours for which we expect to +# find forecast output and then removing from that list any hours for +# which there is no corresponding observation data. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +case "$OBTYPE" in + "CCPA"|"NOHRSC") + vx_intvl="$((10#${ACCUM_HH}))" + vx_hr_start="${vx_intvl}" + ;; + *) + vx_intvl="$((${VX_FCST_OUTPUT_INTVL_HRS}))" + vx_hr_start="0" + ;; +esac +vx_hr_end="${FCST_LEN_HRS}" + +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${vx_hr_start}" \ + --lhr_max="${vx_hr_end}" \ + --lhr_intvl="${vx_intvl}" \ + --base_dir="${OBS_INPUT_DIR}" \ + --fn_template="${OBS_INPUT_FN_TEMPLATE}" \ + --num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" ) || \ + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" + # #----------------------------------------------------------------------- # @@ -215,15 +227,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -237,8 +249,8 @@ fi # First, set the base file names. # metplus_config_tmpl_bn="${MetplusToolName}_ensprob" -metplus_config_bn="${MetplusToolName}_ensprob_${FIELDNAME_IN_MET_FILEDIR_NAMES}" -metplus_log_bn="${metplus_config_bn}" +metplus_config_bn="${MetplusToolName}_${FIELDNAME_IN_MET_FILEDIR_NAMES}_ensprob" +metplus_log_bn="${metplus_config_bn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # @@ -253,9 +265,7 @@ metplus_log_fn="metplus.log.${metplus_log_bn}" # #----------------------------------------------------------------------- # -det_or_ens="ens" -vx_config_fn="vx_config_${det_or_ens}.yaml" -vx_config_fp="${METPLUS_CONF}/${vx_config_fn}" +vx_config_fp="${METPLUS_CONF}/${VX_CONFIG_ENS_FN}" vx_config_dict=$(<"${vx_config_fp}") # Indent each line of vx_config_dict so that it is aligned properly when # included in the yaml-formatted variable "settings" below. @@ -288,7 +298,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' -'fhr_list': '${FHR_LIST}' +'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # @@ -320,7 +330,7 @@ settings="\ 'accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' -'input_field_group': '${VAR:-}' +'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' # diff --git a/scripts/exregional_run_met_pb2nc_obs.sh b/scripts/exregional_run_met_pb2nc_obs.sh index 7e79fb4efb..63ea5ca760 100755 --- a/scripts/exregional_run_met_pb2nc_obs.sh +++ b/scripts/exregional_run_met_pb2nc_obs.sh @@ -21,7 +21,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -74,6 +73,30 @@ to convert NDAS prep buffer observation files to NetCDF format. # #----------------------------------------------------------------------- # +# The day (in the form YYYMMDD) associated with the current task via the +# task's cycledefs attribute in the ROCOTO xml. +# +#----------------------------------------------------------------------- +# +yyyymmdd_task=${PDY} + +# Seconds since some reference time that the DATE_UTIL utility uses of +# the day of the current task. This will be used below to find hours +# since the start of this day. +sec_since_ref_task=$(${DATE_UTIL} --date "${yyyymmdd_task} 0 hours" +%s) +# +#----------------------------------------------------------------------- +# +# Get the list of all the times in the current day at which to retrieve +# obs. This is an array with elements having format "YYYYMMDDHH". +# +#----------------------------------------------------------------------- +# +array_name="OBS_RETRIEVE_TIMES_${OBTYPE}_${yyyymmdd_task}" +eval obs_retrieve_times_crnt_day=\( \${${array_name}[@]} \) +# +#----------------------------------------------------------------------- +# # Get the cycle date and time in YYYYMMDDHH format. # #----------------------------------------------------------------------- @@ -95,7 +118,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -113,29 +136,75 @@ set_vx_params \ vx_output_basedir=$( eval echo "${VX_OUTPUT_BASEDIR}" ) OBS_INPUT_DIR="${OBS_DIR}" -OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE} ) +OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_FN_TEMPLATES[1]} ) OUTPUT_BASE="${vx_output_basedir}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" -OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) +OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${MetplusToolName}_obs" # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. +# Set the array of lead hours (relative to the date associated with this +# task) for which to run the MET/METplus tool. # #----------------------------------------------------------------------- # -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${OBS_INPUT_DIR}" \ - fn_template="${OBS_INPUT_FN_TEMPLATE}" \ - check_accum_contrib_files="FALSE" \ - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" \ - outvarname_fhr_list="FHR_LIST" +LEADHR_LIST="" +num_missing_files=0 +for yyyymmddhh in ${obs_retrieve_times_crnt_day[@]}; do + yyyymmdd=$(echo ${yyyymmddhh} | cut -c1-8) + hh=$(echo ${yyyymmddhh} | cut -c9-10) + + # Set the full path to the final processed obs file (fp_proc) we want to + # create. + sec_since_ref=$(${DATE_UTIL} --date "${yyyymmdd} ${hh} hours" +%s) + lhr=$(( (sec_since_ref - sec_since_ref_task)/3600 )) + + fp=$( python3 $USHdir/eval_metplus_timestr_tmpl.py \ + --init_time="${yyyymmdd_task}00" \ + --lhr="${lhr}" \ + --fn_template="${OBS_DIR}/${OBS_NDAS_FN_TEMPLATES[1]}") || \ + print_err_msg_exit "Call to eval_metplus_timestr_tmpl.py failed with return code: $?" + + if [[ -f "${fp}" ]]; then + print_info_msg " +Found ${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh): + yyyymmddhh = \"${yyyymmddhh}\" + fp = \"${fp}\" +" + hh_noZero=$((10#${hh})) + LEADHR_LIST="${LEADHR_LIST},${hh_noZero}" + else + num_missing_files=$((num_missing_files+1)) + print_info_msg " +${OBTYPE} obs file corresponding to observation retrieval time (yyyymmddhh) +does not exist on disk: + yyyymmddhh = \"${yyyymmddhh}\" + fp = \"${fp}\" +Removing this time from the list of times to be processed by ${METPLUSTOOLNAME}. +" + fi +done + +# If the number of missing files is greater than the maximum allowed +# (specified by num_missing_files_max), print out an error message and +# exit. +if [ "${num_missing_files}" -gt "${NUM_MISSING_OBS_FILES_MAX}" ]; then + print_err_msg_exit "\ +The number of missing ${OBTYPE} obs files (num_missing_files) is greater +than the maximum allowed number (NUM_MISSING_FILES_MAX): + num_missing_files = ${num_missing_files} + NUM_MISSING_OBS_FILES_MAX = ${NUM_MISSING_OBS_FILES_MAX}" +fi + +# Remove leading comma from LEADHR_LIST. +LEADHR_LIST=$( echo "${LEADHR_LIST}" | $SED "s/^,//g" ) +print_info_msg "$VERBOSE" "\ +Final (i.e. after filtering for missing obs files) set of lead hours +(saved in a scalar string variable) is: + LEADHR_LIST = \"${LEADHR_LIST}\" +" # #----------------------------------------------------------------------- # @@ -169,15 +238,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + LEADHR_LIST = [${LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -208,8 +277,8 @@ metplus_config_tmpl_fn="${MetplusToolName}_obs" # information, but we still include that info in the file name so that # the behavior in the two modes is as similar as possible. # -metplus_config_fn="${metplus_config_tmpl_fn}_${CDATE}" -metplus_log_fn="${metplus_config_fn}" +metplus_config_fn="${metplus_config_tmpl_fn}_NDAS_${CDATE}" +metplus_log_fn="${metplus_config_fn}_NDAS" # # Add prefixes and suffixes (extensions) to the base file names. # @@ -241,10 +310,10 @@ settings="\ 'METPLUS_TOOL_NAME': '${METPLUS_TOOL_NAME}' 'metplus_verbosity_level': '${METPLUS_VERBOSITY_LEVEL}' # -# Date and forecast hour information. +# Date and lead hour information. # 'cdate': '$CDATE' - 'fhr_list': '${FHR_LIST}' + 'leadhr_list': '${LEADHR_LIST}' # # Input and output directory/file information. # @@ -285,7 +354,7 @@ uw template render \ -o ${metplus_config_fp} \ --verbose \ --values-file "${tmpfile}" \ - --search-path "/" + --search-path "/" err=$? rm $tmpfile @@ -318,6 +387,16 @@ METplus configuration file used is: # #----------------------------------------------------------------------- # +# Create flag file that indicates completion of task. This is needed by +# the workflow. +# +#----------------------------------------------------------------------- +# +mkdir -p ${WFLOW_FLAG_FILES_DIR} +touch "${WFLOW_FLAG_FILES_DIR}/run_met_pb2nc_obs_ndas_${PDY}_complete.txt" +# +#----------------------------------------------------------------------- +# # Print message indicating successful completion of script. # #----------------------------------------------------------------------- diff --git a/scripts/exregional_run_met_pcpcombine.sh b/scripts/exregional_run_met_pcpcombine.sh index 026afb4eb2..9ff0ee5ada 100755 --- a/scripts/exregional_run_met_pcpcombine.sh +++ b/scripts/exregional_run_met_pcpcombine.sh @@ -22,7 +22,6 @@ done # . $USHdir/get_metplus_tool_name.sh . $USHdir/set_vx_params.sh -. $USHdir/set_vx_fhr_list.sh # #----------------------------------------------------------------------- # @@ -99,7 +98,7 @@ FIELDNAME_IN_MET_FILEDIR_NAMES="" set_vx_params \ obtype="${OBTYPE}" \ - field="$VAR" \ + field_group="${FIELD_GROUP}" \ accum_hh="${ACCUM_HH}" \ outvarname_grid_or_point="grid_or_point" \ outvarname_fieldname_in_obs_input="FIELDNAME_IN_OBS_INPUT" \ @@ -147,6 +146,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then if [ "${RUN_ENVIR}" = "nco" ]; then slash_cdate_or_null="" slash_ensmem_subdir_or_null="" + slash_obs_or_null="" else slash_cdate_or_null="/${CDATE}" # @@ -167,6 +167,13 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then slash_ensmem_subdir_or_null="" fi fi +elif [ "${FCST_OR_OBS}" = "OBS" ]; then + slash_cdate_or_null="/${CDATE}" + if [ $(boolify "${DO_ENSEMBLE}") = "TRUE" ]; then + slash_obs_or_null="/obs" + else + slash_obs_or_null="" + fi fi OBS_INPUT_DIR="" @@ -179,7 +186,7 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then FCST_INPUT_DIR="${vx_fcst_input_basedir}" FCST_INPUT_FN_TEMPLATE=$( eval echo ${FCST_SUBDIR_TEMPLATE:+${FCST_SUBDIR_TEMPLATE}/}${FCST_FN_TEMPLATE} ) - OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}/${slash_ensmem_subdir_or_null}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_ensmem_subdir_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_fcst" OUTPUT_FN_TEMPLATE=$( eval echo ${FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" @@ -187,45 +194,78 @@ if [ "${FCST_OR_OBS}" = "FCST" ]; then elif [ "${FCST_OR_OBS}" = "OBS" ]; then OBS_INPUT_DIR="${OBS_DIR}" - OBS_INPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_FN_TEMPLATES[1]}) + OBS_INPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) - OUTPUT_BASE="${vx_output_basedir}" + OUTPUT_BASE="${vx_output_basedir}${slash_cdate_or_null}${slash_obs_or_null}" OUTPUT_DIR="${OUTPUT_BASE}/metprd/${MetplusToolName}_obs" - OUTPUT_FN_TEMPLATE=$( eval echo ${OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT} ) + fn_template=$(eval echo \${OBS_${OBTYPE}_${FIELD_GROUP}_FN_TEMPLATE_PCPCOMBINE_OUTPUT}) + OUTPUT_FN_TEMPLATE=$( eval echo ${fn_template} ) STAGING_DIR="${OUTPUT_BASE}/stage/${FIELDNAME_IN_MET_FILEDIR_NAMES}" fi # #----------------------------------------------------------------------- # -# Set the array of forecast hours for which to run the MET/METplus tool. -# This is done by starting with the full list of forecast hours for which -# there is forecast output and then removing from that list any forecast -# hours for which there is no corresponding observation data (if combining -# observed APCP) or forecast data (if combining forecast APCP). +# Set the array of lead hours for which to run the MET/METplus tool. +# +#----------------------------------------------------------------------- +# +vx_intvl="$((10#${ACCUM_HH}))" +VX_LEADHR_LIST=$( python3 $USHdir/set_leadhrs.py \ + --lhr_min="${vx_intvl}" \ + --lhr_max="${FCST_LEN_HRS}" \ + --lhr_intvl="${vx_intvl}" \ + --skip_check_files ) || \ + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" +# +#----------------------------------------------------------------------- +# +# Check for the presence of files (either from observations or forecasts) +# needed to create required accumulation given by ACCUM_HH. # #----------------------------------------------------------------------- # if [ "${FCST_OR_OBS}" = "FCST" ]; then base_dir="${FCST_INPUT_DIR}" fn_template="${FCST_INPUT_FN_TEMPLATE}" - num_missing_files_max="${NUM_MISSING_FCST_FILES_MAX}" + subintvl="${VX_FCST_OUTPUT_INTVL_HRS}" elif [ "${FCST_OR_OBS}" = "OBS" ]; then base_dir="${OBS_INPUT_DIR}" fn_template="${OBS_INPUT_FN_TEMPLATE}" - num_missing_files_max="${NUM_MISSING_OBS_FILES_MAX}" + subintvl="${OBS_AVAIL_INTVL_HRS}" fi +num_missing_files_max="0" +input_accum_hh=$(printf "%02d" ${subintvl}) +# +# Convert the list of hours at which the PcpCombine tool will be run to +# an array. This represents the hours at which each accumulation period +# ends. Then use it to check the presence of all files requied to build +# the required accumulations from the sub-accumulations. +# +subintvl_end_hrs=($( echo ${VX_LEADHR_LIST} | $SED "s/,//g" )) +for hr_end in ${subintvl_end_hrs[@]}; do + hr_start=$((hr_end - vx_intvl + subintvl)) + print_info_msg " +Checking for the presence of files that will contribute to the ${vx_intvl}-hour +accumulation ending at lead hour ${hr_end} (relative to ${CDATE})... +" + python3 $USHdir/set_leadhrs.py \ + --date_init="${CDATE}" \ + --lhr_min="${hr_start}" \ + --lhr_max="${hr_end}" \ + --lhr_intvl="${subintvl}" \ + --base_dir="${base_dir}" \ + --fn_template="${fn_template}" \ + --num_missing_files_max="${num_missing_files_max}" \ + --time_lag="${time_lag%.*}" || \ + print_err_msg_exit "Call to set_leadhrs.py failed with return code: $?" +done -set_vx_fhr_list \ - cdate="${CDATE}" \ - fcst_len_hrs="${FCST_LEN_HRS}" \ - field="$VAR" \ - accum_hh="${ACCUM_HH}" \ - base_dir="${base_dir}" \ - fn_template="${fn_template}" \ - check_accum_contrib_files="TRUE" \ - num_missing_files_max="${num_missing_files_max}" \ - outvarname_fhr_list="FHR_LIST" +print_info_msg " +${MetplusToolName} will be run for the following lead hours (relative to ${CDATE}): + VX_LEADHR_LIST = ${VX_LEADHR_LIST} +" # #----------------------------------------------------------------------- # @@ -259,15 +299,15 @@ export LOGDIR # #----------------------------------------------------------------------- # -# Do not run METplus if there isn't at least one valid forecast hour for -# which to run it. +# Do not run METplus if there isn't at least one lead hour for which to +# run it. # #----------------------------------------------------------------------- # -if [ -z "${FHR_LIST}" ]; then +if [ -z "${VX_LEADHR_LIST}" ]; then print_err_msg_exit "\ -The list of forecast hours for which to run METplus is empty: - FHR_LIST = [${FHR_LIST}]" +The list of lead hours for which to run METplus is empty: + VX_LEADHR_LIST = [${VX_LEADHR_LIST}]" fi # #----------------------------------------------------------------------- @@ -281,18 +321,13 @@ fi # First, set the base file names. # metplus_config_tmpl_fn="${MetplusToolName}" -metplus_config_fn="${metplus_config_tmpl_fn}_$(echo_lowercase ${FCST_OR_OBS})_${FIELDNAME_IN_MET_FILEDIR_NAMES}${ENSMEM_INDX:+_${ensmem_name}}" -metplus_log_fn="${metplus_config_fn}_$CDATE" -# -# If operating on observation files, append the cycle date to the name -# of the configuration file because in this case, the output files from -# METplus are not placed under cycle directories (so another method is -# necessary to associate the configuration file with the cycle for which -# it is used). -# -if [ "${FCST_OR_OBS}" = "OBS" ]; then - metplus_config_fn="${metplus_log_fn}" +if [ "${FCST_OR_OBS}" = "FCST" ]; then + suffix="${ENSMEM_INDX:+_${ensmem_name}}" +elif [ "${FCST_OR_OBS}" = "OBS" ]; then + suffix="_${OBTYPE}" fi +metplus_config_fn="${metplus_config_tmpl_fn}_$(echo_lowercase ${FCST_OR_OBS})_${FIELDNAME_IN_MET_FILEDIR_NAMES}${suffix}" +metplus_log_fn="${metplus_config_fn}_$CDATE" # # Add prefixes and suffixes (extensions) to the base file names. # @@ -327,7 +362,7 @@ settings="\ # Date and forecast hour information. # 'cdate': '$CDATE' - 'fhr_list': '${FHR_LIST}' + 'vx_leadhr_list': '${VX_LEADHR_LIST}' # # Input and output directory/file information. # @@ -355,10 +390,11 @@ settings="\ 'fieldname_in_met_filedir_names': '${FIELDNAME_IN_MET_FILEDIR_NAMES}' 'obtype': '${OBTYPE}' 'FCST_OR_OBS': '${FCST_OR_OBS}' - 'accum_hh': '${ACCUM_HH:-}' + 'input_accum_hh': '${input_accum_hh}' + 'output_accum_hh': '${ACCUM_HH:-}' 'accum_no_pad': '${ACCUM_NO_PAD:-}' 'metplus_templates_dir': '${METPLUS_CONF:-}' - 'input_field_group': '${VAR:-}' + 'input_field_group': '${FIELD_GROUP:-}' 'input_level_fcst': '${FCST_LEVEL:-}' 'input_thresh_fcst': '${FCST_THRESH:-}' " diff --git a/tests/WE2E/machine_suites/comprehensive b/tests/WE2E/machine_suites/comprehensive index 8397e5d0c0..8c42aa4599 100644 --- a/tests/WE2E/machine_suites/comprehensive +++ b/tests/WE2E/machine_suites/comprehensive @@ -75,3 +75,15 @@ MET_verification_only_vx pregen_grid_orog_sfc_climo specify_EXTRN_MDL_SYSBASEDIR_ICS_LBCS specify_template_filenames +vx-det_long-fcst_custom-vx-config_aiml-fourcastnet +vx-det_long-fcst_custom-vx-config_aiml-graphcast +vx-det_long-fcst_custom-vx-config_aiml-panguweather +vx-det_long-fcst_custom-vx-config_gfs +vx-det_long-fcst_winter-wx_SRW-staged +vx-det_multicyc_fcst-overlap_ncep-hrrr +vx-det_multicyc_first-obs-00z_ncep-hrrr +vx-det_multicyc_last-obs-00z_ncep-hrrr +vx-det_multicyc_long-fcst-no-overlap_nssl-mpas +vx-det_multicyc_long-fcst-overlap_nssl-mpas +vx-det_multicyc_no-00z-obs_nssl-mpas +vx-det_multicyc_no-fcst-overlap_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.hera.gnu.com b/tests/WE2E/machine_suites/coverage.hera.gnu.com index c2018a6e78..e820e6327e 100644 --- a/tests/WE2E/machine_suites/coverage.hera.gnu.com +++ b/tests/WE2E/machine_suites/coverage.hera.gnu.com @@ -9,3 +9,9 @@ MET_verification_only_vx MET_ensemble_verification_only_vx_time_lag 2019_halloween_storm 2020_jan_cold_blast +vx-det_long-fcst_custom-vx-config_aiml-fourcastnet +vx-det_long-fcst_custom-vx-config_aiml-panguweather +vx-det_long-fcst_custom-vx-config_gfs +vx-det_long-fcst_winter-wx_SRW-staged +vx-det_multicyc_fcst-overlap_ncep-hrrr +vx-det_multicyc_last-obs-00z_ncep-hrrr diff --git a/tests/WE2E/machine_suites/coverage.hera.intel.nco b/tests/WE2E/machine_suites/coverage.hera.intel.nco index e4b02a90ba..82442a6835 100644 --- a/tests/WE2E/machine_suites/coverage.hera.intel.nco +++ b/tests/WE2E/machine_suites/coverage.hera.intel.nco @@ -10,3 +10,9 @@ grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v15p2 grid_RRFS_CONUS_3km_ics_FV3GFS_lbcs_FV3GFS_suite_GFS_v16 grid_RRFS_CONUScompact_3km_ics_HRRR_lbcs_RAP_suite_HRRR pregen_grid_orog_sfc_climo +vx-det_long-fcst_custom-vx-config_aiml-graphcast +vx-det_multicyc_long-fcst-overlap_nssl-mpas +vx-det_multicyc_long-fcst-no-overlap_nssl-mpas +vx-det_multicyc_first-obs-00z_ncep-hrrr +vx-det_multicyc_no-00z-obs_nssl-mpas +vx-det_multicyc_no-fcst-overlap_ncep-hrrr diff --git a/tests/WE2E/run_WE2E_tests.py b/tests/WE2E/run_WE2E_tests.py index 78655857e8..fc0a3e3268 100755 --- a/tests/WE2E/run_WE2E_tests.py +++ b/tests/WE2E/run_WE2E_tests.py @@ -142,7 +142,6 @@ def run_we2e_tests(homedir, args) -> None: pretty_list = "\n".join(str(x) for x in tests_to_run) logging.info(f'Will run {len(tests_to_run)} tests:\n{pretty_list}') - config_default_file = os.path.join(ushdir,'config_defaults.yaml') logging.debug(f"Loading config defaults file {config_default_file}") config_defaults = load_config_file(config_default_file) @@ -203,13 +202,11 @@ def run_we2e_tests(homedir, args) -> None: # obs. If so, and if the config file does not explicitly set the observation locations, # fill these in with defaults from the machine files obs_vars = ['CCPA_OBS_DIR','MRMS_OBS_DIR','NDAS_OBS_DIR','NOHRSC_OBS_DIR'] - if 'platform' not in test_cfg: - test_cfg['platform'] = {} for obvar in obs_vars: mach_path = machine_defaults['platform'].get('TEST_'+obvar) - if not test_cfg['platform'].get(obvar) and mach_path: + if not test_cfg['verification'].get(obvar) and mach_path: logging.debug(f'Setting {obvar} = {mach_path} from machine file') - test_cfg['platform'][obvar] = mach_path + test_cfg['verification'][obvar] = mach_path if args.compiler == "gnu": # 2D decomposition doesn't work with GNU compilers. Deactivate 2D decomposition for GNU diff --git a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml index d773c632e2..ffacb0a8cb 100644 --- a/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml +++ b/tests/WE2E/test_configs/custom_grids/config.custom_ESGgrid_Great_Lakes_snow_8km.yaml @@ -55,10 +55,9 @@ task_run_fcst: task_run_post: POST_OUTPUT_DOMAIN_NAME: custom_ESGgrid_Michigan_Ontario verification: - VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km - VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA", "ASNOW" ] -platform: CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/CCPA_obs' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/MRMS_obs' NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/NDAS_obs' NOHRSC_OBS_DIR: '{{ workflow.EXPTDIR }}/NOHRSC_obs' + VX_FCST_MODEL_NAME: Michigan_Ontario_snow_8km + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "SFC", "UPA", "ASNOW" ] diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml index 812e805645..80b2e3099f 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx.yaml @@ -8,9 +8,6 @@ metadata: user: RUN_ENVIR: community -nco: - NET_default: rrfs - workflow: PREDEF_GRID_NAME: RRFS_CONUS_25km DATE_FIRST_CYCL: '2019061500' @@ -18,6 +15,9 @@ workflow: FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename +nco: + NET_default: rrfs + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml", "parm/wflow/verify_ens.yaml"]|include }}' diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml index f7d82cb8cd..f26ae7db21 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_only_vx_time_lag.yaml @@ -16,24 +16,26 @@ workflow: FCST_LEN_HRS: 6 PREEXISTING_DIR_METHOD: rename +nco: + NET_default: 'RRFSE_CONUS' + rocoto: tasks: taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml", "parm/wflow/verify_ens.yaml"]|include }}' -nco: - NET_default: 'RRFSE_CONUS' - global: DO_ENSEMBLE: true NUM_ENS_MEMBERS: 2 ENS_TIME_LAG_HRS: '[ 0, 12 ]' -platform: + +verification: + # If the following is commented out, then the obs files staged on each + # platform will be (found and) used. CCPA_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ccpa/proc' MRMS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/mrms/proc' NDAS_OBS_DIR: '{{ workflow.EXPTDIR }}/obs_data/ndas/proc' - -verification: + # VX_FCST_MODEL_NAME: FV3_GFS_v15p2_CONUS_25km VX_FCST_INPUT_BASEDIR: '{{ platform.get("TEST_VX_FCST_INPUT_BASEDIR") }}' VX_NDIGITS_ENSMEM_NAMES: 1 diff --git a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml index 85a515f293..7f761117bb 100644 --- a/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml +++ b/tests/WE2E/test_configs/verification/config.MET_ensemble_verification_winter_wx.yaml @@ -31,4 +31,8 @@ global: DO_ENSEMBLE: true NUM_ENS_MEMBERS: 10 verification: - VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] + OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', + '{%- set data_intvl_hrs = "%02d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/sfav2_CONUS_" ~ data_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] + diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml new file mode 100644 index 0000000000..f4d71ceeb8 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-fourcastnet.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + FourCastNet (fcnv2) global AI model. Note that this test uses a custom + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'fcnv2' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_FCST_INPUT_BASEDIR: '{{- platform.TEST_EXTRN_MDL_SOURCE_BASEDIR }}' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml new file mode 100644 index 0000000000..caa917be41 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-graphcast.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + GraphCast (gc) global AI model. Note that this test uses a custom + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'gc' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_FCST_INPUT_BASEDIR: '{{- platform.TEST_EXTRN_MDL_SOURCE_BASEDIR }}' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml new file mode 100644 index 0000000000..cf1fd79ad3 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_aiml-panguweather.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + Pangu-Weather (pw) global AI model. Note that this test uses a custom + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'pw' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_aiml.yaml' + VX_FCST_INPUT_BASEDIR: '{{- platform.TEST_EXTRN_MDL_SOURCE_BASEDIR }}' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}.grb2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.prslev.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml new file mode 100644 index 0000000000..5ea940f055 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_custom-vx-config_gfs.yaml @@ -0,0 +1,66 @@ +metadata: + description: |- + SRW App configuration file to test deterministic verification of the + Global Forecast System (GFS) model in a way that is comparable to vx + for several AI models [GraphCast (gc), FourCastNet (fcnv2), and Pangu- + Weather (pw)]. The idea is for this test to serve as a baseline to + which the AI vx can be compared. Thus, this test uses a custom vx + verification configuration file (as opposed to the default one in the + SRW) because the AI model output often does not include many of the + fields that exist in physics-based models. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + DATE_FIRST_CYCL: '2024073000' + DATE_LAST_CYCL: '2024073000' + FCST_LEN_HRS: 240 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'global' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ platform.TEST_GDAS_OBS_DIR }}" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS (if it's necessary to fetch obs files). + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', '{valid?fmt=%Y%m%d}/gdas.t{valid?fmt=%H}z.prepbufr.nr' ] + # + VX_FCST_MODEL_NAME: 'gfs' + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.obs_gdas.model_gfs.yaml' + VX_FCST_INPUT_BASEDIR: '{{- platform.TEST_EXTRN_MDL_SOURCE_BASEDIR }}' + FCST_SUBDIR_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.{init?fmt=%Y%m%d}/{init?fmt=%H}/atmos' + FCST_FN_TEMPLATE: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- verification.VX_FCST_MODEL_NAME }}.t00z.pgrb2.0p25.f{lead?fmt=%HHH}_a${ACCUM_HH}h.nc' + # + VX_FCST_OUTPUT_INTVL_HRS: 6 + VX_FIELD_GROUPS: [ "SFC" ] diff --git a/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml new file mode 100644 index 0000000000..2a9fe731a0 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_long-fcst_winter-wx_SRW-staged.yaml @@ -0,0 +1,62 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, NOHRSC, MRMS, and NDAS + observations from HPSS for a single cycle with a relatively long forecast + (36 hours) cycle and then perform deterministic verification, including + first performing vx preprocessing with METplus tools such as PcpCombine + and Pb2Nc. + + The staged forecast data are from the SRW itself. + + This test uses a winter case to ensure that ASNOW is verified correctly + for both 6-hour and 24-hour accumulations. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + # This is required in the experiment generation step, although it shouldn't + # since a forecast is not being run. + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2023021700' + DATE_LAST_CYCL: '2023021700' + FCST_LEN_HRS: 36 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'custom_ESGgrid_Michigan_Ontario' + +verification: + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + VX_FCST_MODEL_NAME: 'Michigan_Ontario_snow_8km' + VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_VX_FCST_INPUT_BASEDIR, "..", "custom_ESGgrid_Michigan_Ontario_8km"]) }}' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml new file mode 100644 index 0000000000..0be883f1e8 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_fcst-overlap_ncep-hrrr.yaml @@ -0,0 +1,61 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which there are multiple, short (i.e. + shorter than 24hr), overlapping forecasts in a day. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 7 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043006' + FCST_LEN_HRS: 9 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml new file mode 100644 index 0000000000..80654ec42d --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_first-obs-00z_ncep-hrrr.yaml @@ -0,0 +1,61 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which the first obs needed is at 00z. It + tests the special treatment needed for obtaining CCPA and NDAS obs at 00z. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042900' + DATE_LAST_CYCL: '2024043000' + FCST_LEN_HRS: 3 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml new file mode 100644 index 0000000000..18508af72e --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_last-obs-00z_ncep-hrrr.yaml @@ -0,0 +1,61 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which the last obs needed is at 00z. It + tests the special treatment needed for obtaining CCPA and NDAS obs at 00z. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042921' + DATE_LAST_CYCL: '2024043021' + FCST_LEN_HRS: 3 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml new file mode 100644 index 0000000000..2745c580e3 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-no-overlap_nssl-mpas.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which there are multiple, long (i.e. + longer than 24hr) NON-overlapping forecasts with multi-day gaps between + the end of one forecast and the start of the next. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 96 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024051112' + FCST_LEN_HRS: 48 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml new file mode 100644 index 0000000000..fbd67884a5 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_long-fcst-overlap_nssl-mpas.yaml @@ -0,0 +1,62 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which there are multiple, long (i.e. + longer than 24hr) overlapping forecasts. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024050212' + FCST_LEN_HRS: 48 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml new file mode 100644 index 0000000000..85f55c8fe4 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-00z-obs_nssl-mpas.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from one of NSSL's MPAS prototypes submitted + to the 2024 HWT Spring Forecast Experiment. + + This test is for the scenario in which forecasts do not include 00z. It + is the simplest case of obtaining CCPA and NDAS obs because it avoids + testing the special treatment needed at 00z for these obs types. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 24 + DATE_FIRST_CYCL: '2024042912' + DATE_LAST_CYCL: '2024043012' + FCST_LEN_HRS: 3 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'mpashn4nssl' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + VX_FCST_MODEL_NAME: 'NSSL-MPAS-HN' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%H?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml new file mode 100644 index 0000000000..c65fb74ec4 --- /dev/null +++ b/tests/WE2E/test_configs/verification/config.vx-det_multicyc_no-fcst-overlap_ncep-hrrr.yaml @@ -0,0 +1,63 @@ +metadata: + description: |- + SRW App configuration file to first pull CCPA, MRMS, and NDAS observations + from HPSS for multiple cycles and then perform deterministic verification + for all cycles, including first performing vx preprocessing with METplus + tools such as PcpCombine and Pb2Nc. + + The staged forecast data are from NCEP's operational version of the HRRR. + + This test is for the scenario in which there are multiple, short (i.e. + shorter than 24hr), NON-overlapping forecasts in a day with multi-hour + (but < 24hr) gaps between the end of one forecast and the start of the + next. + +user: + RUN_ENVIR: community + +platform: + EXTRN_MDL_DATA_STORES: hpss + +workflow: + PREEXISTING_DIR_METHOD: rename + PREDEF_GRID_NAME: RRFS_CONUS_25km + INCR_CYCL_FREQ: 11 + DATE_FIRST_CYCL: '2024042902' + DATE_LAST_CYCL: '2024043022' + FCST_LEN_HRS: 3 + +rocoto: + tasks: + taskgroups: '{{ ["parm/wflow/verify_pre.yaml", "parm/wflow/verify_det.yaml"]|include }}' + +task_get_extrn_lbcs: + LBC_SPEC_INTVL_HRS: 3 + +task_run_post: + POST_OUTPUT_DOMAIN_NAME: 'hrrr_ncep' + +verification: + METPLUS_VERBOSITY_LEVEL: 5 + # Base directories in which to look for obs. If these do not exist and/or + # do not contain the required obs, create and populate them. + # Note that when performing WE2E tests, the default behavior is not to + # get obs files from HPSS but to use staged obs files. This is done by + # setting these variables to the (platform-specific) locations of these + # staged files. To force the WE2E testing system to get the obs from + # HPSS, here we reset these variables to their default values in the SRW + # workflow configuration file config_defaults.yaml. + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # Do not remove raw obs files to be able to verify that only the necessary + # raw files are fetched from HPSS. + REMOVE_RAW_OBS_CCPA: false + REMOVE_RAW_OBS_MRMS: false + REMOVE_RAW_OBS_NDAS: false + REMOVE_RAW_OBS_NOHRSC: false + # + VX_FCST_MODEL_NAME: 'hrrr_ncep' + VX_FCST_INPUT_BASEDIR: '{{- "/".join([platform.TEST_EXTRN_MDL_SOURCE_BASEDIR, verification.VX_FCST_MODEL_NAME]) }}' + FCST_FN_TEMPLATE: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}.grib2' + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${POST_OUTPUT_DOMAIN_NAME}_{init?fmt=%Y%m%d%H?shift=-${time_lag}}f{lead?fmt=%HHH?shift=${time_lag}}_${FIELD_GROUP}_a${ACCUM_HH}h.nc' diff --git a/tests/test_python/test_set_cycle_dates.py b/tests/test_python/test_set_cycle_dates.py index eb76f579c6..8baae643ac 100644 --- a/tests/test_python/test_set_cycle_dates.py +++ b/tests/test_python/test_set_cycle_dates.py @@ -1,20 +1,22 @@ """ Test set_cycle_dates.py """ -from datetime import datetime +from datetime import datetime, timedelta import unittest -from set_cycle_dates import set_cycle_dates +from set_cycle_and_obs_timeinfo import set_cycle_dates class Testing(unittest.TestCase): """ Define the tests""" - def test_set_cycle_dates(self): + + def test_set_cycle_dates_string(self): """ Test that the proper list of dates are produced given the - intput data""" + input data and return_type left to its default value (so the + output should be a list of strings)""" cdates = set_cycle_dates( - date_start=datetime(2022, 1, 1, 6), - date_end=datetime(2022, 1, 2, 12), - incr_cycl_freq=6, + start_time_first_cycl=datetime(2022, 1, 1, 6), + start_time_last_cycl=datetime(2022, 1, 2, 12), + cycl_intvl=timedelta(hours=6), ) self.assertEqual( cdates, @@ -27,3 +29,26 @@ def test_set_cycle_dates(self): "2022010212", ], ) + + def test_set_cycle_dates_datetime(self): + + """ Test that the proper list of dates are produced given the + input data and return_type left set to "datetime" (so the output + should be a list of datetime objects)""" + cdates = set_cycle_dates( + start_time_first_cycl=datetime(2022, 1, 1, 6), + start_time_last_cycl=datetime(2022, 1, 2, 12), + cycl_intvl=timedelta(hours=6), + return_type="datetime", + ) + self.assertEqual( + cdates, + [ + datetime(2022, 1, 1, 6), + datetime(2022, 1, 1, 12), + datetime(2022, 1, 1, 18), + datetime(2022, 1, 2, 0), + datetime(2022, 1, 2, 6), + datetime(2022, 1, 2, 12), + ], + ) diff --git a/ush/bash_utils/eval_METplus_timestr_tmpl.sh b/ush/bash_utils/eval_METplus_timestr_tmpl.sh deleted file mode 100644 index 572f7c68c4..0000000000 --- a/ush/bash_utils/eval_METplus_timestr_tmpl.sh +++ /dev/null @@ -1,281 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This file defines a function that evaluates a METplus time-string -# template. -# -#----------------------------------------------------------------------- -# -function eval_METplus_timestr_tmpl() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; . ${USHdir}/preamble.sh; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "init_time" \ - "fhr" \ - "METplus_timestr_tmpl" \ - "outvarname_formatted_time" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# - print_input_args "valid_args" -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local fmt \ - formatted_time \ - hh_init \ - init_time_str \ - lead_hrs \ - len \ - mn_init \ - METplus_time_fmt \ - METplus_time_shift \ - METplus_time_type \ - regex_search \ - ss_init \ - valid_time_str \ - yyyymmdd_init -# -#----------------------------------------------------------------------- -# -# Run checks on input arguments. -# -#----------------------------------------------------------------------- -# - if [ -z "${METplus_timestr_tmpl}" ]; then - print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) cannot be empty: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" - fi - - len=${#init_time} - if [[ ${init_time} =~ ^[0-9]+$ ]]; then - if [ "$len" -ne 10 ] && [ "$len" -ne 12 ] && [ "$len" -ne 14 ]; then - print_err_msg_exit "\ -The specified initial time string (init_time) must contain exactly 10, -12, or 14 integers (but contains $len): - init_time = \"${init_time}\"" - fi - else - print_err_msg_exit "\ -The specified initial time string (init_time) must consist of only -integers and cannot be empty: - init_time = \"${init_time}\"" - fi - - if ! [[ $fhr =~ ^[0-9]+$ ]]; then - print_err_msg_exit "\ -The specified forecast hour (fhr) must consist of only integers and -cannot be empty: - fhr = \"${fhr}\"" - fi -# -#----------------------------------------------------------------------- -# -# Set strings for the initial and valid times that can be passed to the -# "date" utility for evaluation. -# -#----------------------------------------------------------------------- -# - yyyymmdd_init=${init_time:0:8} - hh_init=${init_time:8:2} - - mn_init="00" - if [ "$len" -gt "10" ]; then - mn_init=${init_time:10:2} - fi - - ss_init="00" - if [ "$len" -gt "12" ]; then - ss_init=${init_time:12:2} - fi - - init_time_str=$( printf "%s" "${yyyymmdd_init} + ${hh_init} hours + ${mn_init} minutes + ${ss_init} seconds" ) - valid_time_str=$( printf "%s" "${init_time_str} + ${fhr} hours" ) -# -#----------------------------------------------------------------------- -# -# Parse the input METplus time string template. -# -#----------------------------------------------------------------------- -# - regex_search="^\{(init|valid|lead)(\?)(fmt=)([^\?]*)(\?)?(shift=)?([^\?]*)?\}" - METplus_time_type=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\1/p" ) - METplus_time_fmt=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\4/p" ) - METplus_time_shift=$( \ - printf "%s" "${METplus_timestr_tmpl}" | $SED -n -r -e "s/${regex_search}/\7/p" ) -# -#----------------------------------------------------------------------- -# -# Get strings for the time format and time shift that can be passed to -# the "date" utility or the "printf" command. -# -#----------------------------------------------------------------------- -# - case "${METplus_time_fmt}" in - "%Y%m%d%H"|"%Y%m%d"|"%H%M%S") - fmt="${METplus_time_fmt}" - ;; - "%H") -# -# The "%H" format needs to be treated differently depending on if it's -# formatting a "lead" time type or another (e.g. "init" or "vald") because -# for "lead", the printf function is used below (which doesn't understand -# the "%H" format) whereas for the others, the date utility is used (which -# does understand "%H"). -# - if [ "${METplus_time_type}" = "lead" ]; then - fmt="%02.0f" - else - fmt="${METplus_time_fmt}" - fi - ;; - "%HHH") -# -# Print format assumes that the argument to printf (i.e. the number to -# print out) may be a float. If we instead assume an integer and use -# "%03d" as the format, the printf function below will fail if the argument -# happens to be a float. The "%03.0f" format will work for both a float -# and an integer argument (and will truncate the float and print out a -# 3-digit integer). -# - fmt="%03.0f" - ;; - *) - print_err_msg_exit "\ -Unsupported METplus time format: - METplus_time_fmt = \"${METplus_time_fmt}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" - ;; - esac -# -# Calculate the time shift as an integer in units of seconds. -# - time_shift_str=$(( $(printf "%.0f" "${METplus_time_shift}") + 0 ))" seconds" -# -#----------------------------------------------------------------------- -# -# Set the formatted time string. -# -#----------------------------------------------------------------------- -# - case "${METplus_time_type}" in - "init") - formatted_time=$( ${DATE_UTIL} --date="${init_time_str} + ${time_shift_str}" +"${fmt}" ) - ;; - "valid") - formatted_time=$( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"${fmt}" ) - ;; - "lead") - lead_secs=$(( $( ${DATE_UTIL} --date="${valid_time_str} + ${time_shift_str}" +"%s" ) \ - - $( ${DATE_UTIL} --date="${init_time_str}" +"%s" ) )) - lead_hrs=$( bc -l <<< "${lead_secs}/${SECS_PER_HOUR}" ) -# -# Check to make sure lead_hrs is an integer. -# - lead_hrs_trunc=$( bc <<< "${lead_secs}/${SECS_PER_HOUR}" ) - lead_hrs_rem=$( bc -l <<< "${lead_hrs} - ${lead_hrs_trunc}" ) - if [ "${lead_hrs_rem}" != "0" ]; then - print_err_msg_exit "\ -The lead in hours (lead_hrs) must be an integer but isn't: - lead_hrs = ${lead_hrs} -The lead in seconds (lead_secs) is: - lead_secs = ${lead_secs} -The remainder (lead_hrs_rem) after dividing the lead_secs by SECS_PER_HOUR -= ${SECS_PER_HOUR} is: - lead_hrs_rem = ${lead_hrs_rem}" - fi -# -# Get the lead in the proper format. -# - formatted_time=$( printf "${fmt}" "${lead_hrs}" ) - ;; - *) - print_err_msg_exit "\ -Unsupported METplus time type: - METplus_time_type = \"${METplus_time_type}\" -METplus time string template passed to this function is: - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\"" - ;; - esac - - if [ -z "${formatted_time}" ]; then - print_err_msg_exit "\ -The specified METplus time string template (METplus_timestr_tmpl) could -not be evaluated for the given initial time (init_time) and forecast -hour (fhr): - METplus_timestr_tmpl = \"${METplus_timestr_tmpl}\" - init_time = \"${init_time}\" - fhr = \"${fhr}\"" - fi -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_formatted_time}" ]; then - printf -v ${outvarname_formatted_time} "%s" "${formatted_time}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} diff --git a/ush/config.community.yaml b/ush/config.community.yaml index 417b9edb91..1ce7fc0108 100644 --- a/ush/config.community.yaml +++ b/ush/config.community.yaml @@ -5,10 +5,6 @@ user: RUN_ENVIR: community MACHINE: hera ACCOUNT: an_account -platform: - CCPA_OBS_DIR: "" - MRMS_OBS_DIR: "" - NDAS_OBS_DIR: "" workflow: USE_CRON_TO_RELAUNCH: false EXPT_SUBDIR: test_community @@ -34,8 +30,6 @@ task_plot_allvars: global: DO_ENSEMBLE: false NUM_ENS_MEMBERS: 2 -verification: - VX_FCST_MODEL_NAME: FV3_GFS_v16_CONUS_25km rocoto: tasks: metatask_run_ensemble: diff --git a/ush/config_defaults.yaml b/ush/config_defaults.yaml index d298245f14..f155307a6b 100644 --- a/ush/config_defaults.yaml +++ b/ush/config_defaults.yaml @@ -265,54 +265,6 @@ platform: # #----------------------------------------------------------------------- # - # Set METplus parameters. Definitions: - # - # CCPA_OBS_DIR: - # User-specified location of the directory where CCPA hourly - # precipitation files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure, as well as important caveats about - # errors in the metadata and file names. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NOHRSC_OBS_DIR: - # User-specified location of top-level directory where NOHRSC 6- and - # 24-hour snowfall accumulation files used by METplus are located (or, - # if retrieved by the workflow, where they will be placed). See comments - # in file scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # MRMS_OBS_DIR: - # User-specified location of the directory where MRMS composite - # reflectivity and echo top files used by METplus are located (or, if - # retrieved by the workflow, where they will be placed). See comments - # in the scripts/exregional_get_verif_obs.sh for more details about - # files and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - # NDAS_OBS_DIR: - # User-specified location of top-level directory where NDAS prepbufr - # files used by METplus are located (or, if retrieved by the workflow, - # where they will be placed). See comments in file - # scripts/exregional_get_verif_obs.sh for more details about files - # and directory structure. - # NOTE: Do not set this to the same path as other *_OBS_DIR variables; - # otherwise unexpected results and data loss may occur. - # - #----------------------------------------------------------------------- - # - CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa/proc" - NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc/proc" - MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms/proc" - NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas/proc" - # - #----------------------------------------------------------------------- - # # DOMAIN_PREGEN_BASEDIR: # The base directory containing pregenerated grid, orography, and surface # climatology files. This is an alternative for setting GRID_DIR, @@ -518,12 +470,17 @@ workflow: # default will point to: # # EXPTDIR: "${EXPT_BASEDIR}/${EXPT_SUBDIR}" + # + # WFLOW_FLAG_FILES_DIR: + # Directory in which flag files marking completion of various workflow + # tasks can be placed. #----------------------------------------------------------------------- # EXPT_BASEDIR: '' # This will be set in setup.py prior to extend_yaml() being called EXPT_SUBDIR: 'experiment' EXEC_SUBDIR: "exec" EXPTDIR: '{{ [workflow.EXPT_BASEDIR, workflow.EXPT_SUBDIR]|path_join }}' + WFLOW_FLAG_FILES_DIR: '{{ [workflow.EXPTDIR, "wflow_flag_files"]|path_join }}' # #----------------------------------------------------------------------- # @@ -1705,7 +1662,7 @@ task_run_fcst: #----------------------------------------------------------------------- # # KMP_AFFINITY_*: - # From Intel: "The Intel® runtime library has the ability to bind OpenMP + # From Intel: "The Intel runtime library has the ability to bind OpenMP # threads to physical processing units. The interface is controlled using # the KMP_AFFINITY environment variable. Depending on the system (machine) # topology, application, and operating system, thread affinity can have a @@ -2389,149 +2346,351 @@ global: PRINT_DIFF_PGR: false #---------------------------- -# verification (vx) parameters +# Verification (VX) parameters #----------------------------- verification: # - # METPLUS_VERBOSITY_LEVEL: - # Logging verbosity level used by METplus verification tools. 0 to 5, - # with 0 quiet and 5 loud. - # - METPLUS_VERBOSITY_LEVEL: 2 + # General VX Parameters + # ------------------------------- # - # Templates for CCPA, MRMS, and NDAS observation files. - # - # OBS_CCPA_APCP_FN_TEMPLATE: - # File name template for CCPA accumulated precipitation (APCP) observations. - # This template is used by the workflow tasks that call the METplus PcpCombine - # tool on CCPA obs to find the input observation files containing 1-hour - # APCP and then generate NetCDF files containing either 1-hour or greater - # than 1-hour APCP. - # - # OBS_NOHRSC_ASNOW_FN_TEMPLATE: - # File name template for NOHRSC snow observations. - # - # OBS_MRMS_REFC_FN_TEMPLATE: - # File name template for MRMS reflectivity observations. - # - # OBS_MRMS_RETOP_FN_TEMPLATE: - # File name template for MRMS echo top observations. - # - # OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: - # File name template for NDAS surface and upper air observations. - # This template is used by the workflow tasks that call the METplus Pb2nc - # tool on NDAS obs to find the input observation files containing ADP - # surface (ADPSFC) or ADP upper air (ADPUPA) fields and then generate - # NetCDF versions of these files. + # VX_FIELD_GROUPS: + # The groups of fields (some of which may consist of only a single field) + # on which to run verification. # - OBS_CCPA_APCP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z.01h.hrap.conus.gb2' - OBS_NOHRSC_ASNOW_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/sfav2_CONUS_${ACCUM_HH}h_{valid?fmt=%Y%m%d%H}_grid184.grb2' - OBS_MRMS_REFC_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' - OBS_MRMS_RETOP_FN_TEMPLATE: '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' - OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE: 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' - # - # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # Template used to specify the names of the output NetCDF observation - # files generated by the worfklow verification tasks that call the METplus - # PcpCombine tool on CCPA observations. (These files will contain obs - # APCP, both for 1 hour and for > 1 hour accumulation periods, in NetCDF - # format.) - # - # OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: - # Template used to specify the names of the output NetCDF observation - # files generated by the worfklow verification tasks that call the - # METplus Pb2nc tool on NDAS observations. (These files will contain - # obs ADPSFC or ADPUPA fields in NetCDF format.) - # - OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${OBS_CCPA_APCP_FN_TEMPLATE}_a${ACCUM_HH}h.nc' - OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_ADPSFCorADPUPA_FN_TEMPLATE}.nc' - # - # VX_FCST_MODEL_NAME: - # String that specifies a descriptive name for the model being verified. - # This is used in forming the names of the verification output files as - # well as in the contents of those files. - # - # VX_FIELDS: - # The fields or groups of fields on which to run verification. Because - # accumulated snow (ASNOW) is often not of interest in non-winter cases - # and because observation files for ASNOW are not available on NOAA - # HPSS for retrospective cases before March 2020, by default ASNOW is - # not included VX_FIELDS, but it may be added to this list in order to + # Since accumulated snowfall (ASNOW) is often not of interest in non-winter + # cases and because observation files for ASNOW are not available on NOAA + # HPSS for retrospective cases before March 2020, by default ASNOW is not + # included VX_FIELD_GROUPS, but it may be added to this list in order to # include the verification tasks for ASNOW in the workflow. # + VX_FIELD_GROUPS: [ "APCP", "REFC", "RETOP", "SFC", "UPA" ] + # # VX_APCP_ACCUMS_HRS: - # The 2-digit accumulation periods (in units of hours) to consider for - # APCP (accumulated precipitation). If VX_FIELDS contains "APCP", then - # VX_APCP_ACCUMS_HRS must contain at least one element. If not, + # The accumulation intervals (in hours) to include in the verification of + # accumulated precipitation (APCP). If VX_FIELD_GROUPS contains "APCP", + # then VX_APCP_ACCUMS_HRS must contain at least one element. Otherwise, # VX_APCP_ACCUMS_HRS will be ignored. # + VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] + # # VX_ASNOW_ACCUMS_HRS: - # The 2-digit accumulation periods (in units of hours) to consider for - # ASNOW (accumulated snowfall). If VX_FIELDS contains "ASNOW", then - # VX_ASNOW_ACCUMS_HRS must contain at least one element. If not, + # The accumulation intervals (in hours) to include in the verification of + # accumulated snowfall (ASNOW). If VX_FIELD_GROUPS contains "ASNOW", + # then VX_ASNOW_ACCUMS_HRS must contain at least one element. Otherwise, # VX_ASNOW_ACCUMS_HRS will be ignored. # - VX_FCST_MODEL_NAME: '{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}' - VX_FIELDS: [ "APCP", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] - VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] # - # VX_FCST_INPUT_BASEDIR: - # Template for top-level directory containing forecast (but not obs) - # files that will be used as input into METplus for verification. + # VX_CONFIG_[DET|ENS]_FN: + # Names of configuration files for deterministic and ensemble verification + # that specify the field groups, field names, levels, and (if applicable) + # thresholds for which to run verification. These are relative to the + # directory METPLUS_CONF in which the METplus config templates are + # located. They may include leading relative paths before the file + # names, e.g. "some_dir/another_dir/vx_config_det.yaml". + # + VX_CONFIG_DET_FN: 'vx_configs/vx_config_det.yaml' + VX_CONFIG_ENS_FN: 'vx_configs/vx_config_ens.yaml' # # VX_OUTPUT_BASEDIR: - # Template for top-level directory in which METplus will place its - # output. + # Template for base (i.e. top-level) directory in which METplus will place + # its output. # - VX_FCST_INPUT_BASEDIR: '{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}' VX_OUTPUT_BASEDIR: '{% if user.RUN_ENVIR == "nco" %}$COMOUT/metout{% else %}{{ workflow.EXPTDIR }}{% endif %}' # - # Number of digits in the ensemble member names. This is a configurable - # variable to allow users to change its value (e.g. to go from "mem004" - # to "mem04") when using staged forecast files that do not use the same - # number of digits as the SRW App. + # METplus-Specific Parameters + # ------------------------------- # - VX_NDIGITS_ENSMEM_NAMES: 3 + # METPLUS_VERBOSITY_LEVEL: + # Logging verbosity level used by METplus verification tools. 0 to 5, + # with 0 quiet and 5 loudest. + # + METPLUS_VERBOSITY_LEVEL: 2 # - # File name and path templates used in the verification tasks. + # VX Parameters for Observations + # ------------------------------- # - # FCST_SUBDIR_TEMPLATE: - # Template for the subdirectory containing forecast files that are - # inputs to the verification tasks. - # - # FCST_FN_TEMPLATE: - # Template for the names of the forecast files that are inputs to the - # verification tasks. + # Note: + # The observation types that the SRW App can currently retrieve (if + # necessary) and use in verification are: + # + # * CCPA (Climatology-Calibrated Precipitation Analysis) + # * NOHRSC (National Operational Hydrologic Remote Sensing Center) + # * MRMS (Multi-Radar Multi-Sensor) + # * NDAS (NAM Data Assimilation System) + # + # The script ush/get_obs.py contains further details on the files and + # directory structure of each obs type. + # + + # + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_AVAIL_INTVL_HRS: + # Time interval (in hours) at which various types of obs are available on + # NOAA's HPSS. + # + # Note that MRMS files are in fact available every few minutes, but here + # we set the obs availability interval to 1 hour because currently that + # is the shortest output interval for forecasts, i.e. the forecasts cannot + # (yet) support sub-hourly output. + # + CCPA_OBS_AVAIL_INTVL_HRS: 1 + NOHRSC_OBS_AVAIL_INTVL_HRS: 6 + MRMS_OBS_AVAIL_INTVL_HRS: 1 + NDAS_OBS_AVAIL_INTVL_HRS: 1 + # + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR: + # Base directory in which CCPA, NOHRSC, MRMS, or NDAS obs files needed by + # the verification tasks are located. If the files do not exist, they + # will be retrieved and placed under this directory. + # + # Note that: + # + # * If the obs files need to be retrieved (e.g. from NOAA's HPSS), because + # they are not already staged on disk, then the user must have write + # permission to this directory. Otherwise, the "get_obs" workflow tasks + # that attempt to create these files will fail. + # + # * CCPA obs contain errors in the metadata for a certain range of dates + # that need to be corrected during obs retrieval. This is described + # in more detail in the script ush/get_obs.py. + # + CCPA_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ccpa" + NOHRSC_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/nohrsc" + MRMS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/mrms" + NDAS_OBS_DIR: "{{ workflow.EXPTDIR }}/obs_data/ndas" + # + # OBS_[CCPA|NOHRSC|MRMS|NDAS]_FN_TEMPLATES: + # File name templates for various obs types. These are meant to be used + # in METplus configuration files and thus contain METplus time formatting + # strings. Each of these variables is a python list containing pairs of + # values. The first element of each pair specifies the verification field + # group(s) for which the file name template will be needed, and the second + # element is the file name template itself, which may include a leading + # relative directory. (Here, by "verification field group", we mean a + # group of fields that is verified together in the workflow; see the + # description of the variable VX_FIELD_GROUPS.) For example, for CCPA + # obs, the variable name is OBS_CCPA_FN_TEMPLATES. From the default value + # of this variable given above, we see that if CCPA_OBS_AVAIL_INTVL_HRS + # is set to 1 (i.e. the CCPA obs are assumed to be available every hour) + # and the valid time is 2024042903, then the obs file (including a relative + # path) to look for and, if necessary, create is + # + # 20240429/ccpa.t03z.01h.hrap.conus.gb2 + # + # This file will be used in the verification of fields under the APCP + # field group (which consist of accumulated precipitation for the + # accumulation intervals specified in VX_APCP_ACCUMS_HRS). + # + # Notes: + # + # * The file name templates are relative to the base directories given in + # the variables + # + # [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + # + # defined above. Thus, the template for the full path to the obs files + # is given, e.g. for CCPA obs, by + # + # {CCPA_OBS_DIR}/{OBS_CCPA_FN_TEMPLATES[1]} + # + # where the [1] indicates the second element of the list OBS_CCPA_FN_TEMPLATES. + # + # * The file name templates may represent file names only, or they may + # include leading relative directories. + # + # * The default values of these variables for the CCPA, NOHRSC, and NDAS + # obs types contain only one pair of values (because these obs types + # contain only one set of files that we use in the verification) while + # the default value for the MRMS obs type contains two pairs of values, + # one for the set of files that contains composite reflectivity data + # and another for the set that contains echo top data. This is simply + # because the MRMS obs type does not group these two fields together + # one set of files as does, for example, the NDAS obs type. + # + # * Each file name template must contain full information about the year, + # month, day, and hour by including METplus time formatting strings for + # this information. Some of this information (e.g. the year, month, + # and day) may be in the relative directory portion of the template and + # the rest (e.g. the hour) in the file name, or there may be no relative + # directory portion and all of this information may be in the file name, + # but all four pieces of timing information must be present somewhere in + # each template as METplus time formatting strings. If not, obs files + # created by the "get_obs" tasks for different days might overwrite each + # other. + # + # * The workflow generation scripts create a "get_obs" task for each obs + # type that is needed in the verification and for each day on which that + # obs type is needed at at least some hours. That "get_obs" task first + # checks whether all the necessary obs files for that day already exist + # at the locations specified by the full path template(s) (which are + # obtained by combining the base directories [CCPA|NOHRSC|MRMS|NDAS]_OBS_DIR + # with the file name template(s)). If for a given day one or more of + # these obs files do not exist on disk, the "get_obs" task will retrieve + # "raw" versions of these files from a data store (e.g. NOAA's HPSS) + # and will place them in a temporary "raw" directory. It will then + # move or copy these raw files to the locations specified by the full + # path template(s). + # + # * The raw obs files, i.e. the obs files as they are named and arranged + # in the data stores and retrieved and placed in the raw directories, + # may be arranged differently and/or have names that are different from + # the ones specified in the file name templates. If so, they are renamed + # while being moved or copied from the raw directories to the locations + # specified by the full path template(s). (The lists of templates for + # searching for and retrieving files from the data stores is different + # than the METplus templates described here; the former are given in + # the data retrieval configuration file at parm/data_locations.yml.) + # + # * When the ex-scripts for the various vx tasks are converted from bash + # to python scripts, these variables should be converted from python + # lists to python dictionaries, where the first element of each pair + # becomes the key and the second becomes the value. This currently + # cannot be done due to limitations in the workflow on converting + # python dictionaries to bash variables. + # + OBS_CCPA_FN_TEMPLATES: [ 'APCP', + '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "{valid?fmt=%Y%m%d}/ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2" }}' ] + OBS_NOHRSC_FN_TEMPLATES: [ 'ASNOW', + '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2" }}' ] + OBS_MRMS_FN_TEMPLATES: [ 'REFC', '{valid?fmt=%Y%m%d}/MergedReflectivityQCComposite_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2', + 'RETOP', '{valid?fmt=%Y%m%d}/EchoTop_18_00.50_{valid?fmt=%Y%m%d}-{valid?fmt=%H%M%S}.grib2' ] + OBS_NDAS_FN_TEMPLATES: [ 'SFCandUPA', 'prepbufr.ndas.{valid?fmt=%Y%m%d%H}' ] + # + # REMOVE_RAW_OBS_DIRS_[CCPA|NOHRSC|MRMS|NDAS]: + # Flag specifying whether to remove the "raw" observation directories + # after retrieving the specified type of obs (CCPA, NOHRSC, MRMS, or + # NOHRSC) from a data store (e.g. NOAA's HPSS). The raw directories + # are the ones in which the observation files are placed immediately + # after pulling them from the data store but before performing any + # processing on them such as renaming the files and/or reorganizing + # their directory structure. + # + REMOVE_RAW_OBS_CCPA: True + REMOVE_RAW_OBS_NOHRSC: True + REMOVE_RAW_OBS_MRMS: True + REMOVE_RAW_OBS_NDAS: True # - # FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: - # Template used to specify the names of the output NetCDF forecast files - # generated by the worfklow verification tasks that call the METplus - # PcpCombine tool on forecasts. (These files will contain forecast APCP, - # both for 1 hour and for > 1 hour accumulation periods, in NetCDF - # format.) + # OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's PcpCombine tool on + # CCPA observations. These files will contain observed accumulated + # precipitation in NetCDF format for various accumulation intervals. + # + OBS_CCPA_APCP_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%02d" % CCPA_OBS_AVAIL_INTVL_HRS %} + {{- "ccpa.t{valid?fmt=%H}z." ~ obs_avail_intvl_hrs ~ "h.hrap.conus.gb2_a${ACCUM_HH}h.nc" }}' + # + # OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's PcpCombine tool on + # NOHRSC observations. These files will contain observed accumulated + # snowfall for various accumulaton intervals. + # + OBS_NOHRSC_ASNOW_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{%- set obs_avail_intvl_hrs = "%d" % NOHRSC_OBS_AVAIL_INTVL_HRS %} + {{- "sfav2_CONUS_" ~ obs_avail_intvl_hrs ~ "h_{valid?fmt=%Y%m%d%H}_grid184.grb2_a${ACCUM_HH}h.nc" }}' + # + # OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's Pb2nc tool on the + # prepbufr files in NDAS observations. These files will contain the + # observed surface (SFC) and upper-air (UPA) fields in NetCDF format + # (instead of NDAS's native prepbufr format). # - FCST_SUBDIR_TEMPLATE: '{% if user.RUN_ENVIR == "nco" %}${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}{% else %}{init?fmt=%Y%m%d%H?shift=-${time_lag}}{% if global.DO_ENSEMBLE %}/${ensmem_name}{% endif %}/postprd{% endif %}' - FCST_FN_TEMPLATE: '${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2' - FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '${NET_default}.t{init?fmt=%H}z{% if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %}.${ensmem_name}{% endif %}.prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${VAR}_a${ACCUM_HH}h.nc' + OBS_NDAS_SFCandUPA_FN_TEMPLATE_PB2NC_OUTPUT: '${OBS_NDAS_FN_TEMPLATES[1]}.nc' # + # NUM_MISSING_OBS_FILES_MAX: # For verification tasks that need observational data, this specifies # the maximum number of observation files that may be missing. If more # than this number are missing, the verification task will error out. - # - # Note that this is a crude way of checking that there are enough obs to - # conduct verification since this number should probably depend on the + # This is a crude way of checking that there are enough obs to conduct + # verification (crude because this number should probably depend on the # field being verified, the time interval between observations, the - # length of the forecast, etc. An alternative may be to specify the - # maximum allowed fraction of obs files that can be missing (i.e. the - # number missing divided by the number that are expected to exist). + # length of the forecast, etc; an alternative may be to specify the + # maximum allowed fraction of obs files that can be missing). # NUM_MISSING_OBS_FILES_MAX: 2 # + # VX Parameters for Forecasts + # ---------------------------- + # + # VX_FCST_MODEL_NAME: + # String that specifies a descriptive name for the model being verified. + # This is used in forming the names of the verification output files and + # is also included in the contents of those files. + # + VX_FCST_MODEL_NAME: '{{ nco.NET_default }}.{{ task_run_post.POST_OUTPUT_DOMAIN_NAME }}' + # + # VX_FCST_OUTPUT_INTVL_HRS: + # The forecast output interval (in hours) to assume for verification + # purposes. + # Note: + # If/when a variable is created in this configuration file that specifies + # the forecast output interval for native SRW forecasts, it should be + # used as the default value of this variable. + # + VX_FCST_OUTPUT_INTVL_HRS: 1 + # + # VX_FCST_INPUT_BASEDIR: + # METplus template for the name of the base (i.e. top-level) directory + # containing the forecast files to use as inputs to the verification + # tasks. + # + VX_FCST_INPUT_BASEDIR: '{% if user.RUN_ENVIR == "nco" %}$COMOUT/../..{% else %}{{ workflow.EXPTDIR }}{% endif %}' + # + # FCST_SUBDIR_TEMPLATE: + # METplus template for the name of the subdirectory containing forecast + # files to use as inputs to the verification tasks. + # + FCST_SUBDIR_TEMPLATE: '{%- if user.RUN_ENVIR == "nco" %} + {{- "${NET_default}.{init?fmt=%Y%m%d?shift=-${time_lag}}/{init?fmt=%H?shift=-${time_lag}}" }} + {%- else %} + {{- "{init?fmt=%Y%m%d%H?shift=-${time_lag}}" }} + {%- if global.DO_ENSEMBLE %} + {{- "/${ensmem_name}" }} + {%- endif %} + {{- "/postprd" }} + {%- endif %}' + # + # FCST_FN_TEMPLATE: + # METplus template for the names of the forecast files to use as inputs + # to the verification tasks. + # + FCST_FN_TEMPLATE: '{{- "${NET_default}.t{init?fmt=%H?shift=-${time_lag}}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH?shift=${time_lag}}.${POST_OUTPUT_DOMAIN_NAME}.grib2" }}' + # + # FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: + # METplus template for the names of the NetCDF files generated by the + # worfklow verification tasks that call METplus's PcpCombine tool on + # forecast output. These files will contain forecast accumulated + # precipitation in NetCDF format for various accumulation intervals. + # + FCST_FN_TEMPLATE_PCPCOMBINE_OUTPUT: '{{- "${NET_default}.t{init?fmt=%H}z" }} + {%- if user.RUN_ENVIR == "nco" and global.DO_ENSEMBLE %} + {{- ".${ensmem_name}" }} + {%- endif %} + {{- ".prslev.f{lead?fmt=%HHH}.${POST_OUTPUT_DOMAIN_NAME}_${FIELD_GROUP}_a${ACCUM_HH}h.nc" }}' + # + # VX_NDIGITS_ENSMEM_NAMES: + # Number of digits to assume/use in the forecast ensemble member identifier + # string used in directory and file names and other instances in which the + # ensemble member needs to be identified. For example, if this is set to + # 3, the identifier for ensemble member 4 will be "mem004", while if it's + # set to 2, the identifier will be "mem04". This is useful when verifying + # staged forecast files from a forecasting model/system other than the + # SRW that uses a different number of digits in the ensemble member + # identifier string. + # + VX_NDIGITS_ENSMEM_NAMES: 3 + # + # NUM_MISSING_FCST_FILES_MAX: # For verification tasks that need forecast data, this specifies the # maximum number of post-processed forecast files that may be missing. - # If more than this number are missing, the verification task will not - # be run. + # If more than this number are missing, the verification task will exit + # with an error. # NUM_MISSING_FCST_FILES_MAX: 0 diff --git a/ush/eval_metplus_timestr_tmpl.py b/ush/eval_metplus_timestr_tmpl.py new file mode 100644 index 0000000000..205fee1593 --- /dev/null +++ b/ush/eval_metplus_timestr_tmpl.py @@ -0,0 +1,63 @@ +#!/usr/bin/env python3 +import argparse +import os +import sys +from datetime import datetime, timedelta +try: + sys.path.append(os.environ['METPLUS_ROOT']) +except: + print("\nERROR ERROR ERROR\n") + print("Environment variable METPLUS_ROOT must be set to use this script\n") + raise +from metplus.util import string_template_substitution as sts + +def eval_metplus_timestr_tmpl(init_time, lhr, time_lag, fn_template, verbose=False): + """ + Calls native METplus routine for evaluating filename templates + + Args: + init_time (str): Date string for initial time in YYYYMMDD[mmss] format, where minutes and + seconds are optional. + lhr (int): Lead hour (number of hours since init_time) + time_lag (int): Hours of time lag for a time-lagged ensemble member + fn_template (str): The METplus filename template for finding the files + verbose (bool): By default this script only outputs the list of forecast hours + Returns: + str: The fully resolved filename based on the input parameters + """ + + if len(init_time) == 10: + initdate=datetime.strptime(init_time, '%Y%m%d%H') + elif len(init_time) == 12: + initdate=datetime.strptime(init_time, '%Y%m%d%H%M') + elif len(init_time) == 14: + initdate=datetime.strptime(init_time, '%Y%m%d%H%M%S') + else: + raise ValueError(f"Invalid {init_time=}; must be 10, 12, or 14 characters in length") + + validdate=initdate + timedelta(hours=lhr) + leadsec=lhr*3600 + # Evaluate the METplus timestring template for the current lead hour + if verbose: + print("Resolving METplus template for:") + print(f"{fn_template=}\ninit={initdate}\nvalid={validdate}\nlead={leadsec}\n{time_lag=}\n") + # Return the full path with templates resolved + return sts.do_string_sub(tmpl=fn_template,init=initdate,valid=validdate, + lead=leadsec,time_lag=time_lag) + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description="Print a list of forecast hours in bash-readable comma-separated format such that there is a corresponding file (can be observations or forecast files) for each list entry.", + ) + parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") + parser.add_argument("-i", "--init_time", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') + parser.add_argument("-l", "--lhr", help="Lead hour", type=int, required=True) + parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) + parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') + + args = parser.parse_args() + + filename = eval_metplus_timestr_tmpl(**vars(args)) + # If called from command line, we want to print the resolved filename + print(filename) diff --git a/ush/get_crontab_contents.py b/ush/get_crontab_contents.py index f619fa4587..82bb350a0e 100644 --- a/ush/get_crontab_contents.py +++ b/ush/get_crontab_contents.py @@ -168,7 +168,7 @@ def delete_crontab_line(called_from_cron, machine, crontab_line, debug) -> None: crontab_contents = crontab_contents.replace(crontab_line + "\n", "") crontab_contents = crontab_contents.replace(crontab_line, "") else: - print(f"\nWARNING: line not found in crontab, nothing to remove:\n {crontab_line}\n") + print(f"\nWARNING: line not found in crontab, nothing to remove:\n{crontab_line}\n") run_command(f"""echo '{crontab_contents}' | {crontab_cmd}""") diff --git a/ush/get_obs.py b/ush/get_obs.py new file mode 100644 index 0000000000..da88856575 --- /dev/null +++ b/ush/get_obs.py @@ -0,0 +1,961 @@ +#!/usr/bin/env python3 + +import os +import sys +import shutil +import argparse +import logging +from pathlib import Path +import datetime as dt +from textwrap import dedent +from pprint import pprint +from math import ceil, floor +import subprocess +import retrieve_data +from python_utils import ( + load_yaml_config, +) +from mrms_pull_topofhour import mrms_pull_topofhour +try: + sys.path.append(os.environ['METPLUS_ROOT']) +except: + print("\nERROR ERROR ERROR\n") + print("Environment variable METPLUS_ROOT must be set to use this script\n") + raise +from metplus.util import string_template_substitution as sts + + +def get_obs_arcv_hr(obtype, arcv_intvl_hrs, hod): + """ + This file defines a function that, for the given observation type, obs + archive interval, and hour of day, returns the hour (counting from hour + zero of the day) corresponding to the archive file in which the obs file + for the given hour of day is included. + + Note that for cumulative fields (like CCPA and NOHRSC, as opposed to + instantaneous ones like MRMS and NDAS), the archive files corresponding + to hour 0 of the day represent accumulations over the previous day. Thus, + here, we never return an archive hour of 0 for cumulative fields. Instead, + if the specified hour-of-day is 0, we consider that to represent the 0th + hour of the NEXT day (i.e. the 24th hour of the current day) and set the + archive hour to 24. + + Args: + obtype (str): + The observation type. + + arcv_intvl_hrs (int): + Time interval (in hours) between archive files. For example, if the obs + files are bundled into 6-hourly archives, then this will be set to 6. This + must be between 1 and 24 and must divide evenly into 24. + + hod (int): + The hour of the day. This must be between 0 and 23. For cumulative fields + (CCPA and NOHRSC), hour 0 is treated as that of the next day, i.e. as the + 24th hour of the current day. + + Returns: + arcv_hr (int): + The hour since the start of day corresponding to the archive file containing + the obs file for the given hour of day. + """ + + valid_obtypes = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] + if obtype not in valid_obtypes: + msg = dedent(f""" + The specified observation type is not supported: + {obtype = } + Valid observation types are: + {valid_obtypes} + """) + logging.error(msg) + raise ValueError(msg) + + # Ensure that the archive interval divides evenly into 24 hours. + remainder = 24 % arcv_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The archive interval for obs of type {obtype} must divide evenly into 24 + but doesn't: + {arcv_intvl_hrs = } + 24 % arcv_intvl_hrs = {remainder} + """) + logging.error(msg) + raise ValueError(msg) + + if (hod < 0) or (hod > 23): + msg = dedent(f""" + The specified hour-of-day must be between 0 and 23, inclusive, but isn't: + {hod = } + """) + logging.error(msg) + raise ValueError(msg) + + # Set the archive hour. This depends on the obs type because each obs + # type can organize its observation files into archives in a different + # way, e.g. a cumulative obs type may put the obs files for hours 1 + # through 6 of the day in the archive labeled with hour 6 while an + # instantaneous obs type may put the obs files for hours 0 through 5 of + # the day in the archive labeled with hour 6. + if obtype in ['CCPA']: + if hod == 0: + arcv_hr = 24 + else: + arcv_hr = ceil(hod/arcv_intvl_hrs)*arcv_intvl_hrs + elif obtype in ['NOHRSC']: + if hod == 0: + arcv_hr = 24 + else: + arcv_hr = floor(hod/arcv_intvl_hrs)*arcv_intvl_hrs + elif obtype in ['MRMS']: + arcv_hr = (floor(hod/arcv_intvl_hrs))*arcv_intvl_hrs + elif obtype in ['NDAS']: + arcv_hr = (floor(hod/arcv_intvl_hrs) + 1)*arcv_intvl_hrs + + return arcv_hr + + +def get_obs(config, obtype, yyyymmdd_task): + """ + This script checks for the existence of obs files of the specified type + at the locations specified by variables in the SRW App's configuration + file. If one or more of these files do not exist, it retrieves them from + a data store (using the ``retrieve_data.py`` script and as specified by the + configuration file ``parm/data_locations.yml`` for that script) and places + them in the locations specified by the App's configuration variables, + renaming them if necessary. + + Args: + config (dict): + The final configuration dictionary (obtained from ``var_defns.yaml``). + + obtype (str): + The observation type. + + yyyymmdd_task (datetime.datetime): + The date for which obs may be needed. + + Returns: + True (bool): + If all goes well. + + + Detailed Description: + + In this script, the main (outer) loop to obtain obs files is over a + sequence of archive hours, where each archive hour in the sequence + represents one archive (tar) file in the data store, and archive hours + are with respect to hour 0 of the day. The number of archive hours in + this sequence depends on how the obs files are arranged into archives + for the given obs type. For example, if the obs files for a given day + are arranged into four archives, then the archive interval is 6 hours, + and in order to get all the obs files for that day, the loop must + iterate over a sequence of 4 hours, either [0, 6, 12, 18] or [6, 12, + 18, 24] (which of these it will be depends on how the obs files are + arranged into the archives). + + Below, we give a description of archive layout for each obs type and + give the archive hours to loop over for the case in which we need to + obtain all available obs for the current day. + + + CCPA (Climatology-Calibrated Precipitation Analysis) precipitation accumulation obs + ----------------------------------------------------------------------------------- + For CCPA, the archive interval is 6 hours, i.e. the obs files are bundled + into 6-hourly archives. The archives are organized such that each one + contains 6 files, so that the obs availability interval is + + .. math:: + + \\begin{align*} + \\qquad \\text{obs_avail_intvl_hrs} + & = (\\text{24 hrs})/[(\\text{4 archives}) \\times (\\text{6 files/archive})] \\hspace{50in} \\\\ + & = \\text{1 hr/file} + \\end{align*} + + i.e. there is one obs file for each hour of the day containing the + accumulation over that one hour. The archive corresponding to hour 0 + of the current day contains 6 files representing accumulations during + the 6 hours of the previous day. The archive corresponding to hour 6 + of the current day contains 6 files for the accumulations during the + first 6 hours of the current day, and the archives corresponding to + hours 12 and 18 of the current day each contain 6 files for accumulations + during hours 6-12 and 12-18, respectively, of the current day. Thus, + to obtain all the one-hour accumulations for the current day, we must + extract all the obs files from the three archives corresponding to hours + 6, 12, and 18 of the current day and from the archive corresponding to + hour 0 of the next day. This corresponds to an archive hour sequence + of [6, 12, 18, 24]. Thus, in the simplest case in which the observation + retrieval times include all hours of the current task's day at which + obs files are available and none of the obs files for this day already + exist on disk, this sequence will be [6, 12, 18, 24]. In other cases, + the sequence we loop over will be a subset of [6, 12, 18, 24]. + + Note that CCPA files for 1-hour accumulation have incorrect metadata in + the files under the "00" directory (i.e. for hours-of-day 19 to 00 of + the next day) from 20180718 to 20210504. This script corrects these + errors if getting CCPA obs at these times. + + + NOHRSC (National Operational Hydrologic Remote Sensing Center) snow accumulation observations + --------------------------------------------------------------------------------------------- + For NOHRSC, the archive interval is 24 hours, i.e. the obs files are + bundled into 24-hourly archives. The archives are organized such that + each one contains 4 files, so that the obs availability interval is + + .. math:: + + \\begin{align*} + \\qquad \\text{obs_avail_intvl_hrs} + & = (\\text{24 hrs})/[(\\text{1 archive}) \\times (\\text{4 files/archive})] \\hspace{50in} \\\\ + & = \\text{6 hr/file} + \\end{align*} + + i.e. there is one obs file for each 6-hour interval of the day containing + the accumulation over those 6 hours. The 4 obs files within each archive + correspond to hours 0, 6, 12, and 18 of the current day. The obs file + for hour 0 contains accumulations during the last 6 hours of the previous + day, while those for hours 6, 12, and 18 contain accumulations for the + first, second, and third 6-hour chunks of the current day. Thus, to + obtain all the 6-hour accumulations for the current day, we must extract + from the archive for the current day the obs files for hours 6, 12, and + 18 and from the archive for the next day the obs file for hour 0. This + corresponds to an archive hour sequence of [0, 24]. Thus, in the simplest + case in which the observation retrieval times include all hours of the + current task's day at which obs files are available and none of the obs + files for this day already exist on disk, this sequence will be [0, 24]. + In other cases, the sequence we loop over will be a subset of [0, 24]. + + + MRMS (Multi-Radar Multi-Sensor) radar observations + -------------------------------------------------- + For MRMS, the archive interval is 24 hours, i.e. the obs files are + bundled into 24-hourly archives. The archives are organized such that + each contains gzipped grib2 files for that day that are usually only a + few minutes apart. However, since the forecasts cannot (yet) perform + sub-hourly output, we filter this data in time by using only those obs + files that are closest to each hour of the day for which obs are needed. + This effectively sets the obs availability interval for MRMS to one + hour, i.e. + + .. math:: + + \\begin{align*} + \\qquad \\text{obs_avail_intvl_hrs} + & = \\text{1 hr/file} \\hspace{50in} \\\\ + \\end{align*} + + i.e. there is one obs file for each hour of the day containing values + at that hour (but only after filtering in time; also see notes for + ``MRMS_OBS_AVAIL_INTVL_HRS`` in ``config_defaults.yaml``). Thus, to + obtain the obs at all hours of the day, we only need to extract files + from one archive. Thus, in the simplest case in which the observation + retrieval times include all hours of the current task's day at which obs + files are available and none of the obs files for this day already exist + on disk, the sequence of archive hours over which we loop will be just + [0]. Note that: + + * For cases in which MRMS data are not needed for all hours of the day, + we still need to retrieve and extract from this single daily archive. + Thus, the archive hour sequence over which we loop over will always + be just [0] for MRMS obs. + + * Because MRMS obs are split into two sets of archives -- one for + composite reflectivity (REFC) and another for echo top (RETOP) -- + on any given day (and with an archive hour of 0) we actually retrive + and extract two different archive files (one per field). + + + NDAS (NAM Data Assimilation System) conventional observations + ------------------------------------------------------------- + For NDAS, the archive interval is 6 hours, i.e. the obs files are + bundled into 6-hourly archives. The archives are organized such that + each one contains 7 files (not say 6). The archive associated with + time yyyymmddhh_arcv contains the hourly files at + + | yyyymmddhh_arcv - 6 hours + | yyyymmddhh_arcv - 5 hours + | ... + | yyyymmddhh_arcv - 2 hours + | yyyymmddhh_arcv - 1 hours + | yyyymmddhh_arcv - 0 hours + + These are known as the tm06, tm05, ..., tm02, tm01, and tm00 files, + respectively. Thus, the tm06 file from the current archive, say the + one associated with time yyyymmddhh_arcv, has the same valid time as + the tm00 file from the previous archive, i.e. the one associated with + time (yyyymmddhh_arcv - 6 hours). It turns out that the tm06 file from + the current archive contains more/better observations than the tm00 + file from the previous archive. Thus, for a given archive time + yyyymmddhh_arcv, we use 6 of the 7 files at tm06, ..., tm01 but not + the one at tm00, effectively resulting in 6 files per archive for NDAS + obs. The obs availability interval is then + + .. math:: + + \\begin{align*} + \\qquad \\text{obs_avail_intvl_hrs} + & = (\\text{24 hrs})/[(\\text{4 archives}) \\times (\\text{6 files/archive})] \\hspace{50in} \\\\ + & = \\text{1 hr/file} + \\end{align*} + + i.e. there is one obs file for each hour of the day containing values + at that hour. The archive corresponding to hour 0 of the current day + contains 6 files valid at hours 18 through 23 of the previous day. The + archive corresponding to hour 6 of the current day contains 6 files + valid at hours 0 through 5 of the current day, and the archives + corresponding to hours 12 and 18 of the current day each contain 6 + files valid at hours 6 through 11 and 12 through 17 of the current day. + Thus, to obtain all the hourly values for the current day (from hour + 0 to hour 23), we must extract the 6 obs files (excluding the tm00 + ones) from the three archives corresponding to hours 6, 12, and 18 of + the current day and the archive corresponding to hour 0 of the next + day. This corresponds to an archive hour sequence set below of [6, 12, + 18, 24]. Thus, in the simplest case in which the observation retrieval + times include all hours of the current task's day at which obs files + are available and none of the obs files for this day already exist on + disk, this sequence will be [6, 12, 18, 24]. In other cases, the + sequence we loop over will be a subset of [6, 12, 18, 24]. + """ + + # Convert obtype to upper case to simplify code below. + obtype = obtype.upper() + + # For convenience, get the verification portion of the configuration + # dictionary. + vx_config = cfg['verification'] + + # Get the time interval (in hours) at which the obs are available. + obs_avail_intvl_hrs = vx_config[f'{obtype}_OBS_AVAIL_INTVL_HRS'] + + # The obs availability interval must divide evenly into 24 hours. Otherwise, + # different days would have obs available at different hours-of-day. Make + # sure this is the case. + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + {obs_avail_intvl_hrs = } + 24 % obs_avail_intvl_hrs = {remainder} + """) + logging.error(msg) + raise ValueError(msg) + + # For convenience, convert the obs availability interval to a datetime + # object. + obs_avail_intvl = dt.timedelta(hours=obs_avail_intvl_hrs) + + # Get the base directory for the observations. + obs_dir = vx_config[f'{obtype}_OBS_DIR'] + + # Get from the verification configuration dictionary the list of METplus + # file name template(s) corresponding to the obs type. + obs_fn_templates = vx_config[f'OBS_{obtype}_FN_TEMPLATES'] + + # Note that the list obs_fn_templates consists of pairs of elements such + # that the first element of the pair represents the verification field + # group(s) for which an obs file name template will be needed and the + # second element is the template itself. For convenience, convert this + # information to a dictionary in which the field groups are the keys and + # the templates are the values. + # + # Note: + # Once the ex-scripts for the vx tasks are converted from bash to python, + # the lists in the SRW App's configuration file containing the METplus + # obs file name template(s) (from which the variable obs_fn_templates + # was obtained above) can be converted to python dictionaries. Then the + # list-to-dictionary conversion step here will no longer be needed. + obs_fn_templates_by_fg = dict() + for i in range(0, len(obs_fn_templates), 2): + obs_fn_templates_by_fg[obs_fn_templates[i]] = obs_fn_templates[i+1] + + # For convenience, get the list of verification field groups for which + # the various obs file templates will be used. + field_groups_in_obs = obs_fn_templates_by_fg.keys() + # + #----------------------------------------------------------------------- + # + # Set variables that are only needed for some obs types. + # + #----------------------------------------------------------------------- + # + + # For cumulative obs, set the accumulation period to use when getting obs + # files. This is simply a properly formatted version of the obs availability + # interval. + accum_obs_formatted = None + if obtype == 'CCPA': + accum_obs_formatted = f'{obs_avail_intvl_hrs:02d}' + elif obtype == 'NOHRSC': + accum_obs_formatted = f'{obs_avail_intvl_hrs:d}' + + # For MRMS obs, set field-dependent parameters needed in forming grib2 + # file names. + mrms_fields_in_obs_filenames = [] + mrms_levels_in_obs_filenames = [] + if obtype == 'MRMS': + for fg in field_groups_in_obs: + if fg == 'REFC': + mrms_fields_in_obs_filenames.append('MergedReflectivityQCComposite') + mrms_levels_in_obs_filenames.append('00.50') + elif fg == 'RETOP': + mrms_fields_in_obs_filenames.append('EchoTop') + mrms_levels_in_obs_filenames.append('18_00.50') + else: + msg = dedent(f""" + Field and level names have not been specified for this {obtype} field + group: + {obtype = } + {fg = } + """) + logging.error(msg) + raise ValueError(msg) + + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. Set these starting + # and ending dates as datetime objects for later use. + ccpa_bad_metadata_start = dt.datetime.strptime('20180718', '%Y%m%d') + ccpa_bad_metadata_end = dt.datetime.strptime('20210504', '%Y%m%d') + + # + #----------------------------------------------------------------------- + # + # Form a string list of all the times in the current day (each in the + # format "YYYYMMDDHH") at which to retrieve obs. + # + #----------------------------------------------------------------------- + # + yyyymmdd_task_str = dt.datetime.strftime(yyyymmdd_task, '%Y%m%d') + obs_retrieve_times_crnt_day_str = vx_config[f'OBS_RETRIEVE_TIMES_{obtype}_{yyyymmdd_task_str}'] + obs_retrieve_times_crnt_day \ + = [dt.datetime.strptime(yyyymmddhh_str, '%Y%m%d%H') for yyyymmddhh_str in obs_retrieve_times_crnt_day_str] + # + #----------------------------------------------------------------------- + # + # Obs files will be obtained by extracting them from the relevant n-hourly + # archives, where n is the archive interval in hours (denoted below by the + # variable arcv_intvl_hrs). Thus, we must first obtain the sequence of + # hours (since hour 0 of the task day) corresponding to the archive files + # from which we must extract obs files. We refer to this as the sequence + # of archive hours. + # + # To generate this sequence, we first set the archive interval and then + # set the starting and ending archive hour values. + # + # + #----------------------------------------------------------------------- + # + if obtype == 'CCPA': + arcv_intvl_hrs = 6 + elif obtype == 'NOHRSC': + arcv_intvl_hrs = 24 + elif obtype == 'MRMS': + arcv_intvl_hrs = 24 + elif obtype == 'NDAS': + arcv_intvl_hrs = 6 + arcv_intvl = dt.timedelta(hours=arcv_intvl_hrs) + + # Number of obs files within each archive. + num_obs_files_per_arcv = int(arcv_intvl/obs_avail_intvl) + + # Initial guess for starting archive hour. This is set to the archive + # hour containing obs at the first obs retrieval time of the day. + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, obs_retrieve_times_crnt_day[0].hour) + + # Ending archive hour. This is set to the archive hour containing obs at + # the last obs retrieval time of the day. + arcv_hr_end = get_obs_arcv_hr(obtype, arcv_intvl_hrs, obs_retrieve_times_crnt_day[-1].hour) + + # Set other variables needed below when evaluating the METplus template for + # the full path to the processed observation files. + ushdir = config['user']['USHdir'] + + # Create dictionary containing the paths to all the processed obs files + # that should exist once this script successfully completes. In this + # dictionary, the keys are the field groups, and the values are lists of + # paths. Here, by "paths to processed files" we mean the paths after any + # renaming and rearrangement of files that this script may do to the "raw" + # files, i.e. the files as they are named and arranged within the archive + # (tar) files on HPSS. + all_fp_proc_dict = {} + for fg, fn_proc_tmpl in obs_fn_templates_by_fg.items(): + fp_proc_tmpl = os.path.join(obs_dir, fn_proc_tmpl) + all_fp_proc_dict[fg] = [] + for yyyymmddhh in obs_retrieve_times_crnt_day: + # Set the lead time, a timedelta object from the beginning of the + # day at which the file is valid. + leadtime = yyyymmddhh - yyyymmdd_task + # Call METplus subroutine to evaluate the template for the full path to + # the file containing METplus timestrings at the current time. + fn = sts.do_string_sub(tmpl=fp_proc_tmpl,init=yyyymmdd_task,valid=yyyymmddhh, + lead=leadtime.total_seconds()) + all_fp_proc_dict[fg].append(fn) + + # Check whether any obs files already exist on disk in their processed + # (i.e. final) locations. If so, adjust the starting archive hour. In + # the process, keep a count of the number of obs files that already exist + # on disk. + num_existing_files = 0 + do_break = False + for fg in field_groups_in_obs: + for yyyymmddhh, fp_proc in zip(obs_retrieve_times_crnt_day, all_fp_proc_dict[fg]): + # Check whether the processed file already exists. + if os.path.isfile(fp_proc): + num_existing_files += 1 + msg = dedent(f""" + File already exists on disk: + {fp_proc = } + """) + logging.debug(msg) + else: + arcv_hr_start = get_obs_arcv_hr(obtype, arcv_intvl_hrs, yyyymmddhh.hour) + msg = dedent(f""" + File does not exist on disk: + {fp_proc = } + Setting the hour (since hour 0 of the current task day) of the first + archive to retrieve to: + {arcv_hr_start = } + """) + logging.info(msg) + do_break = True + break + if do_break: break + + # If the number of obs files that already exist on disk is equal to the + # number of obs files needed, then there is no need to retrieve any files. + # The number of obs files needed (i.e. that need to be staged) is equal + # to the number of times in the current day that obs are needed times the + # number of sets of files that the current obs type contains. + num_files_needed = len(obs_retrieve_times_crnt_day)*len(obs_fn_templates_by_fg) + if num_existing_files == num_files_needed: + + msg = dedent(f""" + All obs files needed for the current day (yyyymmdd_task) already exist + on disk: + {yyyymmdd_task = } + Thus, there is no need to retrieve any files. + """) + logging.info(msg) + return True + + # If the number of obs files that already exist on disk is not equal to + # the number of obs files needed, then we will need to retrieve files. + # In this case, set the sequence of hours corresponding to the archives + # from which files will be retrieved. + arcv_hrs = [hr for hr in range(arcv_hr_start, arcv_hr_end+arcv_intvl_hrs, arcv_intvl_hrs)] + msg = dedent(f""" + At least some obs files needed for the current day (yyyymmdd_task) + do not exist on disk: + {yyyymmdd_task = } + The number of obs files needed for the current day is: + {num_files_needed = } + The number of obs files that already exist on disk is: + {num_existing_files = } + Will retrieve remaining files by looping over archives corresponding to + the following hours (since hour 0 of the current day): + {arcv_hrs = } + """) + logging.info(msg) + # + #----------------------------------------------------------------------- + # + # At this point, at least some obs files for the current day need to be + # retrieved. Thus, loop over the relevant archives that contain obs for + # the day given by yyyymmdd_task and retrieve files as needed. + # + # Note that the NOHRSC data on HPSS are archived by day, with the archive + # for a given day containing 6-hour as well as 24-hour grib2 files. As + # described above, the four 6-hour files are for accumulated snowfall at + # hour 0 of the current day (which represents accumulation over the last + # 6 hours of the previous day) as well as hours 6, 12, and 18, while the + # two 24-hour files are at hour 0 (which represents accumulation over all + # 24 hours of the previous day) and 12 (which represents accumulation over + # the last 12 hours of the previous day plus the first 12 hours of the + # current day). Here, we will only obtain the 6-hour files. In other + # workflow tasks, the values in these 6-hour files will be added as + # necessary to obtain accumulations over longer periods (e.g. 24 hours). + # Since the four 6-hour files are in one archive and are relatively small + # (on the order of kilobytes), we get them all with a single call to the + # retrieve_data.py script. + # + #----------------------------------------------------------------------- + # + + # Whether to remove raw observations after processed directories have + # been created from them. + remove_raw_obs = vx_config[f'REMOVE_RAW_OBS_{obtype}'] + + # Base directory that will contain the archive subdirectories in which + # the files extracted from each archive (tar) file will be placed. We + # refer to this as the "raw" base directory because it contains files + # as they are found in the archives before any processing by this script. + basedir_raw = os.path.join(obs_dir, 'raw_' + yyyymmdd_task_str) + + for arcv_hr in arcv_hrs: + + msg = dedent(f""" + Processing archive hour {arcv_hr} ... + """) + logging.info(msg) + + # Calculate the time information for the current archive. + yyyymmddhh_arcv = yyyymmdd_task + dt.timedelta(hours=arcv_hr) + yyyymmddhh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d%H') + yyyymmdd_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%Y%m%d') + + # Set the subdirectory under the raw base directory that will contain the + # files retrieved from the current archive. We refer to this as the "raw" + # archive sudirectory because it will contain the files as they are in + # the archive before any processing by this script. Later below, this + # will be combined with the raw base directory (whose name depends on the + # year, month, and day of the current obs day) to obtain the full path to + # the raw archive directory (arcv_dir_raw). + # + # Notes on each obs type: + # + # CCPA: + # The raw subdirectory name must include the year, month, day, and hour + # in order to avoid get_obs tasks for different days clobbering each + # others' obs files. + # + # NOHRSC: + # The hour-of-day of the archive is irrelevant because there is only one + # archive per day, so we don't include it in the raw archive subdirectory's + # name. However, we still need a subdirectory that contains the year, + # month, and day information of the archive because in the simplest case + # of having to get the NOHRSC obs for all hours of the current obs day, + # we need to extract obs files from two archives -- one for the current + # day (which includes the files for accumulations over hours 0-6, 6-12, + # and 12-18 of the current day) and another for the next day (which + # includes the file for accumulations over hours 18-24 of the current + # day). To distinguish between the raw obs files from these two archives, + # we create an archive-time dependent raw subdirectory for each possible + # archive. + # + # MRMS: + # There is only one archive per day, and it contains all the raw obs + # files needed to generate processed obs files for the current day. + # Since we will only ever need this one archive for a given day, + # for simplicity we simply do not create a raw archive subdirectory. + # + # NDAS: + # Same as for CCPA. + if obtype == 'CCPA': + arcv_subdir_raw = yyyymmddhh_arcv_str + elif obtype == 'NOHRSC': + arcv_subdir_raw = yyyymmdd_arcv_str + elif obtype == 'MRMS': + arcv_subdir_raw = '' + elif obtype == 'NDAS': + arcv_subdir_raw = yyyymmddhh_arcv_str + + # Combine the raw archive base directory with the raw archive subdirectory + # name to obtain the full path to the raw archive directory. + arcv_dir_raw = os.path.join(basedir_raw, arcv_subdir_raw) + + # Check whether any of the obs retrieval times for the day associated with + # this task fall in the time interval spanned by the current archive. If + # so, set the flag (do_retrieve) to retrieve the files in the current + # archive. + if obtype == 'CCPA': + arcv_contents_start = yyyymmddhh_arcv - (num_obs_files_per_arcv - 1)*obs_avail_intvl + arcv_contents_end = yyyymmddhh_arcv + elif obtype == 'NOHRSC': + arcv_contents_start = yyyymmddhh_arcv + arcv_contents_end = yyyymmddhh_arcv + (num_obs_files_per_arcv - 1)*obs_avail_intvl + elif obtype == 'MRMS': + arcv_contents_start = yyyymmddhh_arcv + arcv_contents_end = yyyymmddhh_arcv + (num_obs_files_per_arcv - 1)*obs_avail_intvl + elif obtype == 'NDAS': + arcv_contents_start = yyyymmddhh_arcv - num_obs_files_per_arcv*obs_avail_intvl + arcv_contents_end = yyyymmddhh_arcv - obs_avail_intvl + + do_retrieve = False + for obs_retrieve_time in obs_retrieve_times_crnt_day: + if (obs_retrieve_time >= arcv_contents_start) and \ + (obs_retrieve_time <= arcv_contents_end): + do_retrieve = True + break + + if not do_retrieve: + msg = dedent(f""" + None of the current day's observation retrieval times (possibly including + hour 0 of the next day if considering a cumulative obs type) fall in the + range spanned by the current {arcv_intvl_hrs}-hourly archive file. The + bounds of the data in the current archive are: + {arcv_contents_start = } + {arcv_contents_end = } + The times at which obs need to be retrieved are: + {obs_retrieve_times_crnt_day = } + """) + logging.info(msg) + + else: + + # Make sure the raw archive directory exists because it is used below as + # the output directory of the retrieve_data.py script (so if this directory + # doesn't already exist, that script will fail). Creating this directory + # also ensures that the raw base directory (basedir_raw) exists before we + # change location to it below. + Path(arcv_dir_raw).mkdir(parents=True, exist_ok=True) + + # The retrieve_data.py script first extracts the contents of the archive + # file into the directory it was called from and then moves them to the + # specified output location (via the --output_path option). Note that + # the relative paths of obs files within archives associted with different + # days may be the same. Thus, if files with the same archive-relative + # paths are being simultaneously extracted from multiple archive files + # (by multiple get_obs tasks), they will likely clobber each other if the + # extracton is being carried out into the same location on disk. To avoid + # this, we first change location to the raw base directory (whose name is + # obs-day dependent) and then call the retrieve_data.py script. + os.chdir(basedir_raw) + + # Pull obs from HPSS. This will get all the obs files in the current + # archive and place them in the raw archive directory. + # + # Note that for the specific case of NDAS obs, this will get all 7 obs + # files in the current archive, although we will make use of only 6 of + # these (we will not use the tm00 file). + parmdir = config['user']['PARMdir'] + args = ['--debug', \ + '--file_set', 'obs', \ + '--config', os.path.join(parmdir, 'data_locations.yml'), \ + '--cycle_date', yyyymmddhh_arcv_str, \ + '--data_stores', 'hpss', \ + '--data_type', obtype + '_obs', \ + '--output_path', arcv_dir_raw, \ + '--summary_file', 'retrieve_data.log'] + retrieve_data.main(args) + + # Get the list of times corresponding to the obs files in the current + # archive. This is a list of datetime objects. + if obtype == 'CCPA': + obs_times_in_arcv = [yyyymmddhh_arcv - i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NOHRSC': + obs_times_in_arcv = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'MRMS': + obs_times_in_arcv = [yyyymmddhh_arcv + i*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + elif obtype == 'NDAS': + obs_times_in_arcv = [yyyymmddhh_arcv - (i+1)*obs_avail_intvl for i in range(0,num_obs_files_per_arcv)] + obs_times_in_arcv.sort() + + # Loop over the raw obs files extracted from the current archive and + # generate from them the processed obs files. + # + # Notes on each obs type: + # + # CCPA: + # For most dates, generating the processed obs files consists of simply + # copying or moving the files from the raw archive directory to the processed + # directory, possibly renaming them in the process. However, for dates + # between 20180718 and 20210504 and hours-of-day 19 through the end of the + # day (i.e. hour 0 of the next day), it involves using wgrib2 to correct an + # error in the metadata of the raw file and writing the corrected data + # to a new grib2 file in the processed location. + # + # NOHRSC: + # Generating the processed obs files consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. + # + # MRMS: + # The MRMS obs are in fact available every few minutes, but the smallest + # value we allow the obs availability interval to be set to is 1 hour + # because the forecasts cannot (yet) perform sub-hourly output (also see + # notes for MRMS_OBS_AVAIL_INTVL_HRS in config_defaults.yaml). For this + # reason, MRMS obs require an extra processing step on the raw files (before + # creating the processed files). In this step, at each obs retrieval time + # we first generate an intermediate grib2 file from the set of all raw (and + # gzipped) grib2 files for the current day (the latter usually being only a + # few minutes apart) the file that is nearest in time to the obs retrieval + # time. After selecting this gzipped grib2 file, we unzip it and place it + # in a temporary subdirectory under the raw base directory. Only after this + # step do we then generate the processed file by moving this intermediate + # file to the processed directory, possibly renaming it in the process. + # + # NDAS: + # Generating the processed obs files consists of simply copying or moving + # the files from the raw archive directory to the processed directory, + # possibly renaming them in the process. Note that for a given NDAS archive, + # the tm06 file in a contains more/better observations than the tm00 file + # in the previous archive (their valid times being equivalent), so we always + # use the tm06 files. + for yyyymmddhh in obs_times_in_arcv: + + # Create the processed obs file from the raw one (by moving, copying, or + # otherwise) only if the time of the current file in the current archive + # also exists in the list of obs retrieval times for the current day. We + # need to check this because it is possible that some of the obs retrieval + # times come before the range of times spanned by the current archive while + # the others come after, but none fall within that range. This can happen + # because the set of archive hours over which we are looping were constructed + # above without considering whether there are obs retrieve time gaps that + # make it unnecessary to retrieve some of the archives between the first + # and last ones that must be retrieved. + if yyyymmddhh in obs_retrieve_times_crnt_day: + + for i, fg in enumerate(field_groups_in_obs): + + # For MRMS obs, first select from the set of raw files for the current day + # those that are nearest in time to the current hour. Unzip these in a + # temporary subdirectory under the raw base directory. + # + # Note that the function we call to do this (mrms_pull_topofhour) assumes + # a certain file naming convention. That convention must match the names + # of the files that the retrieve_data.py script called above ends up + # retrieving. The list of possible templates for these names is given + # in parm/data_locations.yml, but which of those is actually used is not + # known until retrieve_data.py completes. Thus, that information needs + # to be passed back by retrieve_data.py and then passed to mrms_pull_topofhour. + # For now, we hard-code the file name here. + if obtype == 'MRMS': + yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') + mrms_pull_topofhour(valid_time=yyyymmddhh_str, + source=basedir_raw, + outdir=os.path.join(basedir_raw, 'topofhour'), + product=mrms_fields_in_obs_filenames[i], + add_vdate_subdir=False) + + # The raw file name needs to be the same as what the retrieve_data.py + # script called above ends up retrieving. The list of possible templates + # for this name is given in parm/data_locations.yml, but which of those + # is actually used is not known until retrieve_data.py completes. Thus, + # that information needs to be passed back by the script and used here. + # For now, we hard-code the file name here. + if obtype == 'CCPA': + hr = yyyymmddhh.hour + fn_raw = 'ccpa.t' + f'{hr:02d}' + 'z.' + accum_obs_formatted + 'h.hrap.conus.gb2' + elif obtype == 'NOHRSC': + yyyymmddhh_str = dt.datetime.strftime(yyyymmddhh, '%Y%m%d%H') + fn_raw = 'sfav2_CONUS_' + accum_obs_formatted + 'h_' + yyyymmddhh_str + '_grid184.grb2' + elif obtype == 'MRMS': + hr = yyyymmddhh.hour + fn_raw = mrms_fields_in_obs_filenames[i] + '_' + mrms_levels_in_obs_filenames[i] \ + + '_' + yyyymmdd_task_str + '-' + f'{hr:02d}' + '0000.grib2' + fn_raw = os.path.join('topofhour', fn_raw) + elif obtype == 'NDAS': + time_ago = yyyymmddhh_arcv - yyyymmddhh + hrs_ago = int(time_ago.seconds/3600) + hh_arcv_str = dt.datetime.strftime(yyyymmddhh_arcv, '%H') + fn_raw = 'nam.t' + hh_arcv_str + 'z.prepbufr.tm' + f'{hrs_ago:02d}' + '.nr' + fp_raw = os.path.join(arcv_dir_raw, fn_raw) + + # Get the full path to the final processed obs file (fp_proc) we want to + # create. + indx = obs_retrieve_times_crnt_day.index(yyyymmddhh) + fp_proc = all_fp_proc_dict[fg][indx] + + # Make sure the directory in which the processed file will be created exists. + dir_proc = os.path.dirname(fp_proc) + Path(dir_proc).mkdir(parents=True, exist_ok=True) + + msg = dedent(f""" + Creating the processed obs file + {fp_proc} + from the raw file + {fp_raw} + ... + """) + logging.debug(msg) + + yyyymmdd = yyyymmddhh.replace(hour=0, minute=0, second=0) + # CCPA files for 1-hour accumulation have incorrect metadata in the files + # under the "00" directory from 20180718 to 20210504. After the data is + # pulled, reorganize into correct yyyymmdd structure. + if (obtype == 'CCPA') and \ + ((yyyymmdd >= ccpa_bad_metadata_start) and (yyyymmdd <= ccpa_bad_metadata_end)) and \ + (((hr >= 19) and (hr <= 23)) or (hr == 0)): + cmd = ' '.join(['wgrib2', fp_raw, '-set_date -24hr -grib', fp_proc, '-s']) + result = subprocess.run(cmd, shell=True, capture_output=True, text=True) + elif remove_raw_obs: + shutil.move(fp_raw, fp_proc) + else: + shutil.copy(fp_raw, fp_proc) + # + #----------------------------------------------------------------------- + # + # Clean up raw obs directories. + # + #----------------------------------------------------------------------- + # + if remove_raw_obs: + logging.info("Removing raw obs directories ...") + shutil.rmtree(basedir_raw) + + return True + + + +def parse_args(argv): + """Parse command line arguments.""" + parser = argparse.ArgumentParser( + description="Get observations." + ) + + parser.add_argument( + "--obtype", + type=str, + required=True, + choices=['CCPA', 'NOHRSC', 'MRMS', 'NDAS'], + help="Cumulative observation type.", + ) + + parser.add_argument( + "--obs_day", + type=lambda d: dt.datetime.strptime(d, '%Y%m%d'), + required=True, + help="Date of observation day, in the form 'YYYMMDD'.", + ) + + parser.add_argument( + "--var_defns_path", + type=str, + required=True, + help="Path to variable definitions file.", + ) + + choices_log_level = [pair for lvl in list(logging._nameToLevel.keys()) + for pair in (str.lower(lvl), str.upper(lvl))] + parser.add_argument( + "--log_level", + type=str, + required=False, + default='info', + choices=choices_log_level, + help=dedent(f""" + Logging level to use with the 'logging' module. + """)) + + parser.add_argument( + "--log_fp", + type=str, + required=False, + default='', + help=dedent(f""" + Name of or path (absolute or relative) to log file. If not specified, + the output goes to screen. + """)) + + return parser.parse_args(argv) + + +if __name__ == "__main__": + args = parse_args(sys.argv[1:]) + + # Set up logging. + # If the name/path of a log file has been specified in the command line + # arguments, place the logging output in it (existing log files of the + # same name are overwritten). Otherwise, direct the output to the screen. + log_level = str.upper(args.log_level) + msg_format = "[%(levelname)s:%(name)s: %(filename)s, line %(lineno)s: %(funcName)s()] %(message)s" + if args.log_fp: + logging.basicConfig(level=log_level, format=msg_format, filename=args.log_fp, filemode='w') + else: + logging.basicConfig(level=log_level, format=msg_format) + + cfg = load_yaml_config(args.var_defns_path) + get_obs(cfg, args.obtype, args.obs_day) + + diff --git a/ush/launch_FV3LAM_wflow.sh b/ush/launch_FV3LAM_wflow.sh index 7c26511f4f..7a4a16e4b5 100644 --- a/ush/launch_FV3LAM_wflow.sh +++ b/ush/launch_FV3LAM_wflow.sh @@ -353,9 +353,9 @@ script for this experiment: # Remove CRONTAB_LINE from cron table # if [ "${called_from_cron}" = "TRUE" ]; then - python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l="${CRONTAB_LINE}" -c -d + python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l='${CRONTAB_LINE}' -c -d else - python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l="${CRONTAB_LINE}" -d + python3 $USHdir/get_crontab_contents.py --remove -m=${machine} -l='${CRONTAB_LINE}' -d fi fi # diff --git a/ush/machine/hera.yaml b/ush/machine/hera.yaml index 1ca55ae270..5644814e1d 100644 --- a/ush/machine/hera.yaml +++ b/ush/machine/hera.yaml @@ -4,9 +4,10 @@ platform: SCHED: slurm WE2E_TEST_DATA: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop TEST_CCPA_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/ccpa/proc + TEST_NOHRSC_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/nohrsc/proc TEST_MRMS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/mrms/proc TEST_NDAS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/ndas/proc - TEST_NOHRSC_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/nohrsc/proc + TEST_GDAS_OBS_DIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/obs_data/gdas DOMAIN_PREGEN_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/FV3LAM_pregen PARTITION_DEFAULT: hera QUEUE_DEFAULT: batch @@ -29,6 +30,7 @@ platform: TEST_PREGEN_BASEDIR: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/FV3LAM_pregen TEST_ALT_EXTRN_MDL_SYSBASEDIR_ICS: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir TEST_ALT_EXTRN_MDL_SYSBASEDIR_LBCS: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/dummy_FV3GFS_sys_dir + #TEST_VX_FCST_INPUT_BASEDIR: '{{ "/scratch2/BMC/fv3lam/Gerard.Ketefian/forecasts_for_SRW_WE2E_tests.final/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}' TEST_VX_FCST_INPUT_BASEDIR: '{{ "/scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/output_data/fcst_" }}{{ "ens" if (global.NUM_ENS_MEMBERS > 0) else "det" }}{{ "/{{workflow.PREDEF_GRID_NAME}}" }}{% raw %}{% endraw %}' FIXaer: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_aer FIXgsm: /scratch1/NCEPDEV/nems/role.epic/UFS_SRW_data/develop/fix/fix_am diff --git a/ush/mrms_pull_topofhour.py b/ush/mrms_pull_topofhour.py index 30b6281503..58d24aeff1 100644 --- a/ush/mrms_pull_topofhour.py +++ b/ush/mrms_pull_topofhour.py @@ -6,7 +6,7 @@ import shutil import gzip -def main(): +def mrms_pull_topofhour(valid_time, outdir, source, product, level=None, add_vdate_subdir=True, debug=False): """Identifies the MRMS file closest to the valid time of the forecast. METplus is configured to look for a MRMS composite reflectivity file for the valid time of the forecast being verified; since MRMS composite @@ -22,55 +22,45 @@ def main(): time of the forecast """ - #Parse input arguments - parser = argparse.ArgumentParser() - parser.add_argument('-v', '--valid_time', type=str, required=True, - help='Valid time (in string format YYYYMMDDHH) to find MRMS data for') - parser.add_argument('-o', '--outdir', type=str, required=True, - help='Destination directory for extracted MRMS data; data will be placed in `dest/YYYYMMDD`') - parser.add_argument('-s', '--source', type=str, required=True, - help='Source directory where zipped MRMS data is found') - parser.add_argument('-p', '--product', type=str, required=True, choices=['MergedReflectivityQCComposite', 'EchoTop'], - help='Name of MRMS product') - parser.add_argument('-l', '--level', type=str, help='MRMS product level', - choices=['_00.50_','_18_00.50_']) - parser.add_argument('-d', '--debug', action='store_true', help='Add additional debug output') - args = parser.parse_args() # Level is determined by MRMS product; set if not provided - if args.level is None: - if args.product == "MergedReflectivityQCComposite": - args.level = "_00.50_" - elif args.product == "EchoTop": - args.level = "_18_00.50_" + if level is None: + if product == "MergedReflectivityQCComposite": + level = "_00.50_" + elif product == "EchoTop": + level = "_18_00.50_" else: raise Exception("This should never have happened") # Copy and unzip MRMS files that are closest to top of hour # Done every hour on a 20-minute lag - YYYY = int(args.valid_time[0:4]) - MM = int(args.valid_time[4:6]) - DD = int(args.valid_time[6:8]) - HH = int(args.valid_time[8:19]) + YYYY = int(valid_time[0:4]) + MM = int(valid_time[4:6]) + DD = int(valid_time[6:8]) + HH = int(valid_time[8:19]) valid = datetime.datetime(YYYY, MM, DD, HH, 0, 0) valid_str = valid.strftime("%Y%m%d") - print(f"Pulling {args.valid_time} MRMS data") + print(f"Pulling MRMS product {product} for valid time: {valid_time}") # Set up working directory - dest_dir = os.path.join(args.outdir, valid_str) + valid_str_or_empty = '' + if add_vdate_subdir: + valid_str_or_empty = valid_str + + dest_dir = os.path.join(outdir, valid_str_or_empty) if not os.path.exists(dest_dir): os.makedirs(dest_dir) # Sort list of files for each MRMS product - if args.debug: + if debug: print(f"Valid date: {valid_str}") - search_path = f"{args.source}/{valid_str}/{args.product}*.gz" + search_path = os.path.join(source, valid_str_or_empty, product + "*.gz") file_list = [f for f in glob.glob(search_path)] - if args.debug: + if debug: print(f"Files found: \n{file_list}") time_list = [file_list[x][-24:-9] for x in range(len(file_list))] int_list = [ @@ -90,12 +80,12 @@ def main(): # Check to make sure closest file is within +/- 15 mins of top of the hour difference = abs(closest_timestamp - valid) if difference.total_seconds() <= 900: - filename1 = f"{args.product}{args.level}{closest_timestamp.strftime('%Y%m%d-%H%M%S')}.grib2.gz" - filename2 = f"{args.product}{args.level}{valid.strftime('%Y%m%d-%H')}0000.grib2" - origfile = os.path.join(args.source, valid_str, filename1) + filename1 = f"{product}{level}{closest_timestamp.strftime('%Y%m%d-%H%M%S')}.grib2.gz" + filename2 = f"{product}{level}{valid.strftime('%Y%m%d-%H')}0000.grib2" + origfile = os.path.join(source, valid_str_or_empty, filename1) target = os.path.join(dest_dir, filename2) - if args.debug: + if debug: print(f"Unzipping file {origfile} to {target}") @@ -107,4 +97,23 @@ def main(): raise FileNotFoundError(f"Did not find a valid file within 15 minutes of {valid}") if __name__ == "__main__": - main() + #Parse input arguments + parser = argparse.ArgumentParser() + parser.add_argument('-v', '--valid_time', type=str, required=True, + help='Valid time (in string format YYYYMMDDHH) to find MRMS data for') + parser.add_argument('-o', '--outdir', type=str, required=True, + help='Destination directory for extracted MRMS data; data will be placed in `dest/YYYYMMDD`') + parser.add_argument('-s', '--source', type=str, required=True, + help='Source directory where zipped MRMS data is found') + parser.add_argument('-p', '--product', type=str, required=True, choices=['MergedReflectivityQCComposite', 'EchoTop'], + help='Name of MRMS product') + parser.add_argument('-l', '--level', type=str, help='MRMS product level', + choices=['_00.50_','_18_00.50_']) + parser.add_argument('--add_vdate_subdir', default=True, required=False, action=argparse.BooleanOptionalAction, + help='Flag to add valid-date subdirectory to source and destination directories') + parser.add_argument('-d', '--debug', action='store_true', help='Add additional debug output') + args = parser.parse_args() + + #Consistency checks + + mrms_pull_topofhour(**vars(args)) diff --git a/ush/retrieve_data.py b/ush/retrieve_data.py index 70d1ef015c..30f3c6aed7 100755 --- a/ush/retrieve_data.py +++ b/ush/retrieve_data.py @@ -50,6 +50,7 @@ def clean_up_output_dir(expected_subdir, local_archive, output_path, source_path unavailable = {} expand_source_paths = [] logging.debug(f"Cleaning up local paths: {source_paths}") + logging.debug(f"Looking for these local paths under directory: {os.getcwd()}") for p in source_paths: expand_source_paths.extend(glob.glob(p.lstrip("/"))) diff --git a/ush/set_cycle_and_obs_timeinfo.py b/ush/set_cycle_and_obs_timeinfo.py new file mode 100644 index 0000000000..7ae764ad10 --- /dev/null +++ b/ush/set_cycle_and_obs_timeinfo.py @@ -0,0 +1,847 @@ +#!/usr/bin/env python3 + +from datetime import datetime, timedelta, date +from pprint import pprint +from textwrap import dedent +from python_utils import print_input_args, print_err_msg_exit +import logging + + +def set_cycle_dates(start_time_first_cycl, start_time_last_cycl, cycl_intvl, + return_type='string'): + """ + This file defines a function that returns a list containing the starting + times of all the cycles in the experiment. + + If return_type is set to 'string' (the default value), the returned list + contains strings in the format 'YYYYMMDDHH'. If it is set to 'datetime', + the returned list contains a set of datetime objects. + + Args: + start_time_first_cycl (datetime.datetime): + Starting time of first cycle. + + start_time_last_cycl (datetime.datetime): + Starting time of last cycle. + + cycl_intvl (datetime.timedelta): + Time interval between cycle start times. + + return_type (str): + Type of the returned list. Can be 'string' or 'datetime'. + + Returns: + all_cdates (list): + Either a list of strings in the format 'YYYYMMDDHH' or a list of datetime + objects containing the cycle starting times, where 'YYYY' is the four- + digit year, 'MM is the two-digit month, 'DD' is the two-digit day-of- + month, and 'HH' is the two-digit hour-of-day. + """ + + print_input_args(locals()) + + valid_values = ['string', 'datetime'] + if return_type not in valid_values: + msg = dedent(f""" + Invalid value for optional argument "return_type": + {return_type = } + Valid values are: + {valid_values = } + """) + logging.error(msg) + raise ValueError(msg) + + # iterate over cycles + all_cdates = [] + cdate = start_time_first_cycl + while cdate <= start_time_last_cycl: + all_cdates.append(cdate) + cdate += cycl_intvl + + if return_type == 'string': + all_cdates = [datetime.strftime(cdate, "%Y%m%d%H") for cdate in all_cdates] + + return all_cdates + + +def check_temporal_consistency_cumul_fields( + vx_config, cycle_start_times, fcst_len, fcst_output_intvl): + """ + This function reads in a subset of the parameters in the verification + configuration dictionary and ensures that certain temporal constraints on + these parameters are satisfied. It then returns an updated version of + the verification configuration dictionary that satisfies these constraints. + + The constraints are on the accumulation intervals associated with the + cumulative field groups (and the corresponding observation types) that + are to be verified. The constraints on each such accumulation interval + are as follows: + + 1) The accumulation interval is less than or equal to the forecast length. + This ensures that the forecast(s) can accumulate the field(s) in the + field group over that interval. + + 2) The obs availability interval evenly divides the accumulation interval. + This ensures that the obs can be added together to obtain accumulated + values of the obs field, e.g. the 6-hourly NOHRSC obs can be added to + obtain 24-hour observed snowfall accumulations. Note that this also + ensures that the accumulation interval is greater than or equal to the + obs availability interval. + + 3) The forecast output interval evenly divides the accumulation interval. + This ensures that the forecast output can be added together to obtain + accumulated values of the fields in the field group. For example, if + the forecast output interval is 3 hours, the resulting 3-hourly APCP + outputs from the forecast can be added to obtain 6-hourly forecast APCP. + Note that this also ensures that the accumulation interval is greater + than or equal to the forecast output interval. + + 4) The hour-of-day at which the accumulated forecast values will be + available are a subset of the ones at which the accumulated obs + values are available. This ensures that the accumulated fields + from the obs and forecast are valid at the same times and thus can + be compared in the verification. + + If for a given field-accumulation combination any of these constraints + is violated, that accumulation is removed from the list of accumulations + to verify for that field. + + Args: + vx_config (dict): + The verification configuration dictionary. + + cycle_start_times (list): + List containing the starting times of the cycles in the experiment; each + list element is a datetime object. + + fcst_len (datetime.timedelta): + The length of each forecast; a timedelta object. + + fcst_output_intvl (datetime.timedelta): + Time interval between forecast output times; a timedelta object. + + Returns: + vx_config (dict): + An updated version of the verification configuration dictionary. + + fcst_obs_matched_times_all_cycles_cumul (dict): + Dictionary containing the times (in YYYYMMDDHH string format) at + which various field/accumlation combinations are output and at + which the corresponding obs type is also available. + """ + + # Set dictionary containing all field groups that consist of cumulative + # fields (i.e. whether or not those field groups are to be verified). + # The keys are the observation types and the field groups. + obtype_to_fg_dict_cumul = {"CCPA": "APCP", "NOHRSC": "ASNOW"} + + # Convert from datetime.timedelta objects to integers. + one_hour = timedelta(hours=1) + fcst_len_hrs = int(fcst_len/one_hour) + fcst_output_intvl_hrs = int(fcst_output_intvl/one_hour) + + # Initialize one of the variables that will be returned to an empty + # dictionary. + fcst_obs_matched_times_all_cycles_cumul = dict() + + for obtype, fg in obtype_to_fg_dict_cumul.items(): + + # If the current cumulative field is not in the list of fields to be + # verified, just skip to the next field. + if fg not in vx_config["VX_FIELD_GROUPS"]: + continue + + # Initialize a sub-dictionary in one of the dictionaries to be returned. + fcst_obs_matched_times_all_cycles_cumul.update({fg: {}}) + + # + # Get the availability interval of the current observation type from the + # verification configuration dictionary and use it to calculate the hours- + # of-day at which the obs will be available. + # + # Get the obs availability interval. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + # Ensure that the obs availability interval evenly divides into 24. + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + {obs_avail_intvl_hrs = } + 24 % obs_avail_intvl_hrs = {remainder}" + """) + logging.error(msg) + raise ValueError(msg) + # Assume that the obs are available at hour 0 of the day regardless + # of obs type. + obs_avail_hr_start = 0 + obs_avail_hr_end = obs_avail_hr_start + 24 + # Construct list of obs availability hours-of-day. + obs_avail_hrs_of_day = list(range(obs_avail_hr_start, obs_avail_hr_end, obs_avail_intvl_hrs)) + obs_avail_hrs_of_day_str = ['%02d' % int(hr) for hr in obs_avail_hrs_of_day] + # + # Get the array of accumulation intervals for the current cumulative field. + # Then loop over them to ensure that the constraints listed above are + # satisfied. If for a given accumulation one or more of the constraints + # is not satisfied, remove that accumulation from the list of accumulations + # for the current field. + # + accum_intvls_array_name = "".join(["VX_", fg, "_ACCUMS_HRS"]) + accum_intvls_hrs = vx_config[accum_intvls_array_name] + # + # Loop through the accumulation intervals and check the temporal constraints + # listed above. + # + for accum_hrs in accum_intvls_hrs.copy(): + + accum_hh = f"{accum_hrs:02d}" + # Initialize a sub-sub-dictionary in one of the dictionaries to be returned. + fcst_obs_matched_times_all_cycles_cumul[fg][accum_hh] = [] + # + # Make sure that the accumulation interval is less than or equal to the + # forecast length. + # + if accum_hrs > fcst_len_hrs: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) and corresponding observation type (obtype) is greater than + the forecast length (fcst_len_hrs): + {fg = } + {obtype = } + {accum_hrs = } + {fcst_len_hrs = } + Thus, the forecast(s) cannot accumulate the field(s) in this field group + over this interval. Will remove this accumulation interval from the list + of accumulation intervals to verify for this field group/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Make sure that accumulation interval is evenly divisible by the observation + # availability interval. + # + if accum_hrs in accum_intvls_hrs: + rem_obs = accum_hrs % obs_avail_intvl_hrs + if rem_obs != 0: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) and corresponding observation type (obtype) is not evenly + divisible by the observation type's availability interval (obs_avail_intvl_hrs): + {fg = } + {obtype = } + {accum_hrs = } + {obs_avail_intvl_hrs = } + accum_hrs % obs_avail_intvl_hrs = {rem_obs} + Thus, this observation type cannot be accumulated over this interval. + Will remove this accumulation interval from the list of accumulation + intervals to verify for this field group/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Make sure that accumulation interval is evenly divisible by the forecast + # output interval. + # + if accum_hrs in accum_intvls_hrs: + rem_fcst = accum_hrs % fcst_output_intvl_hrs + if rem_fcst != 0: + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) and corresponding observation type (obtype) is not evenly + divisible by the forecast output interval (fcst_output_intvl): + {fg = } + {obtype = } + {accum_hrs = } + {fcst_output_intvl_hrs = } + accum_hrs % fcst_output_intvl_hrs = {rem_fcst} + Thus, the forecast(s) cannot accumulate the field(s) in this field group + over this interval. Will remove this accumulation interval from the list + of accumulation intervals to verify for this field group/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + # + # Make sure that the hours-of-day at which the current cumulative field + # will be output are a subset of the hours-of-day at which the corresponding + # obs type is available. + # + if accum_hrs in accum_intvls_hrs: + + # Initialize sets that will contain the forecast output times of the + # current cumulative field over all cycles. + fcst_output_times_all_cycles = set() + + # Calculate the forecast output times of the current cumulative field + # for the current cycle and include them in the the set of such times + # over all cycles. + accum = timedelta(hours=accum_hrs) + num_fcst_output_times_per_cycle = int(fcst_len/accum) + for i, start_time_crnt_cycle in enumerate(cycle_start_times): + fcst_output_times_crnt_cycle \ + = [start_time_crnt_cycle + (i+1)*accum + for i in range(0, num_fcst_output_times_per_cycle)] + fcst_output_times_all_cycles \ + = fcst_output_times_all_cycles | set(fcst_output_times_crnt_cycle) + + # Get all the hours-of-day at which the current cumulative field will be + # output by the forecast. + fcst_output_times_all_cycles = sorted(fcst_output_times_all_cycles) + fcst_output_times_all_cycles_str \ + = [datetime.strftime(dt_object, "%Y%m%d%H") + for dt_object in fcst_output_times_all_cycles] + fcst_output_hrs_of_day_str = [yyyymmddhh[8:10] for yyyymmddhh in fcst_output_times_all_cycles_str] + fcst_output_hrs_of_day_str.sort() + + # Check that all the forecast output hours-of-day are a subset of the obs + # availability hours-of-day. If not, remove the current accumulation + # interval from the list of intervals to verify. + if not set(fcst_output_hrs_of_day_str) <= set(obs_avail_hrs_of_day_str): + msg = dedent(f""" + The accumulation interval (accum_hrs) for the current cumulative field + group (fg) is such that the forecast will output the field(s) in the + field group at at least one hour-of-day at which the corresponding + observation type is not available: + {fg = } + {obtype = } + {accum_hrs = } + The forecast output hours-of-day for this field group/accumulation interval + combination are: + {fcst_output_hrs_of_day_str = } + The hours-of-day at which the obs are available are: + {obs_avail_hrs_of_day_str = } + Thus, at least some of the forecast output cannot be verified. Will remove + this accumulation interval from the list of accumulation intervals to verify + for this field group/obtype. + """) + logging.info(msg) + accum_intvls_hrs.remove(accum_hrs) + else: + fcst_obs_matched_times_all_cycles_cumul[fg][accum_hh] = fcst_output_times_all_cycles_str + # + # Update the value in the experiment configuration dictionary of the list + # of accumulation intervals to verify for this cumulative field (since + # some accumulation intervals may have been removed after the checks above). + # + vx_config[accum_intvls_array_name] = accum_intvls_hrs + # + # If the updated list of accumulations for the current cumulative field + # is empty, remove the field from the list of fields to verify in the + # verification configuration dictionary. + # + if not accum_intvls_hrs: + vx_config["VX_FIELD_GROUPS"].remove(fg) + msg = dedent(f""" + The list of accumulation intervals (accum_intvls_hrs) for the current + cumulative field group to verify (fg) is empty: + {fg = } + {accum_intvls_hrs = } + Removing this field from the list of fields to verify. The updated list + is: + {vx_config["VX_FIELD_GROUPS"]} + """) + logging.info(msg) + + return vx_config, fcst_obs_matched_times_all_cycles_cumul + + +def set_fcst_output_times_and_obs_days_all_cycles( + cycle_start_times, fcst_len, fcst_output_intvl): + """ + This function returns forecast output times and observation days (i.e. + days on which obs are needed because there is forecast output on those + days) for both instantaneous (e.g. REFC, RETOP, T2m) and cumulative (e.g. + APCP) fields that need to be verified. Note that for cumulative fields, + the only accumulation interval considered is the forecast output interval. + Accumulation intervals larger than this are considered elsewhere (and + accumulation interval smaller than this are obviously not allowed). + + Args: + cycle_start_times (list): + List containing the starting times of the cycles in the experiment; each + list element is a datetime object. + + fcst_len (datetime.timedelta): + The length of each forecast. + + fcst_output_intvl (datetime.timedelta): + Time interval between forecast output times. + + Returns: + fcst_output_times_all_cycles (dict): + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles (dict): + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. + """ + + # Get the number of forecast output times per cycle/forecast. + num_fcst_output_times_per_cycle = int(fcst_len/fcst_output_intvl + 1) + + # Initialize dictionaries that will contain the various forecast output + # time and obs day information. Note that we initialize the contents of + # these dictionaries as sets because that better suites the data manipulation + # we will need to do, but these sets will later be converted to lists. + fcst_output_times_all_cycles = dict() + fcst_output_times_all_cycles['inst'] = set() + fcst_output_times_all_cycles['cumul'] = set() + obs_days_all_cycles = dict() + obs_days_all_cycles['inst'] = set() + obs_days_all_cycles['cumul'] = set() + + for i, start_time_crnt_cycle in enumerate(cycle_start_times): + # Create a list of forecast output times of instantaneous fields for the + # current cycle. + fcst_output_times_crnt_cycle_inst \ + = [start_time_crnt_cycle + i*fcst_output_intvl + for i in range(0,num_fcst_output_times_per_cycle)] + # Include the output times of instantaneous fields for the current cycle + # in the set of all such output times over all cycles. + fcst_output_times_all_cycles['inst'] \ + = fcst_output_times_all_cycles['inst'] | set(fcst_output_times_crnt_cycle_inst) + + # Create a list of instantaneous field obs days (i.e. days on which + # observations of instantaneous fields are needed for verification) for + # the current cycle. We do this by dropping the hour-of-day from each + # element of the list of forecast output times and keeping only unique + # elements. + tmp = [datetime_obj.date() for datetime_obj in fcst_output_times_crnt_cycle_inst] + obs_days_crnt_cycl_inst = sorted(set(tmp)) + # Include the obs days for instantaneous fields for the current cycle + # in the set of all such obs days over all cycles. + obs_days_all_cycles['inst'] = obs_days_all_cycles['inst'] | set(obs_days_crnt_cycl_inst) + + # Create a list of forecast output times of cumulative fields for the + # current cycle. This is simply the list of forecast output times for + # instantaneous fields but with the first time dropped (because nothing + # has yet accumulated at the starting time of the cycle). + fcst_output_times_crnt_cycle_cumul = fcst_output_times_crnt_cycle_inst + fcst_output_times_crnt_cycle_cumul.pop(0) + # Include the obs days for cumulative fields for the current cycle in the + # set of all such obs days over all cycles. + fcst_output_times_all_cycles['cumul'] \ + = fcst_output_times_all_cycles['cumul'] | set(fcst_output_times_crnt_cycle_cumul) + + # Create a list of cumulative field obs days (i.e. days on which + # observations of cumulative fields are needed for verification) for + # the current cycle. We do this by dropping the hour-of-day from each + # element of the list of forecast output times and keeping only unique + # elements. Note, however, that before dropping the hour-of-day from + # the list of forecast output times, we remove the last forecast output + # time if it happens to be the 0th hour of a day. This is because in + # the scripts/tasks that get observations of cumulative fields, the + # zeroth hour of a day is considered part of the previous day (because + # it represents accumulation that occurred on the previous day). + tmp = fcst_output_times_crnt_cycle_cumul + last_output_time_cumul = fcst_output_times_crnt_cycle_cumul[-1] + if last_output_time_cumul.hour == 0: + tmp.pop() + tmp = [datetime_obj.date() for datetime_obj in tmp] + obs_days_crnt_cycl_cumul = sorted(set(tmp)) + # Include the obs days for cumulative fields for the current cycle in the + # set of all such obs days over all cycles. + obs_days_all_cycles['cumul'] = obs_days_all_cycles['cumul'] | set(obs_days_crnt_cycl_cumul) + + # Convert the set of output times of instantaneous fields over all cycles + # to a sorted list of strings of the form 'YYYYMMDDHH'. + fcst_output_times_all_cycles['inst'] = sorted(fcst_output_times_all_cycles['inst']) + fcst_output_times_all_cycles['inst'] \ + = [datetime.strftime(fcst_output_times_all_cycles['inst'][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles['inst']))] + + # Convert the set of obs days for instantaneous fields over all cycles + # to a sorted list of strings of the form 'YYYYMMDD'. + obs_days_all_cycles['inst'] = sorted(obs_days_all_cycles['inst']) + obs_days_all_cycles['inst'] \ + = [datetime.strftime(obs_days_all_cycles['inst'][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles['inst']))] + + # Convert the set of output times of cumulative fields over all cycles to + # a sorted list of strings of the form 'YYYYMMDDHH'. + fcst_output_times_all_cycles['cumul'] = sorted(fcst_output_times_all_cycles['cumul']) + fcst_output_times_all_cycles['cumul'] \ + = [datetime.strftime(fcst_output_times_all_cycles['cumul'][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles['cumul']))] + + # Convert the set of obs days for cumulative fields over all cycles to a + # sorted list of strings of the form 'YYYYMMDD'. + obs_days_all_cycles['cumul'] = sorted(obs_days_all_cycles['cumul']) + obs_days_all_cycles['cumul'] \ + = [datetime.strftime(obs_days_all_cycles['cumul'][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles['cumul']))] + + return fcst_output_times_all_cycles, obs_days_all_cycles + + +def set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles): + """ + Given a list of days on which observations are needed (because there is + forecast output on those days), this function generates a list of ROCOTO- + style cycledef strings that together span the days (over all cycles of an + SRW App experiment) on which obs are needed. The input list of days must + be increasing in time, but the days do not have to be consecutive, i.e. + there may be gaps between days that are greater than one day. + + Each cycledef string in the output list represents a set of consecutive + days in the input string (when used inside a tag in a ROCOTO + XML). Thus, when the cycledef strings in the output string are all + assigned to the same cycledef group in a ROCOTO XML, that group will + represent all the days on which observations are needed. This allows + the ROCOTO workflow to define a single set of non-consecutive days on + which obs are needed and define tasks (e.g. get_obs) only for those + days, thereby avoiding the redundant creation of these tasks for any + in-between days on which obs are not needed. + + Args: + obs_days_all_cycles (list): + A list of strings of the form 'YYYYMMDD', with each string representing + a day on which observations are needed. Note that the list must be sorted, + i.e. the days must be increasing in time, but there may be gaps between + days. + + Returns: + cycledefs_all_obs_days (list): + A list of strings, with each string being a ROCOTO-style cycledef of the + form + + '{yyyymmdd_start}0000 {yyyymmdd_end}0000 24:00:00' + + where {yyyymmdd_start} is the starting day of the first cycle in the + cycledef and {yyyymmdd_end} is the starting day of the last cycle (note + that the minutes and hours in these cycledef stirngs are always set to + '00'). For example, an element of the output list may be: + + '202404290000 202405010000 24:00:00' + """ + + # To enable arithmetic with dates, convert input sting list of observation + # days (i.e. days on which observations are needed) over all cycles to a + # list of datetime objects. + tmp = [datetime.strptime(yyyymmdd, "%Y%m%d") for yyyymmdd in obs_days_all_cycles] + + # Initialize the variable that in the loop below contains the date of + # the previous day. This is just the first element of the list of + # datetime objects constructed above. Then use it to initialize the + # list (consec_obs_days_lists) that will contain lists of consecutive + # observation days. Thus, after its construction is complete, each + # element of consec_obs_days_lists will itself be a list containing + # datetime objects that represent consecutive days (i.e. are guaranteed + # to be 24 hours apart). + day_prev = tmp[0] + consec_obs_days_lists = list() + consec_obs_days_lists.append([day_prev]) + + # Remove the first element of the list of obs days since it has already + # been used initiliaze consec_obs_days_lists. + tmp.pop(0) + + # Loop over the remaining list of obs days and construct the list of + # lists of consecutive obs days. + one_day = timedelta(days=1) + for day_crnt in tmp: + # If the current obs day comes 24 hours after the previous obs day, i.e. + # if it is the next day of the previous obs day, append it to the last + # existing list in consec_obs_days_lists. + if day_crnt == day_prev + one_day: + consec_obs_days_lists[-1].append(day_crnt) + # If the current obs day is NOT the next day of the previous obs day, + # append a new element to consec_obs_days_lists and initialize it as a + # list containing a single element -- the current obs day. + else: + consec_obs_days_lists.append([day_crnt]) + # Update the value of the previous day in preparation for the next + # iteration of the loop. + day_prev = day_crnt + + # Use the list of lists of consecutive obs days to construct a list of + # ROCOTO-style cycledef strings that each represent a set of consecutive + # obs days when included in a tag in a ROCOTO XML. Each + # string in this new list corresponds to a series of consecutive days on + # which observations are needed (where by "consecutive" we mean no days + # are skipped), and there is at least a one-day gap between each such + # series. These cycledefs together represent all the days (i.e. over all + # cycles of the experiment) on which observations are needed. + cycledefs_all_obs_days = list() + for consec_obs_days_list in consec_obs_days_lists: + cycledef_start = consec_obs_days_list[0].strftime('%Y%m%d%H%M') + cycledef_end = consec_obs_days_list[-1].strftime('%Y%m%d%H%M') + cycledefs_all_obs_days.append(' '.join([cycledef_start, cycledef_end, '24:00:00'])) + + return cycledefs_all_obs_days + + +def get_obs_retrieve_times_by_day( + vx_config, cycle_start_times, fcst_len, + fcst_output_times_all_cycles, obs_days_all_cycles): + """ + This function generates dictionary of dictionaries that, for each + combination of obs type needed and each obs day, contains a string list + of the times at which that type of observation is needed on that day. + The elements of each list are formatted as 'YYYYMMDDHH'. + + Args: + vx_config (dict): + The verification configuration dictionary. + + cycle_start_times (list): + List containing the starting times of the cycles in the experiment; each + list element is a datetime object. + + fcst_len (datetime.timedelta): + The length of each forecast. + + fcst_output_times_all_cycles (dict): + Dictionary containing a list of forecast output times over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDDHH'. + + obs_days_all_cycles (dict): + Dictionary containing a list of observation days (i.e. days on which + observations are needed to perform verification) over all cycles for + instantaneous fields and a second analogous list for cumulative fields. + Each element of these lists is a string of the form 'YYYYMMDD'. + + Returns: + obs_retrieve_times_by_day (dict): + Dictionary of dictionaries containing times at which each type of obs is + needed on each obs day. + """ + + # Convert string contents of input dictionaries to datetime objects. + for time_type in ['cumul', 'inst']: + fcst_output_times_all_cycles[time_type] \ + = [datetime.strptime(fcst_output_times_all_cycles[time_type][i], "%Y%m%d%H") + for i in range(len(fcst_output_times_all_cycles[time_type]))] + obs_days_all_cycles[time_type] \ + = [datetime.strptime(obs_days_all_cycles[time_type][i], "%Y%m%d") + for i in range(len(obs_days_all_cycles[time_type]))] + + # Get list of field groups to be verified. + vx_field_groups = vx_config['VX_FIELD_GROUPS'] + + # Define a list of dictionaries containing information about all the obs + # types that can possibly be used for verification in the SRW App. Each + # dictionary in the list contains the name of the obs type, the temporal + # nature of that obs type (i.e. whether the obs type contains cumulative + # or instantaneous fields), and a list of the field groups that the obs + # type may be used to verify. + all_obs_info \ + = [{'obtype': 'CCPA', 'time_type': 'cumul', 'field_groups': ['APCP']}, + {'obtype': 'NOHRSC', 'time_type': 'cumul', 'field_groups': ['ASNOW']}, + {'obtype': 'MRMS', 'time_type': 'inst', 'field_groups': ['REFC', 'RETOP']}, + {'obtype': 'NDAS', 'time_type': 'inst', 'field_groups': ['SFC', 'UPA']} + ] + + # Create new list that has the same form as the list of dictionaries + # defined above but contains only those obs types that have at least one + # field group that appears in the list of field groups to verify. Note + # that for those obs types that are retained in the list, the field groups + # that will not be verified are discarded. + obs_info = [] + for obs_dict in all_obs_info.copy(): + obtype = obs_dict['obtype'] + field_groups = obs_dict['field_groups'] + field_groups = [field for field in field_groups if field in vx_field_groups] + obs_dict = obs_dict.copy() + obs_dict['field_groups'] = field_groups + if field_groups: obs_info.append(obs_dict) + + # For convenience, define timedelta object representing a single day. + one_day = timedelta(days=1) + + # Generate a dictionary (of dictionaries) that, for each obs type to be + # used in the vx and for each day for which there is forecast output, + # will contain the times at which verification will be performed, i.e. + # the times at which the forecast output will be compared to observations. + # We refer to these times as the vx comparison times. + vx_compare_times_by_day = dict() + for obs_dict in obs_info: + + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + + fcst_output_times_all_cycles_crnt_ttype = fcst_output_times_all_cycles[obs_time_type] + obs_days_all_cycles_crnt_ttype = obs_days_all_cycles[obs_time_type] + + vx_compare_times_by_day[obtype] = dict() + + # Get the availability interval for the current observation type from the + # verification configuration dictionary. Then make sure it divides evenly + # into 24. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + remainder = 24 % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into 24 but doesn't: + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + 24 % obs_avail_intvl_hrs = {remainder}" + """) + logging.error(msg) + raise Exception(msg) + obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) + num_obs_avail_times_per_day = int(24/obs_avail_intvl_hrs) + + # Loop over all obs days over all cycles (for the current obs type). For + # each such day, get the list forecast output times and the list of obs + # availability times. Finally, set the times (on that day) that verification + # will be performed to the intersection of these two lists. + for obs_day in obs_days_all_cycles_crnt_ttype: + + next_day = obs_day + one_day + if obs_time_type == "cumul": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day < time <= next_day] + elif obs_time_type == "inst": + fcst_output_times_crnt_day \ + = [time for time in fcst_output_times_all_cycles_crnt_ttype if obs_day <= time < next_day] + fcst_output_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in fcst_output_times_crnt_day] + + if obs_time_type == "cumul": + obs_avail_times_crnt_day \ + = [obs_day + (i+1)*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + elif obs_time_type == "inst": + obs_avail_times_crnt_day \ + = [obs_day + i*obs_avail_intvl for i in range(0,num_obs_avail_times_per_day)] + obs_avail_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_avail_times_crnt_day] + + vx_compare_times_crnt_day = list(set(fcst_output_times_crnt_day) & set(obs_avail_times_crnt_day)) + vx_compare_times_crnt_day.sort() + + obs_day_str = datetime.strftime(obs_day, "%Y%m%d") + vx_compare_times_by_day[obtype][obs_day_str] = vx_compare_times_crnt_day + + # For each obs type to be used in the vx and for each day for which there + # is forecast output, calculate the times at which obs need to be retrieved. + # For instantaneous fields, the obs retrieval times are the same as the + # times at which vx will be performed. For cumulative fields, each field + # value needs to be constructed by adding values from previous times. For + # example, if we're verifying 6-hourly precipitation and the obs availability + # interval for precip obs (CCPA) is 1 hour, then the 6-hourly values must + # be built by adding the 1-hour values. Thus, this requires obs at every + # hour, not just every 6 hours. + # + # First, initialze the dictionary (of dictionaries) that will contain the + # obs retreival times (for all obs types and each day for which there is + # forecast output), and set the values for instantaneous obs to the vx + # comparison times calculated above. + obs_retrieve_times_by_day = dict() + for obs_dict in obs_info: + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + if obs_time_type == 'inst': + obs_retrieve_times_by_day[obtype] = vx_compare_times_by_day[obtype] + + # Next, calculate the obs retrieval times for cumulative fields. We want + # these times grouped into days because the get_obs workflow tasks that + # will use this information are day-based (i.e. each task will get obs + # for a single day). However, it is easier to first calculate these + # times as a single group over all cycles. We do this next. + obs_retrieve_times_all_cycles = dict() + for obs_dict in obs_info: + + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + field_groups = obs_dict['field_groups'] + + # Consider only cumulative fields. + if obs_time_type != 'cumul': + continue + + # Initialize the set that will contain the obs retrieval times over all + # cycles. + obs_retrieve_times_all_cycles[obtype] = set() + + # Get the availability interval for the current observation type from the + # verification configuration dictionary. + config_var_name = "".join([obtype, "_OBS_AVAIL_INTVL_HRS"]) + obs_avail_intvl_hrs = vx_config[config_var_name] + obs_avail_intvl = timedelta(hours=obs_avail_intvl_hrs) + + # Consider all field groups to be verified for the current obs type. + for fg in field_groups: + + # Get the list of accumulation intervals for the current cumulative obs + # type and field group combination. + accum_intvls_array_name = "".join(["VX_", fg, "_ACCUMS_HRS"]) + accum_intvls_hrs = vx_config[accum_intvls_array_name] + + for cycle_start_time in cycle_start_times: + + # Loop through the accumulation intervals for this obs type and field + # group combination. + for accum_intvl_hrs in accum_intvls_hrs: + accum_intvl = timedelta(hours=accum_intvl_hrs) + # Get the number of accumulation intervals that fits in the duration of + # the forecast. Note that the accumulation interval doesn't necessarily + # have to evenly divide the forecast duration; we simply drop any fractional + # accumulation intervals by rounding down to the nearest integer. + num_accum_intvls_in_fcst = int(fcst_len/accum_intvl) + # Calulate the times at which the current cumulative obs field will be + # compared to the forecast field(s) in the corresponding cumulative field + # group (for the current accumulation interval). + vx_compare_times_crnt_cycl = [cycle_start_time + (i+1)*accum_intvl + for i in range(0,num_accum_intvls_in_fcst)] + # For each such comparison time, get the times at which obs are needed + # to form that accumulation. For example, if the current accumulation + # interval is 6 hours and the obs are available every hour, then the + # times at which obs are needed will be the comparison time as well as + # the five hours preceeding it. Then put all such times over all vx + # comparison times within all cycles into a single array of times (which + # is stored in the dictionary obs_retrieve_times_all_cycles). + for vx_compare_time in vx_compare_times_crnt_cycl: + remainder = accum_intvl_hrs % obs_avail_intvl_hrs + if remainder != 0: + msg = dedent(f""" + The obs availability interval for obs of type {obtype} must divide evenly + into the current accumulation interval (accum_intvl) but doesn't: + accum_intvl_hrs = {accum_intvl_hrs} + obs_avail_intvl_hrs = {obs_avail_intvl_hrs} + accum_intvl_hrs % obs_avail_intvl_hrs = {remainder}" + """) + logging.error(msg) + raise Exception(msg) + num_obs_avail_times_in_accum_intvl = int(accum_intvl/obs_avail_intvl) + obs_retrieve_times_crnt_accum_intvl \ + = [vx_compare_time - i*obs_avail_intvl \ + for i in range(0,num_obs_avail_times_in_accum_intvl)] + obs_retrieve_times_all_cycles[obtype] \ + = obs_retrieve_times_all_cycles[obtype] | set(obs_retrieve_times_crnt_accum_intvl) + + # Convert the final set of obs retrieval times for the current obs type + # to a sorted list. Note that the sorted() function will convert a set + # to a sorted list (a set itself cannot be sorted). + obs_retrieve_times_all_cycles[obtype] = sorted(obs_retrieve_times_all_cycles[obtype]) + + # Now that the obs retrival times for cumulative fields have been obtained + # but grouped by cycle start date, regroup them by day and save results + # in obs_retrieve_times_by_day. + for obs_dict in obs_info: + + obtype = obs_dict['obtype'] + obs_time_type = obs_dict['time_type'] + + # Consider only cumulative obs/fields. + if obs_time_type != 'cumul': + continue + + # Initialize variables before looping over obs days. + obs_retrieve_times_by_day[obtype] = dict() + obs_days_all_cycles_crnt_ttype = obs_days_all_cycles[obs_time_type] + obs_retrieve_times_all_cycles_crnt_obtype = obs_retrieve_times_all_cycles[obtype] + + for obs_day in obs_days_all_cycles_crnt_ttype: + next_day = obs_day + one_day + obs_retrieve_times_crnt_day \ + = [time for time in obs_retrieve_times_all_cycles_crnt_obtype if obs_day < time <= next_day] + obs_retrieve_times_crnt_day = [datetime.strftime(time, "%Y%m%d%H") for time in obs_retrieve_times_crnt_day] + obs_day_str = datetime.strftime(obs_day, "%Y%m%d") + obs_retrieve_times_by_day[obtype][obs_day_str] = obs_retrieve_times_crnt_day + + return obs_retrieve_times_by_day diff --git a/ush/set_cycle_dates.py b/ush/set_cycle_dates.py deleted file mode 100644 index cb386407b6..0000000000 --- a/ush/set_cycle_dates.py +++ /dev/null @@ -1,33 +0,0 @@ -#!/usr/bin/env python3 - -from datetime import datetime, timedelta, date - -from python_utils import print_input_args, print_err_msg_exit - - -def set_cycle_dates(date_start, date_end, incr_cycl_freq): - """ - Sets the cycle date(s). - - Args: - date_start (datetime.datetime): Start date - date_end (datetime.datetime): End date - incr_cycl_freq (int): Cycle frequency increment in hours - Returns: - all_cdates: An array of cycle date-hours whose elements have the form ``YYYYMMDDHH``, - where ``YYYY`` is a four-digit year, ``MM`` is a two- digit month, ``DD`` - is a two-digit day of the month, and ``HH`` is a two-digit hour of the day - """ - - print_input_args(locals()) - - freq_delta = timedelta(hours=incr_cycl_freq) - - # iterate over cycles - all_cdates = [] - cdate = date_start - while cdate <= date_end: - cyc = datetime.strftime(cdate, "%Y%m%d%H") - all_cdates.append(cyc) - cdate += freq_delta - return all_cdates diff --git a/ush/set_leadhrs.py b/ush/set_leadhrs.py new file mode 100644 index 0000000000..3256297af2 --- /dev/null +++ b/ush/set_leadhrs.py @@ -0,0 +1,94 @@ +#!/usr/bin/env python3 +import argparse +import os +from eval_metplus_timestr_tmpl import eval_metplus_timestr_tmpl + +def set_leadhrs(date_init, lhr_min, lhr_max, lhr_intvl, base_dir, time_lag, fn_template, num_missing_files_max, + skip_check_files=False, verbose=False): + """ + Creates a list of lead hours based on the provided range and interval, + checks for the existence of corresponding files, and returns a list + of lead hours for which files exist. If too many files are missing, it fails with an exception. + + Args: + date_init (str): Date string for initial time in YYYYMMDD[mmss] format, where + minutes and seconds are optional. + lhr_min (int): Minimum lead hour to check + lhr_max (int): Maximum lead hour to check + lhr_intvl (int): Interval between lead hours + base_dir (str): Base directory for forecast/observation file + time_lag (int): Hours of time lag for a time-lagged ensemble member + fn_template (str): The METplus filename template for finding the files + verbose (bool): By default this script only outputs the list of forecast hours + (for easier parsing from bash contexts). Set the verbose flag + to True for additional debugging output. + num_missing_files_max (int): If more files than this value are not found, raise exception + skip_check_files (bool): If true, return the list of forecast hours, skipping the file check + Returns: + A list of forecast hours where files were found + """ + + # Step 1: Generate lead hours without filtering for missing files + lhrs_list = list(range(lhr_min, lhr_max + 1, lhr_intvl)) + if verbose: + print(f"Initial set of lead hours (relative to {date_init}): {lhrs_list}") + + if skip_check_files: + return lhrs_list + + # Step 2: Loop through lead hours and check for corresponding file existence + final_list = [] + num_missing_files = 0 + for lhr in lhrs_list: + + # Evaluate the METplus timestring template for the current lead hour + fn = eval_metplus_timestr_tmpl(date_init, lhr, time_lag, fn_template, verbose=False) + + # Get the full path and check if the file exists + fp = os.path.join(base_dir, fn) + if os.path.isfile(fp): + if verbose: + print(f"Found file for lead hour {lhr} (relative to {date_init}): {fp}") + final_list.append(lhr) + else: + num_missing_files += 1 + + if verbose: + print(f"File for lead hour {lhr} (relative to {date_init}) is MISSING: {fp}") + + if verbose: + print(f"Final set of lead hours relative to {date_init}: {final_list}") + + # Step 3: Check if the number of missing files exceeds the maximum allowed + if num_missing_files > num_missing_files_max: + raise Exception(f"Number of missing files ({num_missing_files}) exceeds maximum allowed ({num_missing_files_max}).") + + return final_list + +if __name__ == "__main__": + + parser = argparse.ArgumentParser( + description="Print a list of forecast hours in bash-readable comma-separated format such that there is a corresponding file (can be observations or forecast files) for each list entry.", + ) + parser.add_argument("-v", "--verbose", help="Verbose output", action="store_true") + parser.add_argument("-d", "--date_init", help="Initial date in YYYYMMDDHH[mmss] format", type=str, default='') + parser.add_argument("-min", "--lhr_min", help="Minimum lead hour to check", type=int, required=True) + parser.add_argument("-max", "--lhr_max", help="Maximum lead hour to check", type=int, required=True) + parser.add_argument("-int", "--lhr_intvl", help="Interval between lead hours", type=int, required=True) + parser.add_argument("-tl", "--time_lag", help="Hours of time lag for a time-lagged ensemble member", type=int, default=0) + parser.add_argument("-bd", "--base_dir", help="Base directory for forecast/observation file", type=str, default='') + parser.add_argument("-ft", "--fn_template", help="Template for file names to search; see ??? for details on template settings", type=str, default='') + parser.add_argument("-n", "--num_missing_files_max", type=int, default=5, + help="Number of missing files to tolerate; if more files than this number can not be found, raise an exception") + parser.add_argument("-s", "--skip_check_files", action="store_true", + help="Flag to skip file check and just return the list of lead hours") + + args = parser.parse_args() + + #Consistency checks + if not args.skip_check_files and not args.date_init: + raise argparse.ArgumentTypeError('--date_init must be specified unless --skip_check_files is specified') + + leadhr_list = set_leadhrs(**vars(args)) + # If called from command line, we want to print a bash-parsable list + print(', '.join(str(x) for x in leadhr_list)) diff --git a/ush/set_vx_fhr_list.sh b/ush/set_vx_fhr_list.sh deleted file mode 100644 index 5cefc78365..0000000000 --- a/ush/set_vx_fhr_list.sh +++ /dev/null @@ -1,295 +0,0 @@ -# -#----------------------------------------------------------------------- -# -# This file defines a function that generates a list of forecast hours -# such that for each hour there exist a corresponding obs file. It does -# this by first generating a generic sequence of forecast hours and then -# removing from that sequence any hour for which there is no obs file. -# -#----------------------------------------------------------------------- -# -function set_vx_fhr_list() { -# -#----------------------------------------------------------------------- -# -# Save current shell options (in a global array). Then set new options -# for this script/function. -# -#----------------------------------------------------------------------- -# - { save_shell_opts; set -u +x; } > /dev/null 2>&1 -# -#----------------------------------------------------------------------- -# -# Get the full path to the file in which this script/function is located -# (scrfunc_fp), the name of that file (scrfunc_fn), and the directory in -# which the file is located (scrfunc_dir). -# -#----------------------------------------------------------------------- -# - local scrfunc_fp=$( $READLINK -f "${BASH_SOURCE[0]}" ) - local scrfunc_fn=$( basename "${scrfunc_fp}" ) - local scrfunc_dir=$( dirname "${scrfunc_fp}" ) -# -#----------------------------------------------------------------------- -# -# Get the name of this function. -# -#----------------------------------------------------------------------- -# - local func_name="${FUNCNAME[0]}" -# -#----------------------------------------------------------------------- -# -# Specify the set of valid argument names for this script/function. Then -# process the arguments provided to this script/function (which should -# consist of a set of name-value pairs of the form arg1="value1", etc). -# -#----------------------------------------------------------------------- -# - local valid_args=( \ - "cdate" \ - "fcst_len_hrs" \ - "field" \ - "accum_hh" \ - "base_dir" \ - "fn_template" \ - "check_accum_contrib_files" \ - "num_missing_files_max" \ - "outvarname_fhr_list" \ - ) - process_args valid_args "$@" -# -#----------------------------------------------------------------------- -# -# For debugging purposes, print out values of arguments passed to this -# script. Note that these will be printed out only if VERBOSE is set to -# TRUE. -# -#----------------------------------------------------------------------- -# - print_input_args valid_args -# -#----------------------------------------------------------------------- -# -# Declare local variables. -# -#----------------------------------------------------------------------- -# - local crnt_tmpl \ - crnt_tmpl_esc \ - fhr \ - fhr_array \ - fhr_int \ - fhr_list \ - fhr_min \ - fhr_max \ - fn \ - fp \ - i \ - num_fcst_hrs \ - num_missing_files \ - regex_search_tmpl \ - remainder \ - skip_this_fhr -# -#----------------------------------------------------------------------- -# -# Create array containing set of forecast hours for which we will check -# for the existence of corresponding observation or forecast file. -# -#----------------------------------------------------------------------- -# - case "${field}" in - "APCP") - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - ;; - "ASNOW") - if [ "${accum_hh}" = "24" ]; then - fhr_min="24" - fhr_int="12" - else - fhr_min="${accum_hh}" - fhr_int="${accum_hh}" - fi - ;; - "REFC") - fhr_min="00" - fhr_int="01" - ;; - "RETOP") - fhr_min="00" - fhr_int="01" - ;; - "ADPSFC") - fhr_min="00" - fhr_int="01" - ;; - "ADPUPA") - fhr_min="00" - fhr_int="06" - ;; - *) - print_err_msg_exit "\ -A method for setting verification parameters has not been specified for -this field (field): - field = \"${field}\"" - ;; - esac - fhr_max="${fcst_len_hrs}" - - fhr_array=($( seq ${fhr_min} ${fhr_int} ${fhr_max} )) - print_info_msg "$VERBOSE" "\ -Initial (i.e. before filtering for missing files) set of forecast hours -is: - fhr_array = ( $( printf "\"%s\" " "${fhr_array[@]}" )) -" -# -#----------------------------------------------------------------------- -# -# Loop through all forecast hours. For each one for which a corresponding -# file exists, add the forecast hour to fhr_list. fhr_list will be a -# scalar containing a comma-separated list of forecast hours for which -# corresponding files exist. Also, use the variable num_missing_files -# to keep track of the number of files that are missing. -# -#----------------------------------------------------------------------- -# - fhr_list="" - num_missing_files="0" - num_fcst_hrs=${#fhr_array[@]} - for (( i=0; i<${num_fcst_hrs}; i++ )); do - - fhr_orig="${fhr_array[$i]}" - - if [ "${check_accum_contrib_files}" = "TRUE" ]; then - fhr=$(( ${fhr_orig} - ${accum_hh} + 1 )) - num_back_hrs=${accum_hh} - else - fhr=${fhr_orig} - num_back_hrs=1 - fi - - skip_this_fhr="FALSE" - for (( j=0; j<${num_back_hrs}; j++ )); do -# -# Use the provided template to set the name of/relative path to the file -# Note that the while-loop below is over all METplus time string templates -# of the form {...} in the template fn_template; it continues until all -# such templates have been evaluated to actual time strings. -# - fn="${fn_template}" - regex_search_tmpl="(.*)(\{.*\})(.*)" - crnt_tmpl=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${fn_template}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - while [ ! -z "${crnt_tmpl}" ]; do - - eval_METplus_timestr_tmpl \ - init_time="$cdate" \ - fhr="$fhr" \ - METplus_timestr_tmpl="${crnt_tmpl}" \ - outvarname_formatted_time="actual_value" -# -# Replace METplus time templates in fn with actual times. Note that -# when using sed, we need to escape various characters (question mark, -# closing and opening curly braces, etc) in the METplus template in -# order for the sed command below to work properly. -# - crnt_tmpl_esc=$( echo "${crnt_tmpl}" | \ - $SED -r -e "s/\?/\\\?/g" -e "s/\{/\\\{/g" -e "s/\}/\\\}/g" ) - fn=$( echo "${fn}" | \ - $SED -n -r "s|(.*)(${crnt_tmpl_esc})(.*)|\1${actual_value}\3|p" ) -# -# Set up values for the next iteration of the while-loop. -# - crnt_tmpl=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\2|p" ) - remainder=$( printf "%s" "${remainder}" | \ - $SED -n -r -e "s|${regex_search_tmpl}|\1\3|p" ) - - done -# -# Get the full path to the file and check if it exists. -# - fp="${base_dir}/${fn}" - - if [ -f "${fp}" ]; then - print_info_msg "\ -Found file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate): - fhr = \"$fhr\" - cdate = \"$cdate\" - fp = \"${fp}\" -" - else - skip_this_fhr="TRUE" - num_missing_files=$(( ${num_missing_files} + 1 )) - print_info_msg "\ -The file (fp) for the current forecast hour (fhr; relative to the cycle -date cdate) is missing: - fhr = \"$fhr\" - cdate = \"$cdate\" - fp = \"${fp}\" -Excluding the current forecast hour from the list of hours passed to the -METplus configuration file. -" - break - fi - - fhr=$(( $fhr + 1 )) - - done - - if [ "${skip_this_fhr}" != "TRUE" ]; then - fhr_list="${fhr_list},${fhr_orig}" - fi - - done -# -# Remove leading comma from fhr_list. -# - fhr_list=$( echo "${fhr_list}" | $SED "s/^,//g" ) - print_info_msg "$VERBOSE" "\ -Final (i.e. after filtering for missing files) set of forecast hours is -(written as a single string): - fhr_list = \"${fhr_list}\" -" -# -#----------------------------------------------------------------------- -# -# If the number of missing files is greater than the maximum allowed -# (specified by num_missing_files_max), print out an error message and -# exit. -# -#----------------------------------------------------------------------- -# - if [ "${num_missing_files}" -gt "${num_missing_files_max}" ]; then - print_err_msg_exit "\ -The number of missing files (num_missing_files) is greater than the -maximum allowed number (num_missing_files_max): - num_missing_files = ${num_missing_files} - num_missing_files_max = ${num_missing_files_max}" - fi -# -#----------------------------------------------------------------------- -# -# Set output variables. -# -#----------------------------------------------------------------------- -# - if [ ! -z "${outvarname_fhr_list}" ]; then - printf -v ${outvarname_fhr_list} "%s" "${fhr_list}" - fi -# -#----------------------------------------------------------------------- -# -# Restore the shell options saved at the beginning of this script/function. -# -#----------------------------------------------------------------------- -# - { restore_shell_opts; } > /dev/null 2>&1 - -} diff --git a/ush/set_vx_params.sh b/ush/set_vx_params.sh index 9b67e36d22..993e45ac67 100644 --- a/ush/set_vx_params.sh +++ b/ush/set_vx_params.sh @@ -3,8 +3,10 @@ # # This file defines a function that sets various parameters needed when # performing verification. The way these parameters are set depends on -# the field being verified and, if the field is accumulated precipitation, -# the accumulation period (both of which are inputs to this function). +# the field group being verified and, if the field group consists of a +# set of cumulative fields (e.g. accumulated precipitation or accumulated +# snowfall), the accumulation interval (both of which are inputs to this +# function). # # As of 20220928, the verification tasks in the SRW App workflow use the # MET/METplus software (MET = Model Evaluation Tools) developed at the @@ -53,7 +55,7 @@ function set_vx_params() { # local valid_args=( \ "obtype" \ - "field" \ + "field_group" \ "accum_hh" \ "outvarname_grid_or_point" \ "outvarname_fieldname_in_obs_input" \ @@ -91,10 +93,14 @@ function set_vx_params() { # #----------------------------------------------------------------------- # - if [[ ! "${accum_hh}" =~ ^[0-9]{2}$ ]]; then - print_err_msg_exit "\ -The accumulation (accum_hh) must be a 2-digit integer: + if [ "${obtype}" = "CCPA" ] || [ "${obtype}" = "NOHRSC" ]; then + if [[ ! "${accum_hh}" =~ ^[0-9]{2}$ ]]; then + print_err_msg_exit "\ +For the given observation type (obtype), the accumulation (accum_hh) must +be a 2-digit integer: + obtype = \"${obtype}\" accum_hh = \"${accum_hh}\"" + fi fi # #----------------------------------------------------------------------- @@ -103,15 +109,17 @@ The accumulation (accum_hh) must be a 2-digit integer: # # grid_or_point: # String that is set to either "grid" or "point" depending on whether -# the field in consideration has obs that are gridded or point-based. +# obs type containing the field group is gridded or point-based. # # fieldname_in_obs_input: -# String used to search for the field in the input observation files -# read in by MET. +# If the field group represents a single field, this is the string used +# to search for that field in the input observation files read in by MET. +# If not, this is set to a null string. # # fieldname_in_fcst_input: -# String used to search for the field in the input forecast files read -# in by MET. +# If the field group represents a single field, this is the string used +# to search for that field in the input forecast files read in by MET. +# If not, this is set to a null string. # # fieldname_in_MET_output: # String that will be used in naming arrays defined in MET output files @@ -135,21 +143,21 @@ The accumulation (accum_hh) must be a 2-digit integer: "CCPA") _grid_or_point_="grid" - case "${field}" in + case "${field_group}" in "APCP") - fieldname_in_obs_input="${field}" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}${accum_hh}h" + fieldname_in_obs_input="${field_group}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}${accum_hh}h" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac @@ -158,21 +166,21 @@ this observation type (obtype) and field (field) combination: "NOHRSC") _grid_or_point_="grid" - case "${field}" in + case "${field_group}" in "ASNOW") - fieldname_in_obs_input="${field}" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}${accum_hh}h" + fieldname_in_obs_input="${field_group}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}${accum_hh}h" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac @@ -181,28 +189,28 @@ this observation type (obtype) and field (field) combination: "MRMS") _grid_or_point_="grid" - case "${field}" in + case "${field_group}" in "REFC") fieldname_in_obs_input="MergedReflectivityQCComposite" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; "RETOP") fieldname_in_obs_input="EchoTop18" - fieldname_in_fcst_input="${field}" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_fcst_input="${field_group}" + fieldname_in_MET_output="${field_group}" + fieldname_in_MET_filedir_names="${field_group}" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac @@ -211,28 +219,28 @@ this observation type (obtype) and field (field) combination: "NDAS") _grid_or_point_="point" - case "${field}" in + case "${field_group}" in - "ADPSFC") + "SFC") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_MET_output="ADP${field_group}" + fieldname_in_MET_filedir_names="ADP${field_group}" ;; - "ADPUPA") + "UPA") fieldname_in_obs_input="" fieldname_in_fcst_input="" - fieldname_in_MET_output="${field}" - fieldname_in_MET_filedir_names="${field}" + fieldname_in_MET_output="ADP${field_group}" + fieldname_in_MET_filedir_names="ADP${field_group}" ;; *) print_err_msg_exit "\ A method for setting verification parameters has not been specified for -this observation type (obtype) and field (field) combination: +this observation type (obtype) and field group (field_group) combination: obtype = \"${obtype}\" - field = \"${field}\"" + field_group = \"${field_group}\"" ;; esac diff --git a/ush/setup.py b/ush/setup.py index deaebbf7c8..d5ba107a04 100644 --- a/ush/setup.py +++ b/ush/setup.py @@ -11,6 +11,7 @@ import yaml from uwtools.api.config import get_yaml_config +from pprint import pprint from python_utils import ( log_info, @@ -39,31 +40,37 @@ load_xml_file, ) -from set_cycle_dates import set_cycle_dates +from set_cycle_and_obs_timeinfo import \ + set_cycle_dates, set_fcst_output_times_and_obs_days_all_cycles, \ + set_rocoto_cycledefs_for_obs_days, \ + check_temporal_consistency_cumul_fields, \ + get_obs_retrieve_times_by_day from set_predef_grid_params import set_predef_grid_params from set_gridparams_ESGgrid import set_gridparams_ESGgrid from set_gridparams_GFDLgrid import set_gridparams_GFDLgrid from link_fix import link_fix def load_config_for_setup(ushdir, default_config, user_config): - """Updates a Python dictionary in place with experiment configuration settings from the - default, machine, and user configuration files. + """Updates a Python dictionary in place with experiment configuration settings from the + default, machine, and user configuration files. Args: ushdir (str): Path to the ``ush`` directory for the SRW App default_config (str): Path to ``config_defaults.yaml`` - user_config (str): Path to the user-provided config YAML (usually named + user_config (str): Path to the user-provided config YAML (usually named ``config.yaml``) Returns: - None - + cfg_d (dict): Experiment configuration dictionary based on default, + machine, and user config files + do_vx (bool): Flag specifying whether workflow will run vx tasks + Raises: - FileNotFoundError: If the user-provided configuration file or the machine file does not + FileNotFoundError: If the user-provided configuration file or the machine file does not exist. - Exception: If (1) the user-provided configuration file cannot be loaded or (2) it contains - invalid sections/keys or (3) it does not contain mandatory information or (4) - an invalid datetime format is used. + Exception: If (1) the user-provided configuration file cannot be loaded or (2) it contains + invalid sections/keys or (3) it does not contain mandatory information or (4) + an invalid datetime format is used. """ # Load the default config. @@ -165,11 +172,13 @@ def load_config_for_setup(ushdir, default_config, user_config): if taskgroups: cfg_wflow['rocoto']['tasks']['taskgroups'] = taskgroups + # Save string specifying final workflow taskgroups for use later on. + taskgroups = cfg_wflow['rocoto']['tasks']['taskgroups'] + # Extend yaml here on just the rocoto section to include the # appropriate groups of tasks extend_yaml(cfg_wflow) - # Put the entries expanded under taskgroups in tasks rocoto_tasks = cfg_wflow["rocoto"]["tasks"] cfg_wflow["rocoto"]["tasks"] = yaml.load(rocoto_tasks.pop("taskgroups"),Loader=yaml.SafeLoader) @@ -236,7 +245,57 @@ def _add_jobname(tasks): except: pass cfg_d["workflow"]["EXPT_BASEDIR"] = os.path.abspath(expt_basedir) - + # + # ----------------------------------------------------------------------- + # + # If the workflow includes at least one verification task, ensure that + # the configuration parameters associated with cumulative fields (e.g. + # APCP) in the verification section of the experiment dicitonary are + # temporally consistent, e.g. that accumulation intervals are less than + # or equal to the forecast length. Update the verification section of + # the dictionary to remove inconsistencies. + # + # ----------------------------------------------------------------------- + # + # List containing the names of all workflow config files for vx (i.e. + # whether or not they're included in the workflow). + vx_taskgroup_fns = ['verify_pre.yaml', 'verify_det.yaml', 'verify_ens.yaml'] + # Flag that specifies whether the workflow will be running any vx tasks. + do_vx = any([fn for fn in vx_taskgroup_fns if fn in taskgroups]) + + # Initialize variable containing the vx configuration. This may be + # modified within the if-statement below. + vx_config = cfg_d["verification"] + + if do_vx: + workflow_config = cfg_d["workflow"] + + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") + + # Convert various times and time intervals from integers or strings to + # datetime or timedelta objects. + date_first_cycl_dt = datetime.datetime.strptime(date_first_cycl, "%Y%m%d%H") + date_last_cycl_dt = datetime.datetime.strptime(date_last_cycl, "%Y%m%d%H") + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) + + # Generate a list containing the starting times of the cycles. + cycle_start_times \ + = set_cycle_dates(date_first_cycl_dt, date_last_cycl_dt, cycl_intvl_dt, + return_type='datetime') + + # Call function that runs the consistency checks on the vx parameters. + vx_config, fcst_obs_matched_times_all_cycles_cumul \ + = check_temporal_consistency_cumul_fields( + vx_config, cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) + + + cfg_d['verification'] = vx_config extend_yaml(cfg_d) # Do any conversions of data types @@ -263,7 +322,7 @@ def _add_jobname(tasks): Mandatory variable "{val}" not found in: user config file {user_config} OR - machine file {machine_file} + machine file {machine_file} """ ) ) @@ -282,7 +341,7 @@ def _add_jobname(tasks): ) ) - return cfg_d + return cfg_d, do_vx def set_srw_paths(ushdir, expt_config): @@ -295,17 +354,17 @@ def set_srw_paths(ushdir, expt_config): Other paths for the SRW App are set as defaults in ``config_defaults.yaml``. Args: - ushdir (str) : Path to the system location of the ``ush`` directory under the + ushdir (str) : Path to the system location of the ``ush`` directory under the SRW App clone expt_config (dict): Contains the configuration settings for the user-defined experiment Returns: Dictionary of configuration settings and system paths as keys/values - + Raises: - KeyError: If the external repository required is not listed in the externals + KeyError: If the external repository required is not listed in the externals configuration file (e.g., ``Externals.cfg``) - FileNotFoundError: If the ``ufs-weather-model`` code containing the FV3 source code has + FileNotFoundError: If the ``ufs-weather-model`` code containing the FV3 source code has not been cloned properly """ @@ -366,23 +425,23 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): time. Args: - USHdir (str): The full path of the ``ush/`` directory where this script + USHdir (str): The full path of the ``ush/`` directory where this script (``setup.py``) is located - user_config_fn (str): The name of a user-provided configuration YAML (usually + user_config_fn (str): The name of a user-provided configuration YAML (usually ``config.yaml``) debug (bool): Enable extra output for debugging Returns: None - - Raises: - ValueError: If checked configuration values are invalid (e.g., forecast length, + + Raises: + ValueError: If checked configuration values are invalid (e.g., forecast length, ``EXPTDIR`` path) - FileExistsError: If ``EXPTDIR`` already exists, and ``PREEXISTING_DIR_METHOD`` is not + FileExistsError: If ``EXPTDIR`` already exists, and ``PREEXISTING_DIR_METHOD`` is not set to a compatible handling method - FileNotFoundError: If the path to a particular file does not exist or if the file itself + FileNotFoundError: If the path to a particular file does not exist or if the file itself does not exist at the expected path - TypeError: If ``USE_CUSTOM_POST_CONFIG_FILE`` or ``USE_CRTM`` are set to true but no + TypeError: If ``USE_CUSTOM_POST_CONFIG_FILE`` or ``USE_CRTM`` are set to true but no corresponding custom configuration file or CRTM fix file directory is set KeyError: If an invalid value is provided (i.e., for ``GRID_GEN_METHOD``) """ @@ -401,7 +460,7 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): # user config files. default_config_fp = os.path.join(USHdir, "config_defaults.yaml") user_config_fp = os.path.join(USHdir, user_config_fn) - expt_config = load_config_for_setup(USHdir, default_config_fp, user_config_fp) + expt_config, do_vx = load_config_for_setup(USHdir, default_config_fp, user_config_fp) # Load build settings as a dictionary; will be used later to make sure the build is consistent with the user settings build_config_fp = os.path.join(expt_config["user"].get("EXECdir"), "build_settings.yaml") @@ -487,7 +546,7 @@ def setup(USHdir, user_config_fn="config.yaml", debug: bool = False): f""" EXPTDIR ({exptdir}) already exists, and PREEXISTING_DIR_METHOD = {preexisting_dir_method} - To ignore this error, delete the directory, or set + To ignore this error, delete the directory, or set PREEXISTING_DIR_METHOD = delete, or PREEXISTING_DIR_METHOD = rename in your config file. @@ -577,66 +636,219 @@ def _remove_tag(tasks, tag): post_meta = rocoto_tasks.get("metatask_run_ens_post", {}) post_meta.pop("metatask_run_sub_hourly_post", None) post_meta.pop("metatask_sub_hourly_last_hour_post", None) + + + date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") + date_last_cycl = workflow_config.get("DATE_LAST_CYCL") + incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) + cycl_intvl_dt = datetime.timedelta(hours=incr_cycl_freq) # # ----------------------------------------------------------------------- # - # Remove all verification [meta]tasks for which no fields are specified. + # If running vx tasks, check and possibly reset values in expt_config + # and rocoto_config. # # ----------------------------------------------------------------------- # - vx_fields_all = {} - vx_metatasks_all = {} - - vx_fields_all["CCPA"] = ["APCP"] - vx_metatasks_all["CCPA"] = ["metatask_PcpCombine_obs", - "metatask_PcpCombine_fcst_APCP_all_accums_all_mems", - "metatask_GridStat_CCPA_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_CCPA", - "metatask_GridStat_CCPA_ensmeanprob_all_accums"] - - vx_fields_all["NOHRSC"] = ["ASNOW"] - vx_metatasks_all["NOHRSC"] = ["task_get_obs_nohrsc", - "metatask_PcpCombine_fcst_ASNOW_all_accums_all_mems", - "metatask_GridStat_NOHRSC_all_accums_all_mems", - "metatask_GenEnsProd_EnsembleStat_NOHRSC", - "metatask_GridStat_NOHRSC_ensmeanprob_all_accums"] - - vx_fields_all["MRMS"] = ["REFC", "RETOP"] - vx_metatasks_all["MRMS"] = ["metatask_GridStat_MRMS_all_mems", - "metatask_GenEnsProd_EnsembleStat_MRMS", - "metatask_GridStat_MRMS_ensprob"] - - vx_fields_all["NDAS"] = ["ADPSFC", "ADPUPA"] - vx_metatasks_all["NDAS"] = ["task_run_MET_Pb2nc_obs", - "metatask_PointStat_NDAS_all_mems", - "metatask_GenEnsProd_EnsembleStat_NDAS", - "metatask_PointStat_NDAS_ensmeanprob"] - - # Get the vx fields specified in the experiment configuration. - vx_fields_config = expt_config["verification"]["VX_FIELDS"] - - # If there are no vx fields specified, remove those tasks that are necessary - # for all observation types. - if not vx_fields_config: - metatask = "metatask_check_post_output_all_mems" - rocoto_config['tasks'].pop(metatask) - - # If for a given obstype no fields are specified, remove all vx metatasks - # for that obstype. - for obstype in vx_fields_all: - vx_fields_obstype = [field for field in vx_fields_config if field in vx_fields_all[obstype]] - if not vx_fields_obstype: - for metatask in vx_metatasks_all[obstype]: - if metatask in rocoto_config['tasks']: - logging.info(dedent( - f""" - Removing verification [meta]task - "{metatask}" - from workflow since no fields belonging to observation type "{obstype}" - are specified for verification.""" - )) - rocoto_config['tasks'].pop(metatask) - + if do_vx: + # + # ----------------------------------------------------------------------- + # + # Set some variables needed for running checks on and creating new + # (derived) configuration variables for the verification. + # + # ----------------------------------------------------------------------- + # + vx_config = expt_config["verification"] + + fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") + vx_fcst_output_intvl_hrs = vx_config.get("VX_FCST_OUTPUT_INTVL_HRS") + + # To enable arithmetic with dates and times, convert various time + # intervals from integer to datetime.timedelta objects. + fcst_len_dt = datetime.timedelta(hours=fcst_len_hrs) + vx_fcst_output_intvl_dt = datetime.timedelta(hours=vx_fcst_output_intvl_hrs) + # + # ----------------------------------------------------------------------- + # + # Generate a list containing the starting times of the cycles. This will + # be needed in checking that the hours-of-day of the forecast output match + # those of the observations. + # + # ----------------------------------------------------------------------- + # + cycle_start_times \ + = set_cycle_dates(date_first_cycl, date_last_cycl, cycl_intvl_dt, + return_type='datetime') + # + # ----------------------------------------------------------------------- + # + # Generate a list of forecast output times and a list of obs days (i.e. + # days on which observations are needed to perform verification because + # there is forecast output on those days) over all cycles, both for + # instantaneous fields (e.g. T2m, REFC, RETOP) and for cumulative ones + # (e.g. APCP). Then add these lists to the dictionary containing workflow + # configuration variables. These will be needed in generating the ROCOTO + # XML. + # + # ----------------------------------------------------------------------- + # + fcst_output_times_all_cycles, obs_days_all_cycles, \ + = set_fcst_output_times_and_obs_days_all_cycles( + cycle_start_times, fcst_len_dt, vx_fcst_output_intvl_dt) + + workflow_config['OBS_DAYS_ALL_CYCLES_INST'] = obs_days_all_cycles['inst'] + workflow_config['OBS_DAYS_ALL_CYCLES_CUMUL'] = obs_days_all_cycles['cumul'] + # + # ----------------------------------------------------------------------- + # + # Generate lists of ROCOTO cycledef strings corresonding to the obs days + # for instantaneous fields and those for cumulative ones. Then save the + # lists of cycledefs in the dictionary containing values needed to + # construct the ROCOTO XML. + # + # ----------------------------------------------------------------------- + # + cycledefs_obs_days_inst = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['inst']) + cycledefs_obs_days_cumul = set_rocoto_cycledefs_for_obs_days(obs_days_all_cycles['cumul']) + + rocoto_config['cycledefs']['cycledefs_obs_days_inst'] = cycledefs_obs_days_inst + rocoto_config['cycledefs']['cycledefs_obs_days_cumul'] = cycledefs_obs_days_cumul + # + # ----------------------------------------------------------------------- + # + # Generate dictionary of dictionaries that, for each combination of obs + # type needed and obs day, contains a string list of the times at which + # that type of observation is needed on that day. The elements of each + # list are formatted as 'YYYYMMDDHH'. This information is used by the + # day-based get_obs tasks in the workflow to get obs only at those times + # at which they are needed (as opposed to for the whole day). + # + # ----------------------------------------------------------------------- + # + obs_retrieve_times_by_day \ + = get_obs_retrieve_times_by_day( + vx_config, cycle_start_times, fcst_len_dt, + fcst_output_times_all_cycles, obs_days_all_cycles) + + for obtype, obs_days_dict in obs_retrieve_times_by_day.items(): + for obs_day, obs_retrieve_times in obs_days_dict.items(): + array_name = '_'.join(["OBS_RETRIEVE_TIMES", obtype, obs_day]) + vx_config[array_name] = obs_retrieve_times + expt_config["verification"] = vx_config + # + # ----------------------------------------------------------------------- + # + # Remove all verification (meta)tasks for which no fields are specified. + # + # ----------------------------------------------------------------------- + # + vx_field_groups_all_by_obtype = {} + vx_metatasks_all_by_obtype = {} + + vx_field_groups_all_by_obtype["CCPA"] = ["APCP"] + vx_metatasks_all_by_obtype["CCPA"] \ + = ["task_get_obs_ccpa", + "metatask_PcpCombine_APCP_all_accums_obs_CCPA", + "metatask_PcpCombine_APCP_all_accums_all_mems", + "metatask_GridStat_APCP_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_APCP_all_accums", + "metatask_GridStat_APCP_all_accums_ensmeanprob"] + + vx_field_groups_all_by_obtype["NOHRSC"] = ["ASNOW"] + vx_metatasks_all_by_obtype["NOHRSC"] \ + = ["task_get_obs_nohrsc", + "metatask_PcpCombine_ASNOW_all_accums_obs_NOHRSC", + "metatask_PcpCombine_ASNOW_all_accums_all_mems", + "metatask_GridStat_ASNOW_all_accums_all_mems", + "metatask_GenEnsProd_EnsembleStat_ASNOW_all_accums", + "metatask_GridStat_ASNOW_all_accums_ensmeanprob"] + + vx_field_groups_all_by_obtype["MRMS"] = ["REFC", "RETOP"] + vx_metatasks_all_by_obtype["MRMS"] \ + = ["task_get_obs_mrms", + "metatask_GridStat_REFC_RETOP_all_mems", + "metatask_GenEnsProd_EnsembleStat_REFC_RETOP", + "metatask_GridStat_REFC_RETOP_ensprob"] + + vx_field_groups_all_by_obtype["NDAS"] = ["SFC", "UPA"] + vx_metatasks_all_by_obtype["NDAS"] \ + = ["task_get_obs_ndas", + "task_run_MET_Pb2nc_obs_NDAS", + "metatask_PointStat_SFC_UPA_all_mems", + "metatask_GenEnsProd_EnsembleStat_SFC_UPA", + "metatask_PointStat_SFC_UPA_ensmeanprob"] + + # If there are no field groups specified for verification, remove those + # tasks that are common to all observation types. + vx_field_groups = vx_config["VX_FIELD_GROUPS"] + if not vx_field_groups: + metatask = "metatask_check_post_output_all_mems" + rocoto_config['tasks'].pop(metatask) + + # If for a given obs type none of its field groups are specified for + # verification, remove all vx metatasks for that obs type. + for obtype in vx_field_groups_all_by_obtype: + vx_field_groups_crnt_obtype = list(set(vx_field_groups) & set(vx_field_groups_all_by_obtype[obtype])) + if not vx_field_groups_crnt_obtype: + for metatask in vx_metatasks_all_by_obtype[obtype]: + if metatask in rocoto_config['tasks']: + logging.info(dedent( + f""" + Removing verification (meta)task + "{metatask}" + from workflow since no field groups from observation type "{obtype}" are + specified for verification.""" + )) + rocoto_config['tasks'].pop(metatask) + # + # ----------------------------------------------------------------------- + # + # If there are at least some field groups to verify, then make sure that + # the base directories in which retrieved obs files will be placed are + # distinct for the different obs types. + # + # ----------------------------------------------------------------------- + # + if vx_field_groups: + obtypes_all = ['CCPA', 'NOHRSC', 'MRMS', 'NDAS'] + obs_basedir_var_names = [f'{obtype}_OBS_DIR' for obtype in obtypes_all] + obs_basedirs_dict = {key: vx_config[key] for key in obs_basedir_var_names} + obs_basedirs_orig = list(obs_basedirs_dict.values()) + obs_basedirs_uniq = list(set(obs_basedirs_orig)) + if len(obs_basedirs_orig) != len(obs_basedirs_uniq): + msg1 = dedent(f""" + The base directories for the obs files must be distinct, but at least two + are identical:""") + msg2 = '' + for obs_basedir_var_name, obs_dir in obs_basedirs_dict.items(): + msg2 = msg2 + dedent(f""" + {obs_basedir_var_name} = {obs_dir}""") + msg3 = dedent(f""" + Modify these in the SRW App's user configuration file to make them distinct + and rerun. + """) + msg = msg1 + ' '.join(msg2.splitlines(True)) + msg3 + logging.error(msg) + raise ValueError(msg) + # + # ----------------------------------------------------------------------- + # + # The "cycled_from_second" cycledef in the default workflow configuration + # file (default_workflow.yaml) requires the starting date of the second + # cycle. That is difficult to calculate in the yaml file itself because + # currently, there are no utilities to perform arithmetic with dates. + # Thus, we calculate it here and save it as a variable in the workflow + # configuration dictionary. Note that correct functioning of the default + # workflow yaml file also requires that DATE_[FIRST|SECOND|LAST]_CYCL all + # be strings, not datetime objects. We perform those conversions here. + # + # ----------------------------------------------------------------------- + # + date_second_cycl = date_first_cycl + cycl_intvl_dt + workflow_config['DATE_FIRST_CYCL'] = datetime.datetime.strftime(date_first_cycl, "%Y%m%d%H") + workflow_config['DATE_SECOND_CYCL'] = datetime.datetime.strftime(date_second_cycl, "%Y%m%d%H") + workflow_config['DATE_LAST_CYCL'] = datetime.datetime.strftime(date_last_cycl, "%Y%m%d%H") # # ----------------------------------------------------------------------- # @@ -801,11 +1013,6 @@ def _get_location(xcs, fmt, expt_cfg): run_envir = expt_config["user"].get("RUN_ENVIR", "") - fcst_len_hrs = workflow_config.get("FCST_LEN_HRS") - date_first_cycl = workflow_config.get("DATE_FIRST_CYCL") - date_last_cycl = workflow_config.get("DATE_LAST_CYCL") - incr_cycl_freq = int(workflow_config.get("INCR_CYCL_FREQ")) - # set varying forecast lengths only when fcst_len_hrs=-1 if fcst_len_hrs == -1: fcst_len_cycl = workflow_config.get("FCST_LEN_CYCL") @@ -817,12 +1024,12 @@ def _get_location(xcs, fmt, expt_cfg): num_cycles = len(set_cycle_dates( date_first_cycl, date_last_cycl, - incr_cycl_freq)) + cycl_intvl_dt)) if num_cycles != len(fcst_len_cycl): logger.error(f""" The number of entries in FCST_LEN_CYCL does not divide evenly into a 24 hour day or the number of cycles - in your experiment! + in your experiment! FCST_LEN_CYCL = {fcst_len_cycl} """ ) @@ -1203,7 +1410,7 @@ def _get_location(xcs, fmt, expt_cfg): post_output_domain_name = lowercase(post_output_domain_name) # Write updated value of POST_OUTPUT_DOMAIN_NAME back to dictionary - post_config["POST_OUTPUT_DOMAIN_NAME"] = post_output_domain_name + post_config["POST_OUTPUT_DOMAIN_NAME"] = post_output_domain_name # # ----------------------------------------------------------------------- @@ -1653,8 +1860,8 @@ def clean_rocoto_dict(rocotodict): 1. A task dictionary containing no "command" key 2. A metatask dictionary containing no task dictionaries - - Args: + + Args: rocotodict (dict): A dictionary containing Rocoto workflow settings """ diff --git a/ush/source_util_funcs.sh b/ush/source_util_funcs.sh index 9feceaf68e..266975e97d 100644 --- a/ush/source_util_funcs.sh +++ b/ush/source_util_funcs.sh @@ -214,16 +214,6 @@ function source_util_funcs() { # #----------------------------------------------------------------------- # -# Source the file containing the function that evaluates a METplus time -# string template. -# -#----------------------------------------------------------------------- -# - . ${bashutils_dir}/eval_METplus_timestr_tmpl.sh - -# -#----------------------------------------------------------------------- -# # Source the file that sources YAML files as if they were bash # #----------------------------------------------------------------------- diff --git a/ush/valid_param_vals.yaml b/ush/valid_param_vals.yaml index dc3c3b170c..a8a568605e 100644 --- a/ush/valid_param_vals.yaml +++ b/ush/valid_param_vals.yaml @@ -78,6 +78,6 @@ valid_vals_DO_AQM_CHEM_LBCS: [True, False] valid_vals_DO_AQM_GEFS_LBCS: [True, False] valid_vals_DO_AQM_SAVE_AIRNOW_HIST: [True, False] valid_vals_COLDSTART: [True, False] -valid_vals_VX_FIELDS: [ "APCP", "ASNOW", "REFC", "RETOP", "ADPSFC", "ADPUPA" ] +valid_vals_VX_FIELD_GROUPS: [ "APCP", "ASNOW", "REFC", "RETOP", "SFC", "UPA" ] valid_vals_VX_APCP_ACCUMS_HRS: [ 1, 3, 6, 24 ] -valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 24 ] +valid_vals_VX_ASNOW_ACCUMS_HRS: [ 6, 12, 18, 24 ]