From 47cb3cb6b439c34458422bc302486663df6eac74 Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Mon, 8 Jul 2024 16:33:09 +0200 Subject: [PATCH] Extend Nomenclature config to specify dimensions (#353) * Add dimensions attribute to config * Use config.dimensions in DataStructureDefinition * Add tests for config * Add DataStructureDefinition config integration test * Update docs * Add subannual to allowed dimension values * Adjust test with 'subannual' --- docs/user_guide/config.rst | 16 ++++++++++++++ nomenclature/config.py | 10 +++++++++ nomenclature/definition.py | 9 +++++++- .../nomenclature.yaml | 2 ++ .../data/nomenclature_configs/dimensions.yaml | 4 ++++ tests/test_config.py | 22 +++++++++++++++++++ tests/test_definition.py | 3 +-- 7 files changed, 63 insertions(+), 3 deletions(-) create mode 100644 tests/data/nomenclature_configs/dimensions.yaml diff --git a/docs/user_guide/config.rst b/docs/user_guide/config.rst index 9da5cd70..f9fd887b 100644 --- a/docs/user_guide/config.rst +++ b/docs/user_guide/config.rst @@ -77,3 +77,19 @@ By setting *definitions.region.country* as *true* in the configuration file: the nomenclature package will add all countries to the *region* codelist. More details on the list of countries can be found here: :ref:`countries`. + +Specify dimensions to be read +----------------------------- + +The configuration file offers the possibility to set the dimensions which will be read +by *DataStructureDefinition*. + +In the below case we specify *region*, *variable* and *scenario* to be read and used for +validation: + +.. code:: yaml + + dimensions: + - region + - variable + - scenario diff --git a/nomenclature/config.py b/nomenclature/config.py index c0372749..dabb080e 100644 --- a/nomenclature/config.py +++ b/nomenclature/config.py @@ -1,3 +1,4 @@ +from enum import Enum from pathlib import Path from typing import Annotated, Optional @@ -132,7 +133,16 @@ class RegionMappingConfig(BaseModel): model_config = ConfigDict(populate_by_name=True) +class DimensionEnum(str, Enum): + model = "model" + scenario = "scenario" + variable = "variable" + region = "region" + subannual = "subannual" + + class NomenclatureConfig(BaseModel): + dimensions: None | list[DimensionEnum] = None repositories: dict[str, Repository] = Field(default_factory=dict) definitions: DataStructureConfig = Field(default_factory=DataStructureConfig) mappings: RegionMappingConfig = Field(default_factory=RegionMappingConfig) diff --git a/nomenclature/definition.py b/nomenclature/definition.py index 2c2cfff9..d66330fb 100644 --- a/nomenclature/definition.py +++ b/nomenclature/definition.py @@ -61,7 +61,14 @@ def __init__(self, path, dimensions=None): ): raise NotADirectoryError(f"Definitions directory not found: {path}") - self.dimensions = dimensions or ["region", "variable"] + self.dimensions = ( + dimensions + or self.config.dimensions + or [ + "region", + "variable", + ] + ) for dim in self.dimensions: codelist_cls = SPECIAL_CODELIST.get(dim, CodeList) self.__setattr__( diff --git a/tests/data/general-config-only-country/nomenclature.yaml b/tests/data/general-config-only-country/nomenclature.yaml index 5217775c..7616a4d1 100644 --- a/tests/data/general-config-only-country/nomenclature.yaml +++ b/tests/data/general-config-only-country/nomenclature.yaml @@ -1,3 +1,5 @@ +dimensions: + - region definitions: region: country: true diff --git a/tests/data/nomenclature_configs/dimensions.yaml b/tests/data/nomenclature_configs/dimensions.yaml new file mode 100644 index 00000000..a7f63f98 --- /dev/null +++ b/tests/data/nomenclature_configs/dimensions.yaml @@ -0,0 +1,4 @@ +dimensions: + - region + - variable + - scenario diff --git a/tests/test_config.py b/tests/test_config.py index 9674f08d..7670424e 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -67,3 +67,25 @@ def test_double_stacked_external_repo_raises(monkeypatch): match = "External repos cannot again refer to external repos" with raises(ValueError, match=match): repo.check_external_repo_double_stacking() + + +def test_config_dimensions(): + config = NomenclatureConfig.from_file( + TEST_DATA_DIR / "nomenclature_configs" / "dimensions.yaml" + ) + assert set(config.dimensions) == { + "scenario", + "region", + "variable", + } + + +def test_invalid_config_dimensions_raises(): + with raises( + ValueError, + match=( + "Input should be 'model', 'scenario', 'variable'," + " 'region' or 'subannual'" + ), + ): + NomenclatureConfig(dimensions=["year"]) diff --git a/tests/test_definition.py b/tests/test_definition.py index b6e1c973..5db19860 100644 --- a/tests/test_definition.py +++ b/tests/test_definition.py @@ -66,8 +66,7 @@ def test_definition_from_general_config(workflow_folder): def test_definition_general_config_country_only(): obs = DataStructureDefinition( - TEST_DATA_DIR / "general-config-only-country" / "definitions", - dimensions=["region"], + TEST_DATA_DIR / "general-config-only-country" / "definitions" ) assert all(region in obs.region for region in ("Austria", "Bolivia", "Kosovo"))