From 30fb5b14d34da4c8905f54df4d16f2a38a24abc3 Mon Sep 17 00:00:00 2001 From: Philip Hackstock <20710924+phackstock@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:57:32 +0200 Subject: [PATCH] Wildcard matching (#397) * Add wildcard matching for CodeLists * Add test for wildcard matching * Remove nonsensical test * Apply suggestions from code review Co-authored-by: Daniel Huppmann * Use pyam.utils.pattern_match for wildcard validation * Update test file path * Pin common-definitions to make tests pass --------- Co-authored-by: Daniel Huppmann --- nomenclature/codelist.py | 5 +++-- .../codelist/wildcard/scenario/scenario.yaml | 1 + .../general-config-only/nomenclature.yaml | 1 + .../config/general-config/nomenclature.yaml | 1 + .../external_repo_test/nomenclature.yaml | 1 + tests/test_validation.py | 20 +++++++++++-------- 6 files changed, 19 insertions(+), 10 deletions(-) create mode 100644 tests/data/codelist/wildcard/scenario/scenario.yaml diff --git a/nomenclature/codelist.py b/nomenclature/codelist.py index 7b1bd4f3..c488df6f 100644 --- a/nomenclature/codelist.py +++ b/nomenclature/codelist.py @@ -7,7 +7,7 @@ import pandas as pd import yaml from pyam import IamDataFrame -from pyam.utils import is_list_like, write_sheet +from pyam.utils import is_list_like, write_sheet, pattern_match from pydantic import BaseModel, ValidationInfo, field_validator from pydantic_core import PydanticCustomError @@ -118,7 +118,8 @@ def validate_items(self, items: List[str]) -> List[str]: list Returns the list of items that are **not** defined in the codelist """ - return [c for c in items if c not in self.keys()] + matches = pattern_match(pd.Series(items), self.keys()) + return [item for item, match in zip(items, matches) if not match] @classmethod def replace_tags( diff --git a/tests/data/codelist/wildcard/scenario/scenario.yaml b/tests/data/codelist/wildcard/scenario/scenario.yaml new file mode 100644 index 00000000..2751c535 --- /dev/null +++ b/tests/data/codelist/wildcard/scenario/scenario.yaml @@ -0,0 +1 @@ +- scen_* diff --git a/tests/data/config/general-config-only/nomenclature.yaml b/tests/data/config/general-config-only/nomenclature.yaml index eb390c76..e9f09c33 100644 --- a/tests/data/config/general-config-only/nomenclature.yaml +++ b/tests/data/config/general-config-only/nomenclature.yaml @@ -1,6 +1,7 @@ repositories: common-definitions: url: https://github.com/IAMconsortium/common-definitions.git/ + hash: cb85704 definitions: region: repository: common-definitions diff --git a/tests/data/config/general-config/nomenclature.yaml b/tests/data/config/general-config/nomenclature.yaml index fe35a156..b6b95718 100644 --- a/tests/data/config/general-config/nomenclature.yaml +++ b/tests/data/config/general-config/nomenclature.yaml @@ -2,6 +2,7 @@ dimensions: [region, variable] repositories: common-definitions: url: https://github.com/IAMconsortium/common-definitions.git/ + hash: cb85704 definitions: region: repository: common-definitions diff --git a/tests/data/region_processing/external_repo_test/nomenclature.yaml b/tests/data/region_processing/external_repo_test/nomenclature.yaml index 6c6b2c53..5cf53bf9 100644 --- a/tests/data/region_processing/external_repo_test/nomenclature.yaml +++ b/tests/data/region_processing/external_repo_test/nomenclature.yaml @@ -1,6 +1,7 @@ repositories: common-definitions: url: https://github.com/IAMconsortium/common-definitions.git/ + hash: cb85704 definitions: region: repository: common-definitions diff --git a/tests/test_validation.py b/tests/test_validation.py index 9395ef46..a4f95ea3 100644 --- a/tests/test_validation.py +++ b/tests/test_validation.py @@ -67,14 +67,6 @@ def test_validation_fails_region(simple_definition, simple_df, caplog): ) -def test_validation_fails_region_as_int(simple_definition, simple_df): - """Using a region name as integer raises the expected error""" - simple_df.rename(region={"World": 1}, inplace=True) - - with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION): - simple_definition.validate(simple_df) - - def test_validation_with_custom_dimension(simple_df): """Check validation with a custom DataStructureDefinition dimension""" @@ -95,3 +87,15 @@ def test_validation_with_custom_dimension(simple_df): # validating against all dimensions works definition.validate(simple_df) + + +def test_wildcard_match(simple_df): + definition = DataStructureDefinition( + TEST_DATA_DIR / "codelist" / "wildcard", + dimensions=["scenario"], + ) + + assert definition.validate(simple_df) is None + + with pytest.raises(ValueError, match=MATCH_FAIL_VALIDATION): + definition.validate(simple_df.rename(scenario={"scen_a": "foo"}))