diff --git a/nomenclature/processor/data_validator.py b/nomenclature/processor/data_validator.py index b1a2bc82..42c58749 100644 --- a/nomenclature/processor/data_validator.py +++ b/nomenclature/processor/data_validator.py @@ -4,6 +4,7 @@ from pathlib import Path import yaml +from pandas import concat from pyam import IamDataFrame from pyam.logging import adjust_log_level from pydantic import computed_field, field_validator, model_validator @@ -158,9 +159,17 @@ def apply(self, df: IamDataFrame) -> IamDataFrame: with adjust_log_level(): for item in self.criteria_items: + per_item_df = df for criterion in item.validation: - failed_validation = df.validate(**criterion.validation_args) + failed_validation = per_item_df.validate( + **criterion.validation_args + ) if failed_validation is not None: + per_item_df = IamDataFrame( + concat([df.data, failed_validation]).drop_duplicates( + keep=False + ) + ) criteria_msg = " Criteria: " + ", ".join( [ f"{key}: {value}" @@ -177,7 +186,6 @@ def apply(self, df: IamDataFrame) -> IamDataFrame: textwrap.indent(str(failed_validation), prefix=" ") + "\n" ) - break fail_msg = "(file %s):\n" % get_relative_path(self.file) if error: fail_msg = ( diff --git a/tests/test_config.py b/tests/test_config.py index 73f9efa8..a5596501 100644 --- a/tests/test_config.py +++ b/tests/test_config.py @@ -93,7 +93,6 @@ def test_config_with_filter(config_file): def test_config_external_repo_mapping_filter(): - config = NomenclatureConfig.from_file( TEST_DATA_DIR / "config" / "filter_mappings.yaml" ) diff --git a/tests/test_validate_data.py b/tests/test_validate_data.py index 89e643e8..07b1e26c 100644 --- a/tests/test_validate_data.py +++ b/tests/test_validate_data.py @@ -5,6 +5,7 @@ from nomenclature import DataStructureDefinition from nomenclature.processor.data_validator import DataValidator +from pyam import IamDataFrame DATA_VALIDATION_TEST_DIR = TEST_DATA_DIR / "validation" / "validate_data" @@ -136,11 +137,12 @@ def test_DataValidator_apply_fails(simple_df, file, item_1, item_2, item_3, capl @pytest.mark.parametrize( - "file", - ["joined", "legacy"], + "file, value", + [("joined", 6.0), ("joined", 3.0), ("legacy", 6.0)], ) -def test_DataValidator_validate_with_warning(file, simple_df, caplog): +def test_DataValidator_validate_with_warning(file, value, simple_df, caplog): """Checks that failed validation rows are printed in log.""" + simple_df = IamDataFrame(simple_df._data.replace(6.0, value)) data_validator = DataValidator.from_file( DATA_VALIDATION_TEST_DIR / f"validate_warning_{file}.yaml" ) @@ -153,7 +155,6 @@ def test_DataValidator_validate_with_warning(file, simple_df, caplog): 0 model_a scen_a World Primary Energy EJ/yr 2010 6.0 error 1 model_a scen_b World Primary Energy EJ/yr 2010 7.0 error""" ) - if file == "legacy": # prints all failed warning levels for legacy format failed_validation_message += """ @@ -163,6 +164,17 @@ def test_DataValidator_validate_with_warning(file, simple_df, caplog): 0 model_a scen_a World Primary Energy EJ/yr 2010 6.0 low 1 model_a scen_b World Primary Energy EJ/yr 2010 7.0 low""" + if value == 3.0: + # prints each warning level when each is triggered by different rows + failed_validation_message = """ + Criteria: variable: ['Primary Energy'], year: [2010], upper_bound: 5.0, lower_bound: 1.0 + model scenario region variable unit year value warning_level + 0 model_a scen_b World Primary Energy EJ/yr 2010 7.0 error + + Criteria: variable: ['Primary Energy'], year: [2010], upper_bound: 2.5, lower_bound: 1.0 + model scenario region variable unit year value warning_level + 0 model_a scen_a World Primary Energy EJ/yr 2010 3.0 low""" + with pytest.raises(ValueError, match="Data validation failed"): data_validator.apply(simple_df) assert failed_validation_message in caplog.text