Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add dimensions option to CLI validate-scenarios #421

Merged
merged 5 commits into from
Oct 31, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 30 additions & 2 deletions nomenclature/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from nomenclature.definition import DataStructureDefinition
from nomenclature.codelist import VariableCodeList
from nomenclature.processor import RegionProcessor
from nomenclature.config import NomenclatureConfig
from nomenclature.testing import assert_valid_structure, assert_valid_yaml

cli = click.Group()
Expand Down Expand Up @@ -287,19 +288,46 @@ def cli_run_workflow(
type=click.Path(exists=True, path_type=Path),
default="definitions",
)
def cli_validate_scenarios(input_file: Path, definitions: Path):
@click.option(
"--dimension",
"dimensions",
help="Optional list of dimensions",
type=str,
multiple=True,
default=None,
)
def cli_validate_scenarios(input_file: Path, definitions: Path, dimensions: List[str]):
"""Validate a scenario file against the codelists of a project

Example
-------
$ nomenclature validate-scenarios <input-file>
--definitions <def-folder>
--dimension <folder1>
--dimension <folder2>
--dimension <folder3>

Parameters
----------
input_file : Path
Input data file, must be IAMC format, .xlsx or .csv
definitions : Path
Definitions folder with codelists, by default "definitions"
dimensions : List[str], optional
Dimensions to be checked, defaults to all sub-folders of `definitions`

Raises
------
ValueError
If input_file validation fails against specified codelist(s).
"""
DataStructureDefinition(definitions).validate(IamDataFrame(input_file))
if not dimensions: # if "dimensions" were not specified
if definitions.parent / "nomenclature.yaml" in definitions.parent.iterdir():
dimensions = NomenclatureConfig.from_file(
definitions.parent / "nomenclature.yaml"
).dimensions
if not dimensions:
dimensions = [x.stem for x in definitions.iterdir() if x.is_dir()]
if not dimensions:
raise FileNotFoundError(f"`definitions` directory is empty: {definitions}")
DataStructureDefinition(definitions, dimensions).validate(IamDataFrame(input_file))
15 changes: 10 additions & 5 deletions nomenclature/testing.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import yaml

from nomenclature.definition import DataStructureDefinition
from nomenclature.config import NomenclatureConfig
from nomenclature.processor import (
DataValidator,
RegionProcessor,
Expand Down Expand Up @@ -144,12 +145,16 @@ def assert_valid_structure(
f"Definitions directory not found: {path / definitions}"
)

if dimensions == (): # if "dimensions" were not specified
dimensions = [x.stem for x in (path / definitions).iterdir() if x.is_dir()]
if not dimensions: # if "dimensions" were not specified
if path / "nomenclature.yaml" in path.iterdir():
dimensions = NomenclatureConfig.from_file(
path / "nomenclature.yaml"
).dimensions
if not dimensions:
raise FileNotFoundError(
f"`definitions` directory is empty: {path / definitions}"
)
dimensions = [x.stem for x in (path / definitions).iterdir() if x.is_dir()]
if not dimensions:
raise FileNotFoundError(f"`definitions` directory is empty: {definitions}")
dc-almeida marked this conversation as resolved.
Show resolved Hide resolved

_check_mappings(path, definitions, dimensions, mappings)
_check_processor_directory(
path,
Expand Down
3 changes: 3 additions & 0 deletions tests/data/cli/structure_validation/nomenclature.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
dimensions:
- region
- variable
dc-almeida marked this conversation as resolved.
Show resolved Hide resolved
60 changes: 52 additions & 8 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -423,10 +423,15 @@ def test_cli_run_workflow(tmp_path, simple_df):


@pytest.mark.parametrize(
"status, unit, exit_code", [("valid", "EJ/yr", 0), ("invalid", "EJ", 1)]
"status, unit, dimensions, exit_code",
[
("valid_1", "EJ/yr", "region", 0),
dc-almeida marked this conversation as resolved.
Show resolved Hide resolved
("invalid", "EJ", "variable", 1),
("valid_2", "EJ", "region", 0),
],
)
def test_cli_valid_scenarios(status, unit, exit_code, tmp_path):
"""Check that CLI validates an IAMC dataset according to defined codelist."""
def test_cli_valid_scenarios(status, unit, exit_code, dimensions, tmp_path):
"""Check that CLI validates an IAMC dataset according to defined codelists."""
IamDataFrame(
pd.DataFrame(
[
Expand All @@ -441,11 +446,50 @@ def test_cli_valid_scenarios(status, unit, exit_code, tmp_path):
"validate-scenarios",
str(tmp_path / f"{status}_data.xlsx"),
"--definitions",
str(
MODULE_TEST_DATA_DIR
/ "structure_validation_no_mappings"
/ "definitions"
),
str(MODULE_TEST_DATA_DIR / "structure_validation" / "definitions"),
"--dimension",
dimensions,
],
)
dc-almeida marked this conversation as resolved.
Show resolved Hide resolved
assert result_valid.exit_code == exit_code


@pytest.mark.parametrize(
"dimensions_src, path, unit, exit_code",
[
(
"nomenclature_yaml_dimensions",
"structure_validation",
"EJ/yr",
0,
), # defaults to nomenclature.yaml dimensions
(
"subfolders_dimensions",
"structure_validation_no_mappings",
"EJ",
1,
), # defaults to 'definitions' subfolders dimensions
],
)
def test_cli_valid_scenarios_implicit_dimensions(
dimensions_src, path, unit, exit_code, tmp_path
):
"""Check that CLI validates an IAMC dataset according to implicit dimensions codelists."""
IamDataFrame(
pd.DataFrame(
[
["m_a", "s_a", "World", "Primary Energy", unit, 1, 2],
],
columns=IAMC_IDX + [2005, 2010],
)
).to_excel(tmp_path / f"{dimensions_src}_data.xlsx")
result_valid = runner.invoke(
cli,
[
"validate-scenarios",
str(tmp_path / f"{dimensions_src}_data.xlsx"),
"--definitions",
str(MODULE_TEST_DATA_DIR / path / "definitions"),
],
)
assert result_valid.exit_code == exit_code