diff --git a/build/utils.py b/build/utils.py index 18f273bf08..294c3e3f93 100644 --- a/build/utils.py +++ b/build/utils.py @@ -4,6 +4,7 @@ from datetime import datetime from pathlib import Path +import yaml def get_commit_hash(): return subprocess.check_output(['git', 'rev-parse', 'HEAD']).decode().strip() @@ -54,12 +55,32 @@ def filter_row(row, simulation_round, product, category=None, sector=None): return values +def read_definitions_file(file_path): + if file_path.suffix == '.json': + return json.loads(file_path.read_text(encoding='utf-8')) + elif file_path.suffix == '.yaml': + return [ + dict(specifier=specifier, **definition) + for specifier, definition in yaml.safe_load(file_path.read_text(encoding='utf-8')).items() + ] + else: + return [] + + def read_definitions(): definitions_path = Path('definitions') definitions = {} for file_path in definitions_path.iterdir(): - with open(file_path, encoding='utf-8') as fp: - definitions[file_path.stem] = json.loads(fp.read()) + if file_path.is_dir(): + definitions[file_path.stem] = [] + for group_path in file_path.iterdir(): + definitions[file_path.stem] += [ + dict(group=group_path.stem, **definition) + for definition in read_definitions_file(group_path) + ] + else: + definitions[file_path.stem] = read_definitions_file(file_path) + return definitions diff --git a/requirements.txt b/requirements.txt index ed90726bf6..11d67da8a8 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,3 +4,4 @@ pytest~=7.0.1 jsonschema~=4.4.0 markdown-customblocks~=1.2.0 pre-commit~=3.6.2 +PyYAML~=5.4.1 diff --git a/tests/test_definitions.py b/tests/test_definitions.py index c92a44b87a..7f69fa19e8 100644 --- a/tests/test_definitions.py +++ b/tests/test_definitions.py @@ -1,70 +1,91 @@ import json -import os from pathlib import Path import jsonschema +import yaml + + +def read_file(file_path): + if file_path.suffix == '.json': + return json.loads(file_path.read_text(encoding='utf-8')) + elif file_path.suffix == '.yaml': + return [ + dict(specifier=specifier, **definition) + for specifier, definition in yaml.safe_load(file_path.read_text(encoding='utf-8')).items() + ] + else: + raise AssertionError() + + +def read_instance(file_path): + if file_path.is_dir(): + instance = [] + for group_path in file_path.iterdir(): + instance += [ + dict(group=group_path.stem, **definition) + for definition in read_file(group_path) + ] + return instance + else: + return read_file(file_path) def test_definitions(): - with open(os.path.join(os.path.dirname(__file__), 'meta.json')) as f: - schema = json.loads(f.read()) + with Path(__file__).parent.joinpath('meta.json').open() as fp: + schema = json.load(fp) - for file_name in os.listdir('definitions'): - file_path = os.path.join('definitions', file_name) + for file_path in Path('definitions').iterdir(): + # read the instance + instance = read_instance(file_path) - with open(file_path) as f: - instance = json.loads(f.read()) - - # validate json with meta json - jsonschema.validate(schema=schema, instance=instance) + # validate json with meta json + jsonschema.validate(schema=schema, instance=instance) def test_double_specifiers(): - for file_name in os.listdir('definitions'): - if file_name not in ['subcategory.json']: - file_path = os.path.join('definitions', file_name) - - with open(file_path) as f: - instance = json.loads(f.read()) + for file_path in Path('definitions').iterdir(): + if file_path.stem not in ['subcategory']: + # read the instance + definitions = read_instance(file_path) - # check for double specifiers - seen = set() - doubles = [] - for row in instance: - if row['specifier'] in seen: - doubles.append(row['specifier']) - else: - seen.add(row['specifier']) + # check for double specifiers + seen = set() + doubles = [] + for definition in definitions: + if definition['specifier'] in seen: + doubles.append(definition['specifier']) + else: + seen.add(definition['specifier']) - assert not doubles, '{} {}'.format(file_name, doubles) + assert not doubles, '{} {}'.format(file_path, doubles) def test_variable(): - simulation_rounds = json.loads(Path('definitions').joinpath('simulation_round.json').read_text()) + simulation_rounds = read_instance(Path('definitions') / 'simulation_round.json') simulation_round_specifiers = [simulation_round['specifier'] for simulation_round in simulation_rounds] - file_path = os.path.join('definitions', 'variable.json') - with open(file_path) as f: - instance = json.loads(f.read()) - for row in instance: - sectors = row.get('sectors') + ['other'] - if sectors: - for key, value in row.items(): - if isinstance(value, dict): - field = '{}.{}'.format(row.get('specifier'), key) - for key in value: - if key in simulation_round_specifiers: - for k in value[key]: - assert k in sectors, field - else: - assert key in sectors, field + # read the instance + instance = read_instance(Path('definitions') / 'variable.json') + + for row in instance: + sectors = row.get('sectors') + ['other'] + if sectors: + for key, value in row.items(): + if isinstance(value, dict): + field = '{}.{}'.format(row.get('specifier'), key) + for key in value: + if key in simulation_round_specifiers: + for k in value[key]: + assert k in sectors, field + else: + assert key in sectors, field def test_dataset_groups(): for file in ['soc_dataset.json', 'geo_dataset.json']: - groups = json.loads(Path('definitions').joinpath('group.json').read_text()) - datasets = json.loads(Path('definitions').joinpath(file).read_text()) + groups = read_instance(Path('definitions') / 'group.json') + datasets = read_instance(Path('definitions') / file) group_specifiers = [group['specifier'] for group in groups] @@ -75,8 +96,8 @@ def test_dataset_groups(): def test_variable_groups(): - groups = json.loads(Path('definitions').joinpath('group.json').read_text()) - variables = json.loads(Path('definitions').joinpath('variable.json').read_text()) + groups = read_instance(Path('definitions') / 'group.json') + variables = read_instance(Path('definitions') / 'variable.json') group_specifiers = [group['specifier'] for group in groups] @@ -87,21 +108,19 @@ def test_variable_groups(): def test_nested_simulation_rounds(): - simulation_rounds = json.loads(Path('definitions').joinpath('simulation_round.json').read_text()) + simulation_rounds = read_instance(Path('definitions') / 'simulation_round.json') simulation_round_specifiers = [simulation_round['specifier'] for simulation_round in simulation_rounds] - for file_name in os.listdir('definitions'): - if file_name not in ['experiments.json']: - file_path = os.path.join('definitions', file_name) - - with open(file_path) as f: - instance = json.loads(f.read()) - - for row in instance: - simulation_rounds = row.get('simulation_rounds', simulation_round_specifiers) - for value in row.values(): - if isinstance(value, dict): - if simulation_rounds[0] in value.keys(): - # assert that all (both simulation_round_specifiers) are there - for simulation_round in simulation_rounds[1:]: - assert simulation_round in value.keys(), row.get('specifier') + for file_path in Path('definitions').iterdir(): + if file_path.name not in ['experiments.json']: + # read the instance + instance = read_instance(file_path) + + for row in instance: + simulation_rounds = row.get('simulation_rounds', simulation_round_specifiers) + for value in row.values(): + if isinstance(value, dict): + if simulation_rounds[0] in value.keys(): + # assert that all (both simulation_round_specifiers) are there + for simulation_round in simulation_rounds[1:]: + assert simulation_round in value.keys(), row.get('specifier')