Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Validate timeseries data using explicit value and rtol #866

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions pyam/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -1073,6 +1073,8 @@ def validate(
self,
criteria: dict = None,
*,
value: float = None,
rtol: float = None,
upper_bound: float = None,
lower_bound: float = None,
exclude_on_fail: bool = False,
Expand All @@ -1088,6 +1090,10 @@ def validate(

Parameters
----------
value : float, optional
Value to compare with timeseries data.
rtol : float, optional
Relative allowed tolerance of timeseries data from `value`.
upper_bound, lower_bound : float, optional
Upper and lower bounds for validation criteria of timeseries :attr:`data`.
criteria : dict, optional, deprecated
Expand All @@ -1111,6 +1117,8 @@ def validate(
return _validate(
self,
criteria=criteria,
value=value,
rtol=rtol,
upper_bound=upper_bound,
lower_bound=lower_bound,
exclude_on_fail=exclude_on_fail,
Expand Down
2 changes: 1 addition & 1 deletion pyam/str.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def reduce_hierarchy(x, depth):
"""
_x = x.split("|")
depth = len(_x) + depth - 1 if depth < 0 else depth
return "|".join(_x[0: (depth + 1)])
return "|".join(_x[0 : (depth + 1)])


def escape_regexp(s):
Expand Down
2 changes: 1 addition & 1 deletion pyam/timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def cross_threshold(
years = index[pre] - values[pre] / change

# it year (as int) is returned, add one because int() rounds down
if return_type == int:
if return_type is int:
return [y + 1 for y in map(int, years)]
return years

Expand Down
61 changes: 39 additions & 22 deletions pyam/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,11 @@
logger = logging.getLogger(__name__)


def _validate(df, criteria, upper_bound, lower_bound, exclude_on_fail, **kwargs): # noqa: C901
def _validate(
df, criteria, *, value, rtol, upper_bound, lower_bound, exclude_on_fail, **kwargs
): # noqa: C901
# TODO: argument `criteria` is deprecated, remove for release >= 3.0

if criteria is not None:
deprecation_warning(
"Use `upper_bound`, `lower_bound`, and filter-arguments instead.",
Expand All @@ -22,29 +25,21 @@
)
# translate legacy `criteria` argument to explicit kwargs
if len(criteria) == 1:
key, value = list(criteria.items())[0]
key, _value = list(criteria.items())[0]
kwargs = dict(variable=key)
upper_bound, lower_bound = value.get("up", None), value.get("lo", None)
kwargs["year"] = value.get("year", None)
upper_bound, lower_bound = _value.get("up", None), _value.get("lo", None)
kwargs["year"] = _value.get("year", None)
criteria = None

# legacy implementation for multiple validation within one dictionary
else:
_df = _apply_criteria(df._data, criteria, in_range=False)

if criteria is None:
_df = df._data[df.slice(**kwargs)]
if _df.empty:
logger.warning("No data matches filters, skipping validation.")

failed_validation = []
if upper_bound is not None:
failed_validation.append(_df[_df > upper_bound])
if lower_bound is not None:
failed_validation.append(_df[_df < lower_bound])
if not failed_validation:
return
_df = pd.concat(failed_validation).sort_index()

# legcy implementation for multiple validation within one dictionary
else:
_df = _apply_criteria(df._data, criteria, in_range=False)
_df = _check_bounds(_df, value, rtol, upper_bound, lower_bound)

if not _df.empty:
msg = "{} of {} data points do not satisfy the criteria"
Expand All @@ -55,6 +50,30 @@
return _df.reset_index()


def _check_bounds(data, value=None, rtol=None, upper_bound=None, lower_bound=None):
"""Return al data points that do not satisfy the criteria"""
if value is None and rtol is not None:
raise ValueError(

Check warning on line 56 in pyam/validation.py

View check run for this annotation

Codecov / codecov/patch

pyam/validation.py#L56

Added line #L56 was not covered by tests
"Using `rtol` is only supported in conjunction with `value`."
)
if value is not None:
if upper_bound or lower_bound is not None:
raise ValueError(

Check warning on line 61 in pyam/validation.py

View check run for this annotation

Codecov / codecov/patch

pyam/validation.py#L61

Added line #L61 was not covered by tests
"Using `value` and bounds simultaneously is not supported."
)
upper_bound = value * (1 + (rtol or 0))
lower_bound = value * (1 - (rtol or 0))

failed_validation = []
if upper_bound is not None:
failed_validation.append(data[data > upper_bound])
if lower_bound is not None:
failed_validation.append(data[data < lower_bound])
if not failed_validation:
return pd.Series([])
return pd.concat(failed_validation).sort_index()


def _check_rows(rows, check, in_range=True, return_test="any"):
"""Check all rows to be in/out of a certain range and provide testing on
return values based on provided conditions
Expand Down Expand Up @@ -109,10 +128,8 @@
for var, check in criteria.items():
_df = df[df.index.get_level_values("variable") == var]
for group in _df.groupby(META_IDX):
grp_idxs = _check_rows(group[-1], check, **kwargs)
idxs.append(grp_idxs)
df = df.loc[itertools.chain(*idxs)]
return df
idxs.append(_check_rows(group[-1], check, **kwargs))
return df.loc[itertools.chain(*idxs)]


def _exclude_on_fail(df, index):
Expand All @@ -124,5 +141,5 @@
df.exclude[index] = True
n = len(index)
logger.info(
f"{n} scenario{s(n)} failed validation and will be set as `exclude=True`."
f"{n} scenario{s(n)} failed validation and will be marked as `exclude=True`."
)
13 changes: 13 additions & 0 deletions tests/test_feature_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,7 @@ def test_require_data(test_df_year, kwargs, exclude_on_fail):
dict(criteria={"Primary Energy": {}}),
dict(variable="foo", upper_bound=10),
dict(criteria={"foo": {"up": 10}}),
dict(variable="foo", value=10),
),
)
def test_validate_none(test_df, args):
Expand All @@ -90,6 +91,10 @@ def test_validate_none(test_df, args):
(
dict(variable="Primary Energy", upper_bound=10),
dict(criteria={"Primary Energy": {"up": 10}}),
dict(variable="Primary Energy", scenario="scen_a", year=2005, value=1),
# two alternative ways to make values 1 & 2 within tolerance
dict(variable="Primary Energy", year=2005, value=2, rtol=0.5),
dict(variable="Primary Energy", year=2005, value=0.5, rtol=3),
),
)
def test_validate_pass(test_df, args):
Expand All @@ -104,6 +109,8 @@ def test_validate_pass(test_df, args):
(
dict(variable="Primary Energy|Coal", upper_bound=2),
dict(criteria={"Primary Energy|Coal": {"up": 2}}),
dict(variable="Primary Energy|Coal", value=0.5),
dict(variable="Primary Energy|Coal", value=1, rtol=0.5),
),
)
def test_validate_nonexisting(test_df, args):
Expand All @@ -121,6 +128,8 @@ def test_validate_nonexisting(test_df, args):
(
dict(variable="Primary Energy", upper_bound=6.5),
dict(criteria={"Primary Energy": {"up": 6.5}}),
dict(variable="Primary Energy", year=2010, value=6),
dict(variable="Primary Energy", value=2, rtol=2),
),
)
def test_validate_up(test_df, args):
Expand All @@ -141,6 +150,7 @@ def test_validate_up(test_df, args):
(
dict(variable="Primary Energy", upper_bound=8, lower_bound=2),
dict(criteria={"Primary Energy": {"up": 8, "lo": 2}}),
dict(variable="Primary Energy", value=8, rtol=0.75),
),
)
def test_validate_lo(test_df, args):
Expand All @@ -161,6 +171,7 @@ def test_validate_lo(test_df, args):
(
dict(variable="Primary Energy", upper_bound=6.5, lower_bound=2),
dict(criteria={"Primary Energy": {"up": 6.5, "lo": 2}}),
dict(variable="Primary Energy", value=4, rtol=0.5),
),
)
def test_validate_both(test_df, args):
Expand All @@ -181,6 +192,7 @@ def test_validate_both(test_df, args):
(
dict(variable="Primary Energy", year=2005, upper_bound=6),
dict(criteria={"Primary Energy": {"up": 6, "year": 2005}}),
dict(variable="Primary Energy", year=2005, value=1, rtol=1),
),
)
def test_validate_year_2005(test_df, args):
Expand All @@ -195,6 +207,7 @@ def test_validate_year_2005(test_df, args):
(
dict(variable="Primary Energy", year=2010, upper_bound=6),
dict(criteria={"Primary Energy": {"up": 6, "year": 2010}}),
dict(variable="Primary Energy", year=2010, value=6),
),
)
def test_validate_year_2010(test_df, args):
Expand Down
4 changes: 1 addition & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,9 +78,7 @@ def test_pattern_match_dot():
assert (obs == [False, True]).all()


@pytest.mark.parametrize(
"bracket", ("(bar)", "[bar]", "{2}")
)
@pytest.mark.parametrize("bracket", ("(bar)", "[bar]", "{2}"))
def test_pattern_match_brackets(bracket):
s = f"foo {bracket}"
data = pd.Series([s, "foo bar"])
Expand Down