Skip to content

Commit

Permalink
Make function less complex by moving check to own utility
Browse files Browse the repository at this point in the history
  • Loading branch information
danielhuppmann committed Jul 19, 2024
1 parent 147fb5e commit 5ac4855
Showing 1 changed file with 30 additions and 25 deletions.
55 changes: 30 additions & 25 deletions pyam/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,31 +31,15 @@ def _validate(
kwargs["year"] = _value.get("year", None)
criteria = None

# legacy implementation for multiple validation within one dictionary
else:
_df = _apply_criteria(df._data, criteria, in_range=False)

if criteria is None:
_df = df._data[df.slice(**kwargs)]
if _df.empty:
logger.warning("No data matches filters, skipping validation.")

if value is not None:
if upper_bound or lower_bound is not None:
raise ValueError(
"Using `value` and bounds simultaneously is not supported."
)
upper_bound = value * (1 + (rtol or 0))
lower_bound = value * (1 - (rtol or 0))

failed_validation = []
if upper_bound is not None:
failed_validation.append(_df[_df > upper_bound])
if lower_bound is not None:
failed_validation.append(_df[_df < lower_bound])
if not failed_validation:
return
_df = pd.concat(failed_validation).sort_index()

# legcy implementation for multiple validation within one dictionary
else:
_df = _apply_criteria(df._data, criteria, in_range=False)
_df = _check_bounds(_df, value, rtol, upper_bound, lower_bound)

if not _df.empty:
msg = "{} of {} data points do not satisfy the criteria"
Expand All @@ -66,6 +50,29 @@ def _validate(
return _df.reset_index()


def _check_bounds(data, value=None, rtol=None, upper_bound=None, lower_bound=None):
"""Return al data points that do not satisfy the criteria"""
if value is None and rtol is not None:
raise ValueError(
"Using `rtol` is only supported in conjunction with `value`."
)
if value is not None:
if upper_bound or lower_bound is not None:
raise ValueError(
"Using `value` and bounds simultaneously is not supported."
)
upper_bound, lower_bound = value * ((1 + (rtol or 0)), (1 - (rtol or 0)))

failed_validation = []
if upper_bound is not None:
failed_validation.append(data[data > upper_bound])
if lower_bound is not None:
failed_validation.append(data[data < lower_bound])
if not failed_validation:
return pd.Series([])
return pd.concat(failed_validation).sort_index()


def _check_rows(rows, check, in_range=True, return_test="any"):
"""Check all rows to be in/out of a certain range and provide testing on
return values based on provided conditions
Expand Down Expand Up @@ -120,10 +127,8 @@ def _apply_criteria(df, criteria, **kwargs):
for var, check in criteria.items():
_df = df[df.index.get_level_values("variable") == var]
for group in _df.groupby(META_IDX):
grp_idxs = _check_rows(group[-1], check, **kwargs)
idxs.append(grp_idxs)
df = df.loc[itertools.chain(*idxs)]
return df
idxs.append(_check_rows(group[-1], check, **kwargs))
return df.loc[itertools.chain(*idxs)]


def _exclude_on_fail(df, index):
Expand Down

0 comments on commit 5ac4855

Please sign in to comment.