IAMconsortium · danielhuppmann · Jul 18, 2024 · Jul 19, 2024 · Jul 19, 2024 · Jul 19, 2024
diff --git a/pyam/core.py b/pyam/core.py
@@ -1073,6 +1073,8 @@ def validate(
         self,
         criteria: dict = None,
         *,
+        value: float = None,
+        rtol: float = None,
         upper_bound: float = None,
         lower_bound: float = None,
         exclude_on_fail: bool = False,
@@ -1088,6 +1090,10 @@ def validate(
 
         Parameters
         ----------
+        value : float, optional
+            Value to compare with timeseries data.
+        rtol : float, optional
+            Relative allowed tolerance of timeseries data from `value`.
         upper_bound, lower_bound : float, optional
             Upper and lower bounds for validation criteria of timeseries :attr:`data`.
         criteria : dict, optional, deprecated
@@ -1111,6 +1117,8 @@ def validate(
         return _validate(
             self,
             criteria=criteria,
+            value=value,
+            rtol=rtol,
             upper_bound=upper_bound,
             lower_bound=lower_bound,
             exclude_on_fail=exclude_on_fail,

diff --git a/pyam/str.py b/pyam/str.py
@@ -125,7 +125,7 @@ def reduce_hierarchy(x, depth):
     """
     _x = x.split("|")
     depth = len(_x) + depth - 1 if depth < 0 else depth
-    return "|".join(_x[0: (depth + 1)])
+    return "|".join(_x[0 : (depth + 1)])
 
 
 def escape_regexp(s):

diff --git a/pyam/timeseries.py b/pyam/timeseries.py
@@ -129,7 +129,7 @@ def cross_threshold(
     years = index[pre] - values[pre] / change
 
     # it year (as int) is returned, add one because int() rounds down
-    if return_type == int:
+    if return_type is int:
         return [y + 1 for y in map(int, years)]
     return years
 

diff --git a/pyam/validation.py b/pyam/validation.py
@@ -9,8 +9,11 @@
 logger = logging.getLogger(__name__)
 
 
-def _validate(df, criteria, upper_bound, lower_bound, exclude_on_fail, **kwargs):  # noqa: C901
+def _validate(
+    df, criteria, *, value, rtol, upper_bound, lower_bound, exclude_on_fail, **kwargs
+):  # noqa: C901
     # TODO: argument `criteria` is deprecated, remove for release >= 3.0
+
     if criteria is not None:
         deprecation_warning(
             "Use `upper_bound`, `lower_bound`, and filter-arguments instead.",
@@ -22,29 +25,21 @@
             )
         # translate legacy `criteria` argument to explicit kwargs
         if len(criteria) == 1:
-            key, value = list(criteria.items())[0]
+            key, _value = list(criteria.items())[0]
             kwargs = dict(variable=key)
-            upper_bound, lower_bound = value.get("up", None), value.get("lo", None)
-            kwargs["year"] = value.get("year", None)
+            upper_bound, lower_bound = _value.get("up", None), _value.get("lo", None)
+            kwargs["year"] = _value.get("year", None)
             criteria = None
 
+        # legacy implementation for multiple validation within one dictionary
+        else:
+            _df = _apply_criteria(df._data, criteria, in_range=False)
+
     if criteria is None:
         _df = df._data[df.slice(**kwargs)]
         if _df.empty:
             logger.warning("No data matches filters, skipping validation.")
-
-        failed_validation = []
-        if upper_bound is not None:
-            failed_validation.append(_df[_df > upper_bound])
-        if lower_bound is not None:
-            failed_validation.append(_df[_df < lower_bound])
-        if not failed_validation:
-            return
-        _df = pd.concat(failed_validation).sort_index()
-
-    # legcy implementation for multiple validation within one dictionary
-    else:
-        _df = _apply_criteria(df._data, criteria, in_range=False)
+        _df = _check_bounds(_df, value, rtol, upper_bound, lower_bound)
 
     if not _df.empty:
         msg = "{} of {} data points do not satisfy the criteria"
@@ -55,6 +50,30 @@
         return _df.reset_index()
 
 
+def _check_bounds(data, value=None, rtol=None, upper_bound=None, lower_bound=None):
+    """Return al data points that do not satisfy the criteria"""
+    if value is None and rtol is not None:
+        raise ValueError(
+            "Using `rtol` is only supported in conjunction with `value`."
+        )
+    if value is not None:
+        if upper_bound or lower_bound is not None:
+            raise ValueError(
+                "Using `value` and bounds simultaneously is not supported."
+            )
+        upper_bound = value * (1 + (rtol or 0))
+        lower_bound = value * (1 - (rtol or 0))
+
+    failed_validation = []
+    if upper_bound is not None:
+        failed_validation.append(data[data > upper_bound])
+    if lower_bound is not None:
+        failed_validation.append(data[data < lower_bound])
+    if not failed_validation:
+        return pd.Series([])
+    return pd.concat(failed_validation).sort_index()
+
+
 def _check_rows(rows, check, in_range=True, return_test="any"):
     """Check all rows to be in/out of a certain range and provide testing on
     return values based on provided conditions
@@ -109,10 +128,8 @@
     for var, check in criteria.items():
         _df = df[df.index.get_level_values("variable") == var]
         for group in _df.groupby(META_IDX):
-            grp_idxs = _check_rows(group[-1], check, **kwargs)
-            idxs.append(grp_idxs)
-    df = df.loc[itertools.chain(*idxs)]
-    return df
+            idxs.append(_check_rows(group[-1], check, **kwargs))
+    return df.loc[itertools.chain(*idxs)]
 
 
 def _exclude_on_fail(df, index):
@@ -124,5 +141,5 @@
     df.exclude[index] = True
     n = len(index)
     logger.info(
-        f"{n} scenario{s(n)} failed validation and will be set as `exclude=True`."
+        f"{n} scenario{s(n)} failed validation and will be marked as `exclude=True`."
     )
diff --git a/tests/test_feature_validation.py b/tests/test_feature_validation.py
@@ -75,6 +75,7 @@ def test_require_data(test_df_year, kwargs, exclude_on_fail):
         dict(criteria={"Primary Energy": {}}),
         dict(variable="foo", upper_bound=10),
         dict(criteria={"foo": {"up": 10}}),
+        dict(variable="foo", value=10),
     ),
 )
 def test_validate_none(test_df, args):
@@ -90,6 +91,10 @@ def test_validate_none(test_df, args):
     (
         dict(variable="Primary Energy", upper_bound=10),
         dict(criteria={"Primary Energy": {"up": 10}}),
+        dict(variable="Primary Energy", scenario="scen_a", year=2005, value=1),
+        # two alternative ways to make values 1 & 2 within tolerance
+        dict(variable="Primary Energy", year=2005, value=2, rtol=0.5),
+        dict(variable="Primary Energy", year=2005, value=0.5, rtol=3),
     ),
 )
 def test_validate_pass(test_df, args):
@@ -104,6 +109,8 @@ def test_validate_pass(test_df, args):
     (
         dict(variable="Primary Energy|Coal", upper_bound=2),
         dict(criteria={"Primary Energy|Coal": {"up": 2}}),
+        dict(variable="Primary Energy|Coal", value=0.5),
+        dict(variable="Primary Energy|Coal", value=1, rtol=0.5),
     ),
 )
 def test_validate_nonexisting(test_df, args):
@@ -121,6 +128,8 @@ def test_validate_nonexisting(test_df, args):
     (
         dict(variable="Primary Energy", upper_bound=6.5),
         dict(criteria={"Primary Energy": {"up": 6.5}}),
+        dict(variable="Primary Energy", year=2010, value=6),
+        dict(variable="Primary Energy", value=2, rtol=2),
     ),
 )
 def test_validate_up(test_df, args):
@@ -141,6 +150,7 @@ def test_validate_up(test_df, args):
     (
         dict(variable="Primary Energy", upper_bound=8, lower_bound=2),
         dict(criteria={"Primary Energy": {"up": 8, "lo": 2}}),
+        dict(variable="Primary Energy", value=8, rtol=0.75),
     ),
 )
 def test_validate_lo(test_df, args):
@@ -161,6 +171,7 @@ def test_validate_lo(test_df, args):
     (
         dict(variable="Primary Energy", upper_bound=6.5, lower_bound=2),
         dict(criteria={"Primary Energy": {"up": 6.5, "lo": 2}}),
+        dict(variable="Primary Energy", value=4, rtol=0.5),
     ),
 )
 def test_validate_both(test_df, args):
@@ -181,6 +192,7 @@ def test_validate_both(test_df, args):
     (
         dict(variable="Primary Energy", year=2005, upper_bound=6),
         dict(criteria={"Primary Energy": {"up": 6, "year": 2005}}),
+        dict(variable="Primary Energy", year=2005, value=1, rtol=1),
     ),
 )
 def test_validate_year_2005(test_df, args):
@@ -195,6 +207,7 @@ def test_validate_year_2005(test_df, args):
     (
         dict(variable="Primary Energy", year=2010, upper_bound=6),
         dict(criteria={"Primary Energy": {"up": 6, "year": 2010}}),
+        dict(variable="Primary Energy", year=2010, value=6),
     ),
 )
 def test_validate_year_2010(test_df, args):

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -78,9 +78,7 @@ def test_pattern_match_dot():
     assert (obs == [False, True]).all()
 
 
-@pytest.mark.parametrize(
-    "bracket", ("(bar)", "[bar]", "{2}")
-)
+@pytest.mark.parametrize("bracket", ("(bar)", "[bar]", "{2}"))
 def test_pattern_match_brackets(bracket):
     s = f"foo {bracket}"
     data = pd.Series([s, "foo bar"])