Fix edge cases in metric computation (#3037)

awslabs · Nov 1, 2023 · 74e9a39 · 74e9a39
1 parent a05f219
commit 74e9a39
Show file tree

Hide file tree

Showing 4 changed files with 138 additions and 26 deletions.
diff --git a/src/gluonts/ev/aggregations.py b/src/gluonts/ev/aggregations.py
@@ -50,7 +50,7 @@ class Sum(Aggregation):
     def step(self, values: np.ndarray) -> None:
         assert self.axis is None or isinstance(self.axis, tuple)
 
-        summed_values = np.ma.sum(values, axis=self.axis)
+        summed_values = np.nansum(values, axis=self.axis)
 
         if self.axis is None or 0 in self.axis:
             if self.partial_result is None:
@@ -66,9 +66,11 @@ def get(self) -> np.ndarray:
         assert self.axis is None or isinstance(self.axis, tuple)
 
         if self.axis is None or 0 in self.axis:
-            return np.ma.copy(self.partial_result)
+            assert isinstance(self.partial_result, np.ndarray)
+            return np.copy(self.partial_result)
 
-        return np.ma.concatenate(self.partial_result)
+        assert isinstance(self.partial_result, list)
+        return np.concatenate(self.partial_result)
 
 
 @dataclass
@@ -107,7 +109,7 @@ def step(self, values: np.ndarray) -> None:
             if self.partial_result is None:
                 self.partial_result = []
 
-            mean_values = np.ma.mean(values, axis=self.axis)
+            mean_values = np.nanmean(values, axis=self.axis)
             assert isinstance(self.partial_result, list)
             self.partial_result.append(mean_values)
 
@@ -118,4 +120,5 @@ def get(self) -> np.ndarray:
             assert isinstance(self.partial_result, np.ndarray)
             return self.partial_result / self.n
 
-        return np.ma.concatenate(self.partial_result)
+        assert isinstance(self.partial_result, list)
+        return np.concatenate(self.partial_result)
diff --git a/src/gluonts/ev/metrics.py b/src/gluonts/ev/metrics.py
@@ -492,7 +492,7 @@ def mean(**quantile_losses: np.ndarray) -> np.ndarray:
             [quantile_loss for quantile_loss in quantile_losses.values()],
             axis=0,
         )
-        return np.ma.mean(stacked_quantile_losses, axis=0)
+        return np.mean(stacked_quantile_losses, axis=0)
 
     def __call__(self, axis: Optional[int] = None) -> DerivedMetric:
         return DerivedMetric(
@@ -515,7 +515,7 @@ def mean(**quantile_losses: np.ndarray) -> np.ndarray:
             [quantile_loss for quantile_loss in quantile_losses.values()],
             axis=0,
         )
-        return np.ma.mean(stacked_quantile_losses, axis=0)
+        return np.mean(stacked_quantile_losses, axis=0)
 
     def __call__(self, axis: Optional[int] = None) -> DerivedMetric:
         return DerivedMetric(
@@ -538,7 +538,7 @@ def mean(**quantile_losses: np.ndarray) -> np.ndarray:
             [quantile_loss for quantile_loss in quantile_losses.values()],
             axis=0,
         )
-        return np.ma.mean(stacked_quantile_losses, axis=0)
+        return np.mean(stacked_quantile_losses, axis=0)
 
     def __call__(self, axis: Optional[int] = None) -> DerivedMetric:
         return DerivedMetric(
@@ -565,7 +565,7 @@ def mean(
             [np.abs(coverages[f"coverage[{q}]"] - q) for q in quantile_levels],
             axis=0,
         )
-        return np.ma.mean(intermediate_result, axis=0)
+        return np.mean(intermediate_result, axis=0)
 
     def __call__(self, axis: Optional[int] = None) -> DerivedMetric:
         return DerivedMetric(

diff --git a/test/ev/test_aggregations.py b/test/ev/test_aggregations.py
@@ -24,19 +24,33 @@
     [
         (
             [
-                np.full((3, 5), np.nan),
-                np.full((3, 5), np.nan),
-                np.full((3, 5), np.nan),
+                np.full((3, 5), 0.0),
+                np.full((3, 5), 0.0),
+                np.full((3, 5), 0.0),
             ],
+            0.0,
+            np.zeros(5),
+            np.zeros(9),
+        ),
+        (
+            np.ma.masked_invalid(
+                [
+                    np.full((3, 5), np.nan),
+                    np.full((3, 5), np.nan),
+                    np.full((3, 5), np.nan),
+                ]
+            ),
             0,
             np.zeros(5),
             np.zeros(9),
         ),
         (
-            [
-                np.array([[0, np.nan], [0, 0]]),
-                np.array([[0, 5], [-5, np.nan]]),
-            ],
+            np.ma.masked_invalid(
+                [
+                    np.array([[0, np.nan], [0, 0]]),
+                    np.array([[0, 5], [-5, np.nan]]),
+                ]
+            ),
             0,
             np.array([-5, 5]),
             np.array([0, 0, 5, -5]),
@@ -58,7 +72,7 @@ def test_Sum(value_stream, res_axis_none, res_axis_0, res_axis_1):
     ):
         sum = Sum(axis=axis)
         for values in value_stream:
-            sum.step(np.ma.masked_invalid(values))
+            sum.step(values)
 
         np.testing.assert_almost_equal(sum.get(), expected_result)
 
@@ -68,19 +82,33 @@ def test_Sum(value_stream, res_axis_none, res_axis_0, res_axis_1):
     [
         (
             [
-                np.full((3, 5), np.nan),
-                np.full((3, 5), np.nan),
-                np.full((3, 5), np.nan),
+                np.full((3, 5), 0.0),
+                np.full((3, 5), 0.0),
+                np.full((3, 5), 0.0),
             ],
+            0.0,
+            np.zeros(5),
+            np.zeros(9),
+        ),
+        (
+            np.ma.masked_invalid(
+                [
+                    np.full((3, 5), np.nan),
+                    np.full((3, 5), np.nan),
+                    np.full((3, 5), np.nan),
+                ]
+            ),
             np.nan,
             np.full(5, np.nan),
             np.full(9, np.nan),
         ),
         (
-            [
-                np.array([[0, np.nan], [0, 0]]),
-                np.array([[0, 5], [-5, np.nan]]),
-            ],
+            np.ma.masked_invalid(
+                [
+                    np.array([[0, np.nan], [0, 0]]),
+                    np.array([[0, 5], [-5, np.nan]]),
+                ]
+            ),
             0,
             np.array([-1.25, 2.5]),
             np.array([0, 0, 2.5, -5]),
@@ -102,7 +130,7 @@ def test_Mean(value_stream, res_axis_none, res_axis_0, res_axis_1):
     ):
         mean = Mean(axis=axis)
         for values in value_stream:
-            mean.step(np.ma.masked_invalid(values))
+            mean.step(values)
 
         np.testing.assert_almost_equal(mean.get(), expected_result)
 

diff --git a/test/ev/test_metrics.py b/test/ev/test_metrics.py
@@ -59,7 +59,7 @@
     METRICS,
 )
 @pytest.mark.parametrize("axis", [None, (0, 1), (0,), (1,), ()])
-def test_metric(metric: MetricDefinition, axis: Optional[tuple]):
+def test_metric_shape(metric: MetricDefinition, axis: Optional[tuple]):
     input_length = 20
     label_length = 5
     num_entries = 7
@@ -98,3 +98,84 @@ def test_metric(metric: MetricDefinition, axis: Optional[tuple]):
         raise ValueError("unsupported axis")
 
     return metric_value
+
+
+@pytest.mark.parametrize(
+    "metric",
+    [
+        ND(),
+        MASE(),
+        MAPE(),
+        NRMSE(),
+        WeightedSumQuantileLoss(0.5),
+        MeanWeightedSumQuantileLoss([0.1, 0.5, 0.9]),
+        MeanScaledQuantileLoss(0.5),
+        AverageMeanScaledQuantileLoss([0.1, 0.5, 0.9]),
+    ],
+)
+@pytest.mark.parametrize("axis", [None, (0, 1), (0,), (1,), ()])
+def test_metric_inf(metric: MetricDefinition, axis: Optional[tuple]):
+    time_series_length = 3
+    number_of_entries = 2
+
+    data = {
+        "label": np.zeros((1, time_series_length)),
+        "0.5": np.ones((1, time_series_length)),
+        "0.1": np.ones((1, time_series_length)),
+        "0.9": np.ones((1, time_series_length)),
+        "mean": np.ones((1, time_series_length)),
+        "seasonal_error": 0.0,
+    }
+
+    evaluator = metric(axis=axis)
+    for _ in range(number_of_entries):
+        evaluator.update(data)
+
+    result = evaluator.get()
+    expected = np.full((number_of_entries, time_series_length), np.inf).sum(
+        axis=axis
+    )
+
+    assert result.shape == expected.shape
+    assert np.allclose(result, expected)
+
+
+@pytest.mark.parametrize(
+    "metric",
+    [
+        ND(),
+        MASE(),
+        MAPE(),
+        SMAPE(),
+        NRMSE(),
+        WeightedSumQuantileLoss(0.5),
+        MeanWeightedSumQuantileLoss([0.1, 0.5, 0.9]),
+        MeanScaledQuantileLoss(0.5),
+        AverageMeanScaledQuantileLoss([0.1, 0.5, 0.9]),
+    ],
+)
+@pytest.mark.parametrize("axis", [None, (0, 1), (0,), (1,), ()])
+def test_metric_nan(metric: MetricDefinition, axis: Optional[tuple]):
+    time_series_length = 3
+    number_of_entries = 2
+
+    data = {
+        "label": np.zeros((1, time_series_length)),
+        "0.5": np.zeros((1, time_series_length)),
+        "0.1": np.zeros((1, time_series_length)),
+        "0.9": np.zeros((1, time_series_length)),
+        "mean": np.zeros((1, time_series_length)),
+        "seasonal_error": 0.0,
+    }
+
+    evaluator = metric(axis=axis)
+    for _ in range(number_of_entries):
+        evaluator.update(data)
+
+    result = evaluator.get()
+    expected = np.full((number_of_entries, time_series_length), np.nan).sum(
+        axis=axis
+    )
+
+    assert result.shape == expected.shape
+    assert np.allclose(result, expected, equal_nan=True)