Skip to content

Commit

Permalink
Lazy median aggregator (SciTools#6167)
Browse files Browse the repository at this point in the history
* add lazy median

* add tests

* add what's new entry

* move tests to pytest

---------

Co-authored-by: Elias <[email protected]>
  • Loading branch information
fnattino and ESadek-MO authored Jan 17, 2025
1 parent 331f012 commit 869691f
Show file tree
Hide file tree
Showing 3 changed files with 114 additions and 5 deletions.
9 changes: 7 additions & 2 deletions docs/src/whatsnew/latest.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,10 @@ This document explains the changes made to Iris for this release
(:issue:`6248`, :pull:`6257`)


#. `@fnattino`_ added the lazy median aggregator :class:`iris.analysis.MEDIAN`
based on the implementation discussed by `@rcomer`_ and `@stefsmeets`_ in
:issue:`4039` (:pull:`6167`).


🐛 Bugs Fixed
=============
Expand Down Expand Up @@ -98,8 +102,9 @@ This document explains the changes made to Iris for this release
Whatsnew author names (@github name) in alphabetical order. Note that,
core dev names are automatically included by the common_links.inc:
.. _@fnattino: https://github.com/fnattino
.. _@jrackham-mo: https://github.com/jrackham-mo
.. _@stefsmeets: https://github.com/stefsmeets

.. comment
Whatsnew resources in alphabetical order:
20 changes: 17 additions & 3 deletions lib/iris/analysis/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -1612,6 +1612,19 @@ def _lazy_max_run(array, axis=-1, **kwargs):
return result


def _lazy_median(data, axis=None, **kwargs):
"""Calculate the lazy median, with support for masked arrays."""
# Dask median requires the axes to be explicitly listed.
axis = range(data.ndim) if axis is None else axis

if np.issubdtype(data, np.integer):
data = data.astype(float)
filled = da.ma.filled(data, np.nan)
result = da.nanmedian(filled, axis=axis, **kwargs)
result_masked = da.ma.fix_invalid(result)
return result_masked


def _rms(array, axis, **kwargs):
rval = np.sqrt(ma.average(array**2, axis=axis, **kwargs))

Expand Down Expand Up @@ -1940,7 +1953,9 @@ def interp_order(length):
"""


MEDIAN = Aggregator("median", ma.median)
MEDIAN = Aggregator(
"median", ma.median, lazy_func=_build_dask_mdtol_function(_lazy_median)
)
"""
An :class:`~iris.analysis.Aggregator` instance that calculates
the median over a :class:`~iris.cube.Cube`, as computed by
Expand All @@ -1953,8 +1968,7 @@ def interp_order(length):
result = cube.collapsed('longitude', iris.analysis.MEDIAN)
This aggregator handles masked data, but NOT lazy data. For lazy aggregation,
please try :obj:`~.PERCENTILE`.
This aggregator handles masked data and lazy data.
"""

Expand Down
90 changes: 90 additions & 0 deletions lib/iris/tests/unit/analysis/test_MEDIAN.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
# Copyright Iris contributors
#
# This file is part of Iris and is released under the BSD license.
# See LICENSE in the root of the repository for full licensing details.
"""Unit tests for the :data:`iris.analysis.MEDIAN` aggregator."""

import numpy as np
import numpy.ma as ma

from iris._lazy_data import (
as_concrete_data,
as_lazy_data,
is_lazy_data,
is_lazy_masked_data,
)
from iris.analysis import MEDIAN
from iris.tests._shared_utils import assert_array_almost_equal, assert_array_equal


def _get_data(lazy=False, masked=False):
data = np.arange(16).reshape((4, 4))
if masked:
mask = np.eye(4)
data = ma.masked_array(data=data, mask=mask)
if lazy:
data = as_lazy_data(data)
return data


class Test_basics:
def setup_method(self):
self.data = _get_data()

def test_name(self):
assert MEDIAN.name() == "median"

def test_collapse(self):
data = MEDIAN.aggregate(self.data, axis=(0, 1))
assert_array_equal(data, [7.5])


class Test_masked:
def setup_method(self):
self.data = _get_data(masked=True)

def test_output_is_masked(self):
result = MEDIAN.aggregate(self.data, axis=1)
assert ma.isMaskedArray(result)

def test_median_is_mask_aware(self):
# the median computed along one axis differs if the array is masked
axis = 1
result = MEDIAN.aggregate(self.data, axis=axis)
data_no_mask = _get_data()
result_no_mask = MEDIAN.aggregate(data_no_mask, axis=axis)
assert not np.allclose(result, result_no_mask)


class Test_lazy:
def setup_method(self):
self.data = _get_data(lazy=True)

def test_output_is_lazy(self):
result = MEDIAN.lazy_aggregate(self.data, axis=(0, 1))
assert is_lazy_data(result)

def test_shape(self):
result = MEDIAN.lazy_aggregate(self.data, axis=1)
assert result.shape == (4,)

def test_result_values(self):
axis = 1
result = MEDIAN.lazy_aggregate(self.data, axis=axis)
expected = np.median(as_concrete_data(self.data), axis=axis)
assert_array_almost_equal(result, expected)


class Test_lazy_masked:
def setup_method(self):
self.data = _get_data(lazy=True, masked=True)

def test_output_is_lazy_and_masked(self):
result = MEDIAN.lazy_aggregate(self.data, axis=1)
assert is_lazy_masked_data(result)

def test_result_values(self):
axis = 1
result = MEDIAN.lazy_aggregate(self.data, axis=axis)
expected = ma.median(as_concrete_data(self.data), axis=axis)
assert_array_almost_equal(result, expected)

0 comments on commit 869691f

Please sign in to comment.