Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for almost equal like equality #988

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ BIOM-Format ChangeLog
biom-2.1.16-dev
---------------

New Features:

* `Table.allclose` is now available to provide almost equality support including equality of `nan` by wrapping NumPy's `allclose`. See issues [#982](https://github.com/biocore/biom-format/issues/982) and [#983](https://github.com/biocore/biom-format/issues/983).

Maintenance:

* Python 3.7 and 3.8 removed from CI as they are [end-of-life](https://devguide.python.org/versions/). Python 3.13 added to CI. See PR[#986](https://github.com/biocore/biom-format/pull/986).
Expand Down
98 changes: 72 additions & 26 deletions biom/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,59 +1839,105 @@ def descriptive_equality(self, other):
return "Observation metadata are not the same"
if not np.array_equal(self.metadata(), other.metadata()):
return "Sample metadata are not the same"
if not self._data_equality(other._data):
if not self._data_equality(other):
return "Data elements are not the same"

return "Tables appear equal"

def __eq__(self, other):
"""Equality is determined by the data matrix, metadata, and IDs"""
"""Equality is determined by the data matrix, metadata, and IDs

Matrices are equal iff the following items are equal:
- shape
- dtype
- size (nnz)
- matrix data (more expensive, so checked last)

The sparse format does not need to be the same between the two
matrices. ``self`` and ``other`` will be converted to csr format if
necessary before performing the final comparison.

"""
if not self._data_equality_meta(other):
return False

if not self._data_equality(other):
return False

return True

def allclose(self, other, **allclose_kwargs):
"""Allow for almost equality testing using np.allclose

Matrices must have identical:
- shape
- dtype
- size (nnz)

Assuming those properties are identical, the matrix data are then
tested for equality within tolerance using `np.allclose`.

Parameters
----------
other : biom.Table
The table to compare against.
allclose_kwargs : dict
Any keyword arguments to provide to np.allclose

Notes
-----
Specify `equal_nan=True` to allow Nan to test equal.

Returns
-------
bool
Whether the two tables are equal within tolerance.
"""
if not self._data_equality_meta(other):
return False

self_data = self._data.tocsr().data
other_data = other._data.tocsr().data

return np.allclose(self_data, other_data, **allclose_kwargs)

def _data_equality_meta(self, other):
if not isinstance(other, self.__class__):
return False

if self.type != other.type:
return False

if not np.array_equal(self.ids(axis='observation'),
other.ids(axis='observation')):
return False

if not np.array_equal(self.ids(), other.ids()):
return False

if not np.array_equal(self.metadata(axis='observation'),
other.metadata(axis='observation')):
return False

if not np.array_equal(self.metadata(), other.metadata()):
return False
if not self._data_equality(other._data):
return False

return True

def _data_equality(self, other):
"""Return ``True`` if both matrices are equal.

Matrices are equal iff the following items are equal:
- shape
- dtype
- size (nnz)
- matrix data (more expensive, so checked last)

The sparse format does not need to be the same between the two
matrices. ``self`` and ``other`` will be converted to csr format if
necessary before performing the final comparison.

"""
if self._data.shape != other.shape:
if self._data.shape != other._data.shape:
return False

if self._data.dtype != other.dtype:
if self._data.dtype != other._data.dtype:
return False

if self._data.nnz != other.nnz:
if self._data.nnz != other._data.nnz:
return False

self._data = self._data.tocsr()
other = other.tocsr()
return True

def _data_equality(self, other):
self_data = self._data.tocsr()
other_data = other._data.tocsr()

if (self._data != other).nnz > 0:
if (self_data != other_data).nnz > 0:
return False

return True
Expand Down
26 changes: 23 additions & 3 deletions biom/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2561,6 +2561,26 @@ def test_sort(self):
with self.assertRaises(UnknownAxisError):
t.sort(axis='foo')

def test_allclose(self):
self.assertTrue(self.st1.allclose(self.st1))
self.assertTrue(self.st1.allclose(self.st2))
self.assertFalse(self.st1.allclose(self.st3))

st4 = self.st1.copy()
st4._data.data += 0.0001
self.assertFalse(self.st1.allclose(st4))
self.assertTrue(self.st1.allclose(st4, atol=1e-1))

st5 = self.st1.copy()
st6 = self.st1.copy()

st5._data.data[0] = np.nan
st6._data.data[0] = np.nan

self.assertFalse(st5.allclose(st6))
self.assertFalse(st5.allclose(st6, atol=1e-1))
self.assertTrue(st5.allclose(st6, equal_nan=True))

def test_eq(self):
"""sparse equality"""
self.assertTrue(self.st1 == self.st2)
Expand All @@ -2573,9 +2593,9 @@ def test_eq(self):

def test_data_equality(self):
"""check equality between tables"""
self.assertTrue(self.st1._data_equality(self.st2._data))
self.assertTrue(self.st1._data_equality(self.st1._data))
self.assertFalse(self.st1._data_equality(self.st3._data))
self.assertTrue(self.st1._data_equality(self.st2))
self.assertTrue(self.st1._data_equality(self.st1))
self.assertFalse(self.st1._data_equality(self.st3))

def test_nonzero(self):
"""Return a list of nonzero positions"""
Expand Down