Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support for almost equal like equality #988

Open
wants to merge 6 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions ChangeLog.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ BIOM-Format ChangeLog
biom-2.1.16-dev
---------------

New Features:

* `Table.allclose` is now available to provide almost equality support including equality of `nan` by wrapping NumPy's `allclose`. See issues [#982](https://github.com/biocore/biom-format/issues/982) and [#983](https://github.com/biocore/biom-format/issues/983).

Maintenance:

* Python 3.7 and 3.8 removed from CI as they are [end-of-life](https://devguide.python.org/versions/). Python 3.13 added to CI. See PR[#986](https://github.com/biocore/biom-format/pull/986).
Expand Down
67 changes: 53 additions & 14 deletions biom/table.py
Original file line number Diff line number Diff line change
Expand Up @@ -1839,28 +1839,76 @@ def descriptive_equality(self, other):
return "Observation metadata are not the same"
if not np.array_equal(self.metadata(), other.metadata()):
return "Sample metadata are not the same"
if not self._data_equality(other._data):
if not self._data_equality(other):
return "Data elements are not the same"

return "Tables appear equal"

def __eq__(self, other):
"""Equality is determined by the data matrix, metadata, and IDs"""
if not self._data_equality_meta(other):
return False

if not self._data_equality(other):
return False

return True

def allclose(self, other, **allclose_kwargs):
"""Allow for almost equality testing using np.allclose

Parameters
----------
other : biom.Table
The table to compare against.
allclose_kwargs : dict
Any keyword arguments to provide to np.allclose

Notes
-----
Specify `equal_nan=True` to allow Nan to test equal.

Returns
-------
bool
Whether the two tables are equal within tolerance.
"""
if not self._data_equality_meta(other):
return False

self_data = self._data.tocsr().data
other_data = other._data.tocsr().data

return np.allclose(self_data, other_data, **allclose_kwargs)

def _data_equality_meta(self, other):
if not isinstance(other, self.__class__):
return False

if self.type != other.type:
return False

if not np.array_equal(self.ids(axis='observation'),
other.ids(axis='observation')):
return False

if not np.array_equal(self.ids(), other.ids()):
return False

if not np.array_equal(self.metadata(axis='observation'),
other.metadata(axis='observation')):
return False

if not np.array_equal(self.metadata(), other.metadata()):
return False
if not self._data_equality(other._data):

if self._data.shape != other._data.shape:
return False

if self._data.dtype != other._data.dtype:
return False

if self._data.nnz != other._data.nnz:
return False

return True
Expand All @@ -1879,19 +1927,10 @@ def _data_equality(self, other):
necessary before performing the final comparison.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are all these docstring comments ("Matrices are equal iff the following items are equal ") still appropriate to this particular method? I'm wondering if they would make more sense on __eq__ or with some of them going onto _data_equality_meta.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This particular method, _data_equality is called on __eq__ and should not exhibit different semantics from prior versions. However, I agree that they could be shuffled to more precise locations -- thanks! I'll do that in a moment


"""
if self._data.shape != other.shape:
return False

if self._data.dtype != other.dtype:
return False

if self._data.nnz != other.nnz:
return False

self._data = self._data.tocsr()
other = other.tocsr()
self_data = self._data.tocsr()
other_data = other._data.tocsr()

if (self._data != other).nnz > 0:
if (self_data != other_data).nnz > 0:
return False

return True
Expand Down
26 changes: 23 additions & 3 deletions biom/tests/test_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -2561,6 +2561,26 @@ def test_sort(self):
with self.assertRaises(UnknownAxisError):
t.sort(axis='foo')

def test_allclose(self):
self.assertTrue(self.st1.allclose(self.st1))
self.assertTrue(self.st1.allclose(self.st2))
self.assertFalse(self.st1.allclose(self.st3))

st4 = self.st1.copy()
st4._data.data += 0.0001
self.assertFalse(self.st1.allclose(st4))
self.assertTrue(self.st1.allclose(st4, atol=1e-1))

st5 = self.st1.copy()
st6 = self.st1.copy()

st5._data.data[0] = np.nan
st6._data.data[0] = np.nan

self.assertFalse(st5.allclose(st6))
self.assertFalse(st5.allclose(st6, atol=1e-1))
self.assertTrue(st5.allclose(st6, equal_nan=True))

def test_eq(self):
"""sparse equality"""
self.assertTrue(self.st1 == self.st2)
Expand All @@ -2573,9 +2593,9 @@ def test_eq(self):

def test_data_equality(self):
"""check equality between tables"""
self.assertTrue(self.st1._data_equality(self.st2._data))
self.assertTrue(self.st1._data_equality(self.st1._data))
self.assertFalse(self.st1._data_equality(self.st3._data))
self.assertTrue(self.st1._data_equality(self.st2))
self.assertTrue(self.st1._data_equality(self.st1))
self.assertFalse(self.st1._data_equality(self.st3))

def test_nonzero(self):
"""Return a list of nonzero positions"""
Expand Down
Loading