From b115ca335f8b326b0de7a202f1df8e1e7d5c7711 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 17 Jan 2025 08:34:38 -0800 Subject: [PATCH 1/4] [pre-commit.ci] pre-commit autoupdate (#1231) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 05fbd3e04..03680eb2f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.6 + rev: v0.9.1 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate From ff4a0aab4f1feb933a74e2b5709d34b506644733 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Tue, 21 Jan 2025 14:05:49 -0800 Subject: [PATCH 2/4] [pre-commit.ci] pre-commit autoupdate (#1236) Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .pre-commit-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 03680eb2f..80e876a58 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -18,7 +18,7 @@ repos: # hooks: # - id: black - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.9.1 + rev: v0.9.2 hooks: - id: ruff # - repo: https://github.com/econchick/interrogate From 775fa3b082dfcaa2cbd5c03704ba6758a08845f1 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Tue, 21 Jan 2025 18:07:32 -0800 Subject: [PATCH 3/4] Move scipy to optional requirements (#1233) --- .github/workflows/run_coverage.yml | 2 +- .readthedocs.yaml | 2 +- CHANGELOG.md | 1 + pyproject.toml | 4 ++-- src/hdmf/common/sparse.py | 22 +++++++++++++++++----- tests/unit/common/test_sparse.py | 22 +++++++++++++++++++--- tox.ini | 2 +- 7 files changed, 42 insertions(+), 13 deletions(-) diff --git a/.github/workflows/run_coverage.yml b/.github/workflows/run_coverage.yml index ee1f9ff91..330bb7aba 100644 --- a/.github/workflows/run_coverage.yml +++ b/.github/workflows/run_coverage.yml @@ -56,7 +56,7 @@ jobs: - name: Install package with optional dependencies if: ${{ matrix.opt_req }} run: | - python -m pip install ".[test,tqdm,zarr,termset]" + python -m pip install ".[test,tqdm,sparse,zarr,termset]" - name: Run tests and generate coverage report run: | diff --git a/.readthedocs.yaml b/.readthedocs.yaml index b752396f4..f17c323b1 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -24,7 +24,7 @@ formats: all # Optionally set the version of Python and requirements required to build your docs python: install: - - path: .[docs,tqdm,zarr,termset] # path to the package relative to the root + - path: .[docs,tqdm,sparse,zarr,termset] # path to the package relative to the root # Optionally include all submodules submodules: diff --git a/CHANGELOG.md b/CHANGELOG.md index 32ab56940..8c72205cd 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - Importing from `hdmf.build.map` is no longer supported. Import from `hdmf.build` instead. @rly [#1221](https://github.com/hdmf-dev/hdmf/pull/1221) - Python 3.8 has reached end of life. Dropped support for Python 3.8 and add support for Python 3.13. @mavaylon1 [#1209](https://github.com/hdmf-dev/hdmf/pull/1209) - Support for Zarr is limited to versions < 3. @rly [#1229](https://github.com/hdmf-dev/hdmf/pull/1229) +- Scipy is no longer a required dependency. Users using the `CSRMatrix` data type should install `scipy` separately or with `pip install "hdmf[sparse]"`. @rly [#1140](https://github.com/hdmf-dev/hdmf/pull/1140) ### Changed - Added checks to ensure that group and dataset spec names and default names do not contain slashes. @bendichter [#1219](https://github.com/hdmf-dev/hdmf/pull/1219) diff --git a/pyproject.toml b/pyproject.toml index 5308543d4..5a58b6cef 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,13 +35,13 @@ dependencies = [ 'numpy>=1.19.3', "pandas>=1.2.0", "ruamel.yaml>=0.16", - "scipy>=1.7", ] dynamic = ["version"] [project.optional-dependencies] tqdm = ["tqdm>=4.41.0"] zarr = ["zarr>=2.12.0,<3"] +sparse = ["scipy>=1.7"] termset = [ "linkml-runtime>=1.5.5", "schemasheets>=0.1.23", @@ -70,7 +70,7 @@ docs = [ ] # all possible dependencies -all = ["hdmf[tqdm,zarr,termset,test,docs]"] +all = ["hdmf[tqdm,zarr,sparse,termset,test,docs]"] [project.urls] "Homepage" = "https://github.com/hdmf-dev/hdmf" diff --git a/src/hdmf/common/sparse.py b/src/hdmf/common/sparse.py index db38d12e8..0dd7d9654 100644 --- a/src/hdmf/common/sparse.py +++ b/src/hdmf/common/sparse.py @@ -1,4 +1,11 @@ -import scipy.sparse as sps +try: + from scipy.sparse import csr_matrix + SCIPY_INSTALLED = True +except ImportError: + SCIPY_INSTALLED = False + class csr_matrix: # dummy class to prevent import errors + pass + from . import register_class from ..container import Container from ..utils import docval, popargs, to_uint_array, get_data_shape, AllowPositional @@ -7,7 +14,7 @@ @register_class('CSRMatrix') class CSRMatrix(Container): - @docval({'name': 'data', 'type': (sps.csr_matrix, 'array_data'), + @docval({'name': 'data', 'type': (csr_matrix, 'array_data'), 'doc': 'the data to use for this CSRMatrix or CSR data array.' 'If passing CSR data array, *indices*, *indptr*, and *shape* must also be provided'}, {'name': 'indices', 'type': 'array_data', 'doc': 'CSR index array', 'default': None}, @@ -16,13 +23,17 @@ class CSRMatrix(Container): {'name': 'name', 'type': str, 'doc': 'the name to use for this when storing', 'default': 'csr_matrix'}, allow_positional=AllowPositional.WARNING) def __init__(self, **kwargs): + if not SCIPY_INSTALLED: + raise ImportError( + "scipy must be installed to use CSRMatrix. Please install scipy using `pip install scipy`." + ) data, indices, indptr, shape = popargs('data', 'indices', 'indptr', 'shape', kwargs) super().__init__(**kwargs) - if not isinstance(data, sps.csr_matrix): + if not isinstance(data, csr_matrix): temp_shape = get_data_shape(data) temp_ndim = len(temp_shape) if temp_ndim == 2: - data = sps.csr_matrix(data) + data = csr_matrix(data) elif temp_ndim == 1: if any(_ is None for _ in (indptr, indices, shape)): raise ValueError("Must specify 'indptr', 'indices', and 'shape' arguments when passing data array.") @@ -31,9 +42,10 @@ def __init__(self, **kwargs): shape = self.__check_arr(shape, 'shape') if len(shape) != 2: raise ValueError("'shape' argument must specify two and only two dimensions.") - data = sps.csr_matrix((data, indices, indptr), shape=shape) + data = csr_matrix((data, indices, indptr), shape=shape) else: raise ValueError("'data' argument cannot be ndarray of dimensionality > 2.") + # self.__data is a scipy.sparse.csr_matrix self.__data = data @staticmethod diff --git a/tests/unit/common/test_sparse.py b/tests/unit/common/test_sparse.py index 7d94231f4..720f1f473 100644 --- a/tests/unit/common/test_sparse.py +++ b/tests/unit/common/test_sparse.py @@ -1,10 +1,25 @@ from hdmf.common import CSRMatrix from hdmf.testing import TestCase, H5RoundTripMixin - -import scipy.sparse as sps import numpy as np +import unittest + +try: + import scipy.sparse as sps + SCIPY_INSTALLED = True +except ImportError: + SCIPY_INSTALLED = False + + +@unittest.skipIf(SCIPY_INSTALLED, "scipy is installed") +class TestCSRMatrixNoScipy(TestCase): + def test_import_error(self): + data = np.array([[1, 0, 2], [0, 0, 3], [4, 5, 6]]) + with self.assertRaises(ImportError): + CSRMatrix(data=data) + +@unittest.skipIf(not SCIPY_INSTALLED, "scipy is not installed") class TestCSRMatrix(TestCase): def test_from_sparse_matrix(self): @@ -153,7 +168,7 @@ def test_array_bad_dim(self): with self.assertRaisesWith(ValueError, msg): CSRMatrix(data=data, indices=indices, indptr=indptr, shape=shape) - +@unittest.skipIf(not SCIPY_INSTALLED, "scipy is not installed") class TestCSRMatrixRoundTrip(H5RoundTripMixin, TestCase): def setUpContainer(self): @@ -164,6 +179,7 @@ def setUpContainer(self): return CSRMatrix(data=data, indices=indices, indptr=indptr, shape=shape) +@unittest.skipIf(not SCIPY_INSTALLED, "scipy is not installed") class TestCSRMatrixRoundTripFromLists(H5RoundTripMixin, TestCase): """Test that CSRMatrix works with lists as well""" diff --git a/tox.ini b/tox.ini index 775cb7592..4caa68a4b 100644 --- a/tox.ini +++ b/tox.ini @@ -29,7 +29,7 @@ extras = # which optional dependency set(s) to use (default: none) pytest: test gallery: doc - optional: tqdm,zarr,termset + optional: tqdm,sparse,zarr,termset commands = # commands to run for every environment python --version # print python version for debugging From ab8cf3b011a6b468ce87aa1470183bee5f5f44f6 Mon Sep 17 00:00:00 2001 From: Ryan Ly Date: Wed, 22 Jan 2025 08:13:56 -0800 Subject: [PATCH 4/4] Don't override col_cls in DynamicTable.add_column (#1091) Co-authored-by: Matthew Avaylon --- CHANGELOG.md | 3 ++ src/hdmf/common/io/table.py | 3 +- src/hdmf/common/table.py | 51 +++++++++++++++--------- tests/unit/common/test_generate_table.py | 49 ++++++++++++++++++++++- 4 files changed, 83 insertions(+), 23 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8c72205cd..473be3100 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,9 @@ ### Added - Added script to check Python version support for HDMF dependencies. @rly [#1230](https://github.com/hdmf-dev/hdmf/pull/1230) +### Fixed +- Fixed issue with `DynamicTable.add_column` not allowing subclasses of `DynamicTableRegion` or `EnumData`. @rly [#1091](https://github.com/hdmf-dev/hdmf/pull/1091) + ## HDMF 3.14.6 (December 20, 2024) ### Enhancements diff --git a/src/hdmf/common/io/table.py b/src/hdmf/common/io/table.py index 50395ba24..379553c07 100644 --- a/src/hdmf/common/io/table.py +++ b/src/hdmf/common/io/table.py @@ -78,12 +78,11 @@ def process_field_spec(cls, classdict, docval_args, parent_cls, attr_name, not_i required=field_spec.required ) dtype = cls._get_type(field_spec, type_map) + column_conf['class'] = dtype if issubclass(dtype, DynamicTableRegion): # the spec does not know which table this DTR points to # the user must specify the table attribute on the DTR after it is generated column_conf['table'] = True - else: - column_conf['class'] = dtype index_counter = 0 index_name = attr_name diff --git a/src/hdmf/common/table.py b/src/hdmf/common/table.py index 84ac4da3b..2f6401672 100644 --- a/src/hdmf/common/table.py +++ b/src/hdmf/common/table.py @@ -521,7 +521,7 @@ def _init_class_columns(self): description=col['description'], index=col.get('index', False), table=col.get('table', False), - col_cls=col.get('class', VectorData), + col_cls=col.get('class'), # Pass through extra kwargs for add_column that subclasses may have added **{k: col[k] for k in col.keys() if k not in DynamicTable.__reserved_colspec_keys}) @@ -564,10 +564,13 @@ def _set_dtr_targets(self, target_tables: dict): if not column_conf.get('table', False): raise ValueError("Column '%s' must be a DynamicTableRegion to have a target table." % colname) - self.add_column(name=column_conf['name'], - description=column_conf['description'], - index=column_conf.get('index', False), - table=True) + self.add_column( + name=column_conf['name'], + description=column_conf['description'], + index=column_conf.get('index', False), + table=True, + col_cls=column_conf.get('class'), + ) if isinstance(self[colname], VectorIndex): col = self[colname].target else: @@ -681,7 +684,7 @@ def add_row(self, **kwargs): index=col.get('index', False), table=col.get('table', False), enum=col.get('enum', False), - col_cls=col.get('class', VectorData), + col_cls=col.get('class'), # Pass through extra keyword arguments for add_column that # subclasses may have added **{k: col[k] for k in col.keys() @@ -753,7 +756,7 @@ def __eq__(self, other): 'default': False}, {'name': 'enum', 'type': (bool, 'array_data'), 'default': False, 'doc': ('whether or not this column contains data from a fixed set of elements')}, - {'name': 'col_cls', 'type': type, 'default': VectorData, + {'name': 'col_cls', 'type': type, 'default': None, 'doc': ('class to use to represent the column data. If table=True, this field is ignored and a ' 'DynamicTableRegion object is used. If enum=True, this field is ignored and a EnumData ' 'object is used.')}, @@ -805,29 +808,39 @@ def add_column(self, **kwargs): # noqa: C901 % (name, self.__class__.__name__, spec_index)) warn(msg, stacklevel=3) - spec_col_cls = self.__uninit_cols[name].get('class', VectorData) - if col_cls != spec_col_cls: - msg = ("Column '%s' is predefined in %s with class=%s which does not match the entered " - "col_cls argument. The predefined class spec will be ignored. " - "Please ensure the new column complies with the spec. " - "This will raise an error in a future version of HDMF." - % (name, self.__class__.__name__, spec_col_cls)) - warn(msg, stacklevel=2) - ckwargs = dict(kwargs) # Add table if it's been specified if table and enum: raise ValueError("column '%s' cannot be both a table region " "and come from an enumerable set of elements" % name) + # Update col_cls if table is specified if table is not False: - col_cls = DynamicTableRegion + if col_cls is None: + col_cls = DynamicTableRegion if isinstance(table, DynamicTable): ckwargs['table'] = table + # Update col_cls if enum is specified if enum is not False: - col_cls = EnumData + if col_cls is None: + col_cls = EnumData if isinstance(enum, (list, tuple, np.ndarray, VectorData)): ckwargs['elements'] = enum + # Update col_cls to the default VectorData if col_cls is None + if col_cls is None: + col_cls = VectorData + + if name in self.__uninit_cols: # column is a predefined optional column from the spec + # check the given values against the predefined optional column spec. if they do not match, raise a warning + # and ignore the given arguments. users should not be able to override these values + spec_col_cls = self.__uninit_cols[name].get('class') + if spec_col_cls is not None and col_cls != spec_col_cls: + msg = ("Column '%s' is predefined in %s with class=%s which does not match the entered " + "col_cls argument. The predefined class spec will be ignored. " + "Please ensure the new column complies with the spec. " + "This will raise an error in a future version of HDMF." + % (name, self.__class__.__name__, spec_col_cls)) + warn(msg, stacklevel=2) # If the user provided a list of lists that needs to be indexed, then we now need to flatten the data # We can only create the index actual VectorIndex once we have the VectorData column so we compute @@ -873,7 +886,7 @@ def add_column(self, **kwargs): # noqa: C901 if col in self.__uninit_cols: self.__uninit_cols.pop(col) - if col_cls is EnumData: + if issubclass(col_cls, EnumData): columns.append(col.elements) col.elements.parent = self diff --git a/tests/unit/common/test_generate_table.py b/tests/unit/common/test_generate_table.py index 7f7d7da40..71c15aad0 100644 --- a/tests/unit/common/test_generate_table.py +++ b/tests/unit/common/test_generate_table.py @@ -16,6 +16,13 @@ class TestDynamicDynamicTable(TestCase): def setUp(self): + + self.dtr_spec = DatasetSpec( + data_type_def='CustomDTR', + data_type_inc='DynamicTableRegion', + doc='a test DynamicTableRegion column', # this is overridden where it is used + ) + self.dt_spec = GroupSpec( 'A test extension that contains a dynamic table', data_type_def='TestTable', @@ -99,7 +106,13 @@ def setUp(self): doc='a test column', dtype='float', quantity='?', - ) + ), + DatasetSpec( + data_type_inc='CustomDTR', + name='optional_custom_dtr_col', + doc='a test DynamicTableRegion column', + quantity='?' + ), ] ) @@ -107,6 +120,7 @@ def setUp(self): writer = YAMLSpecWriter(outdir='.') self.spec_catalog = SpecCatalog() + self.spec_catalog.register_spec(self.dtr_spec, 'test.yaml') self.spec_catalog.register_spec(self.dt_spec, 'test.yaml') self.spec_catalog.register_spec(self.dt_spec2, 'test.yaml') self.namespace = SpecNamespace( @@ -124,7 +138,7 @@ def setUp(self): self.test_dir = tempfile.mkdtemp() spec_fpath = os.path.join(self.test_dir, 'test.yaml') namespace_fpath = os.path.join(self.test_dir, 'test-namespace.yaml') - writer.write_spec(dict(groups=[self.dt_spec, self.dt_spec2]), spec_fpath) + writer.write_spec(dict(datasets=[self.dtr_spec], groups=[self.dt_spec, self.dt_spec2]), spec_fpath) writer.write_namespace(self.namespace, namespace_fpath) self.namespace_catalog = NamespaceCatalog() hdmf_typemap = get_type_map() @@ -133,6 +147,7 @@ def setUp(self): self.type_map.load_namespaces(namespace_fpath) self.manager = BuildManager(self.type_map) + self.CustomDTR = self.type_map.get_dt_container_cls('CustomDTR', CORE_NAMESPACE) self.TestTable = self.type_map.get_dt_container_cls('TestTable', CORE_NAMESPACE) self.TestDTRTable = self.type_map.get_dt_container_cls('TestDTRTable', CORE_NAMESPACE) @@ -228,6 +243,22 @@ def test_dynamic_table_region_non_dtr_target(self): self.TestDTRTable(name='test_dtr_table', description='my table', target_tables={'optional_col3': test_table}) + def test_custom_dtr_class(self): + test_table = self.TestTable(name='test_table', description='my test table') + test_table.add_row(my_col=3.0, indexed_col=[1.0, 3.0], optional_col2=.5) + test_table.add_row(my_col=4.0, indexed_col=[2.0, 4.0], optional_col2=.5) + + test_dtr_table = self.TestDTRTable(name='test_dtr_table', description='my table', + target_tables={'optional_custom_dtr_col': test_table}) + + self.assertIsInstance(test_dtr_table['optional_custom_dtr_col'], self.CustomDTR) + self.assertEqual(test_dtr_table['optional_custom_dtr_col'].description, "a test DynamicTableRegion column") + self.assertIs(test_dtr_table['optional_custom_dtr_col'].table, test_table) + + test_dtr_table.add_row(ref_col=0, indexed_ref_col=[0, 1], optional_custom_dtr_col=0) + test_dtr_table.add_row(ref_col=0, indexed_ref_col=[0, 1], optional_custom_dtr_col=1) + self.assertEqual(test_dtr_table['optional_custom_dtr_col'].data, [0, 1]) + def test_attribute(self): test_table = self.TestTable(name='test_table', description='my test table') assert test_table.my_col is not None @@ -266,3 +297,17 @@ def test_roundtrip(self): for err in errors: raise Exception(err) self.reader.close() + + def test_add_custom_dtr_column(self): + test_table = self.TestTable(name='test_table', description='my test table') + test_table.add_column( + name='custom_dtr_column', + description='this is a custom DynamicTableRegion column', + col_cls=self.CustomDTR, + ) + self.assertIsInstance(test_table['custom_dtr_column'], self.CustomDTR) + self.assertEqual(test_table['custom_dtr_column'].description, 'this is a custom DynamicTableRegion column') + + test_table.add_row(my_col=3.0, indexed_col=[1.0, 3.0], custom_dtr_column=0) + test_table.add_row(my_col=4.0, indexed_col=[2.0, 4.0], custom_dtr_column=1) + self.assertEqual(test_table['custom_dtr_column'].data, [0, 1])