diff --git a/CHANGELOG.rst b/CHANGELOG.rst index b6b3ec46..afe65b83 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -3,12 +3,15 @@ Changelog Version 2.2.0 ------------- +- Don't force loading of neurons into memory for Population (#922). See new API of + :class:`Population` and + :func:`load_neurons` - Move ``total_length`` feature to from ``neuritefunc`` to ``neuronfunc``. Use ``neurite_lengths`` feature for neurites - Include morphology filename extension into Neuron's name - Extend ``tree_type_checker`` to accept a single tuple as an argument. Additionally validate - function's arguments (#909, #912) -- Optimize Sholl analysis code (#905) + function's arguments (#912, #914) +- Optimize Sholl analysis code (#905, #919) Version 2.1.2 ------------- diff --git a/doc/source/api.rst b/doc/source/api.rst index fac79cfa..bc177c94 100644 --- a/doc/source/api.rst +++ b/doc/source/api.rst @@ -47,6 +47,7 @@ API Documentation neurom.check.neuron_checks neurom.core.types neurom.core.neuron + neurom.core.population neurom.core.soma neurom.core.dataformat neurom.io.utils diff --git a/doc/source/conf.py b/doc/source/conf.py index 45698150..0ca7d968 100644 --- a/doc/source/conf.py +++ b/doc/source/conf.py @@ -141,6 +141,7 @@ suppress_warnings = ["ref.python"] autosummary_generate = True autosummary_imported_members = True +autoclass_content = 'both' autodoc_default_options = { 'members': True, 'imported-members': True, diff --git a/neurom/core/population.py b/neurom/core/population.py index 2d247216..081cdeca 100644 --- a/neurom/core/population.py +++ b/neurom/core/population.py @@ -27,42 +27,90 @@ # SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Neuron Population Classes and Functions.""" +import logging -from itertools import chain +from morphio import MorphioError +import neurom +from neurom.exceptions import NeuroMError + + +L = logging.getLogger(__name__) class Population: """Neuron Population Class. - Features: - - flattened collection of neurites. - - collection of somas, neurons. - - iterable-like iteration over neurons. + Offers an iterator over neurons within population, neurites of neurons, somas of neurons. + It does not store the loaded neuron in memory unless the neuron has been already passed + as loaded (instance of ``Neuron``). """ - def __init__(self, neurons, name='Population'): + def __init__(self, files, name='Population', ignored_exceptions=(), cache=False): """Construct a neuron population. Arguments: - neurons: iterable of neuron objects. - name: Optional name for this Population. + files (collections.abc.Sequence[str|Path|Neuron]): collection of neuron files or + paths to them + name (str): Optional name for this Population + ignored_exceptions (tuple): NeuroM and MorphIO exceptions that you want to ignore when + loading neurons. + cache (bool): whether to cache the loaded neurons in memory. If false then a neuron + will be loaded everytime it is accessed within the population. Which is good when + population is big. If true then all neurons will be loaded upon the construction + and kept in memory. """ - self.neurons = tuple(neurons) - self.somata = tuple(neu.soma for neu in neurons) - self.neurites = tuple(chain.from_iterable(neu.neurites for neu in neurons)) + self._ignored_exceptions = ignored_exceptions self.name = name + if cache: + self._files = [self._load_file(f) for f in files if f is not None] + else: + self._files = files + + @property + def neurons(self): + """Iterator to populations's somas.""" + return (n for n in self) + + @property + def somata(self): + """Iterator to populations's somas.""" + return (n.soma for n in self) + + @property + def neurites(self): + """Iterator to populations's neurites.""" + return (neurite for n in self for neurite in n.neurites) + + def _load_file(self, f): + if isinstance(f, neurom.core.neuron.Neuron): + return f + try: + return neurom.load_neuron(f) + except (NeuroMError, MorphioError) as e: + if isinstance(e, self._ignored_exceptions): + L.info('Ignoring exception "%s" for file %s', e, f.name) + else: + raise NeuroMError('`load_neurons` failed') from e + return None def __iter__(self): """Iterator to populations's neurons.""" - return iter(self.neurons) + for f in self._files: + nrn = self._load_file(f) + if nrn is None: + continue + yield nrn def __len__(self): """Length of neuron collection.""" - return len(self.neurons) + return len(self._files) def __getitem__(self, idx): """Get neuron at index idx.""" - return self.neurons[idx] + if idx > len(self): + raise ValueError( + f'no {idx} index in "{self.name}" population, max possible index is {len(self)}') + return self._load_file(self._files[idx]) def __str__(self): """Return a string representation.""" - return 'Population ' % (self.name, len(self.neurons)) + return 'Population ' % (self.name, len(self)) diff --git a/neurom/io/utils.py b/neurom/io/utils.py index 8e60077d..9b033b63 100644 --- a/neurom/io/utils.py +++ b/neurom/io/utils.py @@ -38,7 +38,6 @@ from pathlib import Path import morphio -from morphio import MorphioError from neurom.core.neuron import Neuron from neurom.core.population import Population from neurom.exceptions import NeuroMError @@ -168,47 +167,29 @@ def load_neuron(neuron, reader=None): def load_neurons(neurons, - neuron_loader=load_neuron, name=None, - population_class=Population, - ignored_exceptions=()): + ignored_exceptions=(), + cache=False): """Create a population object. From all morphologies in a directory of from morphologies in a list of file names. Arguments: - neurons: directory path or list of neuron file paths - neuron_loader: function taking a filename and returning a neuron - population_class: class representing populations + neurons(str|Path|Iterable[Path]): path to a folder or list of paths to neuron files name (str): optional name of population. By default 'Population' or\ filepath basename depending on whether neurons is list or\ directory path respectively. ignored_exceptions (tuple): NeuroM and MorphIO exceptions that you want to ignore when - loading neurons. + loading neurons + cache (bool): whether to cache the loaded neurons in memory Returns: - neuron population object + Population: population object """ - if isinstance(neurons, str): - neurons = Path(neurons) - - if isinstance(neurons, Path): + if isinstance(neurons, (str, Path)): files = get_files_by_path(neurons) - name = name or neurons.name + name = name or Path(neurons).name else: files = neurons name = name or 'Population' - - ignored_exceptions = tuple(ignored_exceptions) - pop = [] - for f in files: - try: - pop.append(neuron_loader(f)) - except (NeuroMError, MorphioError) as e: - if isinstance(e, ignored_exceptions): - L.info('Ignoring exception "%s" for file %s', - e, f.name) - continue - raise NeuroMError('`load_neurons` failed') from e - - return population_class(pop, name=name) + return Population(files, name, ignored_exceptions, cache) diff --git a/tests/core/test_population.py b/tests/core/test_population.py index db9c1bcb..8c07f195 100644 --- a/tests/core/test_population.py +++ b/tests/core/test_population.py @@ -29,50 +29,74 @@ from pathlib import Path from neurom.core.population import Population +from neurom.core.neuron import Neuron from neurom import load_neuron +import pytest + DATA_PATH = Path(__file__).parent.parent / 'data' -NRN1 = load_neuron(DATA_PATH / 'swc/Neuron.swc') -NRN2 = load_neuron(DATA_PATH / 'swc/Single_basal.swc') -NRN3 = load_neuron(DATA_PATH / 'swc/Neuron_small_radius.swc') +FILES = [DATA_PATH / 'swc/Neuron.swc', + DATA_PATH / 'swc/Single_basal.swc', + DATA_PATH / 'swc/Neuron_small_radius.swc'] -NEURONS = [NRN1, NRN2, NRN3] +NEURONS = [load_neuron(f) for f in FILES] TOT_NEURITES = sum(len(N.neurites) for N in NEURONS) -POP = Population(NEURONS, name='foo') +populations = [Population(NEURONS, name='foo'), + Population(FILES, name='foo', cache=True)] + +@pytest.mark.parametrize('pop', populations) +def test_names(pop): + assert pop[0].name, 'Neuron' + assert pop[1].name, 'Single_basal' + assert pop[2].name, 'Neuron_small_radius' + assert pop.name == 'foo' -def test_population(): - assert len(POP.neurons) == 3 - assert POP.neurons[0].name, 'Neuron' - assert POP.neurons[1].name, 'Single_basal' - assert POP.neurons[2].name, 'Neuron_small_radius' - assert len(POP.somata) == 3 +def test_indexing(): + pop = populations[0] + for i, n in enumerate(NEURONS): + assert n is pop[i] + with pytest.raises(ValueError, match='no 10 index'): + pop[10] - assert len(POP.neurites) == TOT_NEURITES - assert POP.name == 'foo' +def test_cache(): + pop = populations[1] + for n in pop._files: + assert isinstance(n, Neuron) -def test_neurons(): +def test_double_indexing(): + pop = populations[0] for i, n in enumerate(NEURONS): - assert n is POP.neurons[i] + assert n is pop[i] + # second time to assure that generator is available again + for i, n in enumerate(NEURONS): + assert n is pop[i] -def test_iterate_neurons(): - for a, b in zip(NEURONS, POP): +def test_iterating(): + pop = populations[0] + for a, b in zip(NEURONS, pop): assert a is b + for a, b in zip(NEURONS, pop.somata): + assert a.soma is b + -def test_len(): - assert len(POP) == len(NEURONS) +@pytest.mark.parametrize('pop', populations) +def test_len(pop): + assert len(pop) == len(NEURONS) def test_getitem(): + pop = populations[0] for i in range(len(NEURONS)): - assert POP[i] is NEURONS[i] + assert pop[i] is NEURONS[i] -def test_str(): - assert 'Population' in str(POP) +@pytest.mark.parametrize('pop', populations) +def test_str(pop): + assert 'Population' in str(pop) diff --git a/tests/features/test_get_features.py b/tests/features/test_get_features.py index 24408f2e..ab719f43 100644 --- a/tests/features/test_get_features.py +++ b/tests/features/test_get_features.py @@ -50,9 +50,8 @@ DATA_PATH = Path(__file__).parent.parent / 'data' NRN_FILES = [DATA_PATH / 'h5/v1' / f for f in ('Neuron.h5', 'Neuron_2_branch.h5', 'bio_neuron-001.h5')] -NRNS = load_neurons(NRN_FILES) -NRN = NRNS[0] -POP = Population(NRNS) +POP = load_neurons(NRN_FILES) +NRN = POP[0] SWC_PATH = DATA_PATH / 'swc' NEURON_PATH = SWC_PATH / 'Neuron.swc' @@ -119,18 +118,16 @@ def test_max_radial_distances(): assert_features_for_neurite(feat, POP, expected, exact=False) # Test with a list of neurites - neurites = POP[0].neurites expected = { None: [99.58945832], NeuriteType.all: [99.58945832], NeuriteType.apical_dendrite: [99.589458], } - assert_features_for_neurite(feat, neurites, expected, exact=False) + assert_features_for_neurite(feat, NRN.neurites, expected, exact=False) def test_max_radial_distance(): feat = 'max_radial_distance' - neurites = POP[0].neurites expected = { None: 99.58945832, NeuriteType.all: 99.58945832, @@ -139,9 +136,9 @@ def test_max_radial_distance(): for neurite_type, expected_values in expected.items(): if neurite_type is None: - res = get_feature(feat, neurites) + res = get_feature(feat, NRN) else: - res = get_feature(feat, neurites, neurite_type=neurite_type) + res = get_feature(feat, NRN, neurite_type=neurite_type) assert_allclose(res, expected_values) @@ -508,24 +505,24 @@ def vol(neurite): features.register_neurite_feature('foo', npts) - n_points_ref = [len(n.points) for n in iter_neurites(NRNS)] - n_points = get_feature('foo', NRNS) + n_points_ref = [len(n.points) for n in iter_neurites(POP)] + n_points = get_feature('foo', POP) assert_items_equal(n_points, n_points_ref) # test neurite type filtering - n_points_ref = [len(n.points) for n in iter_neurites(NRNS, filt=_is_type(NeuriteType.axon))] - n_points = get_feature('foo', NRNS, neurite_type=NeuriteType.axon) + n_points_ref = [len(n.points) for n in iter_neurites(POP, filt=_is_type(NeuriteType.axon))] + n_points = get_feature('foo', POP, neurite_type=NeuriteType.axon) assert_items_equal(n_points, n_points_ref) features.register_neurite_feature('bar', vol) - n_volume_ref = [n.volume for n in iter_neurites(NRNS)] - n_volume = get_feature('bar', NRNS) + n_volume_ref = [n.volume for n in iter_neurites(POP)] + n_volume = get_feature('bar', POP) assert_items_equal(n_volume, n_volume_ref) # test neurite type filtering - n_volume_ref = [n.volume for n in iter_neurites(NRNS, filt=_is_type(NeuriteType.axon))] - n_volume = get_feature('bar', NRNS, neurite_type=NeuriteType.axon) + n_volume_ref = [n.volume for n in iter_neurites(POP, filt=_is_type(NeuriteType.axon))] + n_volume = get_feature('bar', POP, neurite_type=NeuriteType.axon) assert_items_equal(n_volume, n_volume_ref) @@ -894,12 +891,12 @@ def test_section_path_distances_start_point(): def test_partition(): - assert np.all(get_feature('partition', NRNS)[:10] == np.array( + assert np.all(get_feature('partition', POP)[:10] == np.array( [19., 17., 15., 13., 11., 9., 7., 5., 3., 1.])) def test_partition_asymmetry(): - assert_allclose(get_feature('partition_asymmetry', NRNS)[:10], np.array([0.9, 0.88888889, 0.875, + assert_allclose(get_feature('partition_asymmetry', POP)[:10], np.array([0.9, 0.88888889, 0.875, 0.85714286, 0.83333333, 0.8, 0.75, 0.66666667, @@ -907,7 +904,7 @@ def test_partition_asymmetry(): def test_partition_asymmetry_length(): - assert_allclose(get_feature('partition_asymmetry_length', NRNS)[:1], np.array([0.853925])) + assert_allclose(get_feature('partition_asymmetry_length', POP)[:1], np.array([0.853925])) def test_section_strahler_orders(): diff --git a/tests/io/test_io_utils.py b/tests/io/test_io_utils.py index 7cec3628..8f740d1e 100644 --- a/tests/io/test_io_utils.py +++ b/tests/io/test_io_utils.py @@ -61,16 +61,6 @@ MISSING_PARENTS_FILE = SWC_PATH / 'Neuron_missing_parents.swc' -def _mock_load_neuron(filename): - class MockNeuron: - def __init__(self, name): - self.soma = 42 - self.neurites = list() - self.name = name - - return MockNeuron(Path(filename).stem) - - def _check_neurites_have_no_parent(nrn): for n in nrn.neurites: @@ -86,47 +76,50 @@ def test_get_morph_files(): def test_load_neurons(): # List of strings - nrns = utils.load_neurons(map(str, FILES), neuron_loader=_mock_load_neuron) + nrns = utils.load_neurons(list(map(str, FILES))) for i, nrn in enumerate(nrns): - assert nrn.name == FILES[i].stem + assert nrn.name == FILES[i].name with pytest.raises(NeuroMError): - utils.load_neurons(MISSING_PARENTS_FILE,) + list(utils.load_neurons(MISSING_PARENTS_FILE,)) # Single string - nrns = utils.load_neurons(str(FILES[0]), neuron_loader=_mock_load_neuron) - assert nrns[0].name == FILES[0].stem + nrns = utils.load_neurons(str(FILES[0])) + assert nrns[0].name == FILES[0].name # Single Path - nrns = utils.load_neurons(FILES[0], neuron_loader=_mock_load_neuron) - assert nrns[0].name == FILES[0].stem + nrns = utils.load_neurons(FILES[0]) + assert nrns[0].name == FILES[0].name - # sequence of strings - nrns = utils.load_neurons(map(str, FILES), neuron_loader=_mock_load_neuron) + # list of strings + nrns = utils.load_neurons(list(map(str, FILES))) for i, nrn in enumerate(nrns): - assert nrn.name == FILES[i].stem + assert nrn.name == FILES[i].name # sequence of Path objects - nrns = utils.load_neurons(FILES, neuron_loader=_mock_load_neuron) + nrns = utils.load_neurons(FILES) for nrn, file in zip(nrns, FILES): - assert nrn.name == file.stem + assert nrn.name == file.name # string path to a directory - nrns = utils.load_neurons(str(SWC_PATH), neuron_loader=_mock_load_neuron) + nrns = utils.load_neurons(str(SWC_PATH), ignored_exceptions=(MissingParentError, MorphioError)) # is subset so that if new morpho are added to SWC_PATH, the test does not break - assert {f.stem for f in FILES}.issubset({nrn.name for nrn in nrns}) + assert {f.name for f in FILES}.issubset({nrn.name for nrn in nrns}) # Path path to a directory - nrns = utils.load_neurons(SWC_PATH, neuron_loader=_mock_load_neuron) + nrns = utils.load_neurons(SWC_PATH, ignored_exceptions=(MissingParentError, MorphioError)) # is subset so that if new morpho are added to SWC_PATH, the test does not break - assert {f.stem for f in FILES}.issubset({nrn.name for nrn in nrns}) + assert {f.name for f in FILES}.issubset({nrn.name for nrn in nrns}) def test_ignore_exceptions(): with pytest.raises(NeuroMError): - utils.load_neurons(MISSING_PARENTS_FILE,) + list(utils.load_neurons(MISSING_PARENTS_FILE,)) + count = 0 pop = utils.load_neurons((MISSING_PARENTS_FILE,), ignored_exceptions=(RawDataError,)) - assert len(pop) == 0 + for _ in pop: + count += 1 + assert count == 0 def test_load_neuron(): @@ -205,7 +198,6 @@ def test_load_neuron_missing_parents_raises(): def test_load_neurons_directory(): pop = utils.load_neurons(VALID_DATA_PATH) - assert len(pop.neurons) == 4 assert len(pop) == 4 assert pop.name == 'valid_set' for nrn in pop: @@ -214,7 +206,6 @@ def test_load_neurons_directory(): def test_load_neurons_directory_name(): pop = utils.load_neurons(VALID_DATA_PATH, name='test123') - assert len(pop.neurons) == 4 assert len(pop) == 4 assert pop.name == 'test123' for nrn in pop: @@ -223,7 +214,7 @@ def test_load_neurons_directory_name(): def test_load_neurons_filenames(): pop = utils.load_neurons(FILENAMES, name='test123') - assert len(pop.neurons) == 2 + assert len(pop) == 2 assert pop.name == 'test123' for nrn, name in zip(pop.neurons, NRN_NAMES): assert isinstance(nrn, Neuron)