From 3a8681dd5a5604252201b8ced600a0d20b90f758 Mon Sep 17 00:00:00 2001 From: Volker Hilsenstein Date: Thu, 21 Jul 2022 11:46:20 +0200 Subject: [PATCH 1/4] Add sortfunc argument to imread to customize glob order --- dask_image/imread/__init__.py | 7 ++++--- tests/test_dask_image/test_imread/test_core.py | 12 ++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) diff --git a/dask_image/imread/__init__.py b/dask_image/imread/__init__.py index 9cd5aaba..9c426afa 100644 --- a/dask_image/imread/__init__.py +++ b/dask_image/imread/__init__.py @@ -10,7 +10,7 @@ from . import _utils -def imread(fname, nframes=1, *, arraytype="numpy"): +def imread(fname, nframes=1, *, arraytype="numpy", sortfunc=sorted): """ Read image data into a Dask Array. @@ -25,6 +25,8 @@ def imread(fname, nframes=1, *, arraytype="numpy"): Number of the frames to include in each chunk (default: 1). arraytype : str, optional Array type for dask chunks. Available options: "numpy", "cupy". + sortfunc: Callable + A function for sorting the glob results. Returns ------- @@ -65,7 +67,7 @@ def imread(fname, nframes=1, *, arraytype="numpy"): ) # place source filenames into dask array - filenames = sorted(glob.glob(sfname)) # pims also does this + filenames = sortfunc(glob.glob(sfname)) # pims also does this if len(filenames) > 1: ar = da.from_array(filenames, chunks=(nframes,)) multiple_files = True @@ -83,7 +85,6 @@ def imread(fname, nframes=1, *, arraytype="numpy"): arrayfunc=arrayfunc, meta=arrayfunc([]).astype(dtype), # meta overwrites `dtype` argument ) - return a diff --git a/tests/test_dask_image/test_imread/test_core.py b/tests/test_dask_image/test_imread/test_core.py index 07f8d26f..8facf5ed 100644 --- a/tests/test_dask_image/test_imread/test_core.py +++ b/tests/test_dask_image/test_imread/test_core.py @@ -95,3 +95,15 @@ def test_tiff_imread(tmpdir, seed, nframes, shape, runtime_warning, dtype, is_pa assert (shape[0] % nframes) == d.chunks[0][-1] da.utils.assert_eq(a, d) + + +@pytest.mark.parametrize("sortfunc, expected", [ + pytest.param(sorted, np.array([[10],[9]])), + pytest.param(tifffile.natural_sorted, np.array([[9],[10]])) + ]) +def test_tiff_imread_glob_sort(tmpdir, sortfunc, expected): + dirpth = tmpdir.mkdir("test_imread") + tifffile.imwrite(dirpth.join("10.tif"), np.array([10])) + tifffile.imwrite(dirpth.join("9.tif"), np.array([9])) + actual = np.array(dask_image.imread.imread(dirpth.join("*.tif"), sortfunc=sortfunc)) + assert np.all(actual==expected) \ No newline at end of file From 472dcf998467e9fb9e830317484d2298ce44bf50 Mon Sep 17 00:00:00 2001 From: Volker Hilsenstein Date: Thu, 21 Jul 2022 11:53:37 +0200 Subject: [PATCH 2/4] Fix flake8 complaints --- tests/test_dask_image/test_imread/test_core.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/tests/test_dask_image/test_imread/test_core.py b/tests/test_dask_image/test_imread/test_core.py index 8facf5ed..3a6d46fc 100644 --- a/tests/test_dask_image/test_imread/test_core.py +++ b/tests/test_dask_image/test_imread/test_core.py @@ -98,12 +98,14 @@ def test_tiff_imread(tmpdir, seed, nframes, shape, runtime_warning, dtype, is_pa @pytest.mark.parametrize("sortfunc, expected", [ - pytest.param(sorted, np.array([[10],[9]])), - pytest.param(tifffile.natural_sorted, np.array([[9],[10]])) + pytest.param(sorted, np.array([[10], [9]])), + pytest.param(tifffile.natural_sorted, + np.array([[9], [10]])) ]) def test_tiff_imread_glob_sort(tmpdir, sortfunc, expected): dirpth = tmpdir.mkdir("test_imread") tifffile.imwrite(dirpth.join("10.tif"), np.array([10])) tifffile.imwrite(dirpth.join("9.tif"), np.array([9])) - actual = np.array(dask_image.imread.imread(dirpth.join("*.tif"), sortfunc=sortfunc)) - assert np.all(actual==expected) \ No newline at end of file + actual = np.array(dask_image.imread.imread(dirpth.join("*.tif"), + sortfunc=sortfunc)) + assert np.all(actual == expected) From 32459fe956277f03b7b300bdccca0afc2b05e609 Mon Sep 17 00:00:00 2001 From: Volker Hilsenstein Date: Thu, 21 Jul 2022 12:03:26 +0200 Subject: [PATCH 3/4] Refine docstring --- dask_image/imread/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/dask_image/imread/__init__.py b/dask_image/imread/__init__.py index 9c426afa..826eca9d 100644 --- a/dask_image/imread/__init__.py +++ b/dask_image/imread/__init__.py @@ -25,8 +25,8 @@ def imread(fname, nframes=1, *, arraytype="numpy", sortfunc=sorted): Number of the frames to include in each chunk (default: 1). arraytype : str, optional Array type for dask chunks. Available options: "numpy", "cupy". - sortfunc: Callable - A function for sorting the glob results. + sortfunc: Callable, optional + A function for sorting the glob results, default is "sorted". Returns ------- From 45b81a70185cf9fc6e38316d2a42064b8062ba11 Mon Sep 17 00:00:00 2001 From: Volker Hilsenstein Date: Fri, 22 Jul 2022 11:59:37 +0200 Subject: [PATCH 4/4] Remove custom sortfunc, default to natural sort --- dask_image/imread/__init__.py | 11 ++++++----- tests/test_dask_image/test_imread/test_core.py | 12 +++--------- 2 files changed, 9 insertions(+), 14 deletions(-) diff --git a/dask_image/imread/__init__.py b/dask_image/imread/__init__.py index 826eca9d..3649f147 100644 --- a/dask_image/imread/__init__.py +++ b/dask_image/imread/__init__.py @@ -6,11 +6,12 @@ import dask.array as da import numpy as np import pims +from tifffile import natural_sorted from . import _utils -def imread(fname, nframes=1, *, arraytype="numpy", sortfunc=sorted): +def imread(fname, nframes=1, *, arraytype="numpy"): """ Read image data into a Dask Array. @@ -21,12 +22,12 @@ def imread(fname, nframes=1, *, arraytype="numpy", sortfunc=sorted): ---------- fname : str or pathlib.Path A glob like string that may match one or multiple filenames. + Where multiple filenames match, they are sorted using + natural (as opposed to alphabetical) sort. nframes : int, optional Number of the frames to include in each chunk (default: 1). arraytype : str, optional Array type for dask chunks. Available options: "numpy", "cupy". - sortfunc: Callable, optional - A function for sorting the glob results, default is "sorted". Returns ------- @@ -66,8 +67,8 @@ def imread(fname, nframes=1, *, arraytype="numpy", sortfunc=sorted): RuntimeWarning ) - # place source filenames into dask array - filenames = sortfunc(glob.glob(sfname)) # pims also does this + # place source filenames into dask array after sorting + filenames = natural_sorted(glob.glob(sfname)) if len(filenames) > 1: ar = da.from_array(filenames, chunks=(nframes,)) multiple_files = True diff --git a/tests/test_dask_image/test_imread/test_core.py b/tests/test_dask_image/test_imread/test_core.py index 3a6d46fc..ddebc70c 100644 --- a/tests/test_dask_image/test_imread/test_core.py +++ b/tests/test_dask_image/test_imread/test_core.py @@ -97,15 +97,9 @@ def test_tiff_imread(tmpdir, seed, nframes, shape, runtime_warning, dtype, is_pa da.utils.assert_eq(a, d) -@pytest.mark.parametrize("sortfunc, expected", [ - pytest.param(sorted, np.array([[10], [9]])), - pytest.param(tifffile.natural_sorted, - np.array([[9], [10]])) - ]) -def test_tiff_imread_glob_sort(tmpdir, sortfunc, expected): +def test_tiff_imread_glob_natural_sort(tmpdir): dirpth = tmpdir.mkdir("test_imread") tifffile.imwrite(dirpth.join("10.tif"), np.array([10])) tifffile.imwrite(dirpth.join("9.tif"), np.array([9])) - actual = np.array(dask_image.imread.imread(dirpth.join("*.tif"), - sortfunc=sortfunc)) - assert np.all(actual == expected) + actual = np.array(dask_image.imread.imread(dirpth.join("*.tif"))) + assert np.all(actual == np.array([[9], [10]]))