From 8d16eddd9358862eb3ae279cc604296d8ac5cf00 Mon Sep 17 00:00:00 2001 From: melisande-c Date: Tue, 26 Nov 2024 14:38:00 +0100 Subject: [PATCH 1/8] feat(CLI): add predict command; refac: organisation --- src/careamics/careamist.py | 1 + src/careamics/cli/conf.py | 29 ++--------- src/careamics/cli/main.py | 103 +++++++++++++++++++++++++++++++++++-- src/careamics/cli/utils.py | 25 +++++++++ 4 files changed, 129 insertions(+), 29 deletions(-) create mode 100644 src/careamics/cli/utils.py diff --git a/src/careamics/careamist.py b/src/careamics/careamist.py index 0a78ca3cb..b42e72dc9 100644 --- a/src/careamics/careamist.py +++ b/src/careamics/careamist.py @@ -146,6 +146,7 @@ def __init__( # path to configuration file or model else: + # TODO: update this check so models can be downloaded directly from BMZ source = check_path_exists(source) # configuration file diff --git a/src/careamics/cli/conf.py b/src/careamics/cli/conf.py index 01290f7f9..95a67ea1b 100644 --- a/src/careamics/cli/conf.py +++ b/src/careamics/cli/conf.py @@ -3,7 +3,7 @@ import sys from dataclasses import dataclass from pathlib import Path -from typing import Optional, Tuple +from typing import Optional import click import typer @@ -17,6 +17,7 @@ create_n2v_configuration, save_configuration, ) +from .utils import handle_2D_3D_callback WORK_DIR = Path.cwd() @@ -92,26 +93,6 @@ def conf_options( # numpydoc ignore=PR01 ctx.obj = ConfOptions(dir, name, force, print) -def patch_size_callback(value: Tuple[int, int, int]) -> Tuple[int, ...]: - """ - Callback for --patch-size option. - - Parameters - ---------- - value : (int, int, int) - Patch size value. - - Returns - ------- - (int, int, int) | (int, int) - If the last element in `value` is -1 the tuple is reduced to the first two - values. - """ - if value[2] == -1: - return value[:2] - return value - - # TODO: Need to decide how to parse model kwargs # - Could be json style string to be loaded as dict e.g. {"depth": 3} # - Cons: Annoying to type, easily have syntax errors @@ -132,7 +113,7 @@ def care( # numpydoc ignore=PR01 "is not 3D pass the last value as -1 e.g. --patch-size 64 64 -1)." ), click_type=click.Tuple([int, int, int]), - callback=patch_size_callback, + callback=handle_2D_3D_callback, ), ], batch_size: Annotated[int, typer.Option(help="Batch size.")], @@ -219,7 +200,7 @@ def n2n( # numpydoc ignore=PR01 "is not 3D pass the last value as -1 e.g. --patch-size 64 64 -1)." ), click_type=click.Tuple([int, int, int]), - callback=patch_size_callback, + callback=handle_2D_3D_callback, ), ], batch_size: Annotated[int, typer.Option(help="Batch size.")], @@ -303,7 +284,7 @@ def n2v( # numpydoc ignore=PR01 "is not 3D pass the last value as -1 e.g. --patch-size 64 64 -1)." ), click_type=click.Tuple([int, int, int]), - callback=patch_size_callback, + callback=handle_2D_3D_callback, ), ], batch_size: Annotated[int, typer.Option(help="Batch size.")], diff --git a/src/careamics/cli/main.py b/src/careamics/cli/main.py index 8305afb0a..f539332ae 100644 --- a/src/careamics/cli/main.py +++ b/src/careamics/cli/main.py @@ -9,11 +9,13 @@ from pathlib import Path from typing import Optional +import click import typer from typing_extensions import Annotated from ..careamist import CAREamist from . import conf +from .utils import handle_2D_3D_callback app = typer.Typer( help="Run CAREamics algorithms from the command line, including Noise2Void " @@ -22,7 +24,6 @@ app.add_typer( conf.app, name="conf", - # callback=conf.conf_options ) @@ -102,7 +103,7 @@ def train( # numpydoc ignore=PR01 typer.Option( "--work-dir", "-wd", - help=("Path to working directory in which to save checkpoints and " "logs"), + help=("Path to working directory in which to save checkpoints and logs"), exists=True, file_okay=False, dir_okay=True, @@ -123,10 +124,102 @@ def train( # numpydoc ignore=PR01 @app.command() -def predict(): # numpydoc ignore=PR01 +def predict( # numpydoc ignore=PR01 + model: Annotated[ + Path, + typer.Argument( + help="Path to a configuration file or a trained model.", + exists=True, + file_okay=True, + dir_okay=False, + ), + ], + source: Annotated[ + Path, + typer.Argument( + help="Path to the training data. Can be a directory or single file.", + exists=True, + file_okay=True, + dir_okay=True, + ), + ], + batch_size: Annotated[int, typer.Option(help="Batch size.")] = 1, + tile_size: Annotated[ + Optional[click.Tuple], + typer.Option( + help=( + "Size of the tiles to use for prediction, (if the data " + "is not 3D pass the last value as -1 e.g. --tile_size 64 64 -1)." + ), + click_type=click.Tuple([int, int, int]), + callback=handle_2D_3D_callback, + ), + ] = None, + tile_overlap: Annotated[ + click.Tuple, + typer.Option( + help=( + "Overlap between tiles, (if the data is not 3D pass the last value as " + "-1 e.g. --tile_overlap 64 64 -1)." + ), + click_type=click.Tuple([int, int, int]), + callback=handle_2D_3D_callback, + ), + ] = (48, 48, -1), + axes: Annotated[ + Optional[str], + typer.Option( + help="Axes of the input data. If unused the data is assumed to have the " + "same axes as the original training data." + ), + ] = None, + data_type: Annotated[ + click.Choice, + typer.Option(click_type=click.Choice(["tiff"]), help="Type of the input data."), + ] = "tiff", + tta_transforms: Annotated[ + bool, + typer.Option( + "--tta-transforms/--no-tta-transforms", + "-t/-T", + help="Whether to apply test-time augmentation.", + ), + ] = False, + write_type: Annotated[ + click.Choice, + typer.Option( + click_type=click.Choice(["tiff"]), help="Type of the output data." + ), + ] = "tiff", + # TODO: could make dataloader_params as json, necessary? + work_dir: Annotated[ + Optional[Path], + typer.Option( + "--work-dir", + "-wd", + help=( + "Path to working directory. (Predictions will be save in a nested " + "directory named 'predictions'.)" + ), + exists=True, + file_okay=False, + dir_okay=True, + ), + ] = None, +): """Create and save predictions from CAREamics models.""" - # TODO: Need a save predict to workdir function - raise NotImplementedError + engine = CAREamist(source=model, work_dir=work_dir) + engine.predict_to_disk( + source=source, + batch_size=batch_size, + tile_size=tile_size, + tile_overlap=tile_overlap, + axes=axes, + data_type=data_type, + tta_transforms=tta_transforms, + write_type=write_type, + prediction_dir="predictions", + ) def run(): diff --git a/src/careamics/cli/utils.py b/src/careamics/cli/utils.py new file mode 100644 index 000000000..151ef3fcd --- /dev/null +++ b/src/careamics/cli/utils.py @@ -0,0 +1,25 @@ +"""Utility functions for the CAREamics CLI.""" + +from typing import Tuple + + +def handle_2D_3D_callback(value: Tuple[int, int, int]) -> Tuple[int, ...]: + """ + Callback for options that require 2D or 3D inputs. + + In the case of 2D, the 3rd element should be set to -1. + + Parameters + ---------- + value : (int, int, int) + Tile size value. + + Returns + ------- + (int, int, int) | (int, int) + If the last element in `value` is -1 the tuple is reduced to the first two + values. + """ + if value[2] == -1: + return value[:2] + return value From c12e4b181da12d30fdcb353a56fddbaa6eae6519 Mon Sep 17 00:00:00 2001 From: melisande-c Date: Tue, 26 Nov 2024 17:54:43 +0100 Subject: [PATCH 2/8] fix(predict2disk): bug with single file directory structure; refac: get input files --- src/careamics/careamist.py | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/src/careamics/careamist.py b/src/careamics/careamist.py index b42e72dc9..c5d939cd2 100644 --- a/src/careamics/careamist.py +++ b/src/careamics/careamist.py @@ -735,7 +735,7 @@ def predict_to_disk( Parameters ---------- - source : PredictDataModule, pathlib.Path or str + source : PredictDataModule or pathlib.Path, str Data to predict on. batch_size : int, default=1 Batch size for prediction. @@ -805,27 +805,36 @@ def predict_to_disk( write_extension = SupportedData.get_extension(write_type) # extract file names + source_path: Union[Path, str, NDArray] + source_data_type: Literal["array", "tiff", "custom"] if isinstance(source, PredictDataModule): - # assert not isinstance(source.pred_data, ) - source_file_paths = list_files( - source.pred_data, source.data_type, source.extension_filter - ) + source_path = source.pred_data + source_data_type = source.data_type + extension_filter = source.extension_filter elif isinstance(source, (str, Path)): - assert self.cfg.data_config.data_type != "array" - data_type = data_type or self.cfg.data_config.data_type + source_path = source + source_data_type = data_type or self.cfg.data_config.data_type extension_filter = SupportedData.get_extension_pattern( - SupportedData(data_type) + SupportedData(source_data_type) ) - source_file_paths = list_files(source, data_type, extension_filter) else: raise ValueError(f"Unsupported source type: '{type(source)}'.") + if source_data_type == "array": + raise ValueError( + "Predicting to disk is not supported for input type 'array'." + ) + assert isinstance(source_path, (Path, str)) # because data_type != "array" + source_path = Path(source_path) + + file_paths = list_files(source_path, source_data_type, extension_filter) + # predict and write each file in turn - for source_path in source_file_paths: + for file_path in file_paths: # source_path is relative to original source path... # should mirror original directory structure prediction = self.predict( - source=source_path, + source=file_path, batch_size=batch_size, tile_size=tile_size, tile_overlap=tile_overlap, @@ -841,11 +850,9 @@ def predict_to_disk( write_data = np.concatenate(prediction) # create directory structure and write path - file_write_dir = write_dir / source_path.parent.name + file_write_dir = write_dir / file_path.relative_to(source_path) file_write_dir.mkdir(parents=True, exist_ok=True) - write_path = (file_write_dir / source_path.name).with_suffix( - write_extension - ) + write_path = (file_write_dir / file_path.name).with_suffix(write_extension) # write data write_func(file_path=write_path, img=write_data) From fb3a655ff71f7ac13f3d4e13a83407ed757c2dc0 Mon Sep 17 00:00:00 2001 From: melisande-c Date: Tue, 26 Nov 2024 17:59:47 +0100 Subject: [PATCH 3/8] feat(CLI): add prediction command --- src/careamics/cli/main.py | 8 ++-- src/careamics/cli/utils.py | 8 +++- tests/cli/test_main.py | 93 ++++++++++++++++++++++++++++++++++---- 3 files changed, 94 insertions(+), 15 deletions(-) diff --git a/src/careamics/cli/main.py b/src/careamics/cli/main.py index f539332ae..d4d91a99b 100644 --- a/src/careamics/cli/main.py +++ b/src/careamics/cli/main.py @@ -19,12 +19,10 @@ app = typer.Typer( help="Run CAREamics algorithms from the command line, including Noise2Void " - "and its many variants and cousins" -) -app.add_typer( - conf.app, - name="conf", + "and its many variants and cousins", + pretty_exceptions_show_locals=False, ) +app.add_typer(conf.app, name="conf") @app.command() diff --git a/src/careamics/cli/utils.py b/src/careamics/cli/utils.py index 151ef3fcd..c25fa12a2 100644 --- a/src/careamics/cli/utils.py +++ b/src/careamics/cli/utils.py @@ -1,9 +1,11 @@ """Utility functions for the CAREamics CLI.""" -from typing import Tuple +from typing import Optional, Tuple -def handle_2D_3D_callback(value: Tuple[int, int, int]) -> Tuple[int, ...]: +def handle_2D_3D_callback( + value: Optional[Tuple[int, int, int]] +) -> Optional[Tuple[int, ...]]: """ Callback for options that require 2D or 3D inputs. @@ -20,6 +22,8 @@ def handle_2D_3D_callback(value: Tuple[int, int, int]) -> Tuple[int, ...]: If the last element in `value` is -1 the tuple is reduced to the first two values. """ + if value is None: + return value if value[2] == -1: return value[:2] return value diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py index aa03be78b..821c6520e 100644 --- a/tests/cli/test_main.py +++ b/tests/cli/test_main.py @@ -2,11 +2,11 @@ import numpy as np import tifffile -import yaml from typer.testing import CliRunner +from careamics import CAREamist from careamics.cli.main import app -from careamics.config import Configuration +from careamics.config import Configuration, save_configuration from careamics.config.support import SupportedData runner = CliRunner() @@ -18,8 +18,7 @@ def test_train(tmp_path: Path, minimum_configuration: dict): config_path = tmp_path / "config.yaml" config = Configuration(**minimum_configuration) config.data_config.data_type = SupportedData.TIFF.value - with open(config_path, "w") as file: - yaml.dump(config.model_dump(), file, indent=2) + save_configuration(config, config_path) # training data train_array = np.random.rand(32, 32) @@ -40,10 +39,88 @@ def test_train(tmp_path: Path, minimum_configuration: dict): ], ) assert (tmp_path / "checkpoints").is_dir() - assert len(list((tmp_path / "checkpoints").glob("*"))) > 0 + assert len(list((tmp_path / "checkpoints").glob("*.ckpt"))) > 0 assert result.exit_code == 0 -def test_predict(): - result = runner.invoke(app, ["predict"]) - assert result.exit_code == 1 # assert exits with error (NotImplementedError) +def test_predict_single_file(tmp_path: Path, minimum_configuration: dict): + + # create & save config + config_path = tmp_path / "config.yaml" + config = Configuration(**minimum_configuration) + config.data_config.data_type = SupportedData.TIFF.value + save_configuration(config, config_path) + + # dummy data + train_array = np.random.rand(32, 32) + # save files + train_file = tmp_path / "train.tiff" + tifffile.imwrite(train_file, train_array) + + careamist = CAREamist(config, work_dir=tmp_path) + careamist.train(train_source=train_file) + + checkpoint_path = next(iter((tmp_path / "checkpoints").glob("*.ckpt"))) + + result = runner.invoke( + app, ["predict", str(checkpoint_path), str(train_file), "-wd", str(tmp_path)] + ) + assert (tmp_path / "predictions").is_dir() + assert len(list((tmp_path / "predictions").glob("*.tif*"))) > 0 + assert result.exit_code == 0 + + +def test_predict_directory(tmp_path: Path, minimum_configuration: dict): + + # create & save config + config_path = tmp_path / "config.yaml" + config = Configuration(**minimum_configuration) + config.data_config.data_type = SupportedData.TIFF.value + save_configuration(config, config_path) + + n_files = 2 + data_dir = tmp_path / "data" + data_dir.mkdir() + + # dummy data + for i in range(n_files): + train_array = np.random.rand(32, 32) + # save files + train_file = data_dir / f"train_{i}.tiff" + tifffile.imwrite(train_file, train_array) + + careamist = CAREamist(config, work_dir=tmp_path) + careamist.train(train_source=data_dir) + + checkpoint_path = next(iter((tmp_path / "checkpoints").glob("*.ckpt"))) + + result = runner.invoke( + app, ["predict", str(checkpoint_path), str(data_dir), "-wd", str(tmp_path)] + ) + assert (tmp_path / "predictions").is_dir() + assert len(list((tmp_path / "predictions").glob("*.tif*"))) > 0 + assert result.exit_code == 0 + + # create & save config + config_path = tmp_path / "config.yaml" + config = Configuration(**minimum_configuration) + config.data_config.data_type = SupportedData.TIFF.value + save_configuration(config, config_path) + + # dummy data + train_array = np.random.rand(32, 32) + # save files + train_file = tmp_path / "train.tiff" + tifffile.imwrite(train_file, train_array) + + careamist = CAREamist(config, work_dir=tmp_path) + careamist.train(train_source=train_file) + + checkpoint_path = next(iter((tmp_path / "checkpoints").glob("*.ckpt"))) + + result = runner.invoke( + app, ["predict", str(checkpoint_path), str(train_file), "-wd", str(tmp_path)] + ) + assert (tmp_path / "predictions").is_dir() + assert len(list((tmp_path / "predictions").glob("*.tif*"))) > 0 + assert result.exit_code == 0 From 090aeffb44eecabd43e97ebd86de8ca9a481396e Mon Sep 17 00:00:00 2001 From: melisande-c Date: Tue, 26 Nov 2024 18:41:31 +0100 Subject: [PATCH 4/8] feat(predict CLI): allow choice of prediction directory --- src/careamics/cli/main.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/src/careamics/cli/main.py b/src/careamics/cli/main.py index d4d91a99b..54296e330 100644 --- a/src/careamics/cli/main.py +++ b/src/careamics/cli/main.py @@ -195,15 +195,26 @@ def predict( # numpydoc ignore=PR01 typer.Option( "--work-dir", "-wd", + help=("Path to working directory."), + exists=True, + file_okay=False, + dir_okay=True, + ), + ] = None, + prediction_dir: Annotated[ + Path, + typer.Option( + "--prediction-dir", + "-pd", help=( - "Path to working directory. (Predictions will be save in a nested " - "directory named 'predictions'.)" + "Directory to save predictions to. If not an abosulte path it will be " + "relative to the set working directory." ), exists=True, file_okay=False, dir_okay=True, ), - ] = None, + ] = Path("predictions"), ): """Create and save predictions from CAREamics models.""" engine = CAREamist(source=model, work_dir=work_dir) @@ -216,7 +227,7 @@ def predict( # numpydoc ignore=PR01 data_type=data_type, tta_transforms=tta_transforms, write_type=write_type, - prediction_dir="predictions", + prediction_dir=prediction_dir, ) From 8fee2c74bda3bfdc79bdc768fd6c29c307e71c2a Mon Sep 17 00:00:00 2001 From: melisande-c Date: Wed, 27 Nov 2024 11:05:14 +0100 Subject: [PATCH 5/8] fix: directory structure bug --- src/careamics/careamist.py | 7 +++++-- tests/test_careamist.py | 6 +++--- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/src/careamics/careamist.py b/src/careamics/careamist.py index c5d939cd2..f9cd58ddc 100644 --- a/src/careamics/careamist.py +++ b/src/careamics/careamist.py @@ -850,8 +850,11 @@ def predict_to_disk( write_data = np.concatenate(prediction) # create directory structure and write path - file_write_dir = write_dir / file_path.relative_to(source_path) - file_write_dir.mkdir(parents=True, exist_ok=True) + if not source_path.is_file(): + file_write_dir = write_dir / file_path.parent.relative_to(source_path) + file_write_dir.mkdir(parents=True, exist_ok=True) + else: + file_write_dir = write_dir write_path = (file_write_dir / file_path.name).with_suffix(write_extension) # write data diff --git a/tests/test_careamist.py b/tests/test_careamist.py index 758e92d31..ce5c0e7e3 100644 --- a/tests/test_careamist.py +++ b/tests/test_careamist.py @@ -862,7 +862,7 @@ def test_predict_to_disk_path_tiff(tmp_path, minimum_configuration): careamist.predict_to_disk(source=image_dir) for i in range(n_samples): - assert (tmp_path / "predictions" / "images" / f"image_{i}.tiff").is_file() + assert (tmp_path / "predictions" / f"image_{i}.tiff").is_file() def test_predict_to_disk_datamodule_tiff(tmp_path, minimum_configuration): @@ -903,7 +903,7 @@ def test_predict_to_disk_datamodule_tiff(tmp_path, minimum_configuration): careamist.predict_to_disk(source=datamodule) for i in range(n_samples): - assert (tmp_path / "predictions" / "images" / f"image_{i}.tiff").is_file() + assert (tmp_path / "predictions" / f"image_{i}.tiff").is_file() def test_predict_to_disk_custom(tmp_path, minimum_configuration): @@ -944,7 +944,7 @@ def write_numpy(file_path: Path, img: NDArray, *args, **kwargs) -> None: ) for i in range(n_samples): - assert (tmp_path / "predictions" / "images" / f"image_{i}.npy").is_file() + assert (tmp_path / "predictions" / f"image_{i}.npy").is_file() def test_predict_to_disk_custom_raises(tmp_path, minimum_configuration): From 4fe951d860f166f17eb903811a0ffc42d4ed7b15 Mon Sep 17 00:00:00 2001 From: melisande-c Date: Wed, 27 Nov 2024 12:06:09 +0100 Subject: [PATCH 6/8] test: remove accidental code duplication; assert individual file existence --- tests/cli/test_main.py | 29 +++-------------------------- 1 file changed, 3 insertions(+), 26 deletions(-) diff --git a/tests/cli/test_main.py b/tests/cli/test_main.py index 821c6520e..01f4794e0 100644 --- a/tests/cli/test_main.py +++ b/tests/cli/test_main.py @@ -66,7 +66,7 @@ def test_predict_single_file(tmp_path: Path, minimum_configuration: dict): app, ["predict", str(checkpoint_path), str(train_file), "-wd", str(tmp_path)] ) assert (tmp_path / "predictions").is_dir() - assert len(list((tmp_path / "predictions").glob("*.tif*"))) > 0 + assert (tmp_path / "predictions" / "train.tiff").is_file() assert result.exit_code == 0 @@ -98,29 +98,6 @@ def test_predict_directory(tmp_path: Path, minimum_configuration: dict): app, ["predict", str(checkpoint_path), str(data_dir), "-wd", str(tmp_path)] ) assert (tmp_path / "predictions").is_dir() - assert len(list((tmp_path / "predictions").glob("*.tif*"))) > 0 - assert result.exit_code == 0 - - # create & save config - config_path = tmp_path / "config.yaml" - config = Configuration(**minimum_configuration) - config.data_config.data_type = SupportedData.TIFF.value - save_configuration(config, config_path) - - # dummy data - train_array = np.random.rand(32, 32) - # save files - train_file = tmp_path / "train.tiff" - tifffile.imwrite(train_file, train_array) - - careamist = CAREamist(config, work_dir=tmp_path) - careamist.train(train_source=train_file) - - checkpoint_path = next(iter((tmp_path / "checkpoints").glob("*.ckpt"))) - - result = runner.invoke( - app, ["predict", str(checkpoint_path), str(train_file), "-wd", str(tmp_path)] - ) - assert (tmp_path / "predictions").is_dir() - assert len(list((tmp_path / "predictions").glob("*.tif*"))) > 0 + for i in range(n_files): + assert (tmp_path / "predictions" / f"train_{i}.tiff").is_file() assert result.exit_code == 0 From 8b2443f3b4e56ce881074fefd9a8bca51ba33408 Mon Sep 17 00:00:00 2001 From: melisande-c Date: Wed, 27 Nov 2024 12:07:36 +0100 Subject: [PATCH 7/8] fix: prediction directory creation; discrepancy between CI and local? --- src/careamics/careamist.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/careamics/careamist.py b/src/careamics/careamist.py index f9cd58ddc..4a5800090 100644 --- a/src/careamics/careamist.py +++ b/src/careamics/careamist.py @@ -852,9 +852,9 @@ def predict_to_disk( # create directory structure and write path if not source_path.is_file(): file_write_dir = write_dir / file_path.parent.relative_to(source_path) - file_write_dir.mkdir(parents=True, exist_ok=True) else: file_write_dir = write_dir + file_write_dir.mkdir(parents=True, exist_ok=True) write_path = (file_write_dir / file_path.name).with_suffix(write_extension) # write data From 87d1d6aba886113aa0efea4713d10561a8acab6c Mon Sep 17 00:00:00 2001 From: melisande-c Date: Wed, 27 Nov 2024 17:23:41 +0100 Subject: [PATCH 8/8] fix: allow prediction directory not to exist --- src/careamics/cli/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/careamics/cli/main.py b/src/careamics/cli/main.py index 54296e330..e82082aee 100644 --- a/src/careamics/cli/main.py +++ b/src/careamics/cli/main.py @@ -210,7 +210,6 @@ def predict( # numpydoc ignore=PR01 "Directory to save predictions to. If not an abosulte path it will be " "relative to the set working directory." ), - exists=True, file_okay=False, dir_okay=True, ),