From 140daab706e508e4f1ee4c708dd5b0d56758a058 Mon Sep 17 00:00:00 2001 From: jdeschamps <6367888+jdeschamps@users.noreply.github.com> Date: Thu, 13 Jun 2024 18:10:26 +0200 Subject: [PATCH] (refac): passing array to BMZ export is now mandatory --- src/careamics/careamist.py | 107 ++----------------- tests/test_careamist.py | 205 +++++-------------------------------- 2 files changed, 39 insertions(+), 273 deletions(-) diff --git a/src/careamics/careamist.py b/src/careamics/careamist.py index 6184dcc51..684d04b0a 100644 --- a/src/careamics/careamist.py +++ b/src/careamics/careamist.py @@ -26,7 +26,6 @@ from careamics.lightning_prediction_datamodule import CAREamicsPredictData from careamics.lightning_prediction_loop import CAREamicsPredictionLoop from careamics.model_io import export_to_bmz, load_pretrained -from careamics.transforms import Denormalize from careamics.utils import check_path_exists, get_logger from .callbacks import HyperParametersCallback @@ -656,103 +655,20 @@ def predict( f"NDArray (got {type(source)})." ) - def _create_data_for_bmz( - self, - input_array: Optional[NDArray] = None, - seed: Optional[int] = None, - ) -> NDArray: - """Create data for BMZ export. - - If no `input_array` is provided, this method checks if there is a prediction - datamodule, or a training data module, to extract a patch. If none exists, - then a random array is created. - - The method returns a denormalized array. - - If there is a non-singleton batch dimension, this method returns only the first - element. - - Parameters - ---------- - input_array : NDArray, optional - Input array, which should not be normalized, by default None. - seed : int, optional - Seed for the random number generator used when no input array is given nor - are there data in the dataloaders, by default None. - - Returns - ------- - NDArray - Input data for BMZ export. - - Raises - ------ - ValueError - If mean and std are not provided in the configuration. - """ - if input_array is None: - if self.cfg.data_config.mean is None or self.cfg.data_config.std is None: - raise ValueError( - "Mean and std cannot be None in the configuration in order to" - "export to the BMZ format. Was the model trained?" - ) - - # generate images, priority is given to the prediction data module - if self.pred_datamodule is not None: - # unpack a batch, ignore masks or targets - input_patch, *_ = next(iter(self.pred_datamodule.predict_dataloader())) - - # convert torch.Tensor to numpy - input_patch = input_patch.numpy() - - # denormalize - denormalize = Denormalize( - mean=self.cfg.data_config.mean, std=self.cfg.data_config.std - ) - input_patch_denorm = denormalize(input_patch) - - elif self.train_datamodule is not None: - input_patch, *_ = next(iter(self.train_datamodule.train_dataloader())) - input_patch = input_patch.numpy() - - # denormalize - denormalize = Denormalize( - mean=self.cfg.data_config.mean, std=self.cfg.data_config.std - ) - input_patch_denorm = denormalize(input_patch) - else: - # create a random input array - rng = np.random.default_rng(seed) - input_patch_denorm = rng.normal( - loc=self.cfg.data_config.mean, - scale=self.cfg.data_config.std, - size=self.cfg.data_config.patch_size, - ).astype(np.float32)[ - np.newaxis, np.newaxis, ... - ] # add S & C dimensions - else: - # potentially correct shape - input_patch_denorm = reshape_array(input_array, self.cfg.data_config.axes) - - # if this a batch - if input_patch_denorm.shape[0] > 1: - input_patch_denorm = input_patch_denorm[[0], ...] # keep singleton dim - - return input_patch_denorm - def export_to_bmz( self, path: Union[Path, str], name: str, + input_array: NDArray, authors: List[dict], - input_array: Optional[NDArray] = None, general_description: str = "", channel_names: Optional[List[str]] = None, data_description: Optional[str] = None, ) -> None: """Export the model to the BioImage Model Zoo format. - Input array must be of shape SC(Z)YX, with S and C singleton dimensions. + Input array must be of the same dimensions as the axes recorded in the + configuration of the `CAREamist`. Parameters ---------- @@ -760,10 +676,10 @@ def export_to_bmz( Path to save the model. name : str Name of the model. + input_array : NDArray + Input array used to validate the model and as example. authors : list of dict List of authors of the model. - input_array : NDArray, optional - Input array for the model, must be of shape SC(Z)YX, by default None. general_description : str General description of the model, used in the metadata of the BMZ archive. channel_names : list of str, optional @@ -771,7 +687,7 @@ def export_to_bmz( data_description : str, optional Description of the data, by default None. """ - input_patch = self._create_data_for_bmz(input_array) + input_patch = reshape_array(input_array, self.cfg.data_config.axes) # axes need to be reformated for the export because reshaping was done in the # datamodule @@ -788,11 +704,10 @@ def export_to_bmz( tta_transforms=False, ) - if not isinstance(output_patch, np.ndarray): - raise ValueError( - f"Numpy array required for export to BioImage Model Zoo, got " - f"{type(output_patch)}." - ) + if isinstance(output_patch, list): + output = np.concatenate(output_patch, axis=0) + else: + output = output_patch export_to_bmz( model=self.model, @@ -802,7 +717,7 @@ def export_to_bmz( general_description=general_description, authors=authors, input_array=input_patch, - output_array=output_patch, + output_array=output, channel_names=channel_names, data_description=data_description, ) diff --git a/tests/test_careamist.py b/tests/test_careamist.py index 4912ab688..b359336fa 100644 --- a/tests/test_careamist.py +++ b/tests/test_careamist.py @@ -102,8 +102,9 @@ def test_train_single_array_no_val(tmp_path: Path, minimum_configuration: dict): careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -135,8 +136,9 @@ def test_train_array(tmp_path: Path, minimum_configuration: dict): careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -174,8 +176,9 @@ def test_train_array_channel( careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", channel_names=["red", "green", "blue"], ) assert (tmp_path / "model.zip").exists() @@ -208,8 +211,9 @@ def test_train_array_3d(tmp_path: Path, minimum_configuration: dict): careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -244,8 +248,9 @@ def test_train_tiff_files_in_memory_no_val(tmp_path: Path, minimum_configuration careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -284,8 +289,9 @@ def test_train_tiff_files_in_memory(tmp_path: Path, minimum_configuration: dict) careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -326,8 +332,9 @@ def test_train_tiff_files(tmp_path: Path, minimum_configuration: dict): careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -366,8 +373,9 @@ def test_train_array_supervised(tmp_path: Path, supervised_configuration: dict): careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -425,8 +433,9 @@ def test_train_tiff_files_in_memory_supervised( careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -485,8 +494,9 @@ def test_train_tiff_files_supervised(tmp_path: Path, supervised_configuration: d careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -524,8 +534,9 @@ def test_predict_on_array_tiled( careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -559,8 +570,9 @@ def test_predict_arrays_no_tiling(tmp_path: Path, minimum_configuration: dict): careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -637,8 +649,9 @@ def test_predict_path(tmp_path: Path, minimum_configuration: dict, batch_size): careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=train_array, authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model.zip").exists() @@ -676,150 +689,6 @@ def test_predict_pretrained_bmz(tmp_path: Path, pre_trained_bmz: Path): assert predicted.squeeze().shape == source_array.shape -def test_data_for_bmz_random(tmp_path, minimum_configuration): - """Test the BMZ example data creation when the careamist has a training - datamodule.""" - seed = 42 - rng = np.random.default_rng(seed) - - # example data - example_data = 255 * rng.random((64, 64), dtype=np.float32) - example_mean = example_data.mean() - example_std = example_data.std() - - # create configuration - config = Configuration(**minimum_configuration) - config.training_config.num_epochs = 1 - config.data_config.axes = "YX" - config.data_config.batch_size = 2 - config.data_config.data_type = SupportedData.ARRAY.value - config.data_config.patch_size = (32, 32) - config.data_config.set_mean_and_std(example_mean, example_std) - - # instantiate CAREamist - careamist = CAREamist(source=config, work_dir=tmp_path) - - # get data for BMZ - patch = careamist._create_data_for_bmz(seed=seed) - assert patch.shape == (1, 1) + tuple(config.data_config.patch_size) - - # check that the correct image is not normalized - assert np.isclose(patch.mean(), example_mean, rtol=0.02) - assert np.isclose(patch.std(), example_std, rtol=0.02) - - -def test_data_for_bmz_with_array(tmp_path, minimum_configuration): - """Test the BMZ example data creation when the careamist has a training - datamodule.""" - seed = 42 - rng = np.random.default_rng(seed) - - # example data - example_data = 255 * rng.random((64, 64), dtype=np.float32) - example_mean = example_data.mean() - example_std = example_data.std() - - # create configuration - config = Configuration(**minimum_configuration) - config.training_config.num_epochs = 1 - config.data_config.axes = "YX" - config.data_config.batch_size = 2 - config.data_config.data_type = SupportedData.ARRAY.value - config.data_config.patch_size = (8, 8) - config.data_config.set_mean_and_std(example_mean, example_std) - - # instantiate CAREamist - careamist = CAREamist(source=config, work_dir=tmp_path) - - # get data for BMZ - patch = careamist._create_data_for_bmz(example_data, seed=seed) - assert patch.shape == (1, 1) + example_data.shape - - # check the normalization - assert np.allclose(patch.squeeze(), example_data) - - -def test_data_for_bmz_after_training(tmp_path, minimum_configuration): - """Test the BMZ example data creation when the careamist has a training - datamodule.""" - seed = 42 - rng = np.random.default_rng(seed) - - # training data - train_array = 255 * rng.random((64, 64), dtype=np.float32) - mean = train_array.mean() - std = train_array.std() - - val_array = 255 * rng.random((64, 64), dtype=np.float32) - - # create configuration - config = Configuration(**minimum_configuration) - config.training_config.num_epochs = 1 - config.data_config.axes = "YX" - config.data_config.batch_size = 2 - config.data_config.data_type = SupportedData.ARRAY.value - config.data_config.patch_size = (32, 32) - - # instantiate CAREamist - careamist = CAREamist(source=config, work_dir=tmp_path) - - # train CAREamist - careamist.train(train_source=train_array, val_source=val_array) - - # check that mean and std make sense - assert np.isclose(config.data_config.mean, mean, rtol=0.01) - assert np.isclose(config.data_config.std, std, rtol=0.01) - - # get data for BMZ - patch = careamist._create_data_for_bmz(seed=seed) - assert patch.shape == (1, 1) + tuple(config.data_config.patch_size) - - # check normalization - assert np.isclose(patch.mean(), mean, rtol=0.1) - assert np.isclose(patch.std(), std, rtol=0.1) - - -def test_data_for_bmz_after_prediction(tmp_path, minimum_configuration): - """Test the BMZ example data creation when the careamist has a prediction - datamodule.""" - seed = 42 - rng = np.random.default_rng(seed) - - # training data - train_array = 255 * rng.random((64, 64), dtype=np.float32) - val_array = 255 * rng.random((64, 64), dtype=np.float32) - - # create configuration - config = Configuration(**minimum_configuration) - config.training_config.num_epochs = 1 - config.data_config.axes = "YX" - config.data_config.batch_size = 2 - config.data_config.data_type = SupportedData.ARRAY.value - config.data_config.patch_size = (32, 32) - - # instantiate CAREamist - careamist = CAREamist(source=config, work_dir=tmp_path) - - # train CAREamist - careamist.train(train_source=train_array, val_source=val_array) - - # check that mean and std make sense - assert config.data_config.mean > 100 - assert config.data_config.std > 20 - - # predict without tiling - test_array = 255 * rng.random((64, 64), dtype=np.float32) - _ = careamist.predict(test_array) - - # get data for BMZ - patch = careamist._create_data_for_bmz() - assert patch.shape == (1, 1) + test_array.shape - - # check normalization - assert np.isclose(patch.mean(), test_array.mean(), rtol=0.1) - assert np.isclose(patch.std(), test_array.std(), rtol=0.1) - - def test_export_bmz_pretrained_prediction(tmp_path: Path, pre_trained: Path): """Test that CAREamics can be instantiated with a pre-trained network and exported to BMZ after prediction. @@ -839,27 +708,9 @@ def test_export_bmz_pretrained_prediction(tmp_path: Path, pre_trained: Path): careamist.export_to_bmz( path=tmp_path / "model.zip", name="TopModel", - general_description="A model that just walked in.", + input_array=source_array, authors=[{"name": "Amod", "affiliation": "El"}], - ) - assert (tmp_path / "model.zip").exists() - - -def test_export_bmz_pretrained_random_array(tmp_path: Path, pre_trained: Path): - """Test that CAREamics can be instantiated with a pre-trained network and exported - to BMZ. - - In this case, the careamist creates a random array for the BMZ archive test. - """ - # instantiate CAREamist - careamist = CAREamist(source=pre_trained, work_dir=tmp_path) - - # export to BMZ (random array created) - careamist.export_to_bmz( - path=tmp_path / "model.zip", - name="TopModel", general_description="A model that just walked in.", - authors=[{"name": "Amod", "affiliation": "El"}], ) assert (tmp_path / "model.zip").exists() @@ -879,7 +730,7 @@ def test_export_bmz_pretrained_with_array(tmp_path: Path, pre_trained: Path): path=tmp_path / "model2.zip", name="TopModel", input_array=array, - general_description="A model that just walked in.", authors=[{"name": "Amod", "affiliation": "El"}], + general_description="A model that just walked in.", ) assert (tmp_path / "model2.zip").exists()