diff --git a/src/careamics/careamist.py b/src/careamics/careamist.py index 62312131e..aea217dd8 100644 --- a/src/careamics/careamist.py +++ b/src/careamics/careamist.py @@ -19,11 +19,13 @@ load_configuration, ) from careamics.config.support import SupportedAlgorithm, SupportedData, SupportedLogger +from careamics.dataset.dataset_utils import reshape_array from careamics.lightning_datamodule import CAREamicsTrainData from careamics.lightning_module import CAREamicsModule from careamics.lightning_prediction_datamodule import CAREamicsPredictData from careamics.lightning_prediction_loop import CAREamicsPredictionLoop from careamics.model_io import export_to_bmz, load_pretrained +from careamics.transforms import Denormalize from careamics.utils import check_path_exists, get_logger from .callbacks import HyperParametersCallback @@ -651,38 +653,41 @@ def predict( f"np.ndarray (got {type(source)})." ) - def export_to_bmz( + def _create_data_for_bmz( self, - path: Union[Path, str], - name: str, - authors: List[dict], input_array: Optional[np.ndarray] = None, - general_description: str = "", - channel_names: Optional[List[str]] = None, - data_description: Optional[str] = None, - ) -> None: - """Export the model to the BioImage Model Zoo format. + ) -> np.ndarray: + """Create data for BMZ export. - Input array must be of shape SC(Z)YX, with S and C singleton dimensions. + If no `input_array` is provided, this method checks if there is a prediction + datamodule, or a training data module, to extract a patch. If none exists, + then a random aray is created. + + If there is a non-singleton batch dimension, this method returns only the first + element. Parameters ---------- - path : Union[Path, str] - Path to save the model. - name : str - Name of the model. - authors : List[dict] - List of authors of the model. input_array : Optional[np.ndarray], optional - Input array for the model, must be of shape SC(Z)YX, by default None. - general_description : str - General description of the model, used in the metadata of the BMZ archive. - channel_names : Optional[List[str]], optional - Channel names, by default None. - data_description : Optional[str], optional - Description of the data, by default None. + Input array, by default None. + + Returns + ------- + np.ndarray + Input data for BMZ export. + + Raises + ------ + ValueError + If mean and std are not provided in the configuration. """ if input_array is None: + if self.cfg.data_config.mean is None or self.cfg.data_config.std is None: + raise ValueError( + "Mean and std cannot be None in the configuration in order to" + "export to the BMZ format. Was the model trained?" + ) + # generate images, priority is given to the prediction data module if self.pred_datamodule is not None: # unpack a batch, ignore masks or targets @@ -690,19 +695,23 @@ def export_to_bmz( # convert torch.Tensor to numpy input_patch = input_patch.numpy() + + # denormalize + denormalize = Denormalize( + mean=self.cfg.data_config.mean, std=self.cfg.data_config.std + ) + input_patch, _ = denormalize(input_patch) + elif self.train_datamodule is not None: input_patch, *_ = next(iter(self.train_datamodule.train_dataloader())) input_patch = input_patch.numpy() - else: - if ( - self.cfg.data_config.mean is None - or self.cfg.data_config.std is None - ): - raise ValueError( - "Mean and std cannot be None in the configuration in order to" - "export to the BMZ format. Was the model trained?" - ) + # denormalize + denormalize = Denormalize( + mean=self.cfg.data_config.mean, std=self.cfg.data_config.std + ) + input_patch, _ = denormalize(input_patch) + else: # create a random input array input_patch = np.random.normal( loc=self.cfg.data_config.mean, @@ -712,11 +721,47 @@ def export_to_bmz( np.newaxis, np.newaxis, ... ] # add S & C dimensions else: - input_patch = input_array + # potentially correct shape + input_patch = reshape_array(input_array, self.cfg.data_config.axes) - # if there is a batch dimension + # if this a batch if input_patch.shape[0] > 1: - input_patch = input_patch[0:1, ...] # keep singleton dim + input_patch = input_patch[[0], ...] # keep singleton dim + + return input_patch + + def export_to_bmz( + self, + path: Union[Path, str], + name: str, + authors: List[dict], + input_array: Optional[np.ndarray] = None, + general_description: str = "", + channel_names: Optional[List[str]] = None, + data_description: Optional[str] = None, + ) -> None: + """Export the model to the BioImage Model Zoo format. + + Input array must be of shape SC(Z)YX, with S and C singleton dimensions. + + Parameters + ---------- + path : Union[Path, str] + Path to save the model. + name : str + Name of the model. + authors : List[dict] + List of authors of the model. + input_array : Optional[np.ndarray], optional + Input array for the model, must be of shape SC(Z)YX, by default None. + general_description : str + General description of the model, used in the metadata of the BMZ archive. + channel_names : Optional[List[str]], optional + Channel names, by default None. + data_description : Optional[str], optional + Description of the data, by default None. + """ + input_patch = self._create_data_for_bmz(input_array) # axes need to be reformated for the export because reshaping was done in the # datamodule diff --git a/src/careamics/model_io/bmz_io.py b/src/careamics/model_io/bmz_io.py index 8dd9aa667..5749bd9d1 100644 --- a/src/careamics/model_io/bmz_io.py +++ b/src/careamics/model_io/bmz_io.py @@ -104,9 +104,9 @@ def export_to_bmz( authors : List[dict] Authors of the model. input_array : np.ndarray - Input array. + Input array, should not have been normalized. output_array : np.ndarray - Output array. + Output array, should have been denormalized. channel_names : Optional[List[str]], optional Channel names, by default None. data_description : Optional[str], optional @@ -178,7 +178,7 @@ def export_to_bmz( ) # test model description - summary: ValidationSummary = test_model(model_description, decimal=0) + summary: ValidationSummary = test_model(model_description, decimal=2) if summary.status == "failed": raise ValueError(f"Model description test failed: {summary}") diff --git a/tests/conftest.py b/tests/conftest.py index 3e41040ce..c80b89bcc 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -247,7 +247,7 @@ def overlaps() -> Tuple[int, int]: def pre_trained(tmp_path, minimum_configuration): """Fixture to create a pre-trained CAREamics model.""" # training data - train_array = np.arange(32 * 32).reshape((32, 32)) + train_array = np.arange(32 * 32).reshape((32, 32)).astype(float) # create configuration config = Configuration(**minimum_configuration) diff --git a/tests/test_careamist.py b/tests/test_careamist.py index d22062cb4..c2fafa5d5 100644 --- a/tests/test_careamist.py +++ b/tests/test_careamist.py @@ -1,4 +1,5 @@ from pathlib import Path +from typing import Tuple import numpy as np import pytest @@ -7,7 +8,10 @@ from careamics import CAREamist, Configuration, save_configuration from careamics.config.support import SupportedAlgorithm, SupportedData -# TODO test 3D and channels + +def random_array(shape: Tuple[int, ...]): + """Return a random array with values between 0 and 255.""" + return (255 * (1 + np.random.rand(*shape)) / 2).astype(np.float32) def test_no_parameters(): @@ -74,7 +78,7 @@ def test_train_error_target_unsupervised_algorithm( def test_train_single_array_no_val(tmp_path: Path, minimum_configuration: dict): """Test that CAREamics can be trained with arrays.""" # training data - train_array = np.random.rand(32, 32) + train_array = random_array((32, 32)) # create configuration config = Configuration(**minimum_configuration) @@ -106,8 +110,8 @@ def test_train_single_array_no_val(tmp_path: Path, minimum_configuration: dict): def test_train_array(tmp_path: Path, minimum_configuration: dict): """Test that CAREamics can be trained on arrays.""" # training data - train_array = np.random.rand(32, 32) - val_array = np.random.rand(32, 32) + train_array = random_array((32, 32)) + val_array = random_array((32, 32)) # create configuration config = Configuration(**minimum_configuration) @@ -142,8 +146,8 @@ def test_train_array_channel( ): """Test that CAREamics can be trained on arrays with channels.""" # training data - train_array = np.random.rand(32, 32, 3) - val_array = np.random.rand(32, 32, 3) + train_array = random_array((32, 32, 3)) + val_array = random_array((32, 32, 3)) # create configuration config = Configuration(**minimum_configuration) @@ -179,8 +183,8 @@ def test_train_array_channel( def test_train_array_3d(tmp_path: Path, minimum_configuration: dict): """Test that CAREamics can be trained on 3D arrays.""" # training data - train_array = np.random.rand(8, 32, 32) - val_array = np.random.rand(8, 32, 32) + train_array = random_array((8, 32, 32)) + val_array = random_array((8, 32, 32)) # create configuration minimum_configuration["data_config"]["axes"] = "ZYX" @@ -212,7 +216,7 @@ def test_train_array_3d(tmp_path: Path, minimum_configuration: dict): def test_train_tiff_files_in_memory_no_val(tmp_path: Path, minimum_configuration: dict): """Test that CAREamics can be trained with tiff files in memory.""" # training data - train_array = np.random.rand(32, 32) + train_array = random_array((32, 32)) # save files train_file = tmp_path / "train.tiff" @@ -248,8 +252,8 @@ def test_train_tiff_files_in_memory_no_val(tmp_path: Path, minimum_configuration def test_train_tiff_files_in_memory(tmp_path: Path, minimum_configuration: dict): """Test that CAREamics can be trained with tiff files in memory.""" # training data - train_array = np.random.rand(32, 32) - val_array = np.random.rand(32, 32) + train_array = random_array((32, 32)) + val_array = random_array((32, 32)) # save files train_file = tmp_path / "train.tiff" @@ -290,8 +294,8 @@ def test_train_tiff_files(tmp_path: Path, minimum_configuration: dict): the in memory dataset. """ # training data - train_array = np.random.rand(32, 32) - val_array = np.random.rand(32, 32) + train_array = random_array((32, 32)) + val_array = random_array((32, 32)) # save files train_file = tmp_path / "train.tiff" @@ -330,10 +334,10 @@ def test_train_tiff_files(tmp_path: Path, minimum_configuration: dict): def test_train_array_supervised(tmp_path: Path, supervised_configuration: dict): """Test that CAREamics can be trained with arrays.""" # training data - train_array = np.random.rand(32, 32) - val_array = np.random.rand(32, 32) - train_target = np.random.rand(32, 32) - val_target = np.random.rand(32, 32) + train_array = random_array((32, 32)) + val_array = random_array((32, 32)) + train_target = random_array((32, 32)) + val_target = random_array((32, 32)) # create configuration config = Configuration(**supervised_configuration) @@ -372,10 +376,10 @@ def test_train_tiff_files_in_memory_supervised( ): """Test that CAREamics can be trained with tiff files in memory.""" # training data - train_array = np.random.rand(32, 32) - val_array = np.random.rand(32, 32) - train_target = np.random.rand(32, 32) - val_target = np.random.rand(32, 32) + train_array = random_array((32, 32)) + val_array = random_array((32, 32)) + train_target = random_array((32, 32)) + val_target = random_array((32, 32)) # save files images = tmp_path / "images" @@ -431,10 +435,10 @@ def test_train_tiff_files_supervised(tmp_path: Path, supervised_configuration: d the in memory dataset. """ # training data - train_array = np.random.rand(32, 32) - val_array = np.random.rand(32, 32) - train_target = np.random.rand(32, 32) - val_target = np.random.rand(32, 32) + train_array = random_array((32, 32)) + val_array = random_array((32, 32)) + train_target = random_array((32, 32)) + val_target = random_array((32, 32)) # save files images = tmp_path / "images" @@ -492,7 +496,7 @@ def test_predict_on_array_tiled( ): """Test that CAREamics can predict on arrays.""" # training data - train_array = np.random.rand(32, 32) + train_array = random_array((32, 32)) # create configuration config = Configuration(**minimum_configuration) @@ -528,7 +532,7 @@ def test_predict_on_array_tiled( def test_predict_arrays_no_tiling(tmp_path: Path, minimum_configuration: dict): """Test that CAREamics can predict on arrays without tiling.""" # training data - train_array = np.random.rand(4, 32, 32) + train_array = random_array((4, 32, 32)) # create configuration config = Configuration(**minimum_configuration) @@ -563,7 +567,7 @@ def test_predict_arrays_no_tiling(tmp_path: Path, minimum_configuration: dict): def test_predict_path(tmp_path: Path, minimum_configuration: dict, batch_size): """Test that CAREamics can predict with tiff files.""" # training data - train_array = np.random.rand(32, 32) + train_array = random_array((32, 32)) # save files train_file = tmp_path / "train.tiff" @@ -603,7 +607,7 @@ def test_predict_pretrained_checkpoint(tmp_path: Path, pre_trained: Path): """Test that CAREamics can be instantiated with a pre-trained network and predict on an array.""" # prediction data - source_array = np.random.rand(32, 32) + source_array = random_array((32, 32)) # instantiate CAREamist careamist = CAREamist(source=pre_trained, work_dir=tmp_path) @@ -620,7 +624,7 @@ def test_predict_pretrained_checkpoint(tmp_path: Path, pre_trained: Path): def test_predict_pretrained_bmz(tmp_path: Path, pre_trained_bmz: Path): """Test that CAREamics can be instantiated with a BMZ archive and predict.""" # prediction data - source_array = np.random.rand(32, 32) + source_array = random_array((32, 32)) # instantiate CAREamist careamist = CAREamist(source=pre_trained_bmz, work_dir=tmp_path) @@ -632,6 +636,144 @@ def test_predict_pretrained_bmz(tmp_path: Path, pre_trained_bmz: Path): assert predicted.squeeze().shape == source_array.shape +def test_data_for_bmz_random(tmp_path, minimum_configuration): + """Test the BMZ example data creation when the careamist has a training + datamodule.""" + seed = 24 + rng = np.random.default_rng(seed) + + # example data + example_data = 255 * (1 + rng.random((32, 32), dtype=float)) / 2 + + # create configuration + config = Configuration(**minimum_configuration) + config.training_config.num_epochs = 1 + config.data_config.axes = "YX" + config.data_config.batch_size = 2 + config.data_config.data_type = SupportedData.ARRAY.value + config.data_config.patch_size = (8, 8) + config.data_config.set_mean_and_std( + mean=example_data.mean(), std=example_data.std() + ) + + # instantiate CAREamist + careamist = CAREamist(source=config, work_dir=tmp_path) + + # get data for BMZ + patch = careamist._create_data_for_bmz() + assert patch.shape == (1, 1) + tuple(config.data_config.patch_size) + + # check that it is not normalised + assert np.abs(patch.mean() - example_data.mean()) < 0.1 * example_data.mean() + + +def test_data_for_bmz_with_array(tmp_path, minimum_configuration): + """Test the BMZ example data creation when the careamist has a training + datamodule.""" + seed = 24 + rng = np.random.default_rng(seed) + + # example data + example_data = 255 * (1 + rng.random((32, 32), dtype=float)) / 2 + + # create configuration + config = Configuration(**minimum_configuration) + config.training_config.num_epochs = 1 + config.data_config.axes = "YX" + config.data_config.batch_size = 2 + config.data_config.data_type = SupportedData.ARRAY.value + config.data_config.patch_size = (8, 8) + config.data_config.set_mean_and_std( + mean=example_data.mean(), std=example_data.std() + ) + + # instantiate CAREamist + careamist = CAREamist(source=config, work_dir=tmp_path) + + # get data for BMZ + patch = careamist._create_data_for_bmz(example_data) + assert patch.shape == (1, 1) + example_data.shape + + # check that it is not normalised + assert np.allclose(patch.squeeze(), example_data) + + +def test_data_for_bmz_after_training(tmp_path, minimum_configuration): + """Test the BMZ example data creation when the careamist has a training + datamodule.""" + seed = 24 + rng = np.random.default_rng(seed) + + # training data + train_array = 255 * (1 + rng.random((32, 32), dtype=float)) / 2 + val_array = 255 * (1 + rng.random((32, 32), dtype=float)) / 2 + + # create configuration + config = Configuration(**minimum_configuration) + config.training_config.num_epochs = 1 + config.data_config.axes = "YX" + config.data_config.batch_size = 2 + config.data_config.data_type = SupportedData.ARRAY.value + config.data_config.patch_size = (8, 8) + + # instantiate CAREamist + careamist = CAREamist(source=config, work_dir=tmp_path) + + # train CAREamist + careamist.train(train_source=train_array, val_source=val_array) + + # check that mean and std make sense + assert config.data_config.mean > 100 + assert config.data_config.std > 20 + + # get data for BMZ + patch = careamist._create_data_for_bmz() + assert patch.shape == (1, 1) + tuple(config.data_config.patch_size) + + # check that it is not normalised (data should be [0, 255]) + assert patch.max() > config.data_config.mean + + +def test_data_for_bmz_after_prediction(tmp_path, minimum_configuration): + """Test the BMZ example data creation when the careamist has a prediction + datamodule.""" + seed = 24 + rng = np.random.default_rng(seed) + + # training data + train_array = 255 * (1 + rng.random((32, 32), dtype=float)) / 2 + val_array = 255 * (1 + rng.random((32, 32), dtype=float)) / 2 + + # create configuration + config = Configuration(**minimum_configuration) + config.training_config.num_epochs = 1 + config.data_config.axes = "YX" + config.data_config.batch_size = 2 + config.data_config.data_type = SupportedData.ARRAY.value + config.data_config.patch_size = (8, 8) + + # instantiate CAREamist + careamist = CAREamist(source=config, work_dir=tmp_path) + + # train CAREamist + careamist.train(train_source=train_array, val_source=val_array) + + # check that mean and std make sense + assert config.data_config.mean > 100 + assert config.data_config.std > 20 + + # predict without tiling + test_array = 1_000 * (1 + rng.random((32, 32), dtype=float)) / 2 + _ = careamist.predict(test_array) + + # get data for BMZ + patch = careamist._create_data_for_bmz() + assert patch.shape == (1, 1) + test_array.shape + + # check that it is not normalised + assert np.allclose(patch.squeeze(), test_array) + + def test_export_bmz_pretrained_prediction(tmp_path: Path, pre_trained: Path): """Test that CAREamics can be instantiated with a pre-trained network and exported to BMZ after prediction. @@ -643,7 +785,7 @@ def test_export_bmz_pretrained_prediction(tmp_path: Path, pre_trained: Path): careamist = CAREamist(source=pre_trained, work_dir=tmp_path) # prediction data - source_array = np.random.rand(32, 32) + source_array = random_array((32, 32)) _ = careamist.predict(source_array) assert len(careamist.pred_datamodule.predict_dataloader()) > 0 @@ -686,11 +828,11 @@ def test_export_bmz_pretrained_with_array(tmp_path: Path, pre_trained: Path): careamist = CAREamist(source=pre_trained, work_dir=tmp_path) # alternatively we can pass an array - array = np.random.rand(32, 32).astype(np.float32) + array = random_array((32, 32)) careamist.export_to_bmz( path=tmp_path / "model2.zip", name="TopModel", - input_array=array[np.newaxis, np.newaxis, ...], + input_array=array, general_description="A model that just walked in.", authors=[{"name": "Amod", "affiliation": "El"}], )