aramis-lab · camillebrianceau · Nov 12, 2024 · Nov 6, 2024 · Nov 6, 2024 · Nov 6, 2024
diff --git a/clinicadl/commandline/pipelines/generate/artifacts/cli.py b/clinicadl/commandline/pipelines/generate/artifacts/cli.py
@@ -98,7 +98,7 @@ def create_artifacts_image(data_idx: int) -> pd.DataFrame:
                 [participant_id],
                 [session_id],
                 caps_config.data.caps_dict[cohort],
-                file_type.model_dump(),
+                file_type,
             )[0][0]
         )
 

diff --git a/clinicadl/commandline/pipelines/generate/hypometabolic/cli.py b/clinicadl/commandline/pipelines/generate/hypometabolic/cli.py
@@ -97,9 +97,7 @@ def cli(generated_caps_directory, **kwargs):
     sessions = [data_df.at[i, "session_id"] for i in range(caps_config.data.n_subjects)]
     cohort = caps_config.data.caps_directory
 
-    images_paths = clinicadl_file_reader(
-        participants, sessions, cohort, file_type.model_dump()
-    )[0]
+    images_paths = clinicadl_file_reader(participants, sessions, cohort, file_type)[0]
     image_nii = nib.loadsave.load(images_paths[0])
     mask_resample_nii = resample_to_img(mask_nii, image_nii, interpolation="nearest")
     mask = mask_resample_nii.get_fdata()

diff --git a/clinicadl/commandline/pipelines/generate/random/cli.py b/clinicadl/commandline/pipelines/generate/random/cli.py
@@ -99,7 +99,7 @@ def cli(generated_caps_directory, n_proc, **kwargs):
         [participant_id],
         [session_id],
         caps_config.data.caps_dict[cohort],
-        file_type.model_dump(),
+        file_type,
     )
     image_nii = nib.loadsave.load(image_paths[0][0])
     # assert isinstance(image_nii, nib.nifti1.Nifti1Image)

diff --git a/clinicadl/commandline/pipelines/generate/trivial/cli.py b/clinicadl/commandline/pipelines/generate/trivial/cli.py
@@ -95,7 +95,7 @@ def create_trivial_image(subject_id: int) -> pd.DataFrame:
                 [participant_id],
                 [session_id],
                 caps_config.data.caps_dict[cohort],
-                file_type.model_dump(),
+                file_type,
             )[0][0]
         )
 

diff --git a/clinicadl/dataset/caps_dataset.py b/clinicadl/dataset/caps_dataset.py
@@ -140,7 +140,7 @@ def _get_image_path(self, participant: str, session: str, cohort: str) -> Path:
                 [participant],
                 [session],
                 self.config.data.caps_dict[cohort],
-                file_type.model_dump(),
+                file_type,
             )
             logger.debug(f"clinicadl_file_reader output: {results}")
             filepath = Path(results[0][0])
@@ -164,7 +164,7 @@ def _get_image_path(self, participant: str, session: str, cohort: str) -> Path:
                 [participant],
                 [session],
                 self.config.data.caps_dict[cohort],
-                file_type.model_dump(),
+                file_type,
             )
             filepath = results[0]
             image_path = Path(filepath[0])
@@ -220,9 +220,9 @@ def _get_full_image(self) -> torch.Tensor:
 
         from clinicadl.utils.iotools.clinica_utils import clinicadl_file_reader
 
-        participant_id = self.df.loc[0, "participant_id"]
-        session_id = self.df.loc[0, "session_id"]
-        cohort = self.df.loc[0, "cohort"]
+        participant_id = self.df.at[0, "participant_id"]
+        session_id = self.df.at[0, "session_id"]
+        cohort = self.df.at[0, "cohort"]
 
         try:
             image_path = self._get_image_path(participant_id, session_id, cohort)
@@ -233,7 +233,7 @@ def _get_full_image(self) -> torch.Tensor:
                 [participant_id],
                 [session_id],
                 self.config.data.caps_dict[cohort],
-                file_type.model_dump(),
+                file_type,
             )
             image_nii = nib.loadsave.load(results[0])
             image_np = image_nii.get_fdata()

diff --git a/clinicadl/dataset/prepare_data/prepare_data.py b/clinicadl/dataset/prepare_data/prepare_data.py
@@ -74,9 +74,9 @@ def DeepLearningPrepareData(
     mod_subfolder, file_type = compute_folder_and_file_type(config, from_bids)
 
     # Input file:
-    input_files = clinicadl_file_reader(
-        subjects, sessions, input_directory, file_type.model_dump()
-    )[0]
+    input_files = clinicadl_file_reader(subjects, sessions, input_directory, file_type)[
+        0
+    ]
     logger.debug(f"Selected image file name list: {input_files}.")
 
     def write_output_imgs(output_mode, container, subfolder):

diff --git a/clinicadl/quality_check/pet_linear/quality_check.py b/clinicadl/quality_check/pet_linear/quality_check.py
@@ -97,7 +97,7 @@ def quality_check(
     )
     file_type = pet_linear_nii(config.preprocessing)
     input_files = clinicadl_file_reader(
-        subjects, sessions, config.data.caps_directory, file_type.model_dump()
+        subjects, sessions, config.data.caps_directory, file_type
     )[0]
 
     def write_output_data(file):

diff --git a/clinicadl/quality_check/t1_linear/utils.py b/clinicadl/quality_check/t1_linear/utils.py
@@ -67,7 +67,7 @@ def __getitem__(self, idx):
             file_type = self.config.extraction.file_type
             file_type.pattern = Path(str(file_type.pattern).replace(".nii.gz", ".pt"))
             image_output = clinicadl_file_reader(
-                [subject], [session], self.img_dir, file_type.model_dump()
+                [subject], [session], self.img_dir, file_type
             )[0]
             image_path = Path(image_output[0])
             image_filename = image_path.name
@@ -90,7 +90,7 @@ def __getitem__(self, idx):
                 [subject],
                 [session],
                 self.img_dir,
-                linear_nii(self.config.preprocessing).model_dump(),
+                linear_nii(self.config.preprocessing),
             )[0]
             image = nib.loadsave.load(image_path[0])
             image = self.nii_transform(image)

diff --git a/clinicadl/transforms/transforms.py b/clinicadl/transforms/transforms.py
@@ -1,14 +1,14 @@
-from typing import List
+from typing import Callable, List
 
 import torchio
 
 
 class Transforms:
     def __init__(
         self,
-        data_augmentation=List[torchio],
-        image_transforms=List[torchio],
-        object_transforms=List[torchio],
+        data_augmentation=List[Callable],
+        image_transforms=List[Callable],
+        object_transforms=List[Callable],
     ) -> None:
         """TO COMPLETE"""
         self.data_augmentation = data_augmentation
diff --git a/clinicadl/utils/iotools/clinica_utils.py b/clinicadl/utils/iotools/clinica_utils.py
@@ -1,23 +1,28 @@
 import hashlib
 import os
+import re
 import shutil
 import ssl
 import tempfile
-from collections import namedtuple
+from collections import defaultdict, namedtuple
 from functools import partial
 from glob import glob
+from multiprocessing import Manager
 from pathlib import Path, PurePath
 from time import localtime, strftime, time
 from typing import Callable, Dict, List, Optional, Tuple, Union
 from urllib.error import URLError
 from urllib.request import Request, urlopen
 
+import numpy as np
 import pandas as pd
-from pydantic import BaseModel
+from joblib import Parallel, delayed
+from pydantic import BaseModel, field_validator, model_validator
 
 from clinicadl.utils.exceptions import (
     ClinicaDLBIDSError,
     ClinicaDLCAPSError,
+    ClinicaDLException,
 )
 from clinicadl.utils.logger import cprint
 
@@ -29,6 +34,20 @@ class FileType(BaseModel):
     description: str
     needed_pipeline: Optional[str] = None
 
+    @field_validator("pattern", mode="before")
+    def check_pattern(cls, v):
+        if v[0] == "/":
+            raise ValueError(
+                "pattern argument cannot start with char: / (does not work in os.path.join function). "
+                "If you want to indicate the exact name of the file, use the format "
+                "directory_name/filename.extension or filename.extension in the pattern argument."
+            )
+        return v
+
+
+class FileReader(BaseModel):
+    caps_directory: Path
+
 
 def container_from_filename(bids_or_caps_filename: Path) -> Path:
     """Extract container from BIDS or CAPS file.
@@ -51,8 +70,6 @@ def container_from_filename(bids_or_caps_filename: Path) -> Path:
     >>> container_from_filename('caps/subjects/sub-CLNC01/ses-M000/dwi/preprocessing/sub-CLNC01_ses-M000_preproc.nii')
     'subjects/sub-CLNC01/ses-M000'
     """
-    import os
-    import re
 
     m = re.search(r"(sub-[a-zA-Z0-9]+)/(ses-[a-zA-Z0-9]+)", bids_or_caps_filename)
     if not m:
@@ -97,9 +114,6 @@ def read_participant_tsv(tsv_file: Path) -> Tuple[List[str], List[str]]:
     >>> read_participant_tsv("participant.tsv")
     (["sub-01", "sub-01", "sub-02"], ["ses-M000", "ses-M006", "ses-M000"])
     """
-    import pandas as pd
-
-    from clinicadl.utils.exceptions import ClinicaDLException
 
     try:
         df = pd.read_csv(tsv_file, sep="\t")
@@ -406,7 +420,6 @@ def check_caps_folder(caps_directory: Path) -> None:
     -----
     Keep in mind that a CAPS folder can be empty.
     """
-    from clinicadl.utils.exceptions import ClinicaDLCAPSError
 
     _common_checks(caps_directory, "CAPS")
 
@@ -598,8 +611,6 @@ def _get_entities(files: List[Path], common_suffix: str) -> dict:
         The entities dictionary.
     """
 
-    from collections import defaultdict
-
     found_entities = defaultdict(set)
     for f in files:
         entities = get_filename_no_ext(f.name).rstrip(common_suffix).split("_")
@@ -682,79 +693,26 @@ def _get_suffix(filename: Path) -> str:
 
 
 def _select_run(files: List[str]) -> str:
-    import numpy as np
-
     runs = [int(_get_run_number(f)) for f in files]
     return files[np.argmax(runs)]
 
 
 def _get_run_number(filename: str) -> str:
-    import re
-
     matches = re.match(r".*_run-(\d+).*", filename)
     if matches:
         return matches[1]
     raise ValueError(f"Filename {filename} should contain one and only one run entity.")
 
 
-def _check_information(information: Dict) -> None:
-    if not isinstance(information, (dict, list)):
-        raise TypeError(
-            "A dict or list of dicts must be provided for the argument 'information'"
-        )
-
-    if isinstance(information, list):
-        for item in information:
-            if not all(elem in item for elem in ["pattern", "description"]):
-                raise ValueError(
-                    "'information' must contain the keys 'pattern' and 'description'"
-                )
-
-            if not all(
-                elem in ["pattern", "description", "needed_pipeline"]
-                for elem in item.keys()
-            ):
-                raise ValueError(
-                    "'information' can only contain the keys 'pattern', 'description' and 'needed_pipeline'"
-                )
-
-            if item["pattern"][0] == "/":
-                raise ValueError(
-                    "pattern argument cannot start with char: / (does not work in os.path.join function). "
-                    "If you want to indicate the exact name of the file, use the format "
-                    "directory_name/filename.extension or filename.extension in the pattern argument."
-                )
-    else:
-        if not all(elem in information for elem in ["pattern", "description"]):
-            raise ValueError(
-                "'information' must contain the keys 'pattern' and 'description'"
-            )
-
-        if not all(
-            elem in ["pattern", "description", "needed_pipeline"]
-            for elem in information.keys()
-        ):
-            raise ValueError(
-                "'information' can only contain the keys 'pattern', 'description' and 'needed_pipeline'"
-            )
-
-        if information["pattern"][0] == "/":
-            raise ValueError(
-                "pattern argument cannot start with char: / (does not work in os.path.join function). "
-                "If you want to indicate the exact name of the file, use the format "
-                "directory_name/filename.extension or filename.extension in the pattern argument."
-            )
-
-
-def _format_errors(errors: List, information: Dict) -> str:
+def _format_errors(errors: List, file_type: FileType) -> str:
     error_message = (
         f"Clinica encountered {len(errors)} "
-        f"problem(s) while getting {information['description']}:\n"
+        f"problem(s) while getting {file_type.description}:\n"
     )
-    if "needed_pipeline" in information and information["needed_pipeline"]:
+    if file_type.needed_pipeline:
         error_message += (
             "Please note that the following clinica pipeline(s) must "
-            f"have run to obtain these files: {information['needed_pipeline']}\n"
+            f"have run to obtain these files: {file_type.needed_pipeline}\n"
         )
     error_message += "\n".join(errors)
 
@@ -765,7 +723,7 @@ def clinicadl_file_reader(
     subjects: List[str],
     sessions: List[str],
     input_directory: Path,
-    information: Dict,
+    file_type: Union[FileType, Dict],
     raise_exception: bool = True,
     n_procs: int = 1,
 ):
@@ -902,10 +860,9 @@ def clinicadl_file_reader(
     or even more precise: 't1/freesurfer_cross_sectional/sub-*_ses-*/surf/rh.white'
     It then gives: ['/caps/subjects/sub-ADNI011S4105/ses-M000/t1/freesurfer_cross_sectional/sub-ADNI011S4105_ses-M000/surf/rh.white']
     """
-    from clinicadl.utils.exceptions import ClinicaDLBIDSError, ClinicaDLCAPSError
+    if isinstance(file_type, Dict):
+        file_type = FileType(**file_type)
 
-    _check_information(information)
-    pattern = information["pattern"]
     is_bids = determine_caps_or_bids(input_directory)
     if is_bids:
         check_bids_folder(input_directory)
@@ -923,10 +880,10 @@ def clinicadl_file_reader(
         subjects,
         sessions,
         is_bids,
-        pattern,
+        file_type.pattern,
         n_procs=n_procs,
     )
-    error_message = _format_errors(errors_encountered, information)
+    error_message = _format_errors(errors_encountered, file_type)
     if len(errors_encountered) > 0 and raise_exception:
         if is_bids:
             raise ClinicaDLBIDSError(error_message)
@@ -944,10 +901,6 @@ def _read_files_parallel(
     pattern: str,
     n_procs: int,
 ) -> Tuple[List[str], List[str]]:
-    from multiprocessing import Manager
-
-    from joblib import Parallel, delayed
-
     manager = Manager()
     shared_results = manager.list()
     shared_errors_encountered = manager.list()