Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

FileType change in clinicadl_file_reader (minor change) #679

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion clinicadl/commandline/pipelines/generate/artifacts/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ def create_artifacts_image(data_idx: int) -> pd.DataFrame:
[participant_id],
[session_id],
caps_config.data.caps_dict[cohort],
file_type.model_dump(),
file_type,
)[0][0]
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,9 +97,7 @@ def cli(generated_caps_directory, **kwargs):
sessions = [data_df.at[i, "session_id"] for i in range(caps_config.data.n_subjects)]
cohort = caps_config.data.caps_directory

images_paths = clinicadl_file_reader(
participants, sessions, cohort, file_type.model_dump()
)[0]
images_paths = clinicadl_file_reader(participants, sessions, cohort, file_type)[0]
image_nii = nib.loadsave.load(images_paths[0])
mask_resample_nii = resample_to_img(mask_nii, image_nii, interpolation="nearest")
mask = mask_resample_nii.get_fdata()
Expand Down
2 changes: 1 addition & 1 deletion clinicadl/commandline/pipelines/generate/random/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def cli(generated_caps_directory, n_proc, **kwargs):
[participant_id],
[session_id],
caps_config.data.caps_dict[cohort],
file_type.model_dump(),
file_type,
)
image_nii = nib.loadsave.load(image_paths[0][0])
# assert isinstance(image_nii, nib.nifti1.Nifti1Image)
Expand Down
2 changes: 1 addition & 1 deletion clinicadl/commandline/pipelines/generate/trivial/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def create_trivial_image(subject_id: int) -> pd.DataFrame:
[participant_id],
[session_id],
caps_config.data.caps_dict[cohort],
file_type.model_dump(),
file_type,
)[0][0]
)

Expand Down
12 changes: 6 additions & 6 deletions clinicadl/dataset/caps_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ def _get_image_path(self, participant: str, session: str, cohort: str) -> Path:
[participant],
[session],
self.config.data.caps_dict[cohort],
file_type.model_dump(),
file_type,
)
logger.debug(f"clinicadl_file_reader output: {results}")
filepath = Path(results[0][0])
Expand All @@ -164,7 +164,7 @@ def _get_image_path(self, participant: str, session: str, cohort: str) -> Path:
[participant],
[session],
self.config.data.caps_dict[cohort],
file_type.model_dump(),
file_type,
)
filepath = results[0]
image_path = Path(filepath[0])
Expand Down Expand Up @@ -220,9 +220,9 @@ def _get_full_image(self) -> torch.Tensor:

from clinicadl.utils.iotools.clinica_utils import clinicadl_file_reader

participant_id = self.df.loc[0, "participant_id"]
session_id = self.df.loc[0, "session_id"]
cohort = self.df.loc[0, "cohort"]
participant_id = self.df.at[0, "participant_id"]
session_id = self.df.at[0, "session_id"]
cohort = self.df.at[0, "cohort"]

try:
image_path = self._get_image_path(participant_id, session_id, cohort)
Expand All @@ -233,7 +233,7 @@ def _get_full_image(self) -> torch.Tensor:
[participant_id],
[session_id],
self.config.data.caps_dict[cohort],
file_type.model_dump(),
file_type,
)
image_nii = nib.loadsave.load(results[0])
image_np = image_nii.get_fdata()
Expand Down
6 changes: 3 additions & 3 deletions clinicadl/dataset/prepare_data/prepare_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,9 @@ def DeepLearningPrepareData(
mod_subfolder, file_type = compute_folder_and_file_type(config, from_bids)

# Input file:
input_files = clinicadl_file_reader(
subjects, sessions, input_directory, file_type.model_dump()
)[0]
input_files = clinicadl_file_reader(subjects, sessions, input_directory, file_type)[
0
]
logger.debug(f"Selected image file name list: {input_files}.")

def write_output_imgs(output_mode, container, subfolder):
Expand Down
2 changes: 1 addition & 1 deletion clinicadl/quality_check/pet_linear/quality_check.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def quality_check(
)
file_type = pet_linear_nii(config.preprocessing)
input_files = clinicadl_file_reader(
subjects, sessions, config.data.caps_directory, file_type.model_dump()
subjects, sessions, config.data.caps_directory, file_type
)[0]

def write_output_data(file):
Expand Down
4 changes: 2 additions & 2 deletions clinicadl/quality_check/t1_linear/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ def __getitem__(self, idx):
file_type = self.config.extraction.file_type
file_type.pattern = Path(str(file_type.pattern).replace(".nii.gz", ".pt"))
image_output = clinicadl_file_reader(
[subject], [session], self.img_dir, file_type.model_dump()
[subject], [session], self.img_dir, file_type
)[0]
image_path = Path(image_output[0])
image_filename = image_path.name
Expand All @@ -90,7 +90,7 @@ def __getitem__(self, idx):
[subject],
[session],
self.img_dir,
linear_nii(self.config.preprocessing).model_dump(),
linear_nii(self.config.preprocessing),
)[0]
image = nib.loadsave.load(image_path[0])
image = self.nii_transform(image)
Expand Down
8 changes: 4 additions & 4 deletions clinicadl/transforms/transforms.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
from typing import List
from typing import Callable, List

import torchio


class Transforms:
def __init__(
self,
data_augmentation=List[torchio],
image_transforms=List[torchio],
object_transforms=List[torchio],
data_augmentation=List[Callable],
image_transforms=List[Callable],
object_transforms=List[Callable],
) -> None:
"""TO COMPLETE"""
self.data_augmentation = data_augmentation
107 changes: 30 additions & 77 deletions clinicadl/utils/iotools/clinica_utils.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,28 @@
import hashlib
import os
import re
import shutil
import ssl
import tempfile
from collections import namedtuple
from collections import defaultdict, namedtuple
from functools import partial
from glob import glob
from multiprocessing import Manager
from pathlib import Path, PurePath
from time import localtime, strftime, time
from typing import Callable, Dict, List, Optional, Tuple, Union
from urllib.error import URLError
from urllib.request import Request, urlopen

import numpy as np
import pandas as pd
from pydantic import BaseModel
from joblib import Parallel, delayed
from pydantic import BaseModel, field_validator, model_validator

from clinicadl.utils.exceptions import (
ClinicaDLBIDSError,
ClinicaDLCAPSError,
ClinicaDLException,
)
from clinicadl.utils.logger import cprint

Expand All @@ -29,6 +34,20 @@ class FileType(BaseModel):
description: str
needed_pipeline: Optional[str] = None

@field_validator("pattern", mode="before")
def check_pattern(cls, v):
if v[0] == "/":
raise ValueError(
"pattern argument cannot start with char: / (does not work in os.path.join function). "
"If you want to indicate the exact name of the file, use the format "
"directory_name/filename.extension or filename.extension in the pattern argument."
)
return v


class FileReader(BaseModel):
caps_directory: Path


def container_from_filename(bids_or_caps_filename: Path) -> Path:
"""Extract container from BIDS or CAPS file.
Expand All @@ -51,8 +70,6 @@ def container_from_filename(bids_or_caps_filename: Path) -> Path:
>>> container_from_filename('caps/subjects/sub-CLNC01/ses-M000/dwi/preprocessing/sub-CLNC01_ses-M000_preproc.nii')
'subjects/sub-CLNC01/ses-M000'
"""
import os
import re

m = re.search(r"(sub-[a-zA-Z0-9]+)/(ses-[a-zA-Z0-9]+)", bids_or_caps_filename)
if not m:
Expand Down Expand Up @@ -97,9 +114,6 @@ def read_participant_tsv(tsv_file: Path) -> Tuple[List[str], List[str]]:
>>> read_participant_tsv("participant.tsv")
(["sub-01", "sub-01", "sub-02"], ["ses-M000", "ses-M006", "ses-M000"])
"""
import pandas as pd

from clinicadl.utils.exceptions import ClinicaDLException

try:
df = pd.read_csv(tsv_file, sep="\t")
Expand Down Expand Up @@ -406,7 +420,6 @@ def check_caps_folder(caps_directory: Path) -> None:
-----
Keep in mind that a CAPS folder can be empty.
"""
from clinicadl.utils.exceptions import ClinicaDLCAPSError

_common_checks(caps_directory, "CAPS")

Expand Down Expand Up @@ -598,8 +611,6 @@ def _get_entities(files: List[Path], common_suffix: str) -> dict:
The entities dictionary.
"""

from collections import defaultdict

found_entities = defaultdict(set)
for f in files:
entities = get_filename_no_ext(f.name).rstrip(common_suffix).split("_")
Expand Down Expand Up @@ -682,79 +693,26 @@ def _get_suffix(filename: Path) -> str:


def _select_run(files: List[str]) -> str:
import numpy as np

runs = [int(_get_run_number(f)) for f in files]
return files[np.argmax(runs)]


def _get_run_number(filename: str) -> str:
import re

matches = re.match(r".*_run-(\d+).*", filename)
if matches:
return matches[1]
raise ValueError(f"Filename {filename} should contain one and only one run entity.")


def _check_information(information: Dict) -> None:
if not isinstance(information, (dict, list)):
raise TypeError(
"A dict or list of dicts must be provided for the argument 'information'"
)

if isinstance(information, list):
for item in information:
if not all(elem in item for elem in ["pattern", "description"]):
raise ValueError(
"'information' must contain the keys 'pattern' and 'description'"
)

if not all(
elem in ["pattern", "description", "needed_pipeline"]
for elem in item.keys()
):
raise ValueError(
"'information' can only contain the keys 'pattern', 'description' and 'needed_pipeline'"
)

if item["pattern"][0] == "/":
raise ValueError(
"pattern argument cannot start with char: / (does not work in os.path.join function). "
"If you want to indicate the exact name of the file, use the format "
"directory_name/filename.extension or filename.extension in the pattern argument."
)
else:
if not all(elem in information for elem in ["pattern", "description"]):
raise ValueError(
"'information' must contain the keys 'pattern' and 'description'"
)

if not all(
elem in ["pattern", "description", "needed_pipeline"]
for elem in information.keys()
):
raise ValueError(
"'information' can only contain the keys 'pattern', 'description' and 'needed_pipeline'"
)

if information["pattern"][0] == "/":
raise ValueError(
"pattern argument cannot start with char: / (does not work in os.path.join function). "
"If you want to indicate the exact name of the file, use the format "
"directory_name/filename.extension or filename.extension in the pattern argument."
)


def _format_errors(errors: List, information: Dict) -> str:
def _format_errors(errors: List, file_type: FileType) -> str:
error_message = (
f"Clinica encountered {len(errors)} "
f"problem(s) while getting {information['description']}:\n"
f"problem(s) while getting {file_type.description}:\n"
)
if "needed_pipeline" in information and information["needed_pipeline"]:
if file_type.needed_pipeline:
error_message += (
"Please note that the following clinica pipeline(s) must "
f"have run to obtain these files: {information['needed_pipeline']}\n"
f"have run to obtain these files: {file_type.needed_pipeline}\n"
)
error_message += "\n".join(errors)

Expand All @@ -765,7 +723,7 @@ def clinicadl_file_reader(
subjects: List[str],
sessions: List[str],
input_directory: Path,
information: Dict,
file_type: Union[FileType, Dict],
raise_exception: bool = True,
n_procs: int = 1,
):
Expand Down Expand Up @@ -902,10 +860,9 @@ def clinicadl_file_reader(
or even more precise: 't1/freesurfer_cross_sectional/sub-*_ses-*/surf/rh.white'
It then gives: ['/caps/subjects/sub-ADNI011S4105/ses-M000/t1/freesurfer_cross_sectional/sub-ADNI011S4105_ses-M000/surf/rh.white']
"""
from clinicadl.utils.exceptions import ClinicaDLBIDSError, ClinicaDLCAPSError
if isinstance(file_type, Dict):
file_type = FileType(**file_type)

_check_information(information)
pattern = information["pattern"]
is_bids = determine_caps_or_bids(input_directory)
if is_bids:
check_bids_folder(input_directory)
Expand All @@ -923,10 +880,10 @@ def clinicadl_file_reader(
subjects,
sessions,
is_bids,
pattern,
file_type.pattern,
n_procs=n_procs,
)
error_message = _format_errors(errors_encountered, information)
error_message = _format_errors(errors_encountered, file_type)
if len(errors_encountered) > 0 and raise_exception:
if is_bids:
raise ClinicaDLBIDSError(error_message)
Expand All @@ -944,10 +901,6 @@ def _read_files_parallel(
pattern: str,
n_procs: int,
) -> Tuple[List[str], List[str]]:
from multiprocessing import Manager

from joblib import Parallel, delayed

manager = Manager()
shared_results = manager.list()
shared_errors_encountered = manager.list()
Expand Down