Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

MIRIAD converter to BIDS #1290

Draft
wants to merge 5 commits into
base: dev
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 26 additions & 1 deletion clinica/iotools/bids_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ class StudyName(str, Enum):
OASIS3 = "OASIS3"
UKB = "UKB"
IXI = "IXI"
MIRIAD = "MIRIAD"


BIDS_VALIDATOR_CONFIG = {
Expand Down Expand Up @@ -93,7 +94,8 @@ def bids_id_factory(study: StudyName) -> Type[BIDSSubjectID]:
return HABSBIDSSubjectID
if study == StudyName.IXI:
return IXIBIDSSubjectID

if study == StudyName.MIRIAD:
return MIRIADBIDSSubjectID

class ADNIBIDSSubjectID(BIDSSubjectID):
"""Implementation for ADNI of the BIDSSubjectIDClass, allowing to go from the source id XXX_S_XXXX
Expand Down Expand Up @@ -319,6 +321,29 @@ def from_original_study_id(cls, study_id: str) -> str:
def to_original_study_id(self) -> str:
return str(self.replace("sub-", ""))

class MIRIADBIDSSubjectID(BIDSSubjectID):
"""Implementation for MIRIAD of the BIDSSubjectIDClass, allowing to go from the source id MIRIAD###
to a bids id sub-MIRAD### and reciprocally."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
to a bids id sub-MIRAD### and reciprocally."""
to a bids id sub-MIRIAD### and reciprocally."""


def validate(self, value: str) -> str:
if re.fullmatch(r"sub-MIRIAD\d{3}", value):
return value
raise ValueError(
f"BIDS MIRIAD subject ID {value} is not properly formatted. "
"Expecting a 'sub-MIRIAD' format."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"Expecting a 'sub-MIRIAD' format."
"Expecting a 'sub-MIRIADXXX' format."

)

@classmethod
def from_original_study_id(cls, study_id: str) -> str:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As written you should have ids of 3 digits always (so there would need to be a padding if it can be 1 or 2). Though I don't think you are using that class for now

if re.fullmatch(r"MIRIAD\d{3}", study_id):
return f"sub-{study_id}"
raise ValueError(
f"Raw MIRIAD subject ID {study_id} is not properly formatted. "
"Expecting a 'Y' format."
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
"Expecting a 'Y' format."
"Expecting a 'MIRIADXXX' format."

)

def to_original_study_id(self) -> str:
return str(self.replace("sub-", ""))

# -- Methods for the clinical data --
def create_participants_df(
Expand Down
2 changes: 2 additions & 0 deletions clinica/iotools/converters/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
from .oasis3_to_bids import oasis3_to_bids_cli
from .oasis_to_bids import oasis_to_bids_cli
from .ukb_to_bids import ukb_to_bids_cli
from .miriad_to_bids import miriad_to_bids_cli


@click.group("convert")
Expand All @@ -26,6 +27,7 @@ def cli() -> None:
cli.add_command(ukb_to_bids_cli.cli)
cli.add_command(genfi_to_bids_cli.cli)
cli.add_command(ixi_to_bids_cli.cli)
cli.add_command(miriad_to_bids_cli.cli)

if __name__ == "__main__":
cli()
4 changes: 4 additions & 0 deletions clinica/iotools/converters/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ def get_converter_name(study: Union[str, StudyName]) -> str:
return "UkbToBids"
if study == StudyName.IXI:
return "IxiToBids"
if study == StudyName.MIRIAD:
return "MiriadToBids"


def converter_factory(study: Union[str, StudyName]) -> Callable:
Expand All @@ -62,4 +64,6 @@ def converter_factory(study: Union[str, StudyName]) -> Callable:
from .ukb_to_bids import convert
if study == StudyName.IXI:
from .ixi_to_bids import convert
if study == StudyName.MIRIAD:
from .miriad_to_bids import convert
return convert
3 changes: 3 additions & 0 deletions clinica/iotools/converters/miriad_to_bids/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
from .miriad_to_bids import convert

__all__ = ["convert"]
124 changes: 124 additions & 0 deletions clinica/iotools/converters/miriad_to_bids/miriad_to_bids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
"""Convert MIRIAD dataset to BIDS."""

from pathlib import Path
from typing import Optional

import os
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
import os
import re

Having consistent Path objects (see below !) allows to use only path lib, so we don't need os anymore

import shutil
import csv
import pandas as pd
from clinica.utils.filemanip import UserProvidedPath

def convert(
path_to_dataset: str,
bids_dir: str,
path_to_clinical: str,
subjects: Optional[str] = None,
n_procs: Optional[int] = 1,
**kwargs,
):
"""Convert MIRIAD data to BIDS format without removing original .nii files."""
from clinica.iotools.converters.miriad_to_bids.miriad_to_bids_utils import create_bids_structure, parse_filename, convert_to_nii_gz
metadata_csv = 'metadata.csv'

# Load clinical data
clinical_data_file = None
for file in os.listdir(path_to_clinical):
if file.endswith('.csv'):
clinical_data_file = os.path.join(path_to_clinical, file)
break

if not clinical_data_file:
raise FileNotFoundError(f"No clinical data CSV found in {path_to_clinical}")

clinical_data = pd.read_csv(clinical_data_file)

# Prepare CSV
with open(metadata_csv, 'w', newline='') as csvfile:
csvwriter = csv.writer(csvfile)
csvwriter.writerow(['cohort', 'subject_id', 'diagnosis', 'gender', 'session', 'run', 'input_file', 'output_file'])

participants_data = {}
sessions_data = []

# Traverse the input directory
for root, dirs, files in os.walk(path_to_dataset):
for file in files:
if file.endswith('.nii'):
# Extract information from filename
parts = file.split('_')
cohort = parts[0] # miriad
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
cohort = parts[0] # miriad

If it is always the same as I assumed above in my regex you do not need to retrieve the info anymore

subject_id = parts[1] # 215
diagnosis = parts[2] # AD (Alzheimer's) or HC (Healthy Control)
gender = parts[3] # M or F
session = parts[4].lstrip('0') # Session number
run_number = parts[6].replace('.nii', '') # Scan number from MR_1 or MR_2

bids_subject_id = f"sub-{subject_id}"
bids_session_id = f"ses-{session}"

# Original file path
original_file_path = os.path.join(root, file)

# Extract MR ID
mr_id = f"{cohort}_{subject_id}_{session}_MR_{run_number}"

# Extract relevant clinical information from the clinical data
clinical_row = clinical_data[clinical_data['MR ID'] == mr_id]
if clinical_row.empty:
print(f"Clinical data not found for MR ID: {mr_id}")
continue

age = clinical_row['Age'].values[0]
group = clinical_row['Group'].values[0] # HC or AD
gender_clinical = clinical_row['M/F'].values[0] # M or F

# Write metadata CSV
csvwriter.writerow([cohort, subject_id, diagnosis, gender, session, run_number, original_file_path, bids_subject_id])

# Track baseline age (minimum age for each subject)
if subject_id not in participants_data or participants_data[subject_id]['age'] > age:
participants_data[subject_id] = {
'participant_id': f"sub-MIRIAD{subject_id}",
'sex': gender_clinical,
'diagnosis': group,
'age': age
}

# Prepare sessions data
sessions_data.append([f"sub-MIRIAD{subject_id}", f"ses-{session}", age])

# Create BIDS structure and copy file with run number
create_bids_structure(subject_id, session, run_number, cohort, diagnosis, gender, original_file_path, path_to_dataset, bids_dir, path_to_clinical)

# Write participants.csv with baseline age (minimum age for each subject)
participants_csv = os.path.join(bids_dir, 'participants.csv')
with open(participants_csv, 'w', newline='') as participants_file:
participants_writer = csv.writer(participants_file)
participants_writer.writerow(['participant_id', 'sex', 'diagnosis', 'age'])

# Write the baseline age (minimum age) for each subject
for participant_info in participants_data.values():
participants_writer.writerow([participant_info['participant_id'],
participant_info['sex'],
participant_info['diagnosis'],
participant_info['age']])

# Write sessions.tsv for each subject
subject_sessions = {}
for session in sessions_data:
subject_id, session_id, age = session
if subject_id not in subject_sessions:
subject_sessions[subject_id] = []
subject_sessions[subject_id].append([session_id, age])

for subject_id, sessions in subject_sessions.items():
sessions_file = os.path.join(bids_dir, subject_id, 'sessions.tsv')
os.makedirs(os.path.dirname(sessions_file), exist_ok=True)

with open(sessions_file, 'w', newline='') as session_file:
session_writer = csv.writer(session_file, delimiter='\t')
session_writer.writerow(['session_id', 'age'])
session_writer.writerows(sessions)

print(f"BIDS conversion completed, clinical data loaded from {clinical_data_file}.")
27 changes: 27 additions & 0 deletions clinica/iotools/converters/miriad_to_bids/miriad_to_bids_cli.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
from os import PathLike
from typing import Optional

import click

from clinica.iotools.converters import cli_param


@click.command(name="miriad-to-bids")
@cli_param.dataset_directory
@cli_param.bids_directory
@cli_param.clinical_data_directory
@cli_param.subjects_list
def cli(
dataset_directory: PathLike,
bids_directory: PathLike,
clinical_data_directory: PathLike,
subjects_list: Optional[PathLike] = None,
) -> None:
"""MIRIAD to BIDS converter."""
from .miriad_to_bids import convert

convert(dataset_directory, bids_directory, clinical_data_directory, subjects_list)


if __name__ == "__main__":
cli()
46 changes: 46 additions & 0 deletions clinica/iotools/converters/miriad_to_bids/miriad_to_bids_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import os
import shutil
import nibabel as nib

# Helper function to create BIDS folders and move files
def create_bids_structure(subject_id, session, run_label, cohort, diagnosis, gender, input_file, path_to_dataset, output_dir, path_to_clinical):
"""Create BIDS folder structure and move files into it."""
sub_id = f"sub-MIRIAD{subject_id}"
ses_id = f"ses-{session}"
run_id = f"run-{run_label}" # Run number (e.g., run-01)

# Create output directory for this subject/session
anat_dir = os.path.join(output_dir, sub_id, ses_id, 'anat')
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
anat_dir = os.path.join(output_dir, sub_id, ses_id, 'anat')
anat_dir = output_dir / sub_id / ses_id / 'anat'

os.makedirs(anat_dir, exist_ok=True)

# Convert the input file to .nii.gz if necessary
input_file_gz = convert_to_nii_gz(input_file)

# Destination filename in BIDS format with run number
bids_filename = f"{sub_id}_{ses_id}_{run_id}_T1w.nii.gz"

# Copy and rename the file to BIDS format
shutil.copy(input_file_gz, os.path.join(anat_dir, bids_filename))


# Function to extract subject, session, and run info from filenames
def parse_filename(filename):
parts = filename.split('_')
cohort_name = parts[0] # "miriad"
subject_id = parts[1] # e.g., "215"
diagnosis = parts[2] # e.g., "AD" or "HC"
gender = parts[3] # "M" or "F"
session_id = parts[4] # e.g., "01"
modality = parts[5] # e.g., "MR"
run_id = parts[6] # e.g., "1" (for run-01, run-02)

return subject_id, session_id, run_id

def convert_to_nii_gz(input_file):
"""Convert a .nii file to .nii.gz format without deleting the original .nii file."""
if input_file.endswith(".nii.gz"):
return input_file
img = nib.load(input_file)
output_file = input_file.replace(".nii", ".nii.gz")
nib.save(img, output_file)
return output_file
Loading