Skip to content

Commit

Permalink
Fix: Update CuBIDS to allow both longitudinal and cross-sectional str…
Browse files Browse the repository at this point in the history
…ucture by adding is_longitudinal attribute to CUBIDS class (#406)

* Add is_longitudinal attribute to the CuBIDS class

* fix lint issues

* still fixing lint issues

* Update cubids.py

* unset default value for is_longitudinal and add a method to infer is_longitudinal from data structure

* fix lint issues

* remove is_longitudinal from CuBIDS class docstring as Taylor suggested

* add is_longitudinal as an attribute in docstring

---------

Co-authored-by: Taylor Salo <[email protected]>
  • Loading branch information
tientong98 and tsalo authored Jan 18, 2025
1 parent 07c0947 commit 5f94248
Show file tree
Hide file tree
Showing 6 changed files with 119 additions and 62 deletions.
1 change: 1 addition & 0 deletions cubids/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import importlib.resources
import yaml


def load_config(config_file):
"""Load a YAML file containing a configuration for param groups.
Expand Down
91 changes: 76 additions & 15 deletions cubids/cubids.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,8 @@ class CuBIDS(object):
A data dictionary for TSV outputs.
use_datalad : :obj:`bool`
If True, use datalad to track changes to the BIDS dataset.
is_longitudinal : :obj:`bool`
If True, adds "ses" in filepath.
"""

def __init__(
Expand All @@ -110,11 +112,17 @@ def __init__(
self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir()
self.data_dict = {} # data dictionary for TSV outputs
self.use_datalad = use_datalad # True if flag set, False if flag unset
self.is_longitudinal = self._infer_longitudinal() # inferred from dataset structure

if self.use_datalad:
self.init_datalad()

if self.acq_group_level == "session":
if self.is_longitudinal and self.acq_group_level == "session":
NON_KEY_ENTITIES.remove("session")
elif not self.is_longitudinal and self.acq_group_level == "session":
raise ValueError(
'Data is not longitudinal, so "session" is not a valid grouping level.'
)

@property
def layout(self):
Expand All @@ -128,6 +136,10 @@ def layout(self):
# print("LAYOUT OBJECT SET")
return self._layout

def _infer_longitudinal(self):
"""Infer if the dataset is longitudinal based on its structure."""
return any("ses-" in str(f) for f in Path(self.path).rglob("*"))

def reset_bids_layout(self, validate=False):
"""Reset the BIDS layout.
Expand Down Expand Up @@ -473,6 +485,7 @@ def change_filename(self, filepath, entities):
filepath=filepath,
entities=entities,
out_dir=str(self.path),
is_longitudinal=self.is_longitudinal,
)

exts = Path(filepath).suffixes
Expand All @@ -481,7 +494,8 @@ def change_filename(self, filepath, entities):
suffix = entities["suffix"]

sub = get_entity_value(filepath, "sub")
ses = get_entity_value(filepath, "ses")
if self.is_longitudinal:
ses = get_entity_value(filepath, "ses")

# Add the scan path + new path to the lists of old, new filenames
self.old_filenames.append(filepath)
Expand Down Expand Up @@ -577,7 +591,10 @@ def change_filename(self, filepath, entities):
self.new_filenames.append(new_labeling)

# RENAME INTENDED FORS!
ses_path = self.path + "/" + sub + "/" + ses
if self.is_longitudinal:
ses_path = self.path + "/" + sub + "/" + ses
elif not self.is_longitudinal:
ses_path = self.path + "/" + sub
files_with_if = []
files_with_if += Path(ses_path).rglob("fmap/*.json")
files_with_if += Path(ses_path).rglob("perf/*_m0scan.json")
Expand All @@ -600,6 +617,7 @@ def change_filename(self, filepath, entities):
data["IntendedFor"].remove(item)
# add new filename
data["IntendedFor"].append(_get_participant_relative_path(new_path))

if item == _get_bidsuri(filepath, self.path):
# remove old filename
data["IntendedFor"].remove(item)
Expand Down Expand Up @@ -1363,6 +1381,7 @@ def get_layout(self):
return self.layout


# XXX: Remove _validate_json?
def _validate_json():
"""Validate a JSON file's contents.
Expand Down Expand Up @@ -1402,8 +1421,29 @@ def _get_participant_relative_path(scan):
This is what will appear in the IntendedFor field of any association.
Examples:
>>> _get_participant_relative_path(
... "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
... )
'ses-01/func/sub-01_ses-01_bold.nii.gz'
>>> _get_participant_relative_path(
... "/path/to/dset/sub-01/func/sub-01_bold.nii.gz",
... )
'func/sub-01_bold.nii.gz'
>>> _get_participant_relative_path(
... "/path/to/dset/ses-01/func/ses-01_bold.nii.gz",
... )
Traceback (most recent call last):
ValueError: Could not find subject in ...
"""
return "/".join(Path(scan).parts[-3:])
parts = Path(scan).parts
# Find the first part that starts with "sub-"
for i, part in enumerate(parts):
if part.startswith("sub-"):
return "/".join(parts[i + 1 :])
raise ValueError(f"Could not find subject in {scan}")


def _get_bidsuri(filename, dataset_root):
Expand Down Expand Up @@ -1734,7 +1774,7 @@ def get_entity_value(path, key):
return part


def build_path(filepath, entities, out_dir):
def build_path(filepath, entities, out_dir, is_longitudinal):
"""Build a new path for a file based on its BIDS entities.
Parameters
Expand All @@ -1746,6 +1786,8 @@ def build_path(filepath, entities, out_dir):
This should include all of the entities in the filename *except* for subject and session.
out_dir : str
The output directory for the new file.
is_longitudinal : bool
If True, add "ses" to file path.
Returns
-------
Expand All @@ -1758,6 +1800,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz",
... {"acquisition": "VAR", "suffix": "T2w"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz'
Expand All @@ -1766,6 +1809,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
Expand All @@ -1775,6 +1819,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz",
... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz'
Expand All @@ -1784,6 +1829,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
... {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
Expand All @@ -1792,6 +1838,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
... {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
Expand All @@ -1801,6 +1848,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
Expand All @@ -1809,6 +1857,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
Expand All @@ -1817,6 +1866,7 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
... "/output",
... True,
... )
'/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
Expand All @@ -1825,19 +1875,19 @@ def build_path(filepath, entities, out_dir):
... "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz",
... {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"},
... "/output",
... True,
... )
WARNING: DATATYPE CHANGE DETECTED
'/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz'
It expects a longitudinal structure, so providing a cross-sectional filename won't work.
XXX: This is a bug.
It also works for cross-sectional filename.
>>> build_path(
... "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz",
... {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
... {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
... "/output",
... False,
... )
Traceback (most recent call last):
ValueError: Could not extract subject or session from ...
'/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz'
"""
exts = Path(filepath).suffixes
old_ext = "".join(exts)
Expand All @@ -1853,9 +1903,13 @@ def build_path(filepath, entities, out_dir):
entity_file_keys.append(key)

sub = get_entity_value(filepath, "sub")
ses = get_entity_value(filepath, "ses")
if sub is None or ses is None:
raise ValueError(f"Could not extract subject or session from {filepath}")
if sub is None:
raise ValueError(f"Could not extract subject from {filepath}")

if is_longitudinal:
ses = get_entity_value(filepath, "ses")
if ses is None:
raise ValueError(f"Could not extract session from {filepath}")

# Add leading zeros to run entity if it's an integer.
# If it's a string, respect the value provided.
Expand All @@ -1874,7 +1928,10 @@ def build_path(filepath, entities, out_dir):
.replace("reconstruction", "rec")
)
if len(filename) > 0:
filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
if is_longitudinal:
filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
elif not is_longitudinal:
filename = f"{sub}_{filename}_{suffix}{old_ext}"
else:
raise ValueError(f"Could not construct new filename for {filepath}")

Expand All @@ -1894,5 +1951,9 @@ def build_path(filepath, entities, out_dir):
dtype_new = dtype_orig

# Construct the new filename
new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
if is_longitudinal:
new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
elif not is_longitudinal:
new_path = str(Path(out_dir) / sub / dtype_new / filename)

return new_path
34 changes: 22 additions & 12 deletions cubids/metadata_merge.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,13 +276,13 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False):
return 0


def get_acq_dictionary():
def get_acq_dictionary(is_longitudinal=False):
"""Create a BIDS data dictionary from dataframe columns.
Parameters
----------
df : :obj:`pandas.DataFrame`
Pre export TSV that will be converted to a json dictionary.
is_longitudinal : :obj:`bool`, optional
If True, add "session" to acq_dict. Default is False.
Returns
-------
Expand All @@ -291,15 +291,16 @@ def get_acq_dictionary():
"""
acq_dict = {}
acq_dict["subject"] = {"Description": "Participant ID"}
acq_dict["session"] = {"Description": "Session ID"}
if is_longitudinal:
acq_dict["session"] = {"Description": "Session ID"}
docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions"
desc = "Acquisition Group. See Read the Docs for more information"
acq_dict["AcqGroup"] = {"Description": desc + docs}

return acq_dict


def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_longitudinal=False):
"""Find unique sets of Key/Param groups across subjects.
This writes out the following files:
Expand All @@ -317,6 +318,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
Prefix for output files.
acq_group_level : {"subject", "session"}
Level at which to group acquisitions.
is_longitudinal : :obj:`bool`, optional
If True, add "session" to acq_dict. Default is False.
"""
from bids import config
from bids.layout import parse_file_entities
Expand All @@ -331,9 +334,12 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
file_entities = parse_file_entities(row.FilePath)

if acq_group_level == "subject":
acq_id = (file_entities.get("subject"), file_entities.get("session"))
if is_longitudinal:
acq_id = (file_entities.get("subject"), file_entities.get("session"))
elif not is_longitudinal:
acq_id = file_entities.get("subject")
acq_groups[acq_id].append((row.EntitySet, row.ParamGroup))
else:
elif is_longitudinal and acq_group_level == "session":
acq_id = (file_entities.get("subject"), None)
acq_groups[acq_id].append(
(row.EntitySet, row.ParamGroup, file_entities.get("session"))
Expand All @@ -359,17 +365,21 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
for groupnum, content_id_row in enumerate(descending_order, start=1):
content_id = content_ids[content_id_row]
acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id)
for subject, session in contents_to_subjects[content_id]:
grouped_sub_sess.append(
{"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
)
if is_longitudinal:
for subject, session in contents_to_subjects[content_id]:
grouped_sub_sess.append(
{"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
)
elif not is_longitudinal:
for subject in contents_to_subjects[content_id]:
grouped_sub_sess.append({"subject": "sub-" + subject, "AcqGroup": groupnum})

# Write the mapping of subject/session to
acq_group_df = pd.DataFrame(grouped_sub_sess)
acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False)

# Create data dictionary for acq group tsv
acq_dict = get_acq_dictionary()
acq_dict = get_acq_dictionary(is_longitudinal)
with open(output_prefix + "_AcqGrouping.json", "w") as outfile:
json.dump(acq_dict, outfile, indent=4)

Expand Down
Loading

0 comments on commit 5f94248

Please sign in to comment.