Fix: Update CuBIDS to allow both longitudinal and cross-sectional str…

…ucture by adding is_longitudinal attribute to CUBIDS class (#406) * Add is_longitudinal attribute to the CuBIDS class * fix lint issues * still fixing lint issues * Update cubids.py * unset default value for is_longitudinal and add a method to infer is_longitudinal from data structure * fix lint issues * remove is_longitudinal from CuBIDS class docstring as Taylor suggested * add is_longitudinal as an attribute in docstring --------- Co-authored-by: Taylor Salo <[email protected]>
PennLINC · Jan 18, 2025 · 5f94248 · 5f94248
1 parent 07c0947
commit 5f94248
Show file tree

Hide file tree

Showing 6 changed files with 119 additions and 62 deletions.
diff --git a/cubids/config.py b/cubids/config.py
@@ -4,6 +4,7 @@
 import importlib.resources
 import yaml
 
+
 def load_config(config_file):
     """Load a YAML file containing a configuration for param groups.
 

diff --git a/cubids/cubids.py b/cubids/cubids.py
@@ -84,6 +84,8 @@ class CuBIDS(object):
         A data dictionary for TSV outputs.
     use_datalad : :obj:`bool`
         If True, use datalad to track changes to the BIDS dataset.
+    is_longitudinal : :obj:`bool`
+        If True, adds "ses" in filepath.
     """
 
     def __init__(
@@ -110,11 +112,17 @@ def __init__(
         self.cubids_code_dir = Path(self.path + "/code/CuBIDS").is_dir()
         self.data_dict = {}  # data dictionary for TSV outputs
         self.use_datalad = use_datalad  # True if flag set, False if flag unset
+        self.is_longitudinal = self._infer_longitudinal()  # inferred from dataset structure
+
         if self.use_datalad:
             self.init_datalad()
 
-        if self.acq_group_level == "session":
+        if self.is_longitudinal and self.acq_group_level == "session":
             NON_KEY_ENTITIES.remove("session")
+        elif not self.is_longitudinal and self.acq_group_level == "session":
+            raise ValueError(
+                'Data is not longitudinal, so "session" is not a valid grouping level.'
+            )
 
     @property
     def layout(self):
@@ -128,6 +136,10 @@ def layout(self):
             # print("LAYOUT OBJECT SET")
         return self._layout
 
+    def _infer_longitudinal(self):
+        """Infer if the dataset is longitudinal based on its structure."""
+        return any("ses-" in str(f) for f in Path(self.path).rglob("*"))
+
     def reset_bids_layout(self, validate=False):
         """Reset the BIDS layout.
 
@@ -473,6 +485,7 @@ def change_filename(self, filepath, entities):
             filepath=filepath,
             entities=entities,
             out_dir=str(self.path),
+            is_longitudinal=self.is_longitudinal,
         )
 
         exts = Path(filepath).suffixes
@@ -481,7 +494,8 @@ def change_filename(self, filepath, entities):
         suffix = entities["suffix"]
 
         sub = get_entity_value(filepath, "sub")
-        ses = get_entity_value(filepath, "ses")
+        if self.is_longitudinal:
+            ses = get_entity_value(filepath, "ses")
 
         # Add the scan path + new path to the lists of old, new filenames
         self.old_filenames.append(filepath)
@@ -577,7 +591,10 @@ def change_filename(self, filepath, entities):
                 self.new_filenames.append(new_labeling)
 
         # RENAME INTENDED FORS!
-        ses_path = self.path + "/" + sub + "/" + ses
+        if self.is_longitudinal:
+            ses_path = self.path + "/" + sub + "/" + ses
+        elif not self.is_longitudinal:
+            ses_path = self.path + "/" + sub
         files_with_if = []
         files_with_if += Path(ses_path).rglob("fmap/*.json")
         files_with_if += Path(ses_path).rglob("perf/*_m0scan.json")
@@ -600,6 +617,7 @@ def change_filename(self, filepath, entities):
                         data["IntendedFor"].remove(item)
                         # add new filename
                         data["IntendedFor"].append(_get_participant_relative_path(new_path))
+
                     if item == _get_bidsuri(filepath, self.path):
                         # remove old filename
                         data["IntendedFor"].remove(item)
@@ -1363,6 +1381,7 @@ def get_layout(self):
         return self.layout
 
 
+# XXX: Remove _validate_json?
 def _validate_json():
     """Validate a JSON file's contents.
 
@@ -1402,8 +1421,29 @@ def _get_participant_relative_path(scan):
 
     This is what will appear in the IntendedFor field of any association.
 
+    Examples:
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
+    ... )
+    'ses-01/func/sub-01_ses-01_bold.nii.gz'
+
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/sub-01/func/sub-01_bold.nii.gz",
+    ... )
+    'func/sub-01_bold.nii.gz'
+
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/ses-01/func/ses-01_bold.nii.gz",
+    ... )
+    Traceback (most recent call last):
+    ValueError: Could not find subject in ...
     """
-    return "/".join(Path(scan).parts[-3:])
+    parts = Path(scan).parts
+    # Find the first part that starts with "sub-"
+    for i, part in enumerate(parts):
+        if part.startswith("sub-"):
+            return "/".join(parts[i + 1 :])
+    raise ValueError(f"Could not find subject in {scan}")
 
 
 def _get_bidsuri(filename, dataset_root):
@@ -1734,7 +1774,7 @@ def get_entity_value(path, key):
             return part
 
 
-def build_path(filepath, entities, out_dir):
+def build_path(filepath, entities, out_dir, is_longitudinal):
     """Build a new path for a file based on its BIDS entities.
 
     Parameters
@@ -1746,6 +1786,8 @@ def build_path(filepath, entities, out_dir):
         This should include all of the entities in the filename *except* for subject and session.
     out_dir : str
         The output directory for the new file.
+    is_longitudinal : bool
+        If True, add "ses" to file path.
 
     Returns
     -------
@@ -1758,6 +1800,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz",
     ...    {"acquisition": "VAR", "suffix": "T2w"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz'
 
@@ -1766,6 +1809,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
 
@@ -1775,6 +1819,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz",
     ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz'
 
@@ -1784,6 +1829,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
     ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
 
@@ -1792,6 +1838,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
     ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
 
@@ -1801,6 +1848,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
 
@@ -1809,6 +1857,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
 
@@ -1817,6 +1866,7 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
     ...    {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
     ...    "/output",
+    ...    True,
     ... )
     '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
 
@@ -1825,19 +1875,19 @@ def build_path(filepath, entities, out_dir):
     ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz",
     ...    {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"},
     ...    "/output",
+    ...    True,
     ... )
     WARNING: DATATYPE CHANGE DETECTED
     '/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz'
 
-    It expects a longitudinal structure, so providing a cross-sectional filename won't work.
-    XXX: This is a bug.
+    It also works for cross-sectional filename.
     >>> build_path(
     ...    "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz",
-    ...    {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
+    ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
     ...    "/output",
+    ...    False,
     ... )
-    Traceback (most recent call last):
-    ValueError: Could not extract subject or session from ...
+    '/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz'
     """
     exts = Path(filepath).suffixes
     old_ext = "".join(exts)
@@ -1853,9 +1903,13 @@ def build_path(filepath, entities, out_dir):
             entity_file_keys.append(key)
 
     sub = get_entity_value(filepath, "sub")
-    ses = get_entity_value(filepath, "ses")
-    if sub is None or ses is None:
-        raise ValueError(f"Could not extract subject or session from {filepath}")
+    if sub is None:
+        raise ValueError(f"Could not extract subject from {filepath}")
+
+    if is_longitudinal:
+        ses = get_entity_value(filepath, "ses")
+        if ses is None:
+            raise ValueError(f"Could not extract session from {filepath}")
 
     # Add leading zeros to run entity if it's an integer.
     # If it's a string, respect the value provided.
@@ -1874,7 +1928,10 @@ def build_path(filepath, entities, out_dir):
         .replace("reconstruction", "rec")
     )
     if len(filename) > 0:
-        filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
+        if is_longitudinal:
+            filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
+        elif not is_longitudinal:
+            filename = f"{sub}_{filename}_{suffix}{old_ext}"
     else:
         raise ValueError(f"Could not construct new filename for {filepath}")
 
@@ -1894,5 +1951,9 @@ def build_path(filepath, entities, out_dir):
         dtype_new = dtype_orig
 
     # Construct the new filename
-    new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
+    if is_longitudinal:
+        new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
+    elif not is_longitudinal:
+        new_path = str(Path(out_dir) / sub / dtype_new / filename)
+
     return new_path
diff --git a/cubids/metadata_merge.py b/cubids/metadata_merge.py
@@ -276,13 +276,13 @@ def merge_json_into_json(from_file, to_file, raise_on_error=False):
     return 0
 
 
-def get_acq_dictionary():
+def get_acq_dictionary(is_longitudinal=False):
     """Create a BIDS data dictionary from dataframe columns.
 
     Parameters
     ----------
-    df : :obj:`pandas.DataFrame`
-        Pre export TSV that will be converted to a json dictionary.
+    is_longitudinal : :obj:`bool`, optional
+       If True, add "session" to acq_dict. Default is False.
 
     Returns
     -------
@@ -291,15 +291,16 @@ def get_acq_dictionary():
     """
     acq_dict = {}
     acq_dict["subject"] = {"Description": "Participant ID"}
-    acq_dict["session"] = {"Description": "Session ID"}
+    if is_longitudinal:
+        acq_dict["session"] = {"Description": "Session ID"}
     docs = " https://cubids.readthedocs.io/en/latest/about.html#definitions"
     desc = "Acquisition Group. See Read the Docs for more information"
     acq_dict["AcqGroup"] = {"Description": desc + docs}
 
     return acq_dict
 
 
-def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
+def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level, is_longitudinal=False):
     """Find unique sets of Key/Param groups across subjects.
 
     This writes out the following files:
@@ -317,6 +318,8 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
         Prefix for output files.
     acq_group_level : {"subject", "session"}
         Level at which to group acquisitions.
+    is_longitudinal : :obj:`bool`, optional
+       If True, add "session" to acq_dict. Default is False.
     """
     from bids import config
     from bids.layout import parse_file_entities
@@ -331,9 +334,12 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
         file_entities = parse_file_entities(row.FilePath)
 
         if acq_group_level == "subject":
-            acq_id = (file_entities.get("subject"), file_entities.get("session"))
+            if is_longitudinal:
+                acq_id = (file_entities.get("subject"), file_entities.get("session"))
+            elif not is_longitudinal:
+                acq_id = file_entities.get("subject")
             acq_groups[acq_id].append((row.EntitySet, row.ParamGroup))
-        else:
+        elif is_longitudinal and acq_group_level == "session":
             acq_id = (file_entities.get("subject"), None)
             acq_groups[acq_id].append(
                 (row.EntitySet, row.ParamGroup, file_entities.get("session"))
@@ -359,17 +365,21 @@ def group_by_acquisition_sets(files_tsv, output_prefix, acq_group_level):
     for groupnum, content_id_row in enumerate(descending_order, start=1):
         content_id = content_ids[content_id_row]
         acq_group_info.append((groupnum, content_id_counts[content_id_row]) + content_id)
-        for subject, session in contents_to_subjects[content_id]:
-            grouped_sub_sess.append(
-                {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
-            )
+        if is_longitudinal:
+            for subject, session in contents_to_subjects[content_id]:
+                grouped_sub_sess.append(
+                    {"subject": "sub-" + subject, "session": session, "AcqGroup": groupnum}
+                )
+        elif not is_longitudinal:
+            for subject in contents_to_subjects[content_id]:
+                grouped_sub_sess.append({"subject": "sub-" + subject, "AcqGroup": groupnum})
 
     # Write the mapping of subject/session to
     acq_group_df = pd.DataFrame(grouped_sub_sess)
     acq_group_df.to_csv(output_prefix + "_AcqGrouping.tsv", sep="\t", index=False)
 
     # Create data dictionary for acq group tsv
-    acq_dict = get_acq_dictionary()
+    acq_dict = get_acq_dictionary(is_longitudinal)
     with open(output_prefix + "_AcqGrouping.json", "w") as outfile:
         json.dump(acq_dict, outfile, indent=4)