diff --git a/cubids/cubids.py b/cubids/cubids.py
index 7b2f657f..8b236a15 100644
--- a/cubids/cubids.py
+++ b/cubids/cubids.py
@@ -22,12 +22,12 @@
 import pandas as pd
 from bids.layout import parse_file_entities
 from bids.utils import listify
-from sklearn.cluster import AgglomerativeClustering
 from tqdm import tqdm
 
 from cubids.config import load_config
-from cubids.constants import ID_VARS, NON_KEY_ENTITIES
+from cubids.constants import NON_KEY_ENTITIES
 from cubids.metadata_merge import check_merging_operations, group_by_acquisition_sets
+from cubids import utils
 
 warnings.simplefilter(action="ignore", category=FutureWarning)
 bids.config.set_option("extension_initial_dot", True)
@@ -343,7 +343,7 @@ def add_nifti_info(self):
                 voxel_sizes = img.header.get_zooms()
                 matrix_dims = img.shape
                 # add nifti info to corresponding sidecars​
-                sidecar = img_to_new_ext(str(path), ".json")
+                sidecar = utils.img_to_new_ext(str(path), ".json")
                 if Path(sidecar).exists():
                     try:
                         with open(sidecar) as f:
@@ -430,9 +430,9 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T
 
             # Get a source json file
             img_full_path = self.path + source_files.iloc[0].FilePath
-            source_json = img_to_new_ext(img_full_path, ".json")
+            source_json = utils.img_to_new_ext(img_full_path, ".json")
             for dest_nii in dest_files.FilePath:
-                dest_json = img_to_new_ext(self.path + dest_nii, ".json")
+                dest_json = utils.img_to_new_ext(self.path + dest_nii, ".json")
                 if Path(dest_json).exists() and Path(source_json).exists():
                     merge_commands.append(f"bids-sidecar-merge {source_json} {dest_json}")
 
@@ -475,7 +475,7 @@ def apply_tsv_changes(self, summary_tsv, files_tsv, new_prefix, raise_on_error=T
 
                         new_key = entity_sets[orig_key_param]
 
-                        new_entities = _entity_set_to_entities(new_key)
+                        new_entities = utils._entity_set_to_entities(new_key)
 
                         # generate new filenames according to new entity set
                         self.change_filename(file_path, new_entities)
@@ -544,7 +544,7 @@ def change_filename(self, filepath, entities):
         -----
         This is the function I need to spend the most time on, since it has entities hardcoded.
         """
-        new_path = build_path(
+        new_path = utils.build_path(
             filepath=filepath,
             entities=entities,
             out_dir=str(self.path),
@@ -556,9 +556,9 @@ def change_filename(self, filepath, entities):
 
         suffix = entities["suffix"]
 
-        sub = get_entity_value(filepath, "sub")
+        sub = utils.get_entity_value(filepath, "sub")
         if self.is_longitudinal:
-            ses = get_entity_value(filepath, "ses")
+            ses = utils.get_entity_value(filepath, "ses")
 
         # Add the scan path + new path to the lists of old, new filenames
         self.old_filenames.append(filepath)
@@ -577,21 +577,24 @@ def change_filename(self, filepath, entities):
                 # ensure assoc not an IntendedFor reference
                 if ".nii" not in str(assoc_path):
                     self.old_filenames.append(assoc_path)
-                    new_ext_path = img_to_new_ext(new_path, "".join(Path(assoc_path).suffixes))
+                    new_ext_path = utils.img_to_new_ext(
+                        new_path,
+                        "".join(Path(assoc_path).suffixes),
+                    )
                     self.new_filenames.append(new_ext_path)
 
         # MAKE SURE THESE AREN'T COVERED BY get_associations!!!
         # Update DWI-specific files
         if "/dwi/" in filepath:
             # add the bval and bvec if there
-            bval_old = img_to_new_ext(filepath, ".bval")
-            bval_new = img_to_new_ext(new_path, ".bval")
+            bval_old = utils.img_to_new_ext(filepath, ".bval")
+            bval_new = utils.img_to_new_ext(new_path, ".bval")
             if Path(bval_old).exists() and bval_old not in self.old_filenames:
                 self.old_filenames.append(bval_old)
                 self.new_filenames.append(bval_new)
 
-            bvec_old = img_to_new_ext(filepath, ".bvec")
-            bvec_new = img_to_new_ext(new_path, ".bvec")
+            bvec_old = utils.img_to_new_ext(filepath, ".bvec")
+            bvec_new = utils.img_to_new_ext(new_path, ".bvec")
             if Path(bvec_old).exists() and bvec_old not in self.old_filenames:
                 self.old_filenames.append(bvec_old)
                 self.new_filenames.append(bvec_new)
@@ -666,7 +669,7 @@ def change_filename(self, filepath, entities):
             self.IF_rename_paths.append(filename_with_if)
             # json_file = self.layout.get_file(filename_with_if)
             # data = json_file.get_dict()
-            data = get_sidecar_metadata(filename_with_if)
+            data = utils.get_sidecar_metadata(filename_with_if)
             if data == "Erroneous sidecar":
                 print("Error parsing sidecar: ", filename_with_if)
                 continue
@@ -675,20 +678,20 @@ def change_filename(self, filepath, entities):
                 # Coerce IntendedFor to a list.
                 data["IntendedFor"] = listify(data["IntendedFor"])
                 for item in data["IntendedFor"]:
-                    if item == _get_participant_relative_path(filepath):
+                    if item == utils._get_participant_relative_path(filepath):
                         # remove old filename
                         data["IntendedFor"].remove(item)
                         # add new filename
-                        data["IntendedFor"].append(_get_participant_relative_path(new_path))
+                        data["IntendedFor"].append(utils._get_participant_relative_path(new_path))
 
-                    if item == _get_bidsuri(filepath, self.path):
+                    if item == utils._get_bidsuri(filepath, self.path):
                         # remove old filename
                         data["IntendedFor"].remove(item)
                         # add new filename
-                        data["IntendedFor"].append(_get_bidsuri(new_path, self.path))
+                        data["IntendedFor"].append(utils._get_bidsuri(new_path, self.path))
 
                 # update the json with the new data dictionary
-                _update_json(filename_with_if, data)
+                utils._update_json(filename_with_if, data)
 
         # save IntendedFor purges so that you can datalad run the
         # remove association file commands on a clean dataset
@@ -807,12 +810,12 @@ def _purge_associations(self, scans):
         # sub, ses, modality only (no self.path)
         if_scans = []
         for scan in scans:
-            if_scans.append(_get_participant_relative_path(self.path + scan))
+            if_scans.append(utils._get_participant_relative_path(self.path + scan))
 
         for path in Path(self.path).rglob("sub-*/*/fmap/*.json"):
             # json_file = self.layout.get_file(str(path))
             # data = json_file.get_dict()
-            data = get_sidecar_metadata(str(path))
+            data = utils.get_sidecar_metadata(str(path))
             if data == "Erroneous sidecar":
                 print("Error parsing sidecar: ", str(path))
                 continue
@@ -826,7 +829,7 @@ def _purge_associations(self, scans):
                         data["IntendedFor"].remove(item)
 
                 # update the json with the new data dictionary
-                _update_json(str(path), data)
+                utils._update_json(str(path), data)
 
         # save IntendedFor purges so that you can datalad run the
         # remove association file commands on a clean dataset
@@ -855,14 +858,14 @@ def _purge_associations(self, scans):
             if ".nii" not in str(path):
                 if "/dwi/" in str(path):
                     # add the bval and bvec if there
-                    if Path(img_to_new_ext(str(path), ".bval")).exists():
-                        to_remove.append(img_to_new_ext(str(path), ".bval"))
-                    if Path(img_to_new_ext(str(path), ".bvec")).exists():
-                        to_remove.append(img_to_new_ext(str(path), ".bvec"))
+                    if Path(utils.img_to_new_ext(str(path), ".bval")).exists():
+                        to_remove.append(utils.img_to_new_ext(str(path), ".bval"))
+                    if Path(utils.img_to_new_ext(str(path), ".bvec")).exists():
+                        to_remove.append(utils.img_to_new_ext(str(path), ".bvec"))
 
                 if "/func/" in str(path):
                     # add tsvs
-                    tsv = img_to_new_ext(str(path), ".tsv").replace("_bold", "_events")
+                    tsv = utils.img_to_new_ext(str(path), ".tsv").replace("_bold", "_events")
                     if Path(tsv).exists():
                         to_remove.append(tsv)
                     # add tsv json (if exists)
@@ -954,8 +957,8 @@ def _cache_fieldmaps(self):
         files_to_fmaps = defaultdict(list)
         for fmap_file in tqdm(fmap_files):
             # intentions = listify(fmap_file.get_metadata().get("IntendedFor"))
-            fmap_json = img_to_new_ext(fmap_file.path, ".json")
-            metadata = get_sidecar_metadata(fmap_json)
+            fmap_json = utils.img_to_new_ext(fmap_file.path, ".json")
+            metadata = utils.get_sidecar_metadata(fmap_json)
             if metadata == "Erroneous sidecar":
                 print("Error parsing sidecar: ", str(fmap_json))
                 continue
@@ -996,7 +999,7 @@ def get_param_groups_from_entity_set(self, entity_set):
         """
         if not self.fieldmaps_cached:
             raise Exception("Fieldmaps must be cached to find parameter groups.")
-        key_entities = _entity_set_to_entities(entity_set)
+        key_entities = utils._entity_set_to_entities(entity_set)
         key_entities["extension"] = ".nii[.gz]*"
 
         matching_files = self.layout.get(
@@ -1007,7 +1010,7 @@ def get_param_groups_from_entity_set(self, entity_set):
         # entities do not also get added to matching_files
         to_include = []
         for filepath in matching_files:
-            f_entity_set = _file_to_entity_set(filepath)
+            f_entity_set = utils._file_to_entity_set(filepath)
 
             if f_entity_set == entity_set:
                 to_include.append(filepath)
@@ -1023,7 +1026,7 @@ def get_param_groups_from_entity_set(self, entity_set):
             print(f"Unusual Modality Detected: {filepath}")
             modality = "other"
 
-        ret = _get_param_groups(
+        ret = utils._get_param_groups(
             to_include,
             self.fieldmap_lookup,
             entity_set,
@@ -1210,14 +1213,14 @@ def get_param_groups_dataframes(self):
             param_group_summaries.append(param_summary)
             labeled_files.append(labeled_file_params)
 
-        big_df = _order_columns(pd.concat(labeled_files, ignore_index=True))
+        big_df = utils._order_columns(pd.concat(labeled_files, ignore_index=True))
 
         # make Filepaths relative to bids dir
         for row in range(len(big_df)):
             long_name = big_df.loc[row, "FilePath"]
             big_df.loc[row, "FilePath"] = long_name.replace(self.path, "")
 
-        summary = _order_columns(pd.concat(param_group_summaries, ignore_index=True))
+        summary = utils._order_columns(pd.concat(param_group_summaries, ignore_index=True))
 
         # create new col that strings key and param group together
         summary["KeyParamGroup"] = summary["EntitySet"] + "__" + summary["ParamGroup"].map(str)
@@ -1274,7 +1277,7 @@ def get_param_groups_dataframes(self):
                     if relational["IntendedForKey"]["display_mode"] == "bool":
                         rename_cols.append("UsedAsFieldmap")
 
-        summary = assign_variants(summary, rename_cols)
+        summary = utils.assign_variants(summary, rename_cols)
 
         return big_df, summary
 
@@ -1365,10 +1368,10 @@ def get_entity_sets(self):
                 continue
 
             if str(path).endswith(".nii") or str(path).endswith(".nii.gz"):
-                entity_sets.update((_file_to_entity_set(path),))
+                entity_sets.update((utils._file_to_entity_set(path),))
 
                 # Fill the dictionary of entity set, list of filenames pairrs
-                ret = _file_to_entity_set(path)
+                ret = utils._file_to_entity_set(path)
 
                 if ret not in self.keys_files.keys():
                     self.keys_files[ret] = []
@@ -1410,7 +1413,7 @@ def change_metadata(self, filters, metadata):
         for bidsfile in files_to_change:
             # get the sidecar file
             # bidsjson_file = bidsfile.get_associations()
-            bidsjson_file = img_to_new_ext(str(bidsfile), ".json")
+            bidsjson_file = utils.img_to_new_ext(str(bidsfile), ".json")
             if not bidsjson_file:
                 print("NO JSON FILES FOUND IN ASSOCIATIONS")
                 continue
@@ -1427,7 +1430,7 @@ def change_metadata(self, filters, metadata):
                 sidecar.update(metadata)
 
                 # write out
-                _update_json(json_file.path, sidecar)
+                utils._update_json(json_file.path, sidecar)
 
     def get_all_metadata_fields(self):
         """Return all metadata fields in a BIDS directory.
@@ -1516,845 +1519,3 @@ def get_fieldmap_lookup(self):
     def get_layout(self):
         """Get layout."""
         return self.layout
-
-
-# XXX: Remove _validate_json?
-def _validate_json():
-    """Validate a JSON file's contents.
-
-    This is currently not implemented, but would accept metadata as its param.
-    """
-    # TODO: implement this or delete ???
-    return True
-
-
-def _update_json(json_file, metadata):
-    """Update a JSON file with the provided metadata.
-
-    This function writes the given metadata to the specified JSON file if the
-    JSON data is valid. If the JSON data is invalid, it prints an error message.
-
-    Parameters
-    ----------
-    json_file : str
-        The path to the JSON file to be updated.
-    metadata : dict
-        The metadata to be written to the JSON file.
-
-    Returns
-    -------
-    None
-    """
-    if _validate_json():
-        with open(json_file, "w", encoding="utf-8") as f:
-            json.dump(metadata, f, ensure_ascii=False, indent=4)
-    else:
-        print("INVALID JSON DATA")
-
-
-def _entity_set_to_entities(entity_set):
-    """Split an entity_set name into a pybids dictionary of entities.
-
-    Parameters
-    ----------
-    entity_set : str
-        A string representing a set of entities, where each entity is
-        separated by an underscore and each key-value pair is separated by a hyphen.
-
-    Returns
-    -------
-    dict
-        A dictionary where the keys are entity names and the values are entity values.
-
-    Examples
-    --------
-    >>> _entity_set_to_entities("sub-01_ses-02_task-rest")
-    {'sub': '01', 'ses': '02', 'task': 'rest'}
-    """
-    return dict([group.split("-") for group in entity_set.split("_")])
-
-
-def _entities_to_entity_set(entities):
-    """Convert a pybids entities dictionary into an entity set name.
-
-    Parameters
-    ----------
-    entities : dict
-        A dictionary containing pybids entities where keys are entity names
-        and values are entity values.
-
-    Returns
-    -------
-    str
-        A string representing the entity set name, constructed by joining
-        the sorted entity keys and their corresponding values, separated by hyphens.
-    """
-    group_keys = sorted(entities.keys() - NON_KEY_ENTITIES)
-    return "_".join([f"{key}-{entities[key]}" for key in group_keys])
-
-
-def _file_to_entity_set(filename):
-    """Identify and return the entity set of a BIDS valid filename.
-
-    Parameters
-    ----------
-    filename : str
-        The filename to parse for BIDS entities.
-
-    Returns
-    -------
-    set
-        A set of entities extracted from the filename.
-    """
-    entities = parse_file_entities(str(filename))
-    return _entities_to_entity_set(entities)
-
-
-def _get_participant_relative_path(scan):
-    """Build the relative-from-subject version of a Path to a file.
-
-    Parameters
-    ----------
-    scan : str
-        The full path to the scan file.
-
-    Returns
-    -------
-    str
-        The relative path from the subject directory.
-
-    Raises
-    ------
-    ValueError
-        If the subject directory cannot be found in the path.
-
-    Examples
-    --------
-    >>> _get_participant_relative_path(
-    ...    "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
-    ... )
-    'ses-01/func/sub-01_ses-01_bold.nii.gz'
-
-    >>> _get_participant_relative_path(
-    ...    "/path/to/dset/sub-01/func/sub-01_bold.nii.gz",
-    ... )
-    'func/sub-01_bold.nii.gz'
-
-    >>> _get_participant_relative_path(
-    ...    "/path/to/dset/ses-01/func/ses-01_bold.nii.gz",
-    ... )
-    Traceback (most recent call last):
-    ValueError: Could not find subject in ...
-    """
-    parts = Path(scan).parts
-    # Find the first part that starts with "sub-"
-    for i, part in enumerate(parts):
-        if part.startswith("sub-"):
-            return "/".join(parts[i + 1 :])
-    raise ValueError(f"Could not find subject in {scan}")
-
-
-def _get_bidsuri(filename, dataset_root):
-    """Convert a file path to a BIDS URI.
-
-    Parameters
-    ----------
-    filename : str
-        The full path to the file within the BIDS dataset.
-    dataset_root : str
-        The root directory of the BIDS dataset.
-
-    Returns
-    -------
-    str
-        The BIDS URI corresponding to the given file path.
-
-    Raises
-    ------
-    ValueError
-        If the filename is not within the dataset_root.
-
-    Examples
-    --------
-    >>> _get_bidsuri("/path/to/bids/sub-01/ses-01/dataset_description.json", "/path/to/bids")
-    'bids::sub-01/ses-01/dataset_description.json'
-    """
-    if dataset_root in filename:
-        return filename.replace(dataset_root, "bids::").replace("bids::/", "bids::")
-    raise ValueError(f"Only local datasets are supported: {filename}")
-
-
-def _get_param_groups(
-    files,
-    fieldmap_lookup,
-    entity_set_name,
-    grouping_config,
-    modality,
-    keys_files,
-):
-    """Find a list of *parameter groups* from a list of files.
-
-    For each file in `files`, find critical parameters for metadata. Then find
-    unique sets of these critical parameters.
-
-    Parameters
-    ----------
-    files : :obj:`list` of :obj:`str`
-        List of file names
-    fieldmap_lookup : :obj:`dict`
-        mapping of filename strings relative to the bids root
-        (e.g. "sub-X/ses-Y/func/sub-X_ses-Y_task-rest_bold.nii.gz")
-    grouping_config : :obj:`dict`
-        configuration for defining parameter groups
-
-    Returns
-    -------
-    ordered_labeled_files : :obj:`pandas.DataFrame`
-        A data frame with one row per file where the ParamGroup column
-        indicates which group each scan is a part of.
-    param_groups_with_counts : :obj:`pandas.DataFrame`
-        A data frame with param group summaries.
-    """
-    if not files:
-        print("WARNING: no files for", entity_set_name)
-        return None, None
-
-    # Split the config into separate parts
-    imaging_params = grouping_config.get("sidecar_params", {})
-    imaging_params = imaging_params[modality]
-
-    relational_params = grouping_config.get("relational_params", {})
-
-    derived_params = grouping_config.get("derived_params")
-    derived_params = derived_params[modality]
-
-    imaging_params.update(derived_params)
-
-    dfs = []
-    # path needs to be relative to the root with no leading prefix
-
-    for path in files:
-        # metadata = layout.get_metadata(path)
-        metadata = get_sidecar_metadata(img_to_new_ext(path, ".json"))
-        if metadata == "Erroneous sidecar":
-            print("Error parsing sidecar: ", img_to_new_ext(path, ".json"))
-        else:
-            intentions = metadata.get("IntendedFor", [])
-            slice_times = metadata.get("SliceTiming", [])
-
-            wanted_keys = metadata.keys() & imaging_params
-            example_data = {key: metadata[key] for key in wanted_keys}
-            example_data["EntitySet"] = entity_set_name
-
-            # Get the fieldmaps out and add their types
-            if "FieldmapKey" in relational_params:
-                fieldmap_types = sorted(
-                    [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]]
-                )
-
-                # check if config says columns or bool
-                if relational_params["FieldmapKey"]["display_mode"] == "bool":
-                    if len(fieldmap_types) > 0:
-                        example_data["HasFieldmap"] = True
-                    else:
-                        example_data["HasFieldmap"] = False
-                else:
-                    for fmap_num, fmap_type in enumerate(fieldmap_types):
-                        example_data[f"FieldmapKey{fmap_num:02d}"] = fmap_type
-
-            # Add the number of slice times specified
-            if "NSliceTimes" in derived_params:
-                example_data["NSliceTimes"] = len(slice_times)
-
-            example_data["FilePath"] = path
-
-            # If it's a fieldmap, see what entity set it's intended to correct
-            if "IntendedForKey" in relational_params:
-                intended_entity_sets = sorted(
-                    [_file_to_entity_set(intention) for intention in intentions]
-                )
-
-                # check if config says columns or bool
-                if relational_params["IntendedForKey"]["display_mode"] == "bool":
-                    if len(intended_entity_sets) > 0:
-                        example_data["UsedAsFieldmap"] = True
-                    else:
-                        example_data["UsedAsFieldmap"] = False
-                else:
-                    for intention_num, intention_entity_set in enumerate(intended_entity_sets):
-                        example_data[f"IntendedForKey{intention_num:02d}"] = intention_entity_set
-
-            dfs.append(example_data)
-
-    # Assign each file to a ParamGroup
-
-    # round param groups based on precision
-    df = round_params(pd.DataFrame(dfs), grouping_config, modality)
-
-    # cluster param groups based on tolerance
-    df = format_params(df, grouping_config, modality)
-    # param_group_cols = list(set(df.columns.to_list()) - set(["FilePath"]))
-
-    # get the subset of columns to drop duplicates by
-    check_cols = []
-    for col in list(df.columns):
-        if f"Cluster_{col}" not in list(df.columns) and col != "FilePath":
-            check_cols.append(col)
-
-    # Find the unique ParamGroups and assign ID numbers in "ParamGroup"\
-    try:
-        deduped = df.drop("FilePath", axis=1)
-    except Exception:
-        return "erroneous sidecar found"
-
-    deduped = deduped.drop_duplicates(subset=check_cols, ignore_index=True)
-    deduped["ParamGroup"] = np.arange(deduped.shape[0]) + 1
-
-    # add the modality as a column
-    deduped["Modality"] = modality
-
-    # add entity set count column (will delete later)
-    deduped["EntitySetCount"] = len(keys_files[entity_set_name])
-
-    # Add the ParamGroup to the whole list of files
-    labeled_files = pd.merge(df, deduped, on=check_cols)
-
-    value_counts = labeled_files.ParamGroup.value_counts()
-
-    param_group_counts = pd.DataFrame(
-        {"Counts": value_counts.to_numpy(), "ParamGroup": value_counts.index.to_numpy()}
-    )
-
-    param_groups_with_counts = pd.merge(deduped, param_group_counts, on=["ParamGroup"])
-
-    # Sort by counts and relabel the param groups
-    param_groups_with_counts.sort_values(by=["Counts"], inplace=True, ascending=False)
-    param_groups_with_counts["ParamGroup"] = np.arange(param_groups_with_counts.shape[0]) + 1
-
-    # Send the new, ordered param group ids to the files list
-    ordered_labeled_files = pd.merge(
-        df, param_groups_with_counts, on=check_cols, suffixes=("_x", "")
-    )
-
-    # sort ordered_labeled_files by param group
-    ordered_labeled_files.sort_values(by=["Counts"], inplace=True, ascending=False)
-
-    return ordered_labeled_files, param_groups_with_counts
-
-
-def round_params(param_group_df, config, modality):
-    """Round columns' values in a DataFrame according to requested precision.
-
-    Parameters
-    ----------
-    param_group_df : pandas.DataFrame
-        DataFrame containing the parameters to be rounded.
-    config : dict
-        Configuration dictionary containing rounding precision information.
-    modality : str
-        The modality key to access the relevant rounding precision settings in the config.
-
-    Returns
-    -------
-    pandas.DataFrame
-        DataFrame with the specified columns' values rounded to the requested precision.
-    """
-    to_format = config["sidecar_params"][modality]
-    to_format.update(config["derived_params"][modality])
-
-    for column_name, column_fmt in to_format.items():
-        if column_name not in param_group_df:
-            continue
-
-        if "precision" in column_fmt:
-            if isinstance(param_group_df[column_name], float):
-                param_group_df[column_name] = param_group_df[column_name].round(
-                    column_fmt["precision"]
-                )
-
-    return param_group_df
-
-
-def get_sidecar_metadata(json_file):
-    """Get all metadata values in a file's sidecar.
-
-    Transform JSON dictionary to Python dictionary.
-
-    Parameters
-    ----------
-    json_file : str
-        Path to the JSON sidecar file.
-
-    Returns
-    -------
-    dict or str
-        Returns a dictionary containing the metadata if the file is successfully read,
-        otherwise returns the string "Erroneous sidecar".
-
-    Raises
-    ------
-    Exception
-        If there is an error loading the JSON file.
-    """
-    try:
-        with open(json_file) as json_file:
-            data = json.load(json_file)
-            return data
-    except Exception:
-        # print("Error loading sidecar: ", json_filename)
-        return "Erroneous sidecar"
-
-
-def format_params(param_group_df, config, modality):
-    """Run AgglomerativeClustering on param groups and add columns to dataframe.
-
-    Parameters
-    ----------
-    param_group_df : :obj:`pandas.DataFrame`
-        A data frame with one row per file where the ParamGroup column
-        indicates which group each scan is a part of.
-    config : :obj:`dict`
-        Configuration for defining parameter groups.
-        This dictionary has two keys: ``'sidecar_params'`` and ``'derived_params'``.
-    modality : :obj:`str`
-        Modality of the scan.
-        This is used to select the correct configuration from the config dict.
-
-    Returns
-    -------
-    param_group_df : :obj:`pandas.DataFrame`
-        An updated version of the input data frame,
-        with a new column added for each element in the modality's
-        ``'sidecar_params'`` and ``'derived_params'`` dictionaries.
-        The new columns will have the name ``'Cluster_' + column_name``,
-        and will contain the cluster labels for each parameter group.
-
-    Notes
-    -----
-    ``'sidecar_params'`` is a dictionary of dictionaries, where keys are modalities.
-    The modality-wise dictionary's keys are names of BIDS fields to directly include
-    in the Parameter Groupings,
-    and the values describe the parameters by which those BIDS' fields are compared.
-    For example,
-    {"RepetitionTime": {"tolerance": 0.000001, "precision": 6, "suggest_variant_rename": True}
-    means that the RepetitionTime field should be compared across files and flagged as a
-    variant if it differs from others by 0.000001 or more.
-
-    ``'derived_params'`` is a dictionary of dictionaries, where keys are modalities.
-    The modality-wise dictionary's keys are names of BIDS fields to derive from the
-    NIfTI header and include in the Parameter Groupings.
-    """
-    to_format = config["sidecar_params"][modality]
-    to_format.update(config["derived_params"][modality])
-
-    for column_name, column_fmt in to_format.items():
-        if column_name not in param_group_df:
-            continue
-
-        if "tolerance" in column_fmt and len(param_group_df) > 1:
-            array = param_group_df[column_name].to_numpy().reshape(-1, 1)
-
-            for i in range(len(array)):
-                if np.isnan(array[i, 0]):
-                    array[i, 0] = -999
-
-            tolerance = to_format[column_name]["tolerance"]
-            clustering = AgglomerativeClustering(
-                n_clusters=None, distance_threshold=tolerance, linkage="complete"
-            ).fit(array)
-
-            for i in range(len(array)):
-                if array[i, 0] == -999:
-                    array[i, 0] = np.nan
-
-            # now add clustering_labels as a column
-            param_group_df[f"Cluster_{column_name}"] = clustering.labels_
-
-    return param_group_df
-
-
-def _order_columns(df):
-    """Organize columns of the summary and files DataFrames.
-
-    Parameters
-    ----------
-    df : pandas.DataFrame
-        The DataFrame whose columns need to be organized.
-
-    Returns
-    -------
-    pandas.DataFrame
-        The DataFrame with columns organized such that 'EntitySet' and
-        'ParamGroup' are the first two columns, 'FilePath' is the last
-        column (if present), and the remaining columns are sorted
-        alphabetically.
-
-    Notes
-    -----
-    This is the only place where the constant ID_VARS is used,
-    and the strings in that constant are hardcoded here,
-    so we might not need that constant at all.
-    """
-    cols = set(df.columns.to_list())
-    non_id_cols = cols - ID_VARS
-    new_columns = ["EntitySet", "ParamGroup"] + sorted(non_id_cols)
-    if "FilePath" in cols:
-        new_columns.append("FilePath")
-
-    df = df[new_columns]
-
-    return df[new_columns]
-
-
-def img_to_new_ext(img_path, new_ext):
-    """Convert an image file path to a new extension.
-
-    Parameters
-    ----------
-    img_path : str
-        The file path of the image to be converted.
-    new_ext : str
-        The new extension to be applied to the image file path.
-
-    Returns
-    -------
-    str
-        The file path with the new extension applied.
-
-    Examples
-    --------
-    >>> img_to_new_ext('/path/to/file_image.nii.gz', '.tsv')
-    '/path/to/file_events.tsv'
-
-    >>> img_to_new_ext('/path/to/file_image.nii.gz', '.tsv.gz')
-    '/path/to/file_physio.tsv.gz'
-
-    >>> img_to_new_ext('/path/to/file_image.nii.gz', '.json')
-    '/path/to/file_image.json'
-
-    Notes
-    -----
-    The hardcoded suffix associated with each extension may not be comprehensive.
-    BIDS has been extended a lot in recent years.
-    """
-    # handle .tsv edge case
-    if new_ext == ".tsv":
-        # take out suffix
-        return img_path.rpartition("_")[0] + "_events" + new_ext
-    elif new_ext == ".tsv.gz":
-        return img_path.rpartition("_")[0] + "_physio" + new_ext
-    else:
-        return img_path.replace(".nii.gz", "").replace(".nii", "") + new_ext
-
-
-def get_entity_value(path, key):
-    """Given a filepath and BIDS key name, return the value associated with the key.
-
-    Parameters
-    ----------
-    path : str
-        The file path to be parsed.
-    key : str
-        The BIDS key name to search for in the file path.
-
-    Returns
-    -------
-    str or None
-        The value associated with the BIDS key if found, otherwise None.
-
-    Examples
-    --------
-    >>> get_entity_value('/path/to/sub-01/ses-01/func/sub-01_ses-02_task-rest_bold.nii.gz', 'sub')
-    'sub-01'
-    >>> get_entity_value('/path/to/sub-01/ses-02/func/sub-01_ses-02_task-rest_bold.nii.gz', 'ses')
-    'ses-02'
-    """
-    parts = Path(path).parts
-    for part in parts:
-        if part.startswith(key + "-"):
-            return part
-
-
-def build_path(filepath, entities, out_dir, is_longitudinal):
-    """Build a new path for a file based on its BIDS entities.
-
-    Parameters
-    ----------
-    filepath : str
-        The original file path.
-    entities : dict
-        A dictionary of BIDS entities.
-        This should include all of the entities in the filename *except* for subject and session.
-    out_dir : str
-        The output directory for the new file.
-    is_longitudinal : bool
-        If True, add "ses" to file path.
-
-    Returns
-    -------
-    new_path : str
-        The new file path.
-
-    Examples
-    --------
-    >>> build_path(
-    ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz",
-    ...    {"acquisition": "VAR", "suffix": "T2w"},
-    ...    "/output",
-    ...    True,
-    ... )
-    '/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz'
-
-    The function does not add an extra leading zero to the run entity when it's a string.
-    >>> build_path(
-    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
-    ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
-    ...    "/output",
-    ...    True,
-    ... )
-    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
-
-    The function adds an extra leading zero to the run entity when it's an integer
-    and the original filename has a leading zero.
-    >>> build_path(
-    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz",
-    ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
-    ...    "/output",
-    ...    True,
-    ... )
-    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz'
-
-    The function does not add an extra leading zero to the run entity when it's an integer
-    and the original filename doesn't have a leading zero.
-    >>> build_path(
-    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
-    ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
-    ...    "/output",
-    ...    True,
-    ... )
-    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
-
-    The function doesn't add an extra leading zero to the run entity when there isn't a zero.
-    >>> build_path(
-    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
-    ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
-    ...    "/output",
-    ...    True,
-    ... )
-    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
-
-    Entities in the original path, but not the entity dictionary, are not included,
-    like run in this case.
-    >>> build_path(
-    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
-    ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
-    ...    "/output",
-    ...    True,
-    ... )
-    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
-
-    Entities outside of the prescribed list are ignored, such as "subject"...
-    >>> build_path(
-    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
-    ...    {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"},
-    ...    "/output",
-    ...    True,
-    ... )
-    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
-
-    or "echo".
-    >>> build_path(
-    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
-    ...    {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
-    ...    "/output",
-    ...    True,
-    ... )
-    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
-
-    It can change the datatype, but will warn the user.
-    >>> build_path(
-    ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz",
-    ...    {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"},
-    ...    "/output",
-    ...    True,
-    ... )
-    WARNING: DATATYPE CHANGE DETECTED
-    '/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz'
-
-    It also works for cross-sectional filename.
-    >>> build_path(
-    ...    "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz",
-    ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
-    ...    "/output",
-    ...    False,
-    ... )
-    '/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz'
-    """
-    exts = Path(filepath).suffixes
-    old_ext = "".join(exts)
-
-    suffix = entities["suffix"]
-    entity_file_keys = []
-
-    # Entities that may be in the filename?
-    file_keys = ["task", "acquisition", "direction", "reconstruction", "run"]
-
-    for key in file_keys:
-        if key in list(entities.keys()):
-            entity_file_keys.append(key)
-
-    sub = get_entity_value(filepath, "sub")
-    if sub is None:
-        raise ValueError(f"Could not extract subject from {filepath}")
-
-    if is_longitudinal:
-        ses = get_entity_value(filepath, "ses")
-        if ses is None:
-            raise ValueError(f"Could not extract session from {filepath}")
-
-    # Add leading zeros to run entity if it's an integer.
-    # If it's a string, respect the value provided.
-    if "run" in entities.keys() and isinstance(entities["run"], int):
-        # Infer the number of leading zeros needed from the original filename
-        n_leading = 2  # default to 1 leading zero
-        if "_run-" in filepath:
-            run_str = filepath.split("_run-")[1].split("_")[0]
-            n_leading = len(run_str)
-        entities["run"] = str(entities["run"]).zfill(n_leading)
-
-    filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys])
-    filename = (
-        filename.replace("acquisition", "acq")
-        .replace("direction", "dir")
-        .replace("reconstruction", "rec")
-    )
-    if len(filename) > 0:
-        if is_longitudinal:
-            filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
-        elif not is_longitudinal:
-            filename = f"{sub}_{filename}_{suffix}{old_ext}"
-    else:
-        raise ValueError(f"Could not construct new filename for {filepath}")
-
-    # CHECK TO SEE IF DATATYPE CHANGED
-    # datatype may be overridden/changed if the original file is located in the wrong folder.
-    dtypes = ["anat", "func", "perf", "fmap", "dwi"]
-    dtype_orig = ""
-    for dtype in dtypes:
-        if dtype in filepath:
-            dtype_orig = dtype
-
-    if "datatype" in entities.keys():
-        dtype_new = entities["datatype"]
-        if entities["datatype"] != dtype_orig:
-            print("WARNING: DATATYPE CHANGE DETECTED")
-    else:
-        dtype_new = dtype_orig
-
-    # Construct the new filename
-    if is_longitudinal:
-        new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
-    elif not is_longitudinal:
-        new_path = str(Path(out_dir) / sub / dtype_new / filename)
-
-    return new_path
-
-
-def assign_variants(summary, rename_cols):
-    """Assign variant names to files based on differences from dominant group.
-
-    Parameters
-    ----------
-    summary : pandas.DataFrame
-        The summary DataFrame containing the metadata for each file.
-        The columns that are used include "ParamGroup", "EntitySet",
-        the columns in ``rename_cols``,
-        and any columns in ``rename_cols`` that are prefixed with "Cluster_".
-    rename_cols : list of str
-        A list of column names to use for renaming files.
-        The values in these columns will be compared against the dominant group
-        and labeled with a variant name if they differ.
-
-    Returns
-    -------
-    pandas.DataFrame
-        The updated summary DataFrame with a new column "RenameEntitySet"
-        containing the new entity set names for each file.
-    """
-    # loop through summary tsv and create dom_dict
-    dom_dict = {}
-    for row in range(len(summary)):
-        # if dominant group identified
-        if str(summary.loc[row, "ParamGroup"]) == "1":
-            val = {}
-            # grab col, all vals send to dict
-            key = summary.loc[row, "EntitySet"]
-            for col in rename_cols:
-                summary[col] = summary[col].apply(str)
-                val[col] = summary.loc[row, col]
-
-                if f"Cluster_{col}" in summary.columns:
-                    val[f"Cluster_{col}"] = summary.loc[row, f"Cluster_{col}"]
-
-            dom_dict[key] = val
-
-    # now loop through again and ID variance
-    for row in range(len(summary)):
-        # check to see if renaming has already happened
-        renamed = False
-        entities = _entity_set_to_entities(summary.loc[row, "EntitySet"])
-        if "VARIANT" in summary.loc[row, "EntitySet"]:
-            renamed = True
-
-        if summary.loc[row, "ParamGroup"] != 1 and not renamed:
-            acq_str = "VARIANT"
-            # now we know we have a deviant param group
-            # check if TR is same as param group 1
-            entity_set = summary.loc[row, "EntitySet"]
-            for col in rename_cols:
-                dom_entity_set = dom_dict[entity_set]
-                summary[col] = summary[col].apply(str)
-
-                if f"Cluster_{col}" in dom_entity_set.keys():
-                    if summary.loc[row, f"Cluster_{col}"] != dom_entity_set[f"Cluster_{col}"]:
-                        acq_str += col
-                elif summary.loc[row, col] != dom_entity_set[col]:
-                    if col == "HasFieldmap":
-                        if dom_entity_set[col] == "True":
-                            acq_str += "NoFmap"
-                        else:
-                            acq_str += "HasFmap"
-                    elif col == "UsedAsFieldmap":
-                        if dom_entity_set[col] == "True":
-                            acq_str += "Unused"
-                        else:
-                            acq_str += "IsUsed"
-                    else:
-                        acq_str += col
-
-            if acq_str == "VARIANT":
-                acq_str += "Other"
-
-            if "acquisition" in entities.keys():
-                acq = f"acquisition-{entities['acquisition'] + acq_str}"
-
-                new_name = summary.loc[row, "EntitySet"].replace(
-                    f"acquisition-{entities['acquisition']}",
-                    acq,
-                )
-            else:
-                acq = f"acquisition-{acq_str}"
-                new_name = acq + "_" + summary.loc[row, "EntitySet"]
-
-            summary.at[row, "RenameEntitySet"] = new_name
-
-        # convert all "nan" to empty str
-        # so they don't show up in the summary tsv
-        if summary.loc[row, "RenameEntitySet"] == "nan":
-            summary.at[row, "RenameEntitySet"] = ""
-
-        for col in rename_cols:
-            if summary.loc[row, col] == "nan":
-                summary.at[row, col] = ""
-
-    return summary
diff --git a/cubids/utils.py b/cubids/utils.py
new file mode 100644
index 00000000..376c2244
--- /dev/null
+++ b/cubids/utils.py
@@ -0,0 +1,853 @@
+"""Utility functions for CuBIDS."""
+
+import json
+from pathlib import Path
+
+import numpy as np
+import pandas as pd
+from bids.layout import parse_file_entities
+from sklearn.cluster import AgglomerativeClustering
+
+from cubids.constants import ID_VARS, NON_KEY_ENTITIES
+
+
+# XXX: Remove _validate_json?
+def _validate_json():
+    """Validate a JSON file's contents.
+
+    This is currently not implemented, but would accept metadata as its param.
+    """
+    # TODO: implement this or delete ???
+    return True
+
+
+def _update_json(json_file, metadata):
+    """Update a JSON file with the provided metadata.
+
+    This function writes the given metadata to the specified JSON file if the
+    JSON data is valid. If the JSON data is invalid, it prints an error message.
+
+    Parameters
+    ----------
+    json_file : str
+        The path to the JSON file to be updated.
+    metadata : dict
+        The metadata to be written to the JSON file.
+
+    Returns
+    -------
+    None
+    """
+    if _validate_json():
+        with open(json_file, "w", encoding="utf-8") as f:
+            json.dump(metadata, f, ensure_ascii=False, indent=4)
+    else:
+        print("INVALID JSON DATA")
+
+
+def _entity_set_to_entities(entity_set):
+    """Split an entity_set name into a pybids dictionary of entities.
+
+    Parameters
+    ----------
+    entity_set : str
+        A string representing a set of entities, where each entity is
+        separated by an underscore and each key-value pair is separated by a hyphen.
+
+    Returns
+    -------
+    dict
+        A dictionary where the keys are entity names and the values are entity values.
+
+    Examples
+    --------
+    >>> _entity_set_to_entities("sub-01_ses-02_task-rest")
+    {'sub': '01', 'ses': '02', 'task': 'rest'}
+    """
+    return dict([group.split("-") for group in entity_set.split("_")])
+
+
+def _entities_to_entity_set(entities):
+    """Convert a pybids entities dictionary into an entity set name.
+
+    Parameters
+    ----------
+    entities : dict
+        A dictionary containing pybids entities where keys are entity names
+        and values are entity values.
+
+    Returns
+    -------
+    str
+        A string representing the entity set name, constructed by joining
+        the sorted entity keys and their corresponding values, separated by hyphens.
+    """
+    group_keys = sorted(entities.keys() - NON_KEY_ENTITIES)
+    return "_".join([f"{key}-{entities[key]}" for key in group_keys])
+
+
+def _file_to_entity_set(filename):
+    """Identify and return the entity set of a BIDS valid filename.
+
+    Parameters
+    ----------
+    filename : str
+        The filename to parse for BIDS entities.
+
+    Returns
+    -------
+    set
+        A set of entities extracted from the filename.
+    """
+    entities = parse_file_entities(str(filename))
+    return _entities_to_entity_set(entities)
+
+
+def _get_participant_relative_path(scan):
+    """Build the relative-from-subject version of a Path to a file.
+
+    Parameters
+    ----------
+    scan : str
+        The full path to the scan file.
+
+    Returns
+    -------
+    str
+        The relative path from the subject directory.
+
+    Raises
+    ------
+    ValueError
+        If the subject directory cannot be found in the path.
+
+    Examples
+    --------
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/sub-01/ses-01/func/sub-01_ses-01_bold.nii.gz",
+    ... )
+    'ses-01/func/sub-01_ses-01_bold.nii.gz'
+
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/sub-01/func/sub-01_bold.nii.gz",
+    ... )
+    'func/sub-01_bold.nii.gz'
+
+    >>> _get_participant_relative_path(
+    ...    "/path/to/dset/ses-01/func/ses-01_bold.nii.gz",
+    ... )
+    Traceback (most recent call last):
+    ValueError: Could not find subject in ...
+    """
+    parts = Path(scan).parts
+    # Find the first part that starts with "sub-"
+    for i, part in enumerate(parts):
+        if part.startswith("sub-"):
+            return "/".join(parts[i + 1 :])
+    raise ValueError(f"Could not find subject in {scan}")
+
+
+def _get_bidsuri(filename, dataset_root):
+    """Convert a file path to a BIDS URI.
+
+    Parameters
+    ----------
+    filename : str
+        The full path to the file within the BIDS dataset.
+    dataset_root : str
+        The root directory of the BIDS dataset.
+
+    Returns
+    -------
+    str
+        The BIDS URI corresponding to the given file path.
+
+    Raises
+    ------
+    ValueError
+        If the filename is not within the dataset_root.
+
+    Examples
+    --------
+    >>> _get_bidsuri("/path/to/bids/sub-01/ses-01/dataset_description.json", "/path/to/bids")
+    'bids::sub-01/ses-01/dataset_description.json'
+    """
+    if dataset_root in filename:
+        return filename.replace(dataset_root, "bids::").replace("bids::/", "bids::")
+    raise ValueError(f"Only local datasets are supported: {filename}")
+
+
+def _get_param_groups(
+    files,
+    fieldmap_lookup,
+    entity_set_name,
+    grouping_config,
+    modality,
+    keys_files,
+):
+    """Find a list of *parameter groups* from a list of files.
+
+    For each file in `files`, find critical parameters for metadata. Then find
+    unique sets of these critical parameters.
+
+    Parameters
+    ----------
+    files : :obj:`list` of :obj:`str`
+        List of file names
+    fieldmap_lookup : :obj:`dict`
+        mapping of filename strings relative to the bids root
+        (e.g. "sub-X/ses-Y/func/sub-X_ses-Y_task-rest_bold.nii.gz")
+    grouping_config : :obj:`dict`
+        configuration for defining parameter groups
+
+    Returns
+    -------
+    ordered_labeled_files : :obj:`pandas.DataFrame`
+        A data frame with one row per file where the ParamGroup column
+        indicates which group each scan is a part of.
+    param_groups_with_counts : :obj:`pandas.DataFrame`
+        A data frame with param group summaries.
+    """
+    if not files:
+        print("WARNING: no files for", entity_set_name)
+        return None, None
+
+    # Split the config into separate parts
+    imaging_params = grouping_config.get("sidecar_params", {})
+    imaging_params = imaging_params[modality]
+
+    relational_params = grouping_config.get("relational_params", {})
+
+    derived_params = grouping_config.get("derived_params")
+    derived_params = derived_params[modality]
+
+    imaging_params.update(derived_params)
+
+    dfs = []
+    # path needs to be relative to the root with no leading prefix
+
+    for path in files:
+        # metadata = layout.get_metadata(path)
+        metadata = get_sidecar_metadata(img_to_new_ext(path, ".json"))
+        if metadata == "Erroneous sidecar":
+            print("Error parsing sidecar: ", img_to_new_ext(path, ".json"))
+        else:
+            intentions = metadata.get("IntendedFor", [])
+            slice_times = metadata.get("SliceTiming", [])
+
+            wanted_keys = metadata.keys() & imaging_params
+            example_data = {key: metadata[key] for key in wanted_keys}
+            example_data["EntitySet"] = entity_set_name
+
+            # Get the fieldmaps out and add their types
+            if "FieldmapKey" in relational_params:
+                fieldmap_types = sorted(
+                    [_file_to_entity_set(fmap.path) for fmap in fieldmap_lookup[path]]
+                )
+
+                # check if config says columns or bool
+                if relational_params["FieldmapKey"]["display_mode"] == "bool":
+                    if len(fieldmap_types) > 0:
+                        example_data["HasFieldmap"] = True
+                    else:
+                        example_data["HasFieldmap"] = False
+                else:
+                    for fmap_num, fmap_type in enumerate(fieldmap_types):
+                        example_data[f"FieldmapKey{fmap_num:02d}"] = fmap_type
+
+            # Add the number of slice times specified
+            if "NSliceTimes" in derived_params:
+                example_data["NSliceTimes"] = len(slice_times)
+
+            example_data["FilePath"] = path
+
+            # If it's a fieldmap, see what entity set it's intended to correct
+            if "IntendedForKey" in relational_params:
+                intended_entity_sets = sorted(
+                    [_file_to_entity_set(intention) for intention in intentions]
+                )
+
+                # check if config says columns or bool
+                if relational_params["IntendedForKey"]["display_mode"] == "bool":
+                    if len(intended_entity_sets) > 0:
+                        example_data["UsedAsFieldmap"] = True
+                    else:
+                        example_data["UsedAsFieldmap"] = False
+                else:
+                    for intention_num, intention_entity_set in enumerate(intended_entity_sets):
+                        example_data[f"IntendedForKey{intention_num:02d}"] = intention_entity_set
+
+            dfs.append(example_data)
+
+    # Assign each file to a ParamGroup
+
+    # round param groups based on precision
+    df = round_params(pd.DataFrame(dfs), grouping_config, modality)
+
+    # cluster param groups based on tolerance
+    df = format_params(df, grouping_config, modality)
+    # param_group_cols = list(set(df.columns.to_list()) - set(["FilePath"]))
+
+    # get the subset of columns to drop duplicates by
+    check_cols = []
+    for col in list(df.columns):
+        if f"Cluster_{col}" not in list(df.columns) and col != "FilePath":
+            check_cols.append(col)
+
+    # Find the unique ParamGroups and assign ID numbers in "ParamGroup"\
+    try:
+        deduped = df.drop("FilePath", axis=1)
+    except Exception:
+        return "erroneous sidecar found"
+
+    deduped = deduped.drop_duplicates(subset=check_cols, ignore_index=True)
+    deduped["ParamGroup"] = np.arange(deduped.shape[0]) + 1
+
+    # add the modality as a column
+    deduped["Modality"] = modality
+
+    # add entity set count column (will delete later)
+    deduped["EntitySetCount"] = len(keys_files[entity_set_name])
+
+    # Add the ParamGroup to the whole list of files
+    labeled_files = pd.merge(df, deduped, on=check_cols)
+
+    value_counts = labeled_files.ParamGroup.value_counts()
+
+    param_group_counts = pd.DataFrame(
+        {"Counts": value_counts.to_numpy(), "ParamGroup": value_counts.index.to_numpy()}
+    )
+
+    param_groups_with_counts = pd.merge(deduped, param_group_counts, on=["ParamGroup"])
+
+    # Sort by counts and relabel the param groups
+    param_groups_with_counts.sort_values(by=["Counts"], inplace=True, ascending=False)
+    param_groups_with_counts["ParamGroup"] = np.arange(param_groups_with_counts.shape[0]) + 1
+
+    # Send the new, ordered param group ids to the files list
+    ordered_labeled_files = pd.merge(
+        df, param_groups_with_counts, on=check_cols, suffixes=("_x", "")
+    )
+
+    # sort ordered_labeled_files by param group
+    ordered_labeled_files.sort_values(by=["Counts"], inplace=True, ascending=False)
+
+    return ordered_labeled_files, param_groups_with_counts
+
+
+def round_params(param_group_df, config, modality):
+    """Round columns' values in a DataFrame according to requested precision.
+
+    Parameters
+    ----------
+    param_group_df : pandas.DataFrame
+        DataFrame containing the parameters to be rounded.
+    config : dict
+        Configuration dictionary containing rounding precision information.
+    modality : str
+        The modality key to access the relevant rounding precision settings in the config.
+
+    Returns
+    -------
+    pandas.DataFrame
+        DataFrame with the specified columns' values rounded to the requested precision.
+    """
+    to_format = config["sidecar_params"][modality]
+    to_format.update(config["derived_params"][modality])
+
+    for column_name, column_fmt in to_format.items():
+        if column_name not in param_group_df:
+            continue
+
+        if "precision" in column_fmt:
+            if isinstance(param_group_df[column_name], float):
+                param_group_df[column_name] = param_group_df[column_name].round(
+                    column_fmt["precision"]
+                )
+
+    return param_group_df
+
+
+def get_sidecar_metadata(json_file):
+    """Get all metadata values in a file's sidecar.
+
+    Transform JSON dictionary to Python dictionary.
+
+    Parameters
+    ----------
+    json_file : str
+        Path to the JSON sidecar file.
+
+    Returns
+    -------
+    dict or str
+        Returns a dictionary containing the metadata if the file is successfully read,
+        otherwise returns the string "Erroneous sidecar".
+
+    Raises
+    ------
+    Exception
+        If there is an error loading the JSON file.
+    """
+    try:
+        with open(json_file) as json_file:
+            data = json.load(json_file)
+            return data
+    except Exception:
+        # print("Error loading sidecar: ", json_filename)
+        return "Erroneous sidecar"
+
+
+def format_params(param_group_df, config, modality):
+    """Run AgglomerativeClustering on param groups and add columns to dataframe.
+
+    Parameters
+    ----------
+    param_group_df : :obj:`pandas.DataFrame`
+        A data frame with one row per file where the ParamGroup column
+        indicates which group each scan is a part of.
+    config : :obj:`dict`
+        Configuration for defining parameter groups.
+        This dictionary has two keys: ``'sidecar_params'`` and ``'derived_params'``.
+    modality : :obj:`str`
+        Modality of the scan.
+        This is used to select the correct configuration from the config dict.
+
+    Returns
+    -------
+    param_group_df : :obj:`pandas.DataFrame`
+        An updated version of the input data frame,
+        with a new column added for each element in the modality's
+        ``'sidecar_params'`` and ``'derived_params'`` dictionaries.
+        The new columns will have the name ``'Cluster_' + column_name``,
+        and will contain the cluster labels for each parameter group.
+
+    Notes
+    -----
+    ``'sidecar_params'`` is a dictionary of dictionaries, where keys are modalities.
+    The modality-wise dictionary's keys are names of BIDS fields to directly include
+    in the Parameter Groupings,
+    and the values describe the parameters by which those BIDS' fields are compared.
+    For example,
+    {"RepetitionTime": {"tolerance": 0.000001, "precision": 6, "suggest_variant_rename": True}
+    means that the RepetitionTime field should be compared across files and flagged as a
+    variant if it differs from others by 0.000001 or more.
+
+    ``'derived_params'`` is a dictionary of dictionaries, where keys are modalities.
+    The modality-wise dictionary's keys are names of BIDS fields to derive from the
+    NIfTI header and include in the Parameter Groupings.
+    """
+    to_format = config["sidecar_params"][modality]
+    to_format.update(config["derived_params"][modality])
+
+    for column_name, column_fmt in to_format.items():
+        if column_name not in param_group_df:
+            continue
+
+        if "tolerance" in column_fmt and len(param_group_df) > 1:
+            array = param_group_df[column_name].to_numpy().reshape(-1, 1)
+
+            for i in range(len(array)):
+                if np.isnan(array[i, 0]):
+                    array[i, 0] = -999
+
+            tolerance = to_format[column_name]["tolerance"]
+            clustering = AgglomerativeClustering(
+                n_clusters=None, distance_threshold=tolerance, linkage="complete"
+            ).fit(array)
+
+            for i in range(len(array)):
+                if array[i, 0] == -999:
+                    array[i, 0] = np.nan
+
+            # now add clustering_labels as a column
+            param_group_df[f"Cluster_{column_name}"] = clustering.labels_
+
+    return param_group_df
+
+
+def _order_columns(df):
+    """Organize columns of the summary and files DataFrames.
+
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        The DataFrame whose columns need to be organized.
+
+    Returns
+    -------
+    pandas.DataFrame
+        The DataFrame with columns organized such that 'EntitySet' and
+        'ParamGroup' are the first two columns, 'FilePath' is the last
+        column (if present), and the remaining columns are sorted
+        alphabetically.
+
+    Notes
+    -----
+    This is the only place where the constant ID_VARS is used,
+    and the strings in that constant are hardcoded here,
+    so we might not need that constant at all.
+    """
+    cols = set(df.columns.to_list())
+    non_id_cols = cols - ID_VARS
+    new_columns = ["EntitySet", "ParamGroup"] + sorted(non_id_cols)
+    if "FilePath" in cols:
+        new_columns.append("FilePath")
+
+    df = df[new_columns]
+
+    return df[new_columns]
+
+
+def img_to_new_ext(img_path, new_ext):
+    """Convert an image file path to a new extension.
+
+    Parameters
+    ----------
+    img_path : str
+        The file path of the image to be converted.
+    new_ext : str
+        The new extension to be applied to the image file path.
+
+    Returns
+    -------
+    str
+        The file path with the new extension applied.
+
+    Examples
+    --------
+    >>> img_to_new_ext('/path/to/file_image.nii.gz', '.tsv')
+    '/path/to/file_events.tsv'
+
+    >>> img_to_new_ext('/path/to/file_image.nii.gz', '.tsv.gz')
+    '/path/to/file_physio.tsv.gz'
+
+    >>> img_to_new_ext('/path/to/file_image.nii.gz', '.json')
+    '/path/to/file_image.json'
+
+    Notes
+    -----
+    The hardcoded suffix associated with each extension may not be comprehensive.
+    BIDS has been extended a lot in recent years.
+    """
+    # handle .tsv edge case
+    if new_ext == ".tsv":
+        # take out suffix
+        return img_path.rpartition("_")[0] + "_events" + new_ext
+    elif new_ext == ".tsv.gz":
+        return img_path.rpartition("_")[0] + "_physio" + new_ext
+    else:
+        return img_path.replace(".nii.gz", "").replace(".nii", "") + new_ext
+
+
+def get_entity_value(path, key):
+    """Given a filepath and BIDS key name, return the value associated with the key.
+
+    Parameters
+    ----------
+    path : str
+        The file path to be parsed.
+    key : str
+        The BIDS key name to search for in the file path.
+
+    Returns
+    -------
+    str or None
+        The value associated with the BIDS key if found, otherwise None.
+
+    Examples
+    --------
+    >>> get_entity_value('/path/to/sub-01/ses-01/func/sub-01_ses-02_task-rest_bold.nii.gz', 'sub')
+    'sub-01'
+    >>> get_entity_value('/path/to/sub-01/ses-02/func/sub-01_ses-02_task-rest_bold.nii.gz', 'ses')
+    'ses-02'
+    """
+    parts = Path(path).parts
+    for part in parts:
+        if part.startswith(key + "-"):
+            return part
+
+
+def build_path(filepath, entities, out_dir, is_longitudinal):
+    """Build a new path for a file based on its BIDS entities.
+
+    Parameters
+    ----------
+    filepath : str
+        The original file path.
+    entities : dict
+        A dictionary of BIDS entities.
+        This should include all of the entities in the filename *except* for subject and session.
+    out_dir : str
+        The output directory for the new file.
+    is_longitudinal : bool
+        If True, add "ses" to file path.
+
+    Returns
+    -------
+    new_path : str
+        The new file path.
+
+    Examples
+    --------
+    >>> build_path(
+    ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_T1w.nii.gz",
+    ...    {"acquisition": "VAR", "suffix": "T2w"},
+    ...    "/output",
+    ...    True,
+    ... )
+    '/output/sub-01/ses-01/anat/sub-01_ses-01_acq-VAR_T2w.nii.gz'
+
+    The function does not add an extra leading zero to the run entity when it's a string.
+    >>> build_path(
+    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
+    ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
+    ...    "/output",
+    ...    True,
+    ... )
+    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
+
+    The function adds an extra leading zero to the run entity when it's an integer
+    and the original filename has a leading zero.
+    >>> build_path(
+    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-00001_bold.nii.gz",
+    ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
+    ...    "/output",
+    ...    True,
+    ... )
+    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-00002_bold.nii.gz'
+
+    The function does not add an extra leading zero to the run entity when it's an integer
+    and the original filename doesn't have a leading zero.
+    >>> build_path(
+    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
+    ...    {"task": "rest", "run": 2, "acquisition": "VAR", "suffix": "bold"},
+    ...    "/output",
+    ...    True,
+    ... )
+    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
+
+    The function doesn't add an extra leading zero to the run entity when there isn't a zero.
+    >>> build_path(
+    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-1_bold.nii.gz",
+    ...    {"task": "rest", "run": "2", "acquisition": "VAR", "suffix": "bold"},
+    ...    "/output",
+    ...    True,
+    ... )
+    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_run-2_bold.nii.gz'
+
+    Entities in the original path, but not the entity dictionary, are not included,
+    like run in this case.
+    >>> build_path(
+    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
+    ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
+    ...    "/output",
+    ...    True,
+    ... )
+    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
+
+    Entities outside of the prescribed list are ignored, such as "subject"...
+    >>> build_path(
+    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
+    ...    {"subject": "02", "task": "rest", "acquisition": "VAR", "suffix": "bold"},
+    ...    "/output",
+    ...    True,
+    ... )
+    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
+
+    or "echo".
+    >>> build_path(
+    ...    "/input/sub-01/ses-01/func/sub-01_ses-01_task-rest_run-01_bold.nii.gz",
+    ...    {"task": "rest", "acquisition": "VAR", "echo": 1, "suffix": "bold"},
+    ...    "/output",
+    ...    True,
+    ... )
+    '/output/sub-01/ses-01/func/sub-01_ses-01_task-rest_acq-VAR_bold.nii.gz'
+
+    It can change the datatype, but will warn the user.
+    >>> build_path(
+    ...    "/input/sub-01/ses-01/anat/sub-01_ses-01_asl.nii.gz",
+    ...    {"datatype": "perf", "acquisition": "VAR", "suffix": "asl"},
+    ...    "/output",
+    ...    True,
+    ... )
+    WARNING: DATATYPE CHANGE DETECTED
+    '/output/sub-01/ses-01/perf/sub-01_ses-01_acq-VAR_asl.nii.gz'
+
+    It also works for cross-sectional filename.
+    >>> build_path(
+    ...    "/input/sub-01/func/sub-01_task-rest_run-01_bold.nii.gz",
+    ...    {"task": "rest", "acquisition": "VAR", "suffix": "bold"},
+    ...    "/output",
+    ...    False,
+    ... )
+    '/output/sub-01/func/sub-01_task-rest_acq-VAR_bold.nii.gz'
+    """
+    exts = Path(filepath).suffixes
+    old_ext = "".join(exts)
+
+    suffix = entities["suffix"]
+    entity_file_keys = []
+
+    # Entities that may be in the filename?
+    file_keys = ["task", "acquisition", "direction", "reconstruction", "run"]
+
+    for key in file_keys:
+        if key in list(entities.keys()):
+            entity_file_keys.append(key)
+
+    sub = get_entity_value(filepath, "sub")
+    if sub is None:
+        raise ValueError(f"Could not extract subject from {filepath}")
+
+    if is_longitudinal:
+        ses = get_entity_value(filepath, "ses")
+        if ses is None:
+            raise ValueError(f"Could not extract session from {filepath}")
+
+    # Add leading zeros to run entity if it's an integer.
+    # If it's a string, respect the value provided.
+    if "run" in entities.keys() and isinstance(entities["run"], int):
+        # Infer the number of leading zeros needed from the original filename
+        n_leading = 2  # default to 1 leading zero
+        if "_run-" in filepath:
+            run_str = filepath.split("_run-")[1].split("_")[0]
+            n_leading = len(run_str)
+        entities["run"] = str(entities["run"]).zfill(n_leading)
+
+    filename = "_".join([f"{key}-{entities[key]}" for key in entity_file_keys])
+    filename = (
+        filename.replace("acquisition", "acq")
+        .replace("direction", "dir")
+        .replace("reconstruction", "rec")
+    )
+    if len(filename) > 0:
+        if is_longitudinal:
+            filename = f"{sub}_{ses}_{filename}_{suffix}{old_ext}"
+        elif not is_longitudinal:
+            filename = f"{sub}_{filename}_{suffix}{old_ext}"
+    else:
+        raise ValueError(f"Could not construct new filename for {filepath}")
+
+    # CHECK TO SEE IF DATATYPE CHANGED
+    # datatype may be overridden/changed if the original file is located in the wrong folder.
+    dtypes = ["anat", "func", "perf", "fmap", "dwi"]
+    dtype_orig = ""
+    for dtype in dtypes:
+        if dtype in filepath:
+            dtype_orig = dtype
+
+    if "datatype" in entities.keys():
+        dtype_new = entities["datatype"]
+        if entities["datatype"] != dtype_orig:
+            print("WARNING: DATATYPE CHANGE DETECTED")
+    else:
+        dtype_new = dtype_orig
+
+    # Construct the new filename
+    if is_longitudinal:
+        new_path = str(Path(out_dir) / sub / ses / dtype_new / filename)
+    elif not is_longitudinal:
+        new_path = str(Path(out_dir) / sub / dtype_new / filename)
+
+    return new_path
+
+
+def assign_variants(summary, rename_cols):
+    """Assign variant names to files based on differences from dominant group.
+
+    Parameters
+    ----------
+    summary : pandas.DataFrame
+        The summary DataFrame containing the metadata for each file.
+        The columns that are used include "ParamGroup", "EntitySet",
+        the columns in ``rename_cols``,
+        and any columns in ``rename_cols`` that are prefixed with "Cluster_".
+    rename_cols : list of str
+        A list of column names to use for renaming files.
+        The values in these columns will be compared against the dominant group
+        and labeled with a variant name if they differ.
+
+    Returns
+    -------
+    pandas.DataFrame
+        The updated summary DataFrame with a new column "RenameEntitySet"
+        containing the new entity set names for each file.
+    """
+    # loop through summary tsv and create dom_dict
+    dom_dict = {}
+    for row in range(len(summary)):
+        # if dominant group identified
+        if str(summary.loc[row, "ParamGroup"]) == "1":
+            val = {}
+            # grab col, all vals send to dict
+            key = summary.loc[row, "EntitySet"]
+            for col in rename_cols:
+                summary[col] = summary[col].apply(str)
+                val[col] = summary.loc[row, col]
+
+                if f"Cluster_{col}" in summary.columns:
+                    val[f"Cluster_{col}"] = summary.loc[row, f"Cluster_{col}"]
+
+            dom_dict[key] = val
+
+    # now loop through again and ID variance
+    for row in range(len(summary)):
+        # check to see if renaming has already happened
+        renamed = False
+        entities = _entity_set_to_entities(summary.loc[row, "EntitySet"])
+        if "VARIANT" in summary.loc[row, "EntitySet"]:
+            renamed = True
+
+        if summary.loc[row, "ParamGroup"] != 1 and not renamed:
+            acq_str = "VARIANT"
+            # now we know we have a deviant param group
+            # check if TR is same as param group 1
+            entity_set = summary.loc[row, "EntitySet"]
+            for col in rename_cols:
+                dom_entity_set = dom_dict[entity_set]
+                summary[col] = summary[col].apply(str)
+
+                if f"Cluster_{col}" in dom_entity_set.keys():
+                    if summary.loc[row, f"Cluster_{col}"] != dom_entity_set[f"Cluster_{col}"]:
+                        acq_str += col
+                elif summary.loc[row, col] != dom_entity_set[col]:
+                    if col == "HasFieldmap":
+                        if dom_entity_set[col] == "True":
+                            acq_str += "NoFmap"
+                        else:
+                            acq_str += "HasFmap"
+                    elif col == "UsedAsFieldmap":
+                        if dom_entity_set[col] == "True":
+                            acq_str += "Unused"
+                        else:
+                            acq_str += "IsUsed"
+                    else:
+                        acq_str += col
+
+            if acq_str == "VARIANT":
+                acq_str += "Other"
+
+            if "acquisition" in entities.keys():
+                acq = f"acquisition-{entities['acquisition'] + acq_str}"
+
+                new_name = summary.loc[row, "EntitySet"].replace(
+                    f"acquisition-{entities['acquisition']}",
+                    acq,
+                )
+            else:
+                acq = f"acquisition-{acq_str}"
+                new_name = acq + "_" + summary.loc[row, "EntitySet"]
+
+            summary.at[row, "RenameEntitySet"] = new_name
+
+        # convert all "nan" to empty str
+        # so they don't show up in the summary tsv
+        if summary.loc[row, "RenameEntitySet"] == "nan":
+            summary.at[row, "RenameEntitySet"] = ""
+
+        for col in rename_cols:
+            if summary.loc[row, col] == "nan":
+                summary.at[row, col] = ""
+
+    return summary