feat: convert models to macro templates (#94)

* feat: convert all models to jinja2 macro templates * update tests * add coverage
AllenNeuralDynamics · Nov 6, 2024 · d198752 · d198752
1 parent 1dd8d04
commit d198752
Show file tree

Hide file tree

Showing 43 changed files with 84,372 additions and 603 deletions.
diff --git a/README.md b/README.md
@@ -25,6 +25,16 @@ pip install -e .[dev]
 
 ## Contributing
 
+### How to add a new model class
+
+The model class files, `brain_atlas.py` etc, are auto-generated. **You should never need to modify the class files directly.**
+
+Instead, take a look at the `jinja2` templates in the folder `_generators/templates`. The filename of the template is used to pull the corresponding `.csv` file and populate the `data` DataFrame. In the template you can pull data from the various columns and use them to populate each of the fields in your class.
+
+To re-build all the models, run the `run_all.sh` bash script in the root folder, which loops through the template files and runs them through the `generate_code` function.
+
+There are a few special cases, e.g. if data are missing in columns they will show up as `float: nan`. See the `organizations.txt` template for examples of how to handle this.
+
 ### Linters and testing
 
 There are several libraries used to run linters, check documentation, and run tests.

diff --git a/pyproject.toml b/pyproject.toml
@@ -31,7 +31,9 @@ dev = [
     'isort',
     'Sphinx',
     'furo',
-    'aind-flake8-extensions==0.5.2'
+    'aind-flake8-extensions==0.5.2',
+    'jinja2',
+    'pandas'
 ]
 
 build = [

diff --git a/run_all.sh b/run_all.sh
@@ -0,0 +1,7 @@
+for file in src/aind_data_schema_models/_generators/templates/*.txt; do
+    # Extract the filename without the directory and extension
+    type_name=$(basename "$file" .txt)
+
+    # Call the Python script with the --type parameter
+    python src/aind_data_schema_models/_generators/generator.py --type "$type_name"
+done
diff --git a/src/aind_data_schema_models/_generators/__init__.py b/src/aind_data_schema_models/_generators/__init__.py
@@ -0,0 +1 @@
+"""Generators"""
diff --git a/src/aind_data_schema_models/_generators/generator.py b/src/aind_data_schema_models/_generators/generator.py
@@ -0,0 +1,67 @@
+import argparse
+from jinja2 import Environment
+import pandas as pd
+from aind_data_schema_models.utils import to_class_name, to_class_name_underscored
+from pathlib import Path
+import subprocess
+
+
+def generate_code(data_type: str, root_path: str, isort: bool = True, black: bool = True):
+    """Generate code from the template type
+
+    Parameters
+    ----------
+    data_type : str
+        Which template file to use
+    isort : bool, optional
+        Whether to run isort on the output, by default True
+    black : bool, optional
+        Whether to run black on the output, by default True
+    """
+    ROOT_DIR = Path(root_path)
+    data_file = ROOT_DIR / "_generators" / "models" / f"{data_type}.csv"
+    template_file = ROOT_DIR / "_generators" / "templates" / f"{data_type}.txt"
+    output_file = ROOT_DIR / f"{data_type}.py"
+
+    # Load data
+    data = pd.read_csv(data_file)
+
+    # Load template
+    with open(template_file) as f:
+        template = f.read()
+
+    # Set up Jinja2 environment
+    env = Environment()
+    env.filters["to_class_name"] = to_class_name
+    env.filters["to_class_name_underscored"] = to_class_name_underscored
+    rendered_template = env.from_string(template)
+
+    # Render template with data
+    rendered_code = rendered_template.render(data=data)
+
+    # Write generated code to file
+    with open(output_file, "w") as f:
+        f.write(rendered_code)
+
+    print(f"Code generated in {output_file}")
+
+    # Optionally, format with isort and black
+    if isort:
+        subprocess.run(["isort", str(output_file)])
+
+    if black:
+        subprocess.run(["black", str(output_file)])
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser(description="Generate code from templates.")
+    parser.add_argument("--type", required=True, help="The data type to generate code for (e.g., 'platforms').")
+    parser.add_argument(
+        "--root-path",
+        required=False,
+        default="./src/aind_data_schema_models/",
+        help="Path to the source folder of the project",
+    )
+    args = parser.parse_args()
+
+    generate_code(args.type, args.root_path)
diff --git a/...ma_models/models/mouse_ccf_structures.csv → ...models/_generators/models/brain_atlas.csv b/...ma_models/models/mouse_ccf_structures.csv → ...models/_generators/models/brain_atlas.csv
@@ -744,7 +744,7 @@ ee,964,"corpus callosum, extreme capsule",/997/1009/983/776/964/,776,#CCCCCC
 fp,971,"corpus callosum, posterior forceps",/997/1009/983/776/971/,776,#CCCCCC
 ccs,986,"corpus callosum, splenium",/997/1009/983/776/986/,776,#CCCCCC
 cst,784,corticospinal tract,/997/1009/983/784/,983,#CCCCCC
-cm,967,cranial nerves,/997/1009/967/,1009,#CCCCCC
+cne,967,cranial nerves,/997/1009/967/,1009,#CCCCCC
 tspc,1043,crossed tectospinal pathway,/997/1009/1000/877/1043/,877,#CCCCCC
 cuf,380,cuneate fascicle,/997/1009/967/792/932/514/380/,514,#CCCCCC
 tspd,1051,direct tectospinal pathway,/997/1009/1000/877/1051/,877,#CCCCCC

diff --git a/..._data_schema_models/models/harp_types.csv → ..._models/_generators/models/harp_types.csv b/..._data_schema_models/models/harp_types.csv → ..._models/_generators/models/harp_types.csv
diff --git a/src/aind_data_schema_models/_generators/models/modalities.csv b/src/aind_data_schema_models/_generators/models/modalities.csv
@@ -0,0 +1,15 @@
+name,abbreviation,subject,data_description,procedures,session,rig,processing,acquisition,instrument,quality_control
+Behavior,behavior,1,1,1,1,1,0,-1,-1,0
+Behavior videos,behavior-videos,1,1,1,1,1,0,-1,-1,0
+Confocal microscopy,confocal,1,1,1,-1,-1,1,1,1,0
+Electromyography,EMG,1,1,1,1,1,0,-1,-1,0
+Extracellular electrophysiology,ecephys,1,1,1,1,1,0,-1,-1,0
+Fiber photometry,fib,1,1,1,1,1,0,-1,-1,0
+Fluorescence micro-optical sectioning tomography,fMOST,1,1,1,-1,-1,1,1,1,0
+Intracellular electrophysiology,icephys,1,1,1,1,1,0,-1,-1,0
+Intrinsic signal imaging,ISI,1,1,1,1,1,0,-1,-1,0
+Magnetic resonance imaging,MRI,1,1,1,1,1,0,-1,-1,0
+Multiplexed error-robust fluorescence in situ hybridization,merfish,1,1,1,-1,-1,1,1,1,0
+Planar optical physiology,pophys,1,1,1,1,1,0,-1,-1,0
+Scanned line projection imaging,slap,1,1,1,1,1,0,-1,-1,0
+Selective plane illumination microscopy,SPIM,1,1,1,-1,-1,1,1,1,0
diff --git a/...models/models/mouse_dev_anat_ontology.csv → ...dels/_generators/models/mouse_anatomy.csv b/...models/models/mouse_dev_anat_ontology.csv → ...dels/_generators/models/mouse_anatomy.csv
diff --git a/...ta_schema_models/models/organizations.csv → ...dels/_generators/models/organizations.csv b/...ta_schema_models/models/organizations.csv → ...dels/_generators/models/organizations.csv
diff --git a/...d_data_schema_models/models/platforms.csv → ...a_models/_generators/models/platforms.csv b/...d_data_schema_models/models/platforms.csv → ...a_models/_generators/models/platforms.csv
diff --git a/...ta_schema_models/models/process_names.csv → ...dels/_generators/models/process_names.csv b/...ta_schema_models/models/process_names.csv → ...dels/_generators/models/process_names.csv
diff --git a/..._data_schema_models/models/registries.csv → ..._models/_generators/models/registries.csv b/..._data_schema_models/models/registries.csv → ..._models/_generators/models/registries.csv
diff --git a/...ind_data_schema_models/models/species.csv → ...ema_models/_generators/models/species.csv b/...ind_data_schema_models/models/species.csv → ...ema_models/_generators/models/species.csv
diff --git a/...odels/models/specimen_procedure_types.csv → ...ators/models/specimen_procedure_types.csv b/...odels/models/specimen_procedure_types.csv → ...ators/models/specimen_procedure_types.csv
diff --git a/src/aind_data_schema_models/_generators/templates/brain_atlas.txt b/src/aind_data_schema_models/_generators/templates/brain_atlas.txt
@@ -0,0 +1,40 @@
+"""Platforms"""
+{% raw -%}
+from pydantic import BaseModel, Field, ConfigDict
+from typing import Literal, Union
+from typing_extensions import Annotated
+{% endraw %}
+
+class _BrainStructureModel(BaseModel):
+    """Base model for brain strutures"""
+    model_config = ConfigDict(frozen=True)
+    atlas: str
+    name: str
+    acronym: str
+    id: str
+
+{% for _, row in data.iterrows() %}
+class {{ row['acronym'] | to_class_name_underscored }}(_BrainStructureModel):
+    """Model {{row['acronym']}}"""
+    atlas: Literal["CCFv3"] = "CCFv3"
+    name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
+    acronym: Literal["{{ row['acronym'] }}"] = "{{ row['acronym'] }}"
+    id: Literal["{{ row['id'] }}"] = "{{ row['id'] }}"
+
+{% endfor %}
+class CCFStructure:
+    """CCFStructure"""
+{% for _, row in data.iterrows() %}
+    {{ row['acronym'] | to_class_name | upper }} = {{ row['acronym'] | to_class_name_underscored }}()
+{%- endfor %}
+
+    ALL = tuple(_BrainStructureModel.__subclasses__())
+
+    ONE_OF = Annotated[Union[tuple(_BrainStructureModel.__subclasses__())], Field(discriminator="name")]
+
+    id_map = {m().id: m() for m in ALL}
+
+    @classmethod
+    def from_id(cls, id: int):
+        """Get structure from id"""
+        return cls.id_map.get(id, None)
diff --git a/src/aind_data_schema_models/_generators/templates/harp_types.txt b/src/aind_data_schema_models/_generators/templates/harp_types.txt
@@ -0,0 +1,29 @@
+"""Harp device types"""
+{% raw -%}
+from pydantic import BaseModel, Field, ConfigDict
+from typing import Literal, Union
+from typing_extensions import Annotated
+{% endraw %}
+
+class _HarpDeviceTypeModel(BaseModel):
+    """Base model for platform"""
+    model_config = ConfigDict(frozen=True)
+    whoami: int = Field(..., title="Harp whoami value")
+    name: str = Field(..., title="Harp device type name")
+
+{% for _, row in data.iterrows() %}
+class {{ row['name'] | to_class_name_underscored }}(_HarpDeviceTypeModel):
+    """Model {{row['name']}}"""
+    name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
+    whoami: Literal[{{ row['whoami'] }}] = {{ row['whoami'] }}
+
+{% endfor %}
+class HarpDeviceType:
+    """Harp device types"""
+{% for _, row in data.iterrows() %}
+    {{ row['name'] | to_class_name | upper }} = {{ row['name'] | to_class_name_underscored }}()
+{%- endfor %}
+
+    ALL = tuple(_HarpDeviceTypeModel.__subclasses__())
+
+    ONE_OF = Annotated[Union[tuple(_HarpDeviceTypeModel.__subclasses__())], Field(discriminator="name")]
diff --git a/src/aind_data_schema_models/_generators/templates/modalities.txt b/src/aind_data_schema_models/_generators/templates/modalities.txt
@@ -0,0 +1,88 @@
+"""Modalities"""
+{% raw -%}
+from pydantic import BaseModel, Field, ConfigDict
+from typing import Literal, Union
+from typing_extensions import Annotated
+from enum import IntEnum
+from aind_data_schema_models.pid_names import BaseName
+{% endraw %}
+
+class _ModalityModel(BaseName):
+    """Base model for modality"""
+    model_config = ConfigDict(frozen=True)
+    name: str
+    abbreviation: str
+
+{% for _, row in data.iterrows() %}
+class {{ row['abbreviation'] | to_class_name_underscored }}(_ModalityModel):
+    """Model {{row['abbreviation']}}"""
+    name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
+    abbreviation: Literal["{{ row['abbreviation'] }}"] = "{{ row['abbreviation'] }}"
+
+{% endfor %}
+class Modality:
+    """Modalities"""
+{% for _, row in data.iterrows() %}
+    {{ row['abbreviation'] | to_class_name | upper }} = {{ row['abbreviation'] | to_class_name_underscored }}()
+{%- endfor %}
+
+    ALL = tuple(_ModalityModel.__subclasses__())
+
+    ONE_OF = Annotated[Union[tuple(_ModalityModel.__subclasses__())], Field(discriminator="abbreviation")]
+
+    abbreviation_map = {m().abbreviation: m() for m in ALL}
+
+    @classmethod
+    def from_abbreviation(cls, abbreviation: str):
+        """Get modality from abbreviation"""
+        return cls.abbreviation_map.get(abbreviation, None)
+
+
+class FileRequirement(IntEnum):
+    """Whether a file is required for a specific modality"""
+
+    REQUIRED = 1
+    OPTIONAL = 0
+    EXCLUDED = -1
+
+
+class _ExpectedFilesModel(BaseModel):
+    """Base model for modality"""
+    model_config = ConfigDict(frozen=True)
+    name: str
+    modality_abbreviation: str
+    subject: FileRequirement
+    data_description: FileRequirement
+    procedures: FileRequirement
+    session: FileRequirement
+    rig: FileRequirement
+    processing: FileRequirement
+    acquisition: FileRequirement
+    instrument: FileRequirement
+    quality_control: FileRequirement
+
+{% for _, row in data.iterrows() %}
+class {{ row['abbreviation'] | to_class_name_underscored }}_Files(_ExpectedFilesModel):
+    """Model {{row['abbreviation']}}_Files"""
+    name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
+    modality_abbreviation: Literal["{{ row['abbreviation'] }}"] = "{{ row['abbreviation'] }}"
+    subject: FileRequirement = {% if row['subject'] == 1 %} FileRequirement.REQUIRED {% elif row['subject'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
+    data_description: FileRequirement = {% if row['data_description'] == 1 %} FileRequirement.REQUIRED {% elif row['data_description'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
+    procedures: FileRequirement = {% if row['procedures'] == 1 %} FileRequirement.REQUIRED {% elif row['procedures'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
+    session: FileRequirement = {% if row['session'] == 1 %} FileRequirement.REQUIRED {% elif row['session'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
+    rig: FileRequirement = {% if row['rig'] == 1 %} FileRequirement.REQUIRED {% elif row['rig'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
+    processing: FileRequirement = {% if row['processing'] == 1 %} FileRequirement.REQUIRED {% elif row['processing'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
+    acquisition: FileRequirement = {% if row['acquisition'] == 1 %} FileRequirement.REQUIRED {% elif row['acquisition'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
+    instrument: FileRequirement = {% if row['instrument'] == 1 %} FileRequirement.REQUIRED {% elif row['instrument'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
+    quality_control: FileRequirement = {% if row['quality_control'] == 1 %} FileRequirement.REQUIRED {% elif row['quality_control'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
+
+{% endfor %}
+class ExpectedFiles:
+    """Expected files for each modality"""
+{% for _, row in data.iterrows() %}
+    {{ row['abbreviation'] | to_class_name | upper }} = {{ row['abbreviation'] | to_class_name_underscored }}_Files()
+{%- endfor %}
+
+    ALL = tuple(_ExpectedFilesModel.__subclasses__())
+
+    ONE_OF = Annotated[Union[tuple(_ExpectedFilesModel.__subclasses__())], Field(discriminator="abbreviation")]
diff --git a/src/aind_data_schema_models/_generators/templates/mouse_anatomy.txt b/src/aind_data_schema_models/_generators/templates/mouse_anatomy.txt
@@ -0,0 +1,65 @@
+"""Mouse anatomy"""
+{% raw -%}
+from pydantic import BaseModel, Field, ConfigDict
+from typing import Literal, Union
+from typing_extensions import Annotated
+from aind_data_schema_models.registries import Registry
+{% endraw %}
+
+class _MouseAnatomicalStructureModel(BaseModel):
+    """Base model for mouse anatomy"""
+    model_config = ConfigDict(frozen=True)
+    name: str
+    registry: Registry.ONE_OF
+    registry_identifier: str
+
+{% for _, row in data.iterrows() %}
+class {{ row['name'] | to_class_name_underscored }}(_MouseAnatomicalStructureModel):
+    """Model {{row['name']}}"""
+    name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
+    registry: Registry.ONE_OF = Registry.EMAPA
+    registry_identifier: Literal["{{ row['registry_identifier'] }}"] = "{{ row['registry_identifier'] }}"
+
+{% endfor %}
+class MouseAnatomicalStructure:
+    """MouseAnatomicalStructure"""
+{% for _, row in data.iterrows() %}
+    {{ row['name'] | to_class_name | upper }} = {{ row['name'] | to_class_name_underscored }}()
+{%- endfor %}
+
+    ALL = tuple(_MouseAnatomicalStructureModel.__subclasses__())
+
+    ONE_OF = Annotated[Union[tuple(_MouseAnatomicalStructureModel.__subclasses__())], Field(discriminator="registry_identifier")]
+
+
+MouseAnatomicalStructure.EMG_MUSCLES = Annotated[Union[
+    _Deltoid,
+    _Pectoralis_Major,
+    _Triceps_Brachii,
+    _Lateral_Head_Of_Triceps_Brachii,
+    _Long_Head_Of_Triceps_Brachii,
+    _Medial_Head_Of_Triceps_Brachii,
+    _Biceps_Brachii,
+    _Long_Head_Of_Biceps_Brachii,
+    _Short_Head_Of_Biceps_Brachii,
+    _Tendon_Of_Biceps_Brachii,
+    _Pars_Scapularis_Of_Deltoid,
+    _Extensor_Carpi_Radialis_Longus,
+    _Extensor_Digitorum_Communis,
+    _Extensor_Digitorum_Lateralis,
+    _Extensor_Carpi_Ulnaris,
+    _Flexor_Carpi_Radialis,
+    _Flexor_Carpi_Ulnaris,
+    _Flexor_Digitorum_Profundus,
+], Field(discriminator="registry_identifier")]
+
+MouseAnatomicalStructure.BODY_PARTS = Annotated[Union[
+    _Forelimb,
+    _Head,
+    _Hindlimb,
+    _Neck,
+    _Tail,
+    _Trunk,
+], Field(discriminator="registry_identifier")]
+
+