Skip to content

Commit

Permalink
feat: convert models to macro templates (#94)
Browse files Browse the repository at this point in the history
* feat: convert all models to jinja2 macro templates
* update tests
* add coverage
  • Loading branch information
dbirman authored Nov 6, 2024
1 parent 1dd8d04 commit d198752
Show file tree
Hide file tree
Showing 43 changed files with 84,372 additions and 603 deletions.
10 changes: 10 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,16 @@ pip install -e .[dev]

## Contributing

### How to add a new model class

The model class files, `brain_atlas.py` etc, are auto-generated. **You should never need to modify the class files directly.**

Instead, take a look at the `jinja2` templates in the folder `_generators/templates`. The filename of the template is used to pull the corresponding `.csv` file and populate the `data` DataFrame. In the template you can pull data from the various columns and use them to populate each of the fields in your class.

To re-build all the models, run the `run_all.sh` bash script in the root folder, which loops through the template files and runs them through the `generate_code` function.

There are a few special cases, e.g. if data are missing in columns they will show up as `float: nan`. See the `organizations.txt` template for examples of how to handle this.

### Linters and testing

There are several libraries used to run linters, check documentation, and run tests.
Expand Down
4 changes: 3 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ dev = [
'isort',
'Sphinx',
'furo',
'aind-flake8-extensions==0.5.2'
'aind-flake8-extensions==0.5.2',
'jinja2',
'pandas'
]

build = [
Expand Down
7 changes: 7 additions & 0 deletions run_all.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
for file in src/aind_data_schema_models/_generators/templates/*.txt; do
# Extract the filename without the directory and extension
type_name=$(basename "$file" .txt)

# Call the Python script with the --type parameter
python src/aind_data_schema_models/_generators/generator.py --type "$type_name"
done
1 change: 1 addition & 0 deletions src/aind_data_schema_models/_generators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
"""Generators"""
67 changes: 67 additions & 0 deletions src/aind_data_schema_models/_generators/generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
import argparse
from jinja2 import Environment
import pandas as pd
from aind_data_schema_models.utils import to_class_name, to_class_name_underscored
from pathlib import Path
import subprocess


def generate_code(data_type: str, root_path: str, isort: bool = True, black: bool = True):
"""Generate code from the template type
Parameters
----------
data_type : str
Which template file to use
isort : bool, optional
Whether to run isort on the output, by default True
black : bool, optional
Whether to run black on the output, by default True
"""
ROOT_DIR = Path(root_path)
data_file = ROOT_DIR / "_generators" / "models" / f"{data_type}.csv"
template_file = ROOT_DIR / "_generators" / "templates" / f"{data_type}.txt"
output_file = ROOT_DIR / f"{data_type}.py"

# Load data
data = pd.read_csv(data_file)

# Load template
with open(template_file) as f:
template = f.read()

# Set up Jinja2 environment
env = Environment()
env.filters["to_class_name"] = to_class_name
env.filters["to_class_name_underscored"] = to_class_name_underscored
rendered_template = env.from_string(template)

# Render template with data
rendered_code = rendered_template.render(data=data)

# Write generated code to file
with open(output_file, "w") as f:
f.write(rendered_code)

print(f"Code generated in {output_file}")

# Optionally, format with isort and black
if isort:
subprocess.run(["isort", str(output_file)])

if black:
subprocess.run(["black", str(output_file)])


if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Generate code from templates.")
parser.add_argument("--type", required=True, help="The data type to generate code for (e.g., 'platforms').")
parser.add_argument(
"--root-path",
required=False,
default="./src/aind_data_schema_models/",
help="Path to the source folder of the project",
)
args = parser.parse_args()

generate_code(args.type, args.root_path)
Original file line number Diff line number Diff line change
Expand Up @@ -744,7 +744,7 @@ ee,964,"corpus callosum, extreme capsule",/997/1009/983/776/964/,776,#CCCCCC
fp,971,"corpus callosum, posterior forceps",/997/1009/983/776/971/,776,#CCCCCC
ccs,986,"corpus callosum, splenium",/997/1009/983/776/986/,776,#CCCCCC
cst,784,corticospinal tract,/997/1009/983/784/,983,#CCCCCC
cm,967,cranial nerves,/997/1009/967/,1009,#CCCCCC
cne,967,cranial nerves,/997/1009/967/,1009,#CCCCCC
tspc,1043,crossed tectospinal pathway,/997/1009/1000/877/1043/,877,#CCCCCC
cuf,380,cuneate fascicle,/997/1009/967/792/932/514/380/,514,#CCCCCC
tspd,1051,direct tectospinal pathway,/997/1009/1000/877/1051/,877,#CCCCCC
Expand Down
15 changes: 15 additions & 0 deletions src/aind_data_schema_models/_generators/models/modalities.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
name,abbreviation,subject,data_description,procedures,session,rig,processing,acquisition,instrument,quality_control
Behavior,behavior,1,1,1,1,1,0,-1,-1,0
Behavior videos,behavior-videos,1,1,1,1,1,0,-1,-1,0
Confocal microscopy,confocal,1,1,1,-1,-1,1,1,1,0
Electromyography,EMG,1,1,1,1,1,0,-1,-1,0
Extracellular electrophysiology,ecephys,1,1,1,1,1,0,-1,-1,0
Fiber photometry,fib,1,1,1,1,1,0,-1,-1,0
Fluorescence micro-optical sectioning tomography,fMOST,1,1,1,-1,-1,1,1,1,0
Intracellular electrophysiology,icephys,1,1,1,1,1,0,-1,-1,0
Intrinsic signal imaging,ISI,1,1,1,1,1,0,-1,-1,0
Magnetic resonance imaging,MRI,1,1,1,1,1,0,-1,-1,0
Multiplexed error-robust fluorescence in situ hybridization,merfish,1,1,1,-1,-1,1,1,1,0
Planar optical physiology,pophys,1,1,1,1,1,0,-1,-1,0
Scanned line projection imaging,slap,1,1,1,1,1,0,-1,-1,0
Selective plane illumination microscopy,SPIM,1,1,1,-1,-1,1,1,1,0
40 changes: 40 additions & 0 deletions src/aind_data_schema_models/_generators/templates/brain_atlas.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
"""Platforms"""
{% raw -%}
from pydantic import BaseModel, Field, ConfigDict
from typing import Literal, Union
from typing_extensions import Annotated
{% endraw %}

class _BrainStructureModel(BaseModel):
"""Base model for brain strutures"""
model_config = ConfigDict(frozen=True)
atlas: str
name: str
acronym: str
id: str

{% for _, row in data.iterrows() %}
class {{ row['acronym'] | to_class_name_underscored }}(_BrainStructureModel):
"""Model {{row['acronym']}}"""
atlas: Literal["CCFv3"] = "CCFv3"
name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
acronym: Literal["{{ row['acronym'] }}"] = "{{ row['acronym'] }}"
id: Literal["{{ row['id'] }}"] = "{{ row['id'] }}"

{% endfor %}
class CCFStructure:
"""CCFStructure"""
{% for _, row in data.iterrows() %}
{{ row['acronym'] | to_class_name | upper }} = {{ row['acronym'] | to_class_name_underscored }}()
{%- endfor %}

ALL = tuple(_BrainStructureModel.__subclasses__())

ONE_OF = Annotated[Union[tuple(_BrainStructureModel.__subclasses__())], Field(discriminator="name")]

id_map = {m().id: m() for m in ALL}

@classmethod
def from_id(cls, id: int):
"""Get structure from id"""
return cls.id_map.get(id, None)
29 changes: 29 additions & 0 deletions src/aind_data_schema_models/_generators/templates/harp_types.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
"""Harp device types"""
{% raw -%}
from pydantic import BaseModel, Field, ConfigDict
from typing import Literal, Union
from typing_extensions import Annotated
{% endraw %}

class _HarpDeviceTypeModel(BaseModel):
"""Base model for platform"""
model_config = ConfigDict(frozen=True)
whoami: int = Field(..., title="Harp whoami value")
name: str = Field(..., title="Harp device type name")

{% for _, row in data.iterrows() %}
class {{ row['name'] | to_class_name_underscored }}(_HarpDeviceTypeModel):
"""Model {{row['name']}}"""
name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
whoami: Literal[{{ row['whoami'] }}] = {{ row['whoami'] }}

{% endfor %}
class HarpDeviceType:
"""Harp device types"""
{% for _, row in data.iterrows() %}
{{ row['name'] | to_class_name | upper }} = {{ row['name'] | to_class_name_underscored }}()
{%- endfor %}

ALL = tuple(_HarpDeviceTypeModel.__subclasses__())

ONE_OF = Annotated[Union[tuple(_HarpDeviceTypeModel.__subclasses__())], Field(discriminator="name")]
88 changes: 88 additions & 0 deletions src/aind_data_schema_models/_generators/templates/modalities.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
"""Modalities"""
{% raw -%}
from pydantic import BaseModel, Field, ConfigDict
from typing import Literal, Union
from typing_extensions import Annotated
from enum import IntEnum
from aind_data_schema_models.pid_names import BaseName
{% endraw %}

class _ModalityModel(BaseName):
"""Base model for modality"""
model_config = ConfigDict(frozen=True)
name: str
abbreviation: str

{% for _, row in data.iterrows() %}
class {{ row['abbreviation'] | to_class_name_underscored }}(_ModalityModel):
"""Model {{row['abbreviation']}}"""
name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
abbreviation: Literal["{{ row['abbreviation'] }}"] = "{{ row['abbreviation'] }}"

{% endfor %}
class Modality:
"""Modalities"""
{% for _, row in data.iterrows() %}
{{ row['abbreviation'] | to_class_name | upper }} = {{ row['abbreviation'] | to_class_name_underscored }}()
{%- endfor %}

ALL = tuple(_ModalityModel.__subclasses__())

ONE_OF = Annotated[Union[tuple(_ModalityModel.__subclasses__())], Field(discriminator="abbreviation")]

abbreviation_map = {m().abbreviation: m() for m in ALL}

@classmethod
def from_abbreviation(cls, abbreviation: str):
"""Get modality from abbreviation"""
return cls.abbreviation_map.get(abbreviation, None)


class FileRequirement(IntEnum):
"""Whether a file is required for a specific modality"""

REQUIRED = 1
OPTIONAL = 0
EXCLUDED = -1


class _ExpectedFilesModel(BaseModel):
"""Base model for modality"""
model_config = ConfigDict(frozen=True)
name: str
modality_abbreviation: str
subject: FileRequirement
data_description: FileRequirement
procedures: FileRequirement
session: FileRequirement
rig: FileRequirement
processing: FileRequirement
acquisition: FileRequirement
instrument: FileRequirement
quality_control: FileRequirement

{% for _, row in data.iterrows() %}
class {{ row['abbreviation'] | to_class_name_underscored }}_Files(_ExpectedFilesModel):
"""Model {{row['abbreviation']}}_Files"""
name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
modality_abbreviation: Literal["{{ row['abbreviation'] }}"] = "{{ row['abbreviation'] }}"
subject: FileRequirement = {% if row['subject'] == 1 %} FileRequirement.REQUIRED {% elif row['subject'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
data_description: FileRequirement = {% if row['data_description'] == 1 %} FileRequirement.REQUIRED {% elif row['data_description'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
procedures: FileRequirement = {% if row['procedures'] == 1 %} FileRequirement.REQUIRED {% elif row['procedures'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
session: FileRequirement = {% if row['session'] == 1 %} FileRequirement.REQUIRED {% elif row['session'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
rig: FileRequirement = {% if row['rig'] == 1 %} FileRequirement.REQUIRED {% elif row['rig'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
processing: FileRequirement = {% if row['processing'] == 1 %} FileRequirement.REQUIRED {% elif row['processing'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
acquisition: FileRequirement = {% if row['acquisition'] == 1 %} FileRequirement.REQUIRED {% elif row['acquisition'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
instrument: FileRequirement = {% if row['instrument'] == 1 %} FileRequirement.REQUIRED {% elif row['instrument'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}
quality_control: FileRequirement = {% if row['quality_control'] == 1 %} FileRequirement.REQUIRED {% elif row['quality_control'] == 0 %} FileRequirement.OPTIONAL {% else %} FileRequirement.EXCLUDED {% endif %}

{% endfor %}
class ExpectedFiles:
"""Expected files for each modality"""
{% for _, row in data.iterrows() %}
{{ row['abbreviation'] | to_class_name | upper }} = {{ row['abbreviation'] | to_class_name_underscored }}_Files()
{%- endfor %}

ALL = tuple(_ExpectedFilesModel.__subclasses__())

ONE_OF = Annotated[Union[tuple(_ExpectedFilesModel.__subclasses__())], Field(discriminator="abbreviation")]
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
"""Mouse anatomy"""
{% raw -%}
from pydantic import BaseModel, Field, ConfigDict
from typing import Literal, Union
from typing_extensions import Annotated
from aind_data_schema_models.registries import Registry
{% endraw %}

class _MouseAnatomicalStructureModel(BaseModel):
"""Base model for mouse anatomy"""
model_config = ConfigDict(frozen=True)
name: str
registry: Registry.ONE_OF
registry_identifier: str

{% for _, row in data.iterrows() %}
class {{ row['name'] | to_class_name_underscored }}(_MouseAnatomicalStructureModel):
"""Model {{row['name']}}"""
name: Literal["{{ row['name'] }}"] = "{{ row['name'] }}"
registry: Registry.ONE_OF = Registry.EMAPA
registry_identifier: Literal["{{ row['registry_identifier'] }}"] = "{{ row['registry_identifier'] }}"

{% endfor %}
class MouseAnatomicalStructure:
"""MouseAnatomicalStructure"""
{% for _, row in data.iterrows() %}
{{ row['name'] | to_class_name | upper }} = {{ row['name'] | to_class_name_underscored }}()
{%- endfor %}

ALL = tuple(_MouseAnatomicalStructureModel.__subclasses__())

ONE_OF = Annotated[Union[tuple(_MouseAnatomicalStructureModel.__subclasses__())], Field(discriminator="registry_identifier")]


MouseAnatomicalStructure.EMG_MUSCLES = Annotated[Union[
_Deltoid,
_Pectoralis_Major,
_Triceps_Brachii,
_Lateral_Head_Of_Triceps_Brachii,
_Long_Head_Of_Triceps_Brachii,
_Medial_Head_Of_Triceps_Brachii,
_Biceps_Brachii,
_Long_Head_Of_Biceps_Brachii,
_Short_Head_Of_Biceps_Brachii,
_Tendon_Of_Biceps_Brachii,
_Pars_Scapularis_Of_Deltoid,
_Extensor_Carpi_Radialis_Longus,
_Extensor_Digitorum_Communis,
_Extensor_Digitorum_Lateralis,
_Extensor_Carpi_Ulnaris,
_Flexor_Carpi_Radialis,
_Flexor_Carpi_Ulnaris,
_Flexor_Digitorum_Profundus,
], Field(discriminator="registry_identifier")]

MouseAnatomicalStructure.BODY_PARTS = Annotated[Union[
_Forelimb,
_Head,
_Hindlimb,
_Neck,
_Tail,
_Trunk,
], Field(discriminator="registry_identifier")]


Loading

0 comments on commit d198752

Please sign in to comment.