diff --git a/.gitattributes b/.gitattributes index 9e2f1b1..806a512 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,2 @@ tests/data/*.json* filter=lfs diff=lfs merge=lfs -text +tests/data/seqrepo/** filter=lfs diff=lfs merge=lfs -text diff --git a/.gitignore b/.gitignore index c2f3f75..9e5486b 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,7 @@ +__pycache__ + +/data + # Coverage information. .coverage coverage.lcov diff --git a/Makefile b/Makefile index a4fcde7..06f420b 100644 --- a/Makefile +++ b/Makefile @@ -69,4 +69,5 @@ ci: \ .PHONY: serve serve: + DATA_DIR=$(PWD)/tests/data \ pipenv run uvicorn dotty.main:app --host 0.0.0.0 --port 8080 --reload --workers 8 diff --git a/Pipfile b/Pipfile index 62c7de4..988be76 100644 --- a/Pipfile +++ b/Pipfile @@ -7,6 +7,8 @@ name = "pypi" cdot = "*" fastapi = "*" hgvs = "*" +pydantic-settings = "*" +uvicorn = "*" [dev-packages] black = "*" @@ -15,6 +17,7 @@ isort = "*" mypy = "*" pytest = "*" pytest-coverage = "*" +httpx = "*" [requires] python_version = "3.10" diff --git a/Pipfile.lock b/Pipfile.lock index 6e97dae..93ced5a 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "9356930bd1e2014315ae74f120406c6cf7a5dbf99581c6a7e1f3a858a915c95f" + "sha256": "877b8027e128a110ae2c18e5e9577a7ec941ff6ba77ac2fbba69e686d54a34cc" }, "pipfile-spec": 6, "requires": { @@ -203,6 +203,14 @@ "markers": "python_full_version >= '3.7.0'", "version": "==3.3.0" }, + "click": { + "hashes": [ + "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", + "sha256:ca9853ad459e787e2192211578cc907e7594e294c7ccc834310722b41b9ca6de" + ], + "markers": "python_version >= '3.7'", + "version": "==8.1.7" + }, "coloredlogs": { "hashes": [ "sha256:612ee75c546f53e92e70049c9dbfcc18c935a2b9a53b66085ce9ef6a6e5c0934", @@ -330,6 +338,14 @@ "markers": "python_version >= '3.7'", "version": "==0.103.2" }, + "h11": { + "hashes": [ + "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", + "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" + ], + "markers": "python_version >= '3.7'", + "version": "==0.14.0" + }, "hgvs": { "hashes": [ "sha256:06abb6363bb0c8ef9f3f8f9dc333d3a346ab5f9ebcb20a5bb56c69256262559f", @@ -695,6 +711,15 @@ "markers": "python_version >= '3.7'", "version": "==2.10.1" }, + "pydantic-settings": { + "hashes": [ + "sha256:962dc3672495aad6ae96a4390fac7e593591e144625e5112d359f8f67fb75945", + "sha256:ddd907b066622bd67603b75e2ff791875540dc485b7307c4fffc015719da8625" + ], + "index": "pypi", + "markers": "python_version >= '3.7'", + "version": "==2.0.3" + }, "pyee": { "hashes": [ "sha256:5c7e60f8df95710dbe17550e16ce0153f83990c00ef744841b43f371ed53ebea", @@ -752,6 +777,14 @@ "markers": "python_version >= '3.6'", "version": "==0.21.0" }, + "python-dotenv": { + "hashes": [ + "sha256:a8df96034aae6d2d50a4ebe8216326c61c3eb64836776504fcca410e5937a3ba", + "sha256:f5971a9226b701070a4bf2c38c89e5a3f0d64de8debda981d1db98583009122a" + ], + "markers": "python_version >= '3.8'", + "version": "==1.0.0" + }, "requests": { "hashes": [ "sha256:58cd2187c01e70e6e26505bca751777aa9f2ee0b7f4300988b709f44e013003f", @@ -851,7 +884,7 @@ "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0", "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef" ], - "markers": "python_version >= '3.8'", + "markers": "python_version < '3.11'", "version": "==4.8.0" }, "urllib3": { @@ -862,6 +895,15 @@ "markers": "python_version >= '2.7' and python_version not in '3.0, 3.1, 3.2, 3.3, 3.4, 3.5'", "version": "==1.26.17" }, + "uvicorn": { + "hashes": [ + "sha256:1f9be6558f01239d4fdf22ef8126c39cb1ad0addf76c40e760549d2c2f43ab53", + "sha256:4d3cc12d7727ba72b64d12d3cc7743124074c0a69f7b201512fc50c3e3f1569a" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.23.2" + }, "w3lib": { "hashes": [ "sha256:c4432926e739caa8e3f49f5de783f336df563d9490416aebd5d39fb896d264e7", @@ -969,6 +1011,14 @@ } }, "develop": { + "anyio": { + "hashes": [ + "sha256:44a3c9aba0f5defa43261a8b3efb97891f2bd7d804e0e1f56419befa1adfc780", + "sha256:91dee416e570e92c64041bd18b900d1d6fa78dff7048769ce5ac5ddad004fbb5" + ], + "markers": "python_version >= '3.7'", + "version": "==3.7.1" + }, "black": { "hashes": [ "sha256:031e8c69f3d3b09e1aa471a926a1eeb0b9071f80b17689a655f7885ac9325a6f", @@ -998,6 +1048,14 @@ "markers": "python_version >= '3.8'", "version": "==23.9.1" }, + "certifi": { + "hashes": [ + "sha256:539cc1d13202e33ca466e88b2807e29f4c13049d6d87031a3c110744495cb082", + "sha256:92d6037539857d8206b8f6ae472e8b77db8058fec5937a1ef3f54304089edbb9" + ], + "markers": "python_version >= '3.6'", + "version": "==2023.7.22" + }, "click": { "hashes": [ "sha256:ae74fb96c20a0277a1d615f1e4d73c8414f5a98db8b799a7931d1582f3390c28", @@ -1084,6 +1142,39 @@ "markers": "python_full_version >= '3.8.1'", "version": "==6.1.0" }, + "h11": { + "hashes": [ + "sha256:8f19fbbe99e72420ff35c00b27a34cb9937e902a8b810e2c88300c6f0a3b699d", + "sha256:e3fe4ac4b851c468cc8363d500db52c2ead036020723024a109d37346efaa761" + ], + "markers": "python_version >= '3.7'", + "version": "==0.14.0" + }, + "httpcore": { + "hashes": [ + "sha256:13b5e5cd1dca1a6636a6aaea212b19f4f85cd88c366a2b82304181b769aab3c9", + "sha256:adc5398ee0a476567bf87467063ee63584a8bce86078bf748e48754f60202ced" + ], + "markers": "python_version >= '3.8'", + "version": "==0.18.0" + }, + "httpx": { + "hashes": [ + "sha256:181ea7f8ba3a82578be86ef4171554dd45fec26a02556a744db029a0a27b7100", + "sha256:47ecda285389cb32bb2691cc6e069e3ab0205956f681c5b2ad2325719751d875" + ], + "index": "pypi", + "markers": "python_version >= '3.8'", + "version": "==0.25.0" + }, + "idna": { + "hashes": [ + "sha256:814f528e8dead7d329833b91c5faa87d60bf71824cd12a7530b5526063d02cb4", + "sha256:90b77e79eaa3eba6de819a0c442c0b4ceefc341a7a2ab77d7562bf49f425c5c2" + ], + "markers": "python_version >= '3.5'", + "version": "==3.4" + }, "iniconfig": { "hashes": [ "sha256:2d91e135bf72d31a410b17c16da610a82cb55f6b0477d1a902134b24a455b8b3", @@ -1231,6 +1322,14 @@ "index": "pypi", "version": "==0.0" }, + "sniffio": { + "hashes": [ + "sha256:e60305c5e5d314f5389259b7f22aaa33d8f7dee49763119234af3755c55b9101", + "sha256:eecefdce1e5bbfb7ad2eeaabf7c1eeb404d7757c379bd1f7e5cce9d8bf425384" + ], + "markers": "python_version >= '3.7'", + "version": "==1.3.0" + }, "tomli": { "hashes": [ "sha256:939de3e7a6161af0c887ef91b7d41a53e7c5a1ca976325f429cb46ea9bc30ecc", @@ -1244,7 +1343,7 @@ "sha256:8f92fc8806f9a6b641eaa5318da32b44d401efaac0f6678c9bc448ba3605faa0", "sha256:df8e4339e9cb77357558cbdbceca33c303714cf861d1eef15e1070055ae8b7ef" ], - "markers": "python_version >= '3.8'", + "markers": "python_version < '3.11'", "version": "==4.8.0" } } diff --git a/README.md b/README.md index ea5f72c..4b23196 100644 --- a/README.md +++ b/README.md @@ -3,6 +3,28 @@ # dotty - cdot-based position projection +## Obtaining Data + +`datasets` is the NCBI `datasets` tool. + +``` +$ mkdir -p data +$ cd data + +$ wget \ + https://github.com/SACGF/cdot/releases/download/v0.2.21/cdot-0.2.21.ensembl.grch37.json.gz \ + https://github.com/SACGF/cdot/releases/download/v0.2.21/cdot-0.2.21.ensembl.grch38.json.gz \ + https://github.com/SACGF/cdot/releases/download/v0.2.21/cdot-0.2.21.refseq.grch37.json.gz \ + https://github.com/SACGF/cdot/releases/download/v0.2.21/cdot-0.2.21.refseq.grch38.json.gz + +$ download genome accession GCF_000001405.25 --filename GRCh37.zip +$ download genome accession GCF_000001405.40 --filename GRCh38.zip +$ unzip GRCh37.zip +$ unzip GRCh38.zip +$ seqrepo --root-directory $PWD load --namespace ncbi --instance-name seqrepo ncbi_dataset/data/GCF_000001405.*/*.fna +$ rm -rf GRCh3?.zip ncbi_dataset +``` + ## Terraform Project Management ``` diff --git a/dotty/config.py b/dotty/config.py new file mode 100644 index 0000000..677afec --- /dev/null +++ b/dotty/config.py @@ -0,0 +1,29 @@ +import logging +import os +import secrets +from typing import Any + +from pydantic import AnyHttpUrl, BaseModel, EmailStr, HttpUrl, PostgresDsn, field_validator +from pydantic_core.core_schema import ValidationInfo +from pydantic_settings import BaseSettings, SettingsConfigDict + +logger = logging.getLogger(__name__) + + +class Settings(BaseSettings): + """Configuration of dotty web server.""" + + #: Enable loading variables from ``.env`` files. + model_config = SettingsConfigDict( + env_file=".env", env_file_encoding="utf-8", case_sensitive=True + ) + + #: Path to the directory with the cdot ``.json.gz`` files. + DATA_DIR: str = "/data" + + #: Whether seqrepo is available for the reference, allows normalization + #: of reference-level variants. + HAVE_SEQREPO: bool = True + + +settings = Settings(_env_file=".env", _env_file_encoding="utf-8") # type: ignore[call-arg] diff --git a/dotty/core.py b/dotty/core.py index b9136d5..bb77025 100644 --- a/dotty/core.py +++ b/dotty/core.py @@ -1,19 +1,34 @@ import enum import logging +import os import pathlib import time from datetime import timedelta +from unittest import mock +import bioutils.assemblies import hgvs.parser from cdot.hgvs.dataproviders import JSONDataProvider from hgvs.assemblymapper import AssemblyMapper -from hgvs.dataproviders.seqfetcher import SeqFetcher -from hgvs.exceptions import HGVSDataNotAvailableError +from hgvs.dataproviders.interface import Interface +from hgvs.extras import babelfish + +from dotty.config import settings #: Logger used in this module. _logger = logging.getLogger(__name__) +class Babelfish(babelfish.Babelfish): + """Custom Babelfish that also knows about GRCh37.""" + + def __init__(self, hdp: Interface, assembly_name: str): + super().__init__(hdp, assembly_name) + for assembly_name in ("GRCh37", "GRCh38"): + for sr in bioutils.assemblies.get_assembly(assembly_name)["sequences"]: + self.ac_to_chr_name_map[sr["refseq_ac"]] = sr["name"] + + class Assembly(enum.Enum): """Enumeration for supported assemblies.""" @@ -23,17 +38,6 @@ class Assembly(enum.Enum): GRCH38 = "GRCh38" -class NullSeqFetcher(SeqFetcher): - """A null sequence fetcher that always returns None.""" - - def __init__(self): - _logger.info("(Not) fetching sequences with NullSeqFetcher") - - def fetch_seq(self, ac, start_i=None, end_i=None): # pragma: no cover - _ = ac, start_i, end_i - raise HGVSDataNotAvailableError("dotty cannot fetch sequences") - - class Driver: """Provides references to the data files.""" @@ -56,6 +60,8 @@ def __init__(self, cdot_dir: str): self.data_providers: dict[Assembly, JSONDataProvider] = {} #: The assembly mapper to use for each genome. self.assembly_mappers: dict[Assembly, AssemblyMapper] = {} + #: One Babelfish for each assembly. + self.babelfishes: dict[Assembly, Babelfish] = {} #: The HGVS parser. self.parser = hgvs.parser.Parser() @@ -63,23 +69,30 @@ def load(self): """Loads the data from the files.""" _logger.info("Loading data from %s: %s ...", self.cdot_dir, self.assembly_file_names) start_time = time.time() - self.data_providers = { - assembly: JSONDataProvider( - [str(self.cdot_dir / fname) for fname in assembly_file_names], - seqfetcher=NullSeqFetcher(), - ) - for assembly, assembly_file_names in self.assembly_file_names.items() - } - self.assembly_mappers = { - assembly: AssemblyMapper( - self.data_providers[assembly], - assembly_name=assembly.value, - alt_aln_method="splign", - normalize=False, - replace_reference=False, - prevalidation_level=None, - ) - for assembly in self.assembly_file_names - } + + # We temporarily override the HGVS_SEQREPO_DIR environment variable for construction + # of hgvs / cdot objects. + with mock.patch.dict(os.environ, {"HGVS_SEQREPO_DIR": str(self.cdot_dir / "seqrepo")}): + self.data_providers = { + assembly: JSONDataProvider( + [str(self.cdot_dir / fname) for fname in assembly_file_names], + ) + for assembly, assembly_file_names in self.assembly_file_names.items() + } + self.assembly_mappers = { + assembly: AssemblyMapper( + self.data_providers[assembly], + assembly_name=assembly.value, + alt_aln_method="splign", + normalize=settings.HAVE_SEQREPO, + replace_reference=settings.HAVE_SEQREPO, + prevalidation_level=None, + ) + for assembly in Assembly + } + self.babelfishes = { + assembly: Babelfish(hdp=self.data_providers[assembly], assembly_name=assembly.value) + for assembly in Assembly + } elapsed = timedelta(seconds=time.time() - start_time) _logger.info("... loaded in %s", elapsed) diff --git a/dotty/main.py b/dotty/main.py index 48dfe51..2ad073a 100644 --- a/dotty/main.py +++ b/dotty/main.py @@ -1,22 +1,91 @@ import logging +from contextlib import asynccontextmanager +import bioutils.assemblies import pydantic -from fastapi import FastAPI +from fastapi import FastAPI, HTTPException + +from dotty.config import settings +from dotty.core import Assembly, Driver logging.basicConfig(level=logging.DEBUG) -logger = logging.getLogger(__name__) +_logger = logging.getLogger(__name__) + +#: The global Driver instance. +driver: Driver = None # type: ignore[assignment] + +#: Contig names per assembly. +contig_names: dict[Assembly, set[str]] = { + assembly: set( + sr["refseq_ac"] for sr in bioutils.assemblies.get_assembly(assembly.value)["sequences"] + ) + for assembly in Assembly +} + + +@asynccontextmanager +async def lifespan(app: FastAPI): # pragma: no cover + global driver + _ = app + driver = Driver(cdot_dir=settings.DATA_DIR) + driver.load() + _logger.info("driver loaded") + yield + app = FastAPI( title="dotty", + lifespan=lifespan, ) +class Spdi(pydantic.BaseModel): + """SPDI representation of a variant.""" + + #: Assembly name. + assembly: str + #: Reference sequence ID. + contig: str + #: 1-based position. + pos: int + #: Reference allele / deleted sequence. + reference_deleted: str + #: Alternate allele / inserted sequence. + alternate_inserted: str + + class Result(pydantic.BaseModel): - payload: str + """The result of the query.""" + + #: The actual payload / SPDI representation of the variant. + spdi: Spdi + + +@app.get("/api/v1/to-spdi", response_model=Result) +async def to_spdi(q: str, assembly: Assembly = Assembly.GRCH38) -> Result: + """Resolve the given HGVS variant to SPDI representation.""" + parsed_var = driver.parser.parse(q) + + if parsed_var.type == "c": + var_g = driver.assembly_mappers[assembly].c_to_g(parsed_var) + elif parsed_var.type == "n": + var_g = driver.assembly_mappers[assembly].n_to_g(parsed_var) + elif parsed_var.type == "g": + var_g = parsed_var + if var_g.ac in contig_names[Assembly.GRCH37]: + assembly = Assembly.GRCH37 + else: # pragma: no cover + raise HTTPException(status_code=400, detail="Invalid variant type") + contig, pos, reference, alternative, type_ = driver.babelfishes[assembly].hgvs_to_vcf(var_g) -@app.get("/", response_model=list[Result]) -async def index() -> list[Result]: - """Render the index.html page at the root URL""" - return [Result(payload="foo")] + return Result( + spdi=Spdi( + assembly=assembly.value, + contig=contig, + pos=pos, + reference_deleted=reference, + alternate_inserted=alternative, + ) + ) diff --git a/stubs/bioutils/__init__.pyi b/stubs/bioutils/__init__.pyi new file mode 100644 index 0000000..e69de29 diff --git a/stubs/bioutils/assemblies.pyi b/stubs/bioutils/assemblies.pyi new file mode 100644 index 0000000..0f21e13 --- /dev/null +++ b/stubs/bioutils/assemblies.pyi @@ -0,0 +1,3 @@ +import typing + +def get_assembly(name: str) -> typing.Any: ... diff --git a/stubs/cdot/hgvs/dataproviders/__init__.pyi b/stubs/cdot/hgvs/dataproviders/__init__.pyi new file mode 100644 index 0000000..c2c1bb6 --- /dev/null +++ b/stubs/cdot/hgvs/dataproviders/__init__.pyi @@ -0,0 +1,3 @@ +from cdot.hgvs.dataproviders.json_data_provider import JSONDataProvider + +__all__ = ("JSONDataProvider",) diff --git a/stubs/cdot/hgvs/dataproviders.pyi b/stubs/cdot/hgvs/dataproviders/json_data_provider.pyi similarity index 100% rename from stubs/cdot/hgvs/dataproviders.pyi rename to stubs/cdot/hgvs/dataproviders/json_data_provider.pyi diff --git a/stubs/hgvs/extras/__init__.pyi b/stubs/hgvs/extras/__init__.pyi new file mode 100644 index 0000000..e69de29 diff --git a/stubs/hgvs/extras/babelfish.pyi b/stubs/hgvs/extras/babelfish.pyi new file mode 100644 index 0000000..0bfaf38 --- /dev/null +++ b/stubs/hgvs/extras/babelfish.pyi @@ -0,0 +1,10 @@ +from hgvs.dataproviders.interface import Interface +from hgvs.posedit import PosEdit +from hgvs.sequencevariant import SequenceVariant + +def _as_interbase(posedit: PosEdit) -> tuple[int, int]: ... + +class Babelfish: + ac_to_chr_name_map: dict[str, str] + def __init__(self, hdp: Interface, assembly_name: str): ... + def hgvs_to_vcf(self, var_g: SequenceVariant) -> tuple[str, int, str, str, str]: ... diff --git a/stubs/hgvs/normalizer.pyi b/stubs/hgvs/normalizer.pyi new file mode 100644 index 0000000..953e6a2 --- /dev/null +++ b/stubs/hgvs/normalizer.pyi @@ -0,0 +1,2 @@ +class Normalizer: + pass diff --git a/stubs/hgvs/posedit.pyi b/stubs/hgvs/posedit.pyi new file mode 100644 index 0000000..5f9dc16 --- /dev/null +++ b/stubs/hgvs/posedit.pyi @@ -0,0 +1,2 @@ +class PosEdit: + pass diff --git a/stubs/hgvs/sequencevariant.pyi b/stubs/hgvs/sequencevariant.pyi index a1df7ff..e4dbdc8 100644 --- a/stubs/hgvs/sequencevariant.pyi +++ b/stubs/hgvs/sequencevariant.pyi @@ -1,2 +1,3 @@ class SequenceVariant: - pass + type: str + ac: str diff --git a/tests/conftest.py b/tests/conftest.py index 5ef2e23..5bf7123 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1,6 +1,17 @@ import pytest +from _pytest.monkeypatch import MonkeyPatch +from cdot.hgvs.dataproviders.json_data_provider import AbstractJSONDataProvider +from fastapi.testclient import TestClient +from hgvs.dataproviders.seqfetcher import SeqFetcher +from dotty.config import settings from dotty.core import Driver +from dotty.main import app + + +@pytest.fixture +def test_client() -> TestClient: + return TestClient(app) @pytest.fixture(scope="session") @@ -8,3 +19,15 @@ def dotty_driver(): driver = Driver("tests/data") driver.load() yield driver + + +@pytest.fixture +def settings_no_seqrepo(monkeypatch: MonkeyPatch) -> None: + monkeypatch.setattr(settings, "HAVE_SEQREPO", False) + + +@pytest.fixture +def mock_seqrepo_fetching(monkeypatch: MonkeyPatch) -> None: + """Mock out the sequence fetching as we do not want to have any data in tests/data/seqrepo.""" + monkeypatch.setattr(AbstractJSONDataProvider, "get_seq", lambda *args: "NN") + monkeypatch.setattr(SeqFetcher, "fetch_seq", lambda *args: "NN") diff --git a/tests/data/seqrepo/aliases.sqlite3 b/tests/data/seqrepo/aliases.sqlite3 new file mode 100644 index 0000000..784f05f --- /dev/null +++ b/tests/data/seqrepo/aliases.sqlite3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5b23a47769ee7a993518f54bb2efc8d46ce35afc4d0bb791a8de8d68ff05f4d5 +size 69632 diff --git a/tests/data/seqrepo/sequences/db.sqlite3 b/tests/data/seqrepo/sequences/db.sqlite3 new file mode 100644 index 0000000..7f8131c --- /dev/null +++ b/tests/data/seqrepo/sequences/db.sqlite3 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d11053a482fd4f5a8d6ffe9c8ddc65ed9fc7833da8255132d07c03372ad68b3f +size 61440 diff --git a/tests/test_core.py b/tests/test_core.py index 9031336..cf3d0cc 100644 --- a/tests/test_core.py +++ b/tests/test_core.py @@ -4,7 +4,9 @@ from dotty.core import Assembly, Driver -def test_driver_c_to_g(dotty_driver: Driver): +def test_driver_c_to_g( + settings_no_seqrepo: None, mock_seqrepo_fetching: None, dotty_driver: Driver +): var_c = dotty_driver.parser.parse("NM_007294.3:c.5588A>G") result = { assembly.value: str(dotty_driver.assembly_mappers[assembly].c_to_g(var_c)) @@ -17,7 +19,9 @@ def test_driver_c_to_g(dotty_driver: Driver): assert result == expected -def test_driver_n_to_g(dotty_driver: Driver): +def test_driver_n_to_g( + settings_no_seqrepo: None, mock_seqrepo_fetching: None, dotty_driver: Driver +): var_n = dotty_driver.parser.parse("NR_027676.2:n.5765A>G") result = { assembly.value: str(dotty_driver.assembly_mappers[assembly].n_to_g(var_n)) @@ -30,7 +34,9 @@ def test_driver_n_to_g(dotty_driver: Driver): assert result == expected -def test_driver_t_to_g_with_var_c(dotty_driver: Driver): +def test_driver_t_to_g_with_var_c( + settings_no_seqrepo: None, mock_seqrepo_fetching: None, dotty_driver: Driver +): var_c = dotty_driver.parser.parse("NM_007294.3:c.5588A>G") result = { assembly.value: str(dotty_driver.assembly_mappers[assembly].c_to_g(var_c)) @@ -43,7 +49,9 @@ def test_driver_t_to_g_with_var_c(dotty_driver: Driver): assert result == expected -def test_driver_t_to_g_with_var_n(dotty_driver: Driver): +def test_driver_t_to_g_with_var_n( + settings_no_seqrepo: None, mock_seqrepo_fetching: None, dotty_driver: Driver +): var_n = dotty_driver.parser.parse("NR_027676.2:n.5765A>G") result = { assembly.value: str(dotty_driver.assembly_mappers[assembly].n_to_g(var_n)) diff --git a/tests/test_main.py b/tests/test_main.py new file mode 100644 index 0000000..064145e --- /dev/null +++ b/tests/test_main.py @@ -0,0 +1,105 @@ +from unittest.mock import Mock + +from _pytest.monkeypatch import MonkeyPatch +from fastapi.testclient import TestClient + +from dotty import main as dotty_main +from dotty.core import Assembly + + +def _setup_mock_driver(var_type: str, parsed_var_ac: str) -> Mock: + parsed_var = Mock() + parsed_var.type = var_type + parsed_var.ac = parsed_var_ac + + g_var = Mock() + + grch38_am = Mock() + grch38_am.c_to_g = Mock() + grch38_am.c_to_g.return_value = g_var + + grch37_bf = Mock() + grch37_bf.hgvs_to_vcf = Mock() + grch37_bf.hgvs_to_vcf.return_value = ("chr1", 100, "A", "C", "g") + + grch38_bf = Mock() + grch38_bf.hgvs_to_vcf = Mock() + grch38_bf.hgvs_to_vcf.return_value = ("chr1", 100, "A", "C", "g") + + mock_driver = Mock() + mock_driver.assembly_mappers = { + Assembly.GRCH38: grch38_am, + } + mock_driver.babelfishes = { + Assembly.GRCH37: grch37_bf, + Assembly.GRCH38: grch38_bf, + } + mock_driver.parser = Mock() + mock_driver.parser.parse = Mock() + mock_driver.parser.parse.return_value = parsed_var + + return mock_driver + + +def test_to_spdi_c(test_client: TestClient, monkeypatch: MonkeyPatch): + monkeypatch.setattr(dotty_main, "driver", _setup_mock_driver("c", "NC_000017.10")) + response = test_client.get("/api/v1/to-spdi?q=NM_000059.3:c.274G>A") + assert response.status_code == 200 + expected = { + "spdi": { + "alternate_inserted": "C", + "contig": "chr1", + "pos": 100, + "reference_deleted": "A", + "assembly": "GRCh38", + } + } + assert response.json() == expected + + +def test_to_spdi_n(test_client: TestClient, monkeypatch: MonkeyPatch): + monkeypatch.setattr(dotty_main, "driver", _setup_mock_driver("n", "NC_000017.10")) + response = test_client.get("/api/v1/to-spdi?q=NM_000059.3:n.274G>A") + assert response.status_code == 200 + expected = { + "spdi": { + "alternate_inserted": "C", + "contig": "chr1", + "pos": 100, + "reference_deleted": "A", + "assembly": "GRCh38", + } + } + assert response.json() == expected + + +def test_to_spdi_g_37(test_client: TestClient, monkeypatch: MonkeyPatch): + monkeypatch.setattr(dotty_main, "driver", _setup_mock_driver("g", "NC_000017.10")) + response = test_client.get("/api/v1/to-spdi?q=NC_000017.10:g.41197699T>C") + assert response.status_code == 200 + expected = { + "spdi": { + "alternate_inserted": "C", + "contig": "chr1", + "pos": 100, + "reference_deleted": "A", + "assembly": "GRCh37", + } + } + assert response.json() == expected + + +def test_to_spdi_g_38(test_client: TestClient, monkeypatch: MonkeyPatch): + monkeypatch.setattr(dotty_main, "driver", _setup_mock_driver("g", "NC_000017.11")) + response = test_client.get("/api/v1/to-spdi?q=NC_000017.11:g.43045682T>C") + assert response.status_code == 200 + expected = { + "spdi": { + "alternate_inserted": "C", + "contig": "chr1", + "pos": 100, + "reference_deleted": "A", + "assembly": "GRCh38", + } + } + assert response.json() == expected