Skip to content

Commit

Permalink
Merge pull request #31 from multimeric/ci-fixes
Browse files Browse the repository at this point in the history
CI fixes
  • Loading branch information
multimeric authored May 18, 2024
2 parents fed6ef3 + ad330f4 commit 7a97b8a
Show file tree
Hide file tree
Showing 8 changed files with 2,036 additions and 1,456 deletions.
3,334 changes: 1,907 additions & 1,427 deletions poetry.lock

Large diffs are not rendered by default.

9 changes: 7 additions & 2 deletions test/test_dataset/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,12 @@ def test_main_example():
fields=["length", "gene_names"],
).each_record():
assert isinstance(record, dict)
assert set(record.keys()) == {"primaryAccession", "genes", "sequence"}
assert set(record.keys()) == {
"primaryAccession",
"genes",
"sequence",
"entryType",
}
assert record["sequence"]["length"] > 5000


Expand All @@ -104,7 +109,7 @@ def test_date_field():
fields=["date_created", "protein_name"],
).each_record()
)
assert len(records) == 544
assert len(records) == 542


def test_uniref():
Expand Down
2 changes: 1 addition & 1 deletion test/test_dataset/test_validate_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,4 +48,4 @@ def test_valid_return_fields(client: Type[DatasetClient]):
).each_response()
)
)
response.raise_for_status()
assert response.status_code == 200, response.json()
2 changes: 1 addition & 1 deletion unipressed/dataset/generated_types/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ class DatabaseQueryDict(TypedDict):
"pubmed_id",
"doi_id",
"link_type",
"server",
"servers",
"dbUrl",
"category",
"statistics",
Expand Down
15 changes: 14 additions & 1 deletion unipressed/dataset/generated_types/keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,19 @@

from typing_extensions import Literal, NotRequired, TypeAlias, TypedDict

Category: TypeAlias = Literal[
"technical_term",
"ptm",
"molecular_function",
"ligand",
"domain",
"disease",
"developmental_stage",
"coding_sequence_diversity",
"cellular_component",
"biological_process",
]


class KeywordsQueryDict(TypedDict):
and_: NotRequired[Iterable["KeywordsQuery"]]
Expand All @@ -17,7 +30,7 @@ class KeywordsQueryDict(TypedDict):
"Name\ne.g. 2Fe-2S"
keyword_id: NotRequired[str]
"Keyword [AC]\ne.g. KW-0001"
category: NotRequired[str]
category: NotRequired[Category]
"Category\ne.g. Domain\n* technical_term: Technical term [KW-9990]\n* ptm: PTM [KW-9991]\n* molecular_function: Molecular function [KW-9992]\n* ligand: Ligand [KW-9993]\n* domain: Domain [KW-9994]\n* disease: Disease [KW-9995]\n* developmental_stage: Developmental stage [KW-9996]\n* coding_sequence_diversity: Coding sequence diversity [KW-9997]\n* cellular_component: Cellular component [KW-9998]\n* biological_process: Biological process [KW-9999]"


Expand Down
2 changes: 1 addition & 1 deletion unipressed/dataset/generated_types/taxonomy.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ class TaxonomyQueryDict(TypedDict):
host: NotRequired[int]
"Virus host\ne.g. 85621"
linked: NotRequired[bool]
"With external info\ne.g. true\n* true: Yes\n* false: No"
"With external info\ne.g. true"
parent: NotRequired[str]
"Parent\ne.g. 9606"
ancestor: NotRequired[str]
Expand Down
92 changes: 83 additions & 9 deletions unipressed/dataset/generated_types/uniparc.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,80 @@

from typing_extensions import Literal, NotRequired, TypeAlias, TypedDict

Database: TypeAlias = Literal[
"EnsemblBacteria",
"EnsemblFungi",
"EnsemblMetazoa",
"EnsemblPlants",
"EnsemblProtists",
"embl-cds",
"EMBL_CON",
"EMBL_TPA",
"EMBL_TSA",
"EMBLWGS",
"Ensembl",
"EnsemblRapid",
"EPO",
"FlyBase",
"FusionGDB",
"H-InvDB",
"IPI",
"JPO",
"KIPO",
"PATRIC",
"PDB",
"PIR",
"PIRARC",
"PRF",
"RefSeq",
"REMTREMBL",
"SEED",
"SGD",
"UniProt",
"isoforms",
"TAIR",
"TREMBLNEW",
"TREMBL_VARSPLIC",
"TROME",
"UNIMES",
"USPTO",
"VectorBase",
"VEGA",
"WBParaSite",
"WormBase",
]
Active: TypeAlias = Literal[
"EnsemblBacteria",
"EnsemblFungi",
"EnsemblMetazoa",
"EnsemblPlants",
"EnsemblProtists",
"embl-cds",
"EMBL_CON",
"EMBL_TSA",
"EMBLWGS",
"Ensembl",
"EnsemblRapid",
"EPO",
"FlyBase",
"FusionGDB",
"JPO",
"KIPO",
"PATRIC",
"PDB",
"RefSeq",
"SEED",
"SGD",
"UniProt",
"isoforms",
"TAIR",
"TROME",
"USPTO",
"VEGA",
"WBParaSite",
"WormBase",
]


class UniparcQueryDict(TypedDict):
and_: NotRequired[Iterable["UniparcQuery"]]
Expand All @@ -22,19 +96,19 @@ class UniparcQueryDict(TypedDict):
upid: NotRequired[str]
"Proteome ID\ne.g. UP123456789"
taxonomy_name: NotRequired[str]
"Taxonomy [OC]\ne.g. sample name"
"Taxonomy [OC]\ne.g. Human"
taxonomy_id: NotRequired[str]
"Taxonomy id"
gene: NotRequired[str]
"Gene name [GN]\ne.g. sample gene"
"Gene name [GN]\ne.g. PROZ"
protein: NotRequired[str]
"Protein name\ne.g. sample protein"
database: NotRequired[str]
"Database\ne.g. sample database\n* EnsemblBacteria: EnsemblBacteria\n* EnsemblFungi: EnsemblFungi\n* EnsemblMetazoa: EnsemblMetazoa\n* EnsemblPlants: EnsemblPlants\n* EnsemblProtists: EnsemblProtists\n* embl-cds: EMBL CDS\n* EMBL_CON: EMBL_CON\n* EMBL_TPA: EMBL_TPA\n* EMBL_TSA: EMBL_TSA\n* EMBLWGS: EMBLWGS\n* Ensembl: Ensembl\n* EnsemblRapid: EnsemblRapid\n* EPO: EPO\n* FlyBase: FlyBase\n* FusionGDB: FusionGDB\n* H-InvDB: H-InvDB\n* IPI: IPI\n* JPO: JPO\n* KIPO: KIPO\n* PATRIC: PATRIC\n* PDB: PDB\n* PIR: PIR\n* PIRARC: PIRARC\n* PRF: PRF\n* RefSeq: RefSeq\n* REMTREMBL: REMTREMBL\n* SEED: SEED\n* SGD: SGD\n* UniProt: UniProtKB\n* isoforms: UniProtKB/Swiss-Prot isoforms\n* TAIR: TAIR\n* TREMBLNEW: TREMBLNEW\n* TREMBL_VARSPLIC: TREMBL_VARSPLIC\n* TROME: TROME\n* UNIMES: UNIMES\n* USPTO: USPTO\n* VectorBase: VectorBase\n* VEGA: VEGA\n* WBParaSite: WBParaSite\n* WormBase: WormBase"
active: NotRequired[str]
"Active\ne.g. sample active\n* EnsemblBacteria: EnsemblBacteria\n* EnsemblFungi: EnsemblFungi\n* EnsemblMetazoa: EnsemblMetazoa\n* EnsemblPlants: EnsemblPlants\n* EnsemblProtists: EnsemblProtists\n* embl-cds: EMBL CDS\n* EMBL_CON: EMBL_CON\n* EMBL_TSA: EMBL_TSA\n* EMBLWGS: EMBLWGS\n* Ensembl: Ensembl\n* EnsemblRapid: EnsemblRapid\n* EPO: EPO\n* FlyBase: FlyBase\n* FusionGDB: FusionGDB\n* JPO: JPO\n* KIPO: KIPO\n* PATRIC: PATRIC\n* PDB: PDB\n* RefSeq: RefSeq\n* SEED: SEED\n* SGD: SGD\n* UniProt: UniProtKB\n* isoforms: UniProtKB/Swiss-Prot isoforms\n* TAIR: TAIR\n* TROME: TROME\n* USPTO: USPTO\n* VEGA: VEGA\n* WBParaSite: WBParaSite\n* WormBase: WormBase"
"Protein name\ne.g. Protein Z"
database: NotRequired[Database]
"Database\ne.g. Gene3D\n* EnsemblBacteria: EnsemblBacteria\n* EnsemblFungi: EnsemblFungi\n* EnsemblMetazoa: EnsemblMetazoa\n* EnsemblPlants: EnsemblPlants\n* EnsemblProtists: EnsemblProtists\n* embl-cds: EMBL CDS\n* EMBL_CON: EMBL_CON\n* EMBL_TPA: EMBL_TPA\n* EMBL_TSA: EMBL_TSA\n* EMBLWGS: EMBLWGS\n* Ensembl: Ensembl\n* EnsemblRapid: EnsemblRapid\n* EPO: EPO\n* FlyBase: FlyBase\n* FusionGDB: FusionGDB\n* H-InvDB: H-InvDB\n* IPI: IPI\n* JPO: JPO\n* KIPO: KIPO\n* PATRIC: PATRIC\n* PDB: PDB\n* PIR: PIR\n* PIRARC: PIRARC\n* PRF: PRF\n* RefSeq: RefSeq\n* REMTREMBL: REMTREMBL\n* SEED: SEED\n* SGD: SGD\n* UniProt: UniProtKB\n* isoforms: UniProtKB/Swiss-Prot isoforms\n* TAIR: TAIR\n* TREMBLNEW: TREMBLNEW\n* TREMBL_VARSPLIC: TREMBL_VARSPLIC\n* TROME: TROME\n* UNIMES: UNIMES\n* USPTO: USPTO\n* VectorBase: VectorBase\n* VEGA: VEGA\n* WBParaSite: WBParaSite\n* WormBase: WormBase"
active: NotRequired[Active]
"Active\ne.g. Gene3D\n* EnsemblBacteria: EnsemblBacteria\n* EnsemblFungi: EnsemblFungi\n* EnsemblMetazoa: EnsemblMetazoa\n* EnsemblPlants: EnsemblPlants\n* EnsemblProtists: EnsemblProtists\n* embl-cds: EMBL CDS\n* EMBL_CON: EMBL_CON\n* EMBL_TSA: EMBL_TSA\n* EMBLWGS: EMBLWGS\n* Ensembl: Ensembl\n* EnsemblRapid: EnsemblRapid\n* EPO: EPO\n* FlyBase: FlyBase\n* FusionGDB: FusionGDB\n* JPO: JPO\n* KIPO: KIPO\n* PATRIC: PATRIC\n* PDB: PDB\n* RefSeq: RefSeq\n* SEED: SEED\n* SGD: SGD\n* UniProt: UniProtKB\n* isoforms: UniProtKB/Swiss-Prot isoforms\n* TAIR: TAIR\n* TROME: TROME\n* USPTO: USPTO\n* VEGA: VEGA\n* WBParaSite: WBParaSite\n* WormBase: WormBase"
checksum: NotRequired[str]
"Checksum (CRC64/MD5)\ne.g. sample checksum"
"Checksum (CRC64/MD5)\ne.g. B8824CE1ECAEEEAE"
length: NotRequired[
tuple[
Union[
Expand Down Expand Up @@ -75,7 +149,7 @@ class UniparcQueryDict(TypedDict):
"SFLD",
"SMART",
"SUPFAM",
"TIGRFAMs",
"NCBIfam",
]
UniparcFields: TypeAlias = Literal[
UniparcNamesTaxonomy,
Expand Down
36 changes: 22 additions & 14 deletions unipressed/dataset/generated_types/uniprotkb.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
Existence: TypeAlias = Literal["1", "2", "3", "4", "5"]
Organelle: TypeAlias = Literal[
"mitochondrion",
"plasmid",
"plastid",
"chloroplast",
"cyanelle",
Expand Down Expand Up @@ -245,7 +246,7 @@ class UniprotkbQueryDict(TypedDict):
cc_subunit_exp: NotRequired[str]
"Cc subunit exp\ne.g. homodimer"
structure_3d: NotRequired[bool]
"3D Structure\ne.g. true\n* true: Yes\n* false: No"
"3D Structure\ne.g. true"
ft_secstruct: NotRequired[str]
"Ft secstruct\ne.g. *"
ft_secstruct_exp: NotRequired[str]
Expand Down Expand Up @@ -375,11 +376,11 @@ class UniprotkbQueryDict(TypedDict):
ft_positional_exp: NotRequired[str]
"Ft positional exp\ne.g. colorectal"
fragment: NotRequired[bool]
"Fragment\ne.g. true\n* true: Yes\n* false: No"
"Fragment\ne.g. true"
organelle: NotRequired[Organelle]
"Encoded in\ne.g. mitochondrion\n* mitochondrion: Mitochondrion\n* plastid: Plastid\n* chloroplast: Chloroplast\n* cyanelle: Cyanelle\n* apicoplast: Apicoplast\n* organellar chromatophore: Organellar chromatophore\n* non-photosynthetic plastid: Non-photosynthetic plastid\n* nucleomorph: Nucleomorph\n* hydrogenosome: Hydrogenosome"
"Encoded in\ne.g. mitochondrion\n* mitochondrion: Mitochondrion\n* plasmid: Plasmid\n* plastid: Plastid\n* chloroplast: Chloroplast\n* cyanelle: Cyanelle\n* apicoplast: Apicoplast\n* organellar chromatophore: Organellar chromatophore\n* non-photosynthetic plastid: Non-photosynthetic plastid\n* nucleomorph: Nucleomorph\n* hydrogenosome: Hydrogenosome"
precursor: NotRequired[bool]
"Precursor\ne.g. true\n* true: Yes\n* false: No"
"Precursor\ne.g. true"
tissue: NotRequired[str]
"Tissue\ne.g. head"
strain: NotRequired[str]
Expand All @@ -403,9 +404,9 @@ class UniprotkbQueryDict(TypedDict):
ft_coiled_exp: NotRequired[str]
"Ft coiled exp\ne.g. *"
ft_compbias: NotRequired[str]
"Ft compbias\ne.g. glu-rich"
"Ft compbias\ne.g. basic residues"
ft_compbias_exp: NotRequired[str]
"Ft compbias exp\ne.g. glu-rich"
"Ft compbias exp\ne.g. basic residues"
ft_motif: NotRequired[str]
"Ft motif\ne.g. motif"
ft_motif_exp: NotRequired[str]
Expand Down Expand Up @@ -561,9 +562,9 @@ class UniprotkbQueryDict(TypedDict):
scope: NotRequired[str]
"Cited for\ne.g. microtubule"
reviewed: NotRequired[bool]
"Reviewed\ne.g. true\n* true: Yes\n* false: No"
"Reviewed\ne.g. true"
active: NotRequired[bool]
"Active\ne.g. true\n* true: Yes\n* false: No"
"Active\ne.g. true"
uniref_cluster_50: NotRequired[str]
"UniRef50\ne.g. UniRef50_P05067"
uniref_cluster_90: NotRequired[str]
Expand Down Expand Up @@ -677,7 +678,7 @@ class UniprotkbQueryDict(TypedDict):
UniprotkbStructure: TypeAlias = Literal[
"structure_3d", "ft_strand", "ft_helix", "ft_turn"
]
UniprotkbPublications: TypeAlias = Literal["lit_pubmed_id",]
UniprotkbPublications: TypeAlias = Literal["lit_pubmed_id", "lit_doi_id"]
UniprotkbDateOf: TypeAlias = Literal[
"date_created", "date_modified", "date_sequence_modified", "version"
]
Expand All @@ -690,14 +691,16 @@ class UniprotkbQueryDict(TypedDict):
"protein_families",
"ft_region",
"ft_repeat",
"cc_similarity",
"ft_zn_fing",
]
UniprotkbSequence: TypeAlias = Literal[
"xref_ccds", "xref_embl", "xref_pir", "xref_refseq"
"xref_ccds", "xref_embl", "xref_generif", "xref_pir", "xref_refseq"
]
UniprotkbThreedStructure: TypeAlias = Literal[
"xref_alphafolddb",
"xref_bmrb",
"xref_emdb",
"xref_pcddb",
"xref_pdb",
"xref_pdbsum",
Expand Down Expand Up @@ -749,7 +752,7 @@ class UniprotkbQueryDict(TypedDict):
"xref_iptmnet",
]
UniprotkbPolymorphismAndMutation: TypeAlias = Literal[
"xref_biomuta", "xref_dmdm", "xref_dbsnp"
"xref_alzforum", "xref_biomuta", "xref_dmdm", "xref_dbsnp"
]
UniprotkbTwodGel: TypeAlias = Literal[
"xref_compluyeast-2dpage",
Expand All @@ -770,6 +773,7 @@ class UniprotkbQueryDict(TypedDict):
"xref_peptideatlas",
"xref_promex",
"xref_proteomicsdb",
"xref_pumba",
"xref_topdownproteomics",
"xref_jpost",
]
Expand All @@ -791,7 +795,6 @@ class UniprotkbQueryDict(TypedDict):
"xref_ucsc",
"xref_vectorbase",
"xref_wbparasite",
"xref_wbparasitetranscriptprotein",
]
UniprotkbOrganismSpecific: TypeAlias = Literal[
"xref_agr",
Expand All @@ -807,6 +810,8 @@ class UniprotkbQueryDict(TypedDict):
"xref_genereviews",
"xref_hgnc",
"xref_hpa",
"xref_ic4r",
"xref_japonicusdb",
"xref_legiolist",
"xref_leproma",
"xref_mgi",
Expand Down Expand Up @@ -860,10 +865,14 @@ class UniprotkbQueryDict(TypedDict):
"xref_evolutionarytrace",
"xref_genewiki",
"xref_genomernai",
"xref_orcid",
"xref_pgenn",
"xref_phi-base",
"xref_pro",
"xref_pharos",
"xref_pubtator",
"xref_rnact",
"xref_emind",
]
UniprotkbGeneExpression: TypeAlias = Literal[
"xref_bgee",
Expand All @@ -879,16 +888,15 @@ class UniprotkbQueryDict(TypedDict):
"xref_hamap",
"xref_ideal",
"xref_interpro",
"xref_ncbifam",
"xref_panther",
"xref_pirsf",
"xref_prints",
"xref_prosite",
"xref_pfam",
"xref_prodom",
"xref_sfld",
"xref_smart",
"xref_supfam",
"xref_tigrfams",
]
UniprotkbFields: TypeAlias = Literal[
UniprotkbNamesTaxonomy,
Expand Down

0 comments on commit 7a97b8a

Please sign in to comment.