From a0cfde13b6c5a3e6cebae5c20f18b89e07f92e85 Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Wed, 14 Aug 2024 23:10:22 +1000 Subject: [PATCH 1/4] Regenerate type definitions --- unipressed/dataset/generated_types/uniparc.py | 2 ++ .../dataset/generated_types/uniprotkb.py | 18 ++---------------- unipressed/dataset/generated_types/uniref.py | 6 +++--- 3 files changed, 7 insertions(+), 19 deletions(-) diff --git a/unipressed/dataset/generated_types/uniparc.py b/unipressed/dataset/generated_types/uniparc.py index 9dcf220..b6f440c 100644 --- a/unipressed/dataset/generated_types/uniparc.py +++ b/unipressed/dataset/generated_types/uniparc.py @@ -128,6 +128,8 @@ class UniparcQueryDict(TypedDict): "Feature ID\ne.g. IPR004251" proteomecomponent: NotRequired[str] "Proteome Component\ne.g. chromosome" + organism_id: NotRequired[int] + "Organism ID\ne.g. 10254" UniparcQuery: TypeAlias = Union[UniparcQueryDict, str] diff --git a/unipressed/dataset/generated_types/uniprotkb.py b/unipressed/dataset/generated_types/uniprotkb.py index faa30d2..26ca202 100644 --- a/unipressed/dataset/generated_types/uniprotkb.py +++ b/unipressed/dataset/generated_types/uniprotkb.py @@ -728,7 +728,6 @@ class UniprotkbQueryDict(TypedDict): UniprotkbProteinFamilygroup: TypeAlias = Literal[ "xref_allergome", "xref_cazy", - "xref_clae", "xref_esther", "xref_imgt_gene-db", "xref_merops", @@ -755,19 +754,11 @@ class UniprotkbQueryDict(TypedDict): "xref_alzforum", "xref_biomuta", "xref_dmdm", "xref_dbsnp" ] UniprotkbTwodGel: TypeAlias = Literal[ - "xref_compluyeast-2dpage", - "xref_dosac-cobs-2dpage", - "xref_ogp", - "xref_reproduction-2dpage", - "xref_swiss-2dpage", - "xref_ucd-2dpage", - "xref_world-2dpage", + "xref_compluyeast-2dpage", "xref_ogp", "xref_reproduction-2dpage" ] UniprotkbProteomic: TypeAlias = Literal[ "xref_cptac", - "xref_epd", "xref_massive", - "xref_maxqb", "xref_pride", "xref_paxdb", "xref_peptideatlas", @@ -841,7 +832,6 @@ class UniprotkbQueryDict(TypedDict): "xref_genetree", "xref_hogenom", "xref_inparanoid", - "xref_ko", "xref_oma", "xref_orthodb", "xref_phylomedb", @@ -875,11 +865,7 @@ class UniprotkbQueryDict(TypedDict): "xref_emind", ] UniprotkbGeneExpression: TypeAlias = Literal[ - "xref_bgee", - "xref_cleanex", - "xref_collectf", - "xref_expressionatlas", - "xref_genevisible", + "xref_bgee", "xref_cleanex", "xref_collectf", "xref_expressionatlas" ] UniprotkbFamilyAndDomain: TypeAlias = Literal[ "xref_cdd", diff --git a/unipressed/dataset/generated_types/uniref.py b/unipressed/dataset/generated_types/uniref.py index 3430d8d..832ee52 100644 --- a/unipressed/dataset/generated_types/uniref.py +++ b/unipressed/dataset/generated_types/uniref.py @@ -47,7 +47,7 @@ class UnirefQueryDict(TypedDict): ] ] "Sequence length\ne.g. [100 TO 300]" - created: NotRequired[ + date_modified: NotRequired[ tuple[ Union[ date, @@ -59,7 +59,7 @@ class UnirefQueryDict(TypedDict): ], ] ] - "Date published\ne.g. [2011-10-10 TO 2019-10-10]" + "Date of last modification\ne.g. [2011-10-10 TO 2019-10-10]" uniprot_id: NotRequired[str] "UniProtKB ID/AC\ne.g. sample uniprot id" upi: NotRequired[str] @@ -78,7 +78,7 @@ class UnirefQueryDict(TypedDict): ] UnirefSequences: TypeAlias = Literal["identity", "length", "sequence"] UnirefMiscellaneous: TypeAlias = Literal["types", "members", "count"] -UnirefDateOf: TypeAlias = Literal["created",] +UnirefDateOf: TypeAlias = Literal["date_modified",] UnirefFields: TypeAlias = Literal[ UnirefNamesTaxonomy, UnirefSequences, UnirefMiscellaneous, UnirefDateOf ] From d6e846962e5f665ccfecf2f8406b8f651d99313e Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Wed, 14 Aug 2024 23:15:04 +1000 Subject: [PATCH 2/4] Update changelog --- CHANGELOG.md | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a599d45..c4db802 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,12 @@ ## Changelog +### 1.4.0 + +#### Changed + +* Auto-generated type definitions for the datasets have been regenerated [[#37](https://github.com/multimeric/Unipressed/pull/37)]. This pulls upstream changes from Uniprot. For a full list of changes [view this commit diff](https://github.com/multimeric/Unipressed/pull/31/commits/7e620c46175b6ec03e073fc78444a43e96821c31). + ### 1.3.0 #### Changed From b7f1c310442a934a88e2df7e4c291e162c77a69c Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Wed, 14 Aug 2024 23:21:43 +1000 Subject: [PATCH 3/4] Sleep longer, allow extraAttributes --- test/test_dataset/test_request.py | 1 + test/test_id_mapping/test_id_mapping.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_dataset/test_request.py b/test/test_dataset/test_request.py index 1ee17ae..c2a0886 100644 --- a/test/test_dataset/test_request.py +++ b/test/test_dataset/test_request.py @@ -88,6 +88,7 @@ def test_main_example(): ).each_record(): assert isinstance(record, dict) assert set(record.keys()) == { + "extraAttributes", "primaryAccession", "genes", "sequence", diff --git a/test/test_id_mapping/test_id_mapping.py b/test/test_id_mapping/test_id_mapping.py index 6b21ef9..c683ef0 100644 --- a/test/test_id_mapping/test_id_mapping.py +++ b/test/test_id_mapping/test_id_mapping.py @@ -126,7 +126,7 @@ def test_gene_names(): ) # At this point it might be already finished assert request.get_status() in {"RUNNING", "FINISHED"} - sleep(1) + sleep(5) # At this point it should definitely be finished assert request.get_status() == "FINISHED" for result in request.each_result(): From 5e6d806355e46c24c7c55eb825a27e61bfde97ff Mon Sep 17 00:00:00 2001 From: Michael Milton Date: Wed, 14 Aug 2024 23:26:32 +1000 Subject: [PATCH 4/4] Expand on the upstream type changes --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index c4db802..c70cd5b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ #### Changed * Auto-generated type definitions for the datasets have been regenerated [[#37](https://github.com/multimeric/Unipressed/pull/37)]. This pulls upstream changes from Uniprot. For a full list of changes [view this commit diff](https://github.com/multimeric/Unipressed/pull/31/commits/7e620c46175b6ec03e073fc78444a43e96821c31). + * Uniref's `created` `query` parameter has been replaced by `last_modified` + * UniprotKB has lost various `fields`: `xref_genevisible`, `xref_dosac-cobs-2dpage"`, `xref_swiss-2dpage`, `xref_ucd-2dpage`, `xref_world-2dpage`, `xref_epd`, `xref_maxqb`, `xref_ko` and `xref_genevisible` ### 1.3.0