From 232b39b2735585c2565474c091a85d4a811b6c37 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Tue, 9 Apr 2024 18:18:13 +0200 Subject: [PATCH 1/2] Track obsoletions in EC Closes #178 --- src/pyobo/sources/expasy.py | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/src/pyobo/sources/expasy.py b/src/pyobo/sources/expasy.py index 658445c7..a116fbee 100644 --- a/src/pyobo/sources/expasy.py +++ b/src/pyobo/sources/expasy.py @@ -4,11 +4,11 @@ import logging from collections import defaultdict -from typing import Dict, Iterable, Mapping, Optional, Set, Tuple +from typing import Any, Dict, Iterable, Mapping, Optional, Set, Tuple from .utils import get_go_mapping from ..struct import Obo, Reference, Synonym, Term -from ..struct.typedef import enables, has_member +from ..struct.typedef import enables, has_member, term_replaced_by from ..utils.path import ensure_path __all__ = [ @@ -98,7 +98,24 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]: ec2go = get_ec2go(version=version) ec_code_to_alt_ids = {} - for ec_code, data in _data.items(): + for ec_code, data in id_to_data.items(): + if data.get("deleted"): + terms[ec_code] = Term( + reference=Reference(prefix=PREFIX, identifier=ec_code), is_obsolete=True + ) + continue + + transfer_ids = data.get("transfer_id") + if transfer_ids: + term = terms[ec_code] = Term( + reference=Reference(prefix=PREFIX, identifier=ec_code), is_obsolete=True + ) + for transfer_id in transfer_ids: + term.append_relationship( + term_replaced_by, Reference(prefix=PREFIX, identifier=transfer_id) + ) + continue + parent_ec_code = data["parent"]["identifier"] parent_term = terms[parent_ec_code] @@ -210,7 +227,7 @@ def get_database(lines: Iterable[str]) -> Mapping: for groups in _group_by_id(lines): _, expasy_id = groups[0] - rv[expasy_id] = ec_data_entry = { + ec_data_entry: Dict[str, Any] = { "concept": { "namespace": PREFIX, "identifier": expasy_id, @@ -230,10 +247,10 @@ def get_database(lines: Iterable[str]) -> Mapping: if descriptor == "//": continue elif descriptor == DE and value == "Deleted entry.": - continue + ec_data_entry["deleted"] = True elif descriptor == DE and value.startswith("Transferred entry: "): - value = value[len("Transferred entry: ") :].rstrip() - ec_data_entry["transfer_id"] = value + value = value[len("Transferred entry: ") :].rstrip().rstrip(".") + ec_data_entry["transfer_id"] = value.split(" and ") elif descriptor == DE: ec_data_entry["concept"]["name"] = value.rstrip(".") # type:ignore elif descriptor == AN: @@ -259,11 +276,7 @@ def get_database(lines: Iterable[str]) -> Mapping: ) ) - for expasy_id, data in rv.items(): - transfer_id = data.pop("transfer_id", None) - if transfer_id is not None: - rv[expasy_id]["alt_ids"].append(transfer_id) # type:ignore - + rv[expasy_id] = ec_data_entry return rv From 51743496d51eae0b786fb201b8febc36dbf23bf4 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 17 Apr 2024 17:07:26 +0200 Subject: [PATCH 2/2] Update expasy.py --- src/pyobo/sources/expasy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/pyobo/sources/expasy.py b/src/pyobo/sources/expasy.py index a116fbee..acd7b028 100644 --- a/src/pyobo/sources/expasy.py +++ b/src/pyobo/sources/expasy.py @@ -93,7 +93,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]: database_path = ensure_path(PREFIX, url=EXPASY_DATABASE_URL, version=version) with open(database_path) as file: - _data = get_database(file) + id_to_data = get_database(file) ec2go = get_ec2go(version=version)