From 1b6450a713dd05b3d364084addb9d3cdfddd5f80 Mon Sep 17 00:00:00 2001 From: Charles Tapley Hoyt Date: Wed, 21 Feb 2024 10:53:16 +0100 Subject: [PATCH] Handle CVX obsolete/replaced/placeholder entries (#171) * Skip CVX obsolete/placeholder entries * Add replacement info --- src/pyobo/sources/cvx.py | 20 ++++++++++++++++++-- src/pyobo/struct/struct.py | 6 ++++++ 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/src/pyobo/sources/cvx.py b/src/pyobo/sources/cvx.py index 82410d07..d48a16e5 100644 --- a/src/pyobo/sources/cvx.py +++ b/src/pyobo/sources/cvx.py @@ -7,7 +7,7 @@ import pandas as pd -from pyobo import Obo, Term +from pyobo import Obo, Reference, Term __all__ = [ "CVXGetter", @@ -28,6 +28,11 @@ def iter_terms(self, force: bool = False) -> Iterable[Term]: return iter_terms() +# This got split, which it's not obvious how to deal with this +MANUAL_OBSOLETE = {"15"} +REPLACEMENTS = {"31": "85", "154": "86", "180": "13"} + + def iter_terms() -> Iterable[Term]: """Iterate over terms in CVX.""" dd = defaultdict(set) @@ -60,11 +65,22 @@ def iter_terms() -> Iterable[Term]: cvx_df[col] = cvx_df[col].map(lambda s: s.strip() if pd.notna(s) else s) terms = {} for cvx, short_name, full_name, notes, status, nonvaccine, _updated in cvx_df.values: - term = Term.from_triple(PREFIX, cvx, full_name) + if cvx == "99": + continue # this is a placeholder + + is_obsolete = cvx in MANUAL_OBSOLETE or (pd.notna(notes) and "do not use" in notes.lower()) + term = Term( + reference=Reference(prefix=PREFIX, identifier=cvx, name=full_name), + is_obsolete=is_obsolete, + ) if short_name != full_name: term.append_synonym(short_name) if pd.notna(notes): term.append_comment(notes) + if is_obsolete: + replacement_identifier = REPLACEMENTS.get(cvx) + if replacement_identifier: + term.append_replaced_by(Reference(prefix=PREFIX, identifier=replacement_identifier)) if pd.notna(status): term.append_property("status", status) if pd.notna(nonvaccine): diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index 03c79f80..db9d6ec8 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -53,6 +53,7 @@ orthologous, part_of, see_also, + term_replaced_by, ) from .utils import comma_separate, obo_escape_slim from ..constants import ( @@ -299,6 +300,11 @@ def append_comment(self, value: str) -> "Term": self.append_property(comment.curie, value) return self + def append_replaced_by(self, reference: ReferenceHint) -> "Term": + """Add a replaced by relationship.""" + self.append_relationship(term_replaced_by, reference) + return self + def append_parent(self, reference: ReferenceHint) -> "Term": """Add a parent to this entity.""" reference = _ensure_ref(reference)