Skip to content

Commit

Permalink
Handle CVX obsolete/replaced/placeholder entries (#171)
Browse files Browse the repository at this point in the history
* Skip CVX obsolete/placeholder entries

* Add replacement info
  • Loading branch information
cthoyt authored Feb 21, 2024
1 parent ca4ea65 commit 1b6450a
Show file tree
Hide file tree
Showing 2 changed files with 24 additions and 2 deletions.
20 changes: 18 additions & 2 deletions src/pyobo/sources/cvx.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@

import pandas as pd

from pyobo import Obo, Term
from pyobo import Obo, Reference, Term

__all__ = [
"CVXGetter",
Expand All @@ -28,6 +28,11 @@ def iter_terms(self, force: bool = False) -> Iterable[Term]:
return iter_terms()


# This got split, which it's not obvious how to deal with this
MANUAL_OBSOLETE = {"15"}
REPLACEMENTS = {"31": "85", "154": "86", "180": "13"}


def iter_terms() -> Iterable[Term]:
"""Iterate over terms in CVX."""
dd = defaultdict(set)
Expand Down Expand Up @@ -60,11 +65,22 @@ def iter_terms() -> Iterable[Term]:
cvx_df[col] = cvx_df[col].map(lambda s: s.strip() if pd.notna(s) else s)
terms = {}
for cvx, short_name, full_name, notes, status, nonvaccine, _updated in cvx_df.values:
term = Term.from_triple(PREFIX, cvx, full_name)
if cvx == "99":
continue # this is a placeholder

is_obsolete = cvx in MANUAL_OBSOLETE or (pd.notna(notes) and "do not use" in notes.lower())
term = Term(
reference=Reference(prefix=PREFIX, identifier=cvx, name=full_name),
is_obsolete=is_obsolete,
)
if short_name != full_name:
term.append_synonym(short_name)
if pd.notna(notes):
term.append_comment(notes)
if is_obsolete:
replacement_identifier = REPLACEMENTS.get(cvx)
if replacement_identifier:
term.append_replaced_by(Reference(prefix=PREFIX, identifier=replacement_identifier))
if pd.notna(status):
term.append_property("status", status)
if pd.notna(nonvaccine):
Expand Down
6 changes: 6 additions & 0 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@
orthologous,
part_of,
see_also,
term_replaced_by,
)
from .utils import comma_separate, obo_escape_slim
from ..constants import (
Expand Down Expand Up @@ -299,6 +300,11 @@ def append_comment(self, value: str) -> "Term":
self.append_property(comment.curie, value)
return self

def append_replaced_by(self, reference: ReferenceHint) -> "Term":
"""Add a replaced by relationship."""
self.append_relationship(term_replaced_by, reference)
return self

def append_parent(self, reference: ReferenceHint) -> "Term":
"""Add a parent to this entity."""
reference = _ensure_ref(reference)
Expand Down

0 comments on commit 1b6450a

Please sign in to comment.