Skip to content

Commit

Permalink
Update synonym parsing to handle OMO CURIEs (#166)
Browse files Browse the repository at this point in the history
* Update synonym parsing

* Handle preferred curies

* Enable searching for synonym type even without specificity
  • Loading branch information
cthoyt authored Jan 4, 2024
1 parent dc9ad7d commit 58ab2b8
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 14 deletions.
31 changes: 17 additions & 14 deletions src/pyobo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,20 +444,23 @@ def _extract_synonym(
break

stype: Optional[SynonymTypeDef] = None
if specificity is not None: # go fishing for a synonym type definition
for _stype in synonym_typedefs.values():
# Since there aren't a lot of carefully defined synonym definitions, it
# can appear as a string or curie. Therefore, we might see temporary prefixes
# get added, so we should check against full curies as well as local unique
# identifiers
if rest.startswith(_stype.curie):
rest = rest[len(_stype.curie) :].strip()
stype = _stype
break
elif rest.startswith(_stype.identifier):
rest = rest[len(_stype.identifier) :].strip()
stype = _stype
break
for _stype in synonym_typedefs.values():
# Since there aren't a lot of carefully defined synonym definitions, it
# can appear as a string or curie. Therefore, we might see temporary prefixes
# get added, so we should check against full curies as well as local unique
# identifiers
if rest.startswith(_stype.curie):
rest = rest[len(_stype.curie) :].strip()
stype = _stype
break
elif rest.startswith(_stype.preferred_curie):
rest = rest[len(_stype.preferred_curie) :].strip()
stype = _stype
break
elif rest.startswith(_stype.identifier):
rest = rest[len(_stype.identifier) :].strip()
stype = _stype
break

if not rest.startswith("[") or not rest.endswith("]"):
logger.warning("[%s:%s] problem with synonym: %s", prefix, identifier, s)
Expand Down
18 changes: 18 additions & 0 deletions tests/test_get.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
iterate_node_synonyms,
iterate_node_xrefs,
)
from pyobo.struct.struct import acronym
from tests.constants import TEST_CHEBI_OBO_PATH, chebi_patch


Expand Down Expand Up @@ -105,6 +106,7 @@ def test_extract_synonym(self):
iupac_name = SynonymTypeDef.from_text("IUPAC NAME", lower=False)
synoynym_typedefs = {
"IUPAC_NAME": iupac_name,
acronym.curie: acronym,
}

for expected_synonym, text in [
Expand Down Expand Up @@ -137,6 +139,22 @@ def test_extract_synonym(self):
Synonym(name="LTEC I", specificity="EXACT"),
'"LTEC I" []',
),
(
Synonym(name="HAdV-A", specificity="BROAD", type=acronym),
'"HAdV-A" BROAD OMO:0003012 []',
),
(
Synonym(name="HAdV-A", specificity="BROAD", type=acronym),
'"HAdV-A" BROAD omo:0003012 []',
),
(
Synonym(name="HAdV-A", specificity="EXACT", type=acronym),
'"HAdV-A" OMO:0003012 []',
),
(
Synonym(name="HAdV-A", specificity="EXACT", type=acronym),
'"HAdV-A" omo:0003012 []',
),
]:
with self.subTest(s=text):
actual_synonym = _extract_synonym(
Expand Down

0 comments on commit 58ab2b8

Please sign in to comment.