Skip to content

Commit

Permalink
Additional improvements for OFN output (#301)
Browse files Browse the repository at this point in the history
1. It seems like the debio issue is getting closer to being solved, but
somehow the chemrof predicates seem to work where the debio ones don't
for InChI and SMILES. debio predicates also seem to work in the miRBase
exporter when used for object properties.
2. Skip more bad IDs from BiGG
3. Output oboInOWL to OFN if auto-generated-by is used, but not in OBO
since it's builtin
  • Loading branch information
cthoyt authored Jan 13, 2025
1 parent 27e7708 commit 8cc1c97
Show file tree
Hide file tree
Showing 8 changed files with 60 additions and 40 deletions.
7 changes: 5 additions & 2 deletions src/pyobo/sources/bigg/bigg_metabolite.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Converter for metabolites in BiGG."""

import logging
import re
from collections.abc import Iterable

Expand All @@ -15,6 +16,8 @@
"BiGGMetaboliteGetter",
]

logger = logging.getLogger(__name__)

PREFIX = "bigg.metabolite"
URL = "http://bigg.ucsd.edu/static/namespace/bigg_models_metabolites.txt"
PATTERN = re.compile("^[a-z_A-Z0-9]+$")
Expand Down Expand Up @@ -105,7 +108,7 @@ def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[T
)
if pd.notna(bigg_compartmental_id):
if not PATTERN.match(bigg_compartmental_id):
tqdm.write(
logger.debug(
f"[{PREFIX}:{universal_bigg_id}] invalid compartment ID: {bigg_compartmental_id}"
)
else:
Expand All @@ -116,7 +119,7 @@ def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[T
if not PATTERN.match(old_bigg_id):
if not old_bigg_id.endswith("]"):
# if it ends with ']' then it's a compartment identifier
tqdm.write(f"[{PREFIX}:{universal_bigg_id}] invalid alt ID: {old_bigg_id}")
logger.debug(f"[{PREFIX}:{universal_bigg_id}] invalid alt ID: {old_bigg_id}")
continue
term.append_alt(Reference(prefix=PREFIX, identifier=old_bigg_id))
_parse_model_links(term, model_list)
Expand Down
6 changes: 6 additions & 0 deletions src/pyobo/sources/bigg/bigg_reaction.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[T
for bigg_id, name, reaction_string, model_list, database_links, old_bigg_ids in tqdm(
bigg_reaction_df.values, unit_scale=True, unit="reaction", desc=f"[{PREFIX}] processing"
):
if "(" in bigg_id:
tqdm.write(f"[{PREFIX}] identifier has open paren. can't encode in OWL: {bigg_id}")
continue

term = Term(
reference=Reference(
prefix=PREFIX, identifier=bigg_id, name=name if pd.notna(name) else None
Expand All @@ -56,6 +60,8 @@ def iterate_terms(force: bool = False, version: str | None = None) -> Iterable[T
for old_bigg_id in _split(old_bigg_ids):
if old_bigg_id == bigg_id:
continue
if "(" in old_bigg_id:
continue
term.append_alt(Reference(prefix=PREFIX, identifier=old_bigg_id))
_parse_model_links(term, model_list)

Expand Down
49 changes: 25 additions & 24 deletions src/pyobo/sources/rhea.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,17 +7,8 @@
import pystow

from pyobo.api.utils import get_version
from pyobo.struct import Obo, Reference, Term
from pyobo.struct.typedef import (
enabled_by,
has_bidirectional_reaction,
has_input,
has_left_to_right_reaction,
has_output,
has_participant,
has_right_to_left_reaction,
reaction_enabled_by_molecular_function,
)
from pyobo.struct import Obo, Reference, Term, TypeDef
from pyobo.struct import typedef as v
from pyobo.utils.path import ensure_df

if TYPE_CHECKING:
Expand All @@ -31,6 +22,16 @@
PREFIX = "rhea"
RHEA_RDF_GZ_URL = "ftp://ftp.expasy.org/databases/rhea/rdf/rhea.rdf.gz"

has_left_to_right_reaction = TypeDef.default(
PREFIX, "hasLeftToRightReaction", name="has left to right reaction", is_metadata_tag=True
).append_xref(v.has_left_to_right_reaction)
has_right_to_left_reaction = TypeDef.default(
PREFIX, "hasRightToLeftReaction", name="has right to left reaction", is_metadata_tag=True
).append_xref(v.has_right_to_left_reaction)
has_bidirectional_reaction = TypeDef.default(
PREFIX, "hasBidirectionalReaction", name="has bidirectional reaction", is_metadata_tag=True
).append_xref(v.has_bidirectional_reaction)


class RheaGetter(Obo):
"""An ontology representation of Rhea's chemical reaction database."""
Expand All @@ -40,11 +41,11 @@ class RheaGetter(Obo):
has_left_to_right_reaction,
has_bidirectional_reaction,
has_right_to_left_reaction,
enabled_by,
has_input,
has_output,
has_participant,
reaction_enabled_by_molecular_function,
v.enabled_by,
v.has_input,
v.has_output,
v.has_participant,
v.reaction_enabled_by_molecular_function,
]

def iter_terms(self, force: bool = False) -> Iterable[Term]:
Expand Down Expand Up @@ -159,10 +160,10 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
right_rhea_id = master_to_left[master_rhea_id]
else:
raise ValueError(f"Invalid side: {side_uri}")
terms[master_rhea_id].annotate_object(has_participant, chebi_reference)
terms[master_to_bi[master_rhea_id]].annotate_object(has_participant, chebi_reference)
terms[left_rhea_id].append_relationship(has_input, chebi_reference)
terms[right_rhea_id].append_relationship(has_output, chebi_reference)
terms[master_rhea_id].annotate_object(v.has_participant, chebi_reference)
terms[master_to_bi[master_rhea_id]].annotate_object(v.has_participant, chebi_reference)
terms[left_rhea_id].append_relationship(v.has_input, chebi_reference)
terms[right_rhea_id].append_relationship(v.has_output, chebi_reference)

hierarchy = ensure_df(
PREFIX,
Expand All @@ -181,8 +182,8 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
("reactome", "rhea2reactome", None),
("macie", "rhea2macie", None),
("metacyc", "rhea2metacyc", None),
("go", "rhea2go", reaction_enabled_by_molecular_function),
("uniprot", "rhea2uniprot", enabled_by),
("go", "rhea2go", v.reaction_enabled_by_molecular_function),
("uniprot", "rhea2uniprot", v.enabled_by),
]:
xref_df = ensure_df(
PREFIX,
Expand Down Expand Up @@ -223,11 +224,11 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
_iubmb,
) in ec_df.values:
terms[directional_rhea_id].append_relationship(
enabled_by, Reference(prefix="eccode", identifier=ec)
v.enabled_by, Reference(prefix="eccode", identifier=ec)
)

yield from terms.values()


if __name__ == "__main__":
RheaGetter.cli()
RheaGetter.cli(["--owl"])
2 changes: 2 additions & 0 deletions src/pyobo/struct/functional/dsl.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,6 +164,8 @@ def to_funowl(self) -> str:
"""Represent this identifier for functional OWL."""
if isinstance(self.identifier, term.URIRef):
return f"<{self.identifier}>"
if any(c in self.identifier.identifier for c in "()"):
raise ValueError(f"Can't encode CURIE with parentheses to OFN: {self.identifier}")
return self.identifier.curie

def to_funowl_args(self) -> str: # pragma: no cover
Expand Down
8 changes: 7 additions & 1 deletion src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -658,6 +658,8 @@ def _get_prefixes(self) -> set[str]:
# _iterate_property_pairs covers metadata, root terms,
# and properties in self.property_values
prefixes.update(_get_prefixes_from_annotations(self._iterate_property_pairs() or []))
if self.auto_generated_by:
prefixes.add("oboInOwl")
return prefixes

def _get_version(self) -> str | None:
Expand Down Expand Up @@ -808,12 +810,16 @@ def iterate_obo_lines(
# 10 TODO namespace-id-rule
# 11
for prefix, url in sorted(self._get_clean_idspaces().items()):
if prefix in DEFAULT_PREFIX_MAP or prefix == "obo":
if prefix in DEFAULT_PREFIX_MAP:
# we don't need to write out the 4 default prefixes from
# table 2 in https://www.w3.org/TR/owl2-syntax/#IRIs since
# they're considered to always be builtin
continue

# additional assumptions about built in
if prefix in {"obo", "oboInOwl"}:
continue

# ROBOT assumes that all OBO foundry prefixes are builtin,
# so don't re-declare them
if bioregistry.is_obo_foundry(prefix):
Expand Down
18 changes: 7 additions & 11 deletions src/pyobo/struct/typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@
from curies import ReferenceTuple

from . import vocabulary as v
from .reference import Reference
from .reference import Reference, default_reference
from .struct import TypeDef
from ..resources.ro import load_ro

Expand Down Expand Up @@ -73,10 +73,12 @@
has_left_to_right_reaction = TypeDef(v.has_left_to_right_reaction, is_metadata_tag=True)
has_right_to_left_reaction = TypeDef(v.has_right_to_left_reaction, is_metadata_tag=True)
has_bidirectional_reaction = TypeDef(
Reference(prefix="debio", identifier="0000009", name="has bi-directional reaction"),
reference=default_reference("RO", "hasBiDirectionalReaction"),
is_metadata_tag=True,
)
).append_xref(Reference(prefix="debio", identifier="0000009", name="has bi-directional reaction"))
reaction_enabled_by_molecular_function = TypeDef(
reference=default_reference("RO", "reactionEnabledByMolecularFunction")
).append_xref(
Reference(prefix="debio", identifier="0000047", name="reaction enabled by molecular function")
)

Expand Down Expand Up @@ -249,15 +251,9 @@
range=Reference(prefix="IAO", identifier="0000013", name="journal article"),
)

has_smiles = TypeDef(
reference=Reference(prefix="debio", identifier="0000022", name="has SMILES"),
is_metadata_tag=True,
)
has_smiles = TypeDef(reference=v.has_smiles, is_metadata_tag=True).append_xref(v.debio_has_smiles)

has_inchi = TypeDef(
reference=Reference(prefix="debio", identifier="0000020", name="has InChI"),
is_metadata_tag=True,
)
has_inchi = TypeDef(reference=v.has_inchi, is_metadata_tag=True).append_xref(v.debio_has_inchi)

has_homepage = TypeDef(
reference=Reference(prefix="foaf", identifier="homepage", name="homepage"), is_metadata_tag=True
Expand Down
6 changes: 6 additions & 0 deletions src/pyobo/struct/vocabulary.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,12 @@ def _c(c: curies.NamedReference) -> Reference:
has_right_to_left_reaction = Reference(
prefix="debio", identifier="0000008", name="has right-to-left reaction"
)
debio_has_inchi = Reference(prefix="debio", identifier="0000020", name="has InChI")
has_inchi = Reference(prefix="chemrof", identifier="inchi_string")

debio_has_smiles = Reference(prefix="debio", identifier="0000022", name="has SMILES")
has_smiles = Reference(prefix="chemrof", identifier="smiles_string")

# TODO update to use debio, or put in RO
has_citation = default_reference(prefix="RO", identifier="hasCitation", name="has citation")
has_description = Reference(prefix="dcterms", identifier="description", name="description")
Expand Down
4 changes: 2 additions & 2 deletions tests/test_struct/test_obo/test_typedef.py
Original file line number Diff line number Diff line change
Expand Up @@ -373,15 +373,15 @@ def test_11_property_value(self) -> None:
[Typedef]
id: RO:0000087
property_value: dcterms:contributor orcid:0000-0003-4423-4370 ! contributor Charles Tapley Hoyt
property_value: debio:0000020 "abc" xsd:string
property_value: ChEMROF:inchi_string "abc" xsd:string
""",
typedef,
)
self.assert_funowl_lines(
"""\
Declaration(ObjectProperty(RO:0000087))
AnnotationAssertion(dcterms:contributor RO:0000087 orcid:0000-0003-4423-4370)
AnnotationAssertion(debio:0000020 RO:0000087 "abc"^^xsd:string)
AnnotationAssertion(ChEMROF:inchi_string RO:0000087 "abc"^^xsd:string)
""",
typedef,
)
Expand Down

0 comments on commit 8cc1c97

Please sign in to comment.