Skip to content

Commit

Permalink
Update interface for Stanza.annoate_literal (#325)
Browse files Browse the repository at this point in the history
This PR adds `Stanza.annotate_X` for common datatypes and makes
`Stanza.annotate_literal` only take OBO literals. This will make it
easier to extend the OBO literal interface, e.g., with language codes
  • Loading branch information
cthoyt authored Jan 19, 2025
1 parent 4ac0271 commit 4509aab
Show file tree
Hide file tree
Showing 15 changed files with 109 additions and 48 deletions.
4 changes: 2 additions & 2 deletions src/pyobo/sources/chembl/chembl_compound.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,9 +54,9 @@ def iter_terms(version: str) -> Iterable[Term]:
# TODO add xrefs?
term = Term.from_triple(prefix=PREFIX, identifier=chembl_id, name=name)
if smiles:
term.annotate_literal(has_smiles, smiles)
term.annotate_string(has_smiles, smiles)
if inchi:
term.annotate_literal(has_inchi, inchi)
term.annotate_string(has_inchi, inchi)
if inchi_key:
term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
yield term
Expand Down
2 changes: 1 addition & 1 deletion src/pyobo/sources/cvx.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def iter_terms() -> Iterable[Term]:
if replacement_identifier:
term.append_replaced_by(Reference(prefix=PREFIX, identifier=replacement_identifier))
if pd.notna(status):
term.annotate_literal(STATUS, status)
term.annotate_string(STATUS, status)
if pd.notna(nonvaccine):
term.annotate_boolean(NONVACCINE, nonvaccine)
terms[cvx] = term
Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/sources/drugbank/drugbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,10 @@ def _make_term(drug_info: Mapping[str, Any]) -> Term:
if identifier:
term.append_xref(Reference(prefix=xref_prefix, identifier=identifier))

for prop, debio_curie in [("smiles", has_smiles), ("inchi", has_inchi)]:
identifier = drug_info.get(prop)
for key, typedef_ in [("smiles", has_smiles), ("inchi", has_inchi)]:
identifier = drug_info.get(key)
if identifier:
term.annotate_literal(debio_curie, identifier)
term.annotate_string(typedef_, identifier)

for salt in drug_info.get("salts", []):
term.annotate_object(
Expand Down
4 changes: 2 additions & 2 deletions src/pyobo/sources/drugcentral.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,9 +87,9 @@ def iter_terms() -> Iterable[Term]:
if inchi_key:
term.append_exact_match(Reference(prefix="inchikey", identifier=inchi_key))
if smiles:
term.annotate_literal(has_smiles, smiles)
term.annotate_string(has_smiles, smiles)
if inchi:
term.annotate_literal(has_inchi, inchi)
term.annotate_string(has_inchi, inchi)
if cas:
term.append_exact_match(Reference(prefix="cas", identifier=cas))
yield term
Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/sources/geonames/geonames.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def get_code_to_country(*, force: bool = False) -> Mapping[str, Term]:
term.append_synonym(fips)
if pd.notna(iso3):
term.append_synonym(iso3)
term.annotate_literal(CODE_TYPEDEF, code)
term.annotate_string(CODE_TYPEDEF, code)
code_to_country[code] = term
logger.info(f"got {len(code_to_country):,} country records")
return code_to_country
Expand Down Expand Up @@ -151,7 +151,7 @@ def get_code_to_admin1(
type="Instance",
)
term.append_parent(ADMIN_1)
term.annotate_literal(CODE_TYPEDEF, code)
term.annotate_string(CODE_TYPEDEF, code)
code_to_admin1[code] = term

country_code = code.split(".")[0]
Expand Down Expand Up @@ -183,7 +183,7 @@ def get_code_to_admin2(
type="Instance",
)
term.append_parent(ADMIN_2)
term.annotate_literal(CODE_TYPEDEF, code)
term.annotate_string(CODE_TYPEDEF, code)
code_to_admin2[code] = term
admin1_code = code.rsplit(".", 1)[0]
admin1_term = code_to_admin1.get(admin1_code)
Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/sources/hgnc/hgnc.py
Original file line number Diff line number Diff line change
Expand Up @@ -396,7 +396,7 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
for prop, td in [("location", HAS_LOCATION)]:
value = entry.pop(prop, None)
if value:
term.annotate_literal(td, value)
term.annotate_string(td, value)

locus_type = entry.pop("locus_type")
locus_group = entry.pop("locus_group")
Expand All @@ -408,8 +408,8 @@ def get_terms(version: str | None = None, force: bool = False) -> Iterable[Term]
Reference(prefix="SO", identifier="0000704", name=get_so_name("0000704"))
) # gene
unhandle_locus_types[locus_type][identifier] = term
term.annotate_literal(HAS_LOCUS_TYPE, locus_type)
term.annotate_literal(HAS_LOCUS_GROUP, locus_group)
term.annotate_string(HAS_LOCUS_TYPE, locus_type)
term.annotate_string(HAS_LOCUS_GROUP, locus_group)

term.set_species(identifier="9606", name="Homo sapiens")

Expand Down
2 changes: 1 addition & 1 deletion src/pyobo/sources/icd/icd10.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ def _extract_icd10(res_json: Mapping[str, Any]) -> Term:
synonyms=synonyms,
parents=parents,
)
rv.annotate_literal(has_category, res_json["classKind"])
rv.annotate_string(has_category, res_json["classKind"])

return rv

Expand Down
2 changes: 1 addition & 1 deletion src/pyobo/sources/interpro.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def iter_terms(*, version: str, proteins: bool = False, force: bool = False) ->
term.append_relationship(
enables, Reference(prefix="go", identifier=go_id, name=go_name)
)
term.annotate_literal(has_category, entry_type)
term.annotate_string(has_category, entry_type)
for uniprot_id in interpro_to_proteins.get(identifier, []):
term.append_relationship(has_member, Reference(prefix="uniprot", identifier=uniprot_id))
yield term
Expand Down
2 changes: 1 addition & 1 deletion src/pyobo/sources/msigdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:

for key, typedef in PROPERTIES:
if value := attrib[key].strip():
term.annotate_literal(typedef, value)
term.annotate_string(typedef, value)

term.set_species(tax_id)

Expand Down
2 changes: 1 addition & 1 deletion src/pyobo/sources/pathbank.py
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ def iter_terms(version: str, force: bool = False) -> Iterable[Term]:
# but there are weird parser errors
)
term.append_exact_match(Reference(prefix="smpdb", identifier=smpdb_id))
term.annotate_literal(has_category, subject.lower().replace(" ", "_"))
term.annotate_string(has_category, subject.lower().replace(" ", "_"))
for participant in chain(smpdb_id_to_proteins[smpdb_id], smpdb_id_to_metabolites[smpdb_id]):
term.append_relationship(has_participant, participant)
yield term
Expand Down
2 changes: 1 addition & 1 deletion src/pyobo/sources/pombase.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def get_terms(version: str, force: bool = False) -> Iterable[Term]:
name=symbol if pd.notna(symbol) else None,
definition=name if pd.notna(name) else None,
)
term.annotate_literal(CHROMOSOME, chromosome[len("chromosome_") :])
term.annotate_string(CHROMOSOME, chromosome[len("chromosome_") :])
term.append_parent(so[gtype])
term.set_species(identifier="4896", name="Schizosaccharomyces pombe")
for hgnc_id in identifier_to_hgnc_ids.get(identifier, []):
Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/sources/slm.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,18 +86,18 @@ def iter_terms(version: str, force: bool = False):
raise ValueError(identifier)
term = Term.from_triple(PREFIX, identifier, name)
if pd.notna(level):
term.annotate_literal(LEVEL, level)
term.annotate_string(LEVEL, level)
if pd.notna(abbreviation):
term.append_synonym(abbreviation, type=abbreviation_typedef)
if pd.notna(synonyms):
for synonym in synonyms.split("|"):
term.append_synonym(synonym.strip())
if pd.notna(smiles):
term.annotate_literal(has_smiles, smiles)
term.annotate_string(has_smiles, smiles)
if pd.notna(inchi) and inchi != "InChI=none":
if inchi.startswith("InChI="):
inchi = inchi[len("InChI=") :]
term.annotate_literal(has_inchi, inchi)
term.annotate_string(has_inchi, inchi)
if pd.notna(inchikey):
inchikey = inchikey.removeprefix("InChIKey=").strip()
if inchikey and inchikey != "none":
Expand Down
29 changes: 28 additions & 1 deletion src/pyobo/struct/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,10 +302,37 @@ class OBOLiteral(NamedTuple):
datatype: Reference

@classmethod
def string(cls, value: str) -> OBOLiteral:
def string(cls, value: str, *, language: str | None = None) -> OBOLiteral:
"""Get a string literal."""
if language:
raise NotImplementedError
return cls(value, Reference(prefix="xsd", identifier="string"))

@classmethod
def boolean(cls, value: bool) -> OBOLiteral:
"""Get a boolean literal."""
return cls(str(value).lower(), Reference(prefix="xsd", identifier="boolean"))

@classmethod
def decimal(cls, value) -> OBOLiteral:
"""Get a decimal literal."""
return cls(str(value), Reference(prefix="xsd", identifier="decimal"))

@classmethod
def float(cls, value) -> OBOLiteral:
"""Get a float literal."""
return cls(str(value), Reference(prefix="xsd", identifier="float"))

@classmethod
def integer(cls, value: int | str) -> OBOLiteral:
"""Get a integer literal."""
return cls(str(int(value)), Reference(prefix="xsd", identifier="integer"))

@classmethod
def year(cls, value: int | str) -> OBOLiteral:
"""Get a year (gYear) literal."""
return cls(str(int(value)), Reference(prefix="xsd", identifier="gYear"))

@classmethod
def uri(cls, uri: str) -> OBOLiteral:
"""Get a string literal for a URI."""
Expand Down
82 changes: 58 additions & 24 deletions src/pyobo/struct/struct_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -335,46 +335,76 @@ def append_property(
def annotate_literal(
self,
prop: ReferenceHint,
value: str | OBOLiteral,
datatype: Reference | None = None,
value: OBOLiteral,
*,
annotations: Iterable[Annotation] | None = None,
) -> Self:
"""Append an object annotation."""
prop = _ensure_ref(prop)
if isinstance(value, str):
literal = OBOLiteral(value, datatype or v.xsd_string)
elif datatype is not None:
raise ValueError("can not pass pre-instantiated literal with a datatype")
else: # the value is a pre-instantiated OBOLiteral
literal = value
self.properties[prop].append(literal)
self._extend_annotations(prop, literal, annotations)
self.properties[prop].append(value)
self._extend_annotations(prop, value, annotations)
return self

def annotate_boolean(self, prop: ReferenceHint, value: bool) -> Self:
def annotate_string(
self,
prop: ReferenceHint,
value: str,
*,
annotations: Iterable[Annotation] | None = None,
language: str | None = None,
) -> Self:
"""Append an object annotation."""
return self.annotate_literal(prop, str(value).lower(), v.xsd_boolean)
return self.annotate_literal(
prop, OBOLiteral.string(value, language=language), annotations=annotations
)

def annotate_integer(self, prop: ReferenceHint, value: int | str) -> Self:
def annotate_boolean(
self,
prop: ReferenceHint,
value: bool,
*,
annotations: Iterable[Annotation] | None = None,
) -> Self:
"""Append an object annotation."""
return self.annotate_literal(prop, str(int(value)), v.xsd_integer)
return self.annotate_literal(prop, OBOLiteral.boolean(value), annotations=annotations)

def annotate_float(self, prop: ReferenceHint, value: float) -> Self:
def annotate_integer(
self,
prop: ReferenceHint,
value: int | str,
*,
annotations: Iterable[Annotation] | None = None,
) -> Self:
"""Append an object annotation."""
return self.annotate_literal(prop, OBOLiteral.integer(value), annotations=annotations)

def annotate_float(
self, prop: ReferenceHint, value: float, *, annotations: Iterable[Annotation] | None = None
) -> Self:
"""Append a float annotation."""
return self.annotate_literal(prop, str(value), v.xsd_float)
return self.annotate_literal(prop, OBOLiteral.float(value), annotations=annotations)

def annotate_decimal(self, prop: ReferenceHint, value: float) -> Self:
def annotate_decimal(
self, prop: ReferenceHint, value: float, *, annotations: Iterable[Annotation] | None = None
) -> Self:
"""Append a decimal annotation."""
return self.annotate_literal(prop, str(value), v.xsd_decimal)
return self.annotate_literal(prop, OBOLiteral.decimal(value), annotations=annotations)

def annotate_year(self, prop: ReferenceHint, value: int | str) -> Self:
def annotate_year(
self,
prop: ReferenceHint,
value: int | str,
*,
annotations: Iterable[Annotation] | None = None,
) -> Self:
"""Append a year annotation."""
return self.annotate_literal(prop, str(int(value)), v.xsd_year)
return self.annotate_literal(prop, OBOLiteral.year(value), annotations=annotations)

def annotate_uri(self, prop: ReferenceHint, value: str) -> Self:
def annotate_uri(
self, prop: ReferenceHint, value: str, *, annotations: Iterable[Annotation] | None = None
) -> Self:
"""Append a URI annotation."""
return self.annotate_literal(prop, value, v.xsd_uri)
return self.annotate_literal(prop, OBOLiteral.uri(value), annotations=annotations)

def _iterate_obo_properties(
self,
Expand Down Expand Up @@ -549,10 +579,14 @@ def append_see_also(
return self.annotate_object(v.see_also, _reference, annotations=annotations)

def append_comment(
self, value: str, *, annotations: Iterable[Annotation] | None = None
self,
value: str,
*,
annotations: Iterable[Annotation] | None = None,
language: str | None = None,
) -> Self:
"""Add a comment property."""
return self.annotate_literal(v.comment, value, annotations=annotations)
return self.annotate_string(v.comment, value, annotations=annotations, language=language)

@property
def alt_ids(self) -> Sequence[Reference]:
Expand Down
2 changes: 1 addition & 1 deletion tests/test_struct/test_obo/test_struct_term.py
Original file line number Diff line number Diff line change
Expand Up @@ -585,7 +585,7 @@ def test_12_property_default_reference(self) -> None:
def test_12_property_literal(self) -> None:
"""Test emitting property literals."""
term = Term(reference=LYSINE_DEHYDROGENASE_ACT)
term.annotate_literal(RO_DUMMY, "value")
term.annotate_string(RO_DUMMY, "value")
self.assert_obo_stanza(
term,
obo="""\
Expand Down

0 comments on commit 4509aab

Please sign in to comment.