Skip to content

Commit

Permalink
Add more language handling
Browse files Browse the repository at this point in the history
  • Loading branch information
cthoyt committed Jan 19, 2025
1 parent cf30d04 commit 8de5183
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 22 deletions.
8 changes: 6 additions & 2 deletions src/pyobo/api/properties.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,9 +74,13 @@ def get_literal_properties(
(
Reference.from_curie(s),
Reference.from_curie(p),
OBOLiteral(value, Reference.from_curie(datatype)),
OBOLiteral(
value,
Reference.from_curie(datatype),
language if language and pd.notna(language) else None,
),
)
for s, p, value, datatype in tqdm(
for s, p, value, datatype, language in tqdm(
df.values,
desc=f"[{prefix}] parsing properties",
unit_scale=True,
Expand Down
2 changes: 1 addition & 1 deletion src/pyobo/reader.py
Original file line number Diff line number Diff line change
Expand Up @@ -1178,7 +1178,7 @@ def _handle_prop(
if datatype_reference is None:
logger.warning("[%s] had unparsable datatype %s", node.curie, prop_value_type)
return None
return Annotation(prop_reference, OBOLiteral(value, datatype_reference))
return Annotation(prop_reference, OBOLiteral(value, datatype_reference, None))


def _get_prop(
Expand Down
17 changes: 9 additions & 8 deletions src/pyobo/struct/reference.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ def comma_separate_references(elements: Iterable[Reference | OBOLiteral]) -> str
match element:
case Reference():
parts.append(get_preferred_curie(element))
case OBOLiteral(value, _datatype):
case OBOLiteral(value, _datatype, _language):
# TODO check datatype is URI
parts.append(value)
return ", ".join(parts)
Expand Down Expand Up @@ -300,43 +300,44 @@ class OBOLiteral(NamedTuple):

value: str
datatype: Reference
language: str | None

@classmethod
def string(cls, value: str, *, language: str | None = None) -> OBOLiteral:
"""Get a string literal."""
if language:
raise NotImplementedError
return cls(value, Reference(prefix="xsd", identifier="string"))
return cls(value, Reference(prefix="xsd", identifier="string"), language)

@classmethod
def boolean(cls, value: bool) -> OBOLiteral:
"""Get a boolean literal."""
return cls(str(value).lower(), Reference(prefix="xsd", identifier="boolean"))
return cls(str(value).lower(), Reference(prefix="xsd", identifier="boolean"), None)

@classmethod
def decimal(cls, value) -> OBOLiteral:
"""Get a decimal literal."""
return cls(str(value), Reference(prefix="xsd", identifier="decimal"))
return cls(str(value), Reference(prefix="xsd", identifier="decimal"), None)

@classmethod
def float(cls, value) -> OBOLiteral:
"""Get a float literal."""
return cls(str(value), Reference(prefix="xsd", identifier="float"))
return cls(str(value), Reference(prefix="xsd", identifier="float"), None)

@classmethod
def integer(cls, value: int | str) -> OBOLiteral:
"""Get a integer literal."""
return cls(str(int(value)), Reference(prefix="xsd", identifier="integer"))
return cls(str(int(value)), Reference(prefix="xsd", identifier="integer"), None)

@classmethod
def year(cls, value: int | str) -> OBOLiteral:
"""Get a year (gYear) literal."""
return cls(str(int(value)), Reference(prefix="xsd", identifier="gYear"))
return cls(str(int(value)), Reference(prefix="xsd", identifier="gYear"), None)

@classmethod
def uri(cls, uri: str) -> OBOLiteral:
"""Get a string literal for a URI."""
return cls(uri, Reference(prefix="xsd", identifier="anyURI"))
return cls(uri, Reference(prefix="xsd", identifier="anyURI"), None)


def _reference_list_tag(
Expand Down
24 changes: 16 additions & 8 deletions src/pyobo/struct/struct.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,7 +134,7 @@ def _get_prefixes(self) -> set[str]:
match provenance:
case Reference():
rv.add(provenance.prefix)
case OBOLiteral(_, datatype):
case OBOLiteral(_, datatype, _language):
rv.add(datatype.prefix)
rv.update(_get_prefixes_from_annotations(self.annotations))
return rv
Expand Down Expand Up @@ -329,7 +329,7 @@ def get_property_literals(self, prop: ReferenceHint) -> list[str]:
match t:
case Reference():
rv.append(get_preferred_curie(t))
case OBOLiteral(value, _):
case OBOLiteral(value, _datatype, _language):
rv.append(value)
return rv

Expand Down Expand Up @@ -894,7 +894,7 @@ def _iterate_property_pairs(self) -> Iterable[Annotation]:
# TODO add SPDX to idspaces and use as a CURIE?
if license_spdx_id := bioregistry.get_license(self.ontology):
if license_spdx_id.startswith("http"):
license_literal = OBOLiteral(license_spdx_id, v.xsd_uri)
license_literal = OBOLiteral.uri(license_spdx_id)
else:
license_literal = OBOLiteral.string(license_spdx_id)
yield Annotation(v.has_license, license_literal)
Expand Down Expand Up @@ -1446,7 +1446,7 @@ def iterate_properties(self, *, use_tqdm: bool = False) -> Iterable[tuple[Term,
@property
def properties_header(self):
"""Property dataframe header."""
return [f"{self.ontology}_id", "property", "value", "datatype"]
return [f"{self.ontology}_id", "property", "value", "datatype", "language"]

@property
def object_properties_header(self):
Expand All @@ -1458,15 +1458,23 @@ def literal_properties_header(self):
"""Property dataframe header."""
return ["source", "predicate", "target", "datatype"]

def _iter_property_rows(self, *, use_tqdm: bool = False) -> Iterable[tuple[str, str, str, str]]:
def _iter_property_rows(
self, *, use_tqdm: bool = False
) -> Iterable[tuple[str, str, str, str, str]]:
"""Iterate property rows."""
for term, t in self.iterate_properties(use_tqdm=use_tqdm):
pred = term._reference(t.predicate, ontology_prefix=self.ontology)
match t.value:
case OBOLiteral(value, datatype):
yield (term.identifier, pred, value, get_preferred_curie(datatype))
case OBOLiteral(value, datatype, language):
yield (
term.identifier,
pred,
value,
get_preferred_curie(datatype),
language or "",
)
case Reference() as obj:
yield (term.identifier, pred, get_preferred_curie(obj), "")
yield term.identifier, pred, get_preferred_curie(obj), "", ""
case _:
raise TypeError(f"got: {type(t)} - {t}")

Expand Down
6 changes: 3 additions & 3 deletions src/pyobo/struct/struct_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ class Annotation(NamedTuple):
@classmethod
def float(cls, predicate: Reference, value: float) -> Self:
"""Return a literal property for a float."""
return cls(predicate, OBOLiteral(str(value), v.xsd_float))
return cls(predicate, OBOLiteral.float(value))

@staticmethod
def _sort_key(x: Annotation):
Expand Down Expand Up @@ -805,7 +805,7 @@ def _iterate_obo_relations(
start = f"{pc} "
for value in sorted(values, key=_reference_or_literal_key):
match value:
case OBOLiteral(dd, datatype):
case OBOLiteral(dd, datatype, _language):
if predicate in skip_predicate_literals:
continue
# TODO how to clean/escape value?
Expand Down Expand Up @@ -867,7 +867,7 @@ def _format_obo_trailing_modifiers(
match prop.value:
case Reference():
right = reference_escape(prop.value, ontology_prefix=ontology_prefix)
case OBOLiteral(value, _datatype):
case OBOLiteral(value, _datatype, _language):
right = value
modifiers.append((left, right))
inner = ", ".join(f"{key}={value}" for key, value in modifiers)
Expand Down

0 comments on commit 8de5183

Please sign in to comment.