Skip to content

Commit

Permalink
Improve import and export functions (#818)
Browse files Browse the repository at this point in the history
* Improve import and export functions.

* Properly documented options `all_triples` (all statements where an ontology individual is the subject) and `all_statements` (all RDF statements) to enable importing and exporting RDF statements that are not supported by SimPhoNy.

* Add tests for `all_triples` and `all_statements`. Fix `test_api_importexport_data.json` (did not match the turtle and xml versions).

* Rename `path_or_filelike` argument of `import_file` to `file`.

* Treat IRIs with type `owl:NamedIndividual` as individuals of class `owl:Thing`.

* Warn when ignoring RDF statements where an individual that is being added is the subject.

* Add SKOS to included ontologies.

* Emit warnings on `add` when uninterpretable statements are included with the entities to be added. Remove the warning for references to individuals that are not being added simultaneously.

* Exceptions on import/export.

* Raise exception when importing references to unknown individuals, terminological knowledge, or individuals of an unknown class.

* Do not overwrite individuals when importing.

* Fix docstrings.
  • Loading branch information
kysrpex authored Sep 26, 2022
1 parent 180de4c commit b0a7ba2
Show file tree
Hide file tree
Showing 11 changed files with 610 additions and 148 deletions.
2 changes: 2 additions & 0 deletions simphony_osp/ontology/files/owl.ttl
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@
@base <http://www.w3.org/2002/07/owl#> .

owl:Thing rdf:type owl:Class.
owl:NamedIndividual rdf:type owl:Class.
owl:NamedIndividual rdfs:subClassOf owl:Thing.
owl:topObjectProperty rdf:type owl:ObjectProperty.
owl:bottomObjectProperty rdf:type owl:ObjectProperty.
owl:topDataProperty rdf:type owl:DatatypeProperty.
Expand Down
5 changes: 5 additions & 0 deletions simphony_osp/ontology/files/skos.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
identifier: skos
ontology_file: http://www.w3.org/TR/skos-reference/skos.rdf
format: "application/rdf+xml"
namespaces:
skos: http://www.w3.org/2004/02/skos/core#
7 changes: 2 additions & 5 deletions simphony_osp/ontology/individual.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,6 @@
logger = logging.getLogger(__name__)

RDF_type = RDF.type
OWL_NamedIndividual = OWL.NamedIndividual


class ResultEmptyError(Exception):
Expand Down Expand Up @@ -384,7 +383,6 @@ def __iter__(self) -> Iterator[OntologyIndividual]:
in (
subclass
for c in graph.objects(identifier, RDF_type)
if c != OWL_NamedIndividual
for subclass in ontology.from_identifier_typed(
c, typing=OntologyClass
).superclasses
Expand All @@ -409,8 +407,8 @@ def __iter__(self) -> Iterator[OntologyIndividual]:
except KeyError:
logger.warning(
f"Ignoring identifier {identifier}, which does not "
f"match an ontology individual belonging to a class in"
f"the ontology."
f"match an ontology individual belonging to a class "
f"in the ontology."
)

def __contains__(self, item: OntologyIndividual) -> bool:
Expand Down Expand Up @@ -847,7 +845,6 @@ def classes(self) -> FrozenSet[OntologyClass]:
o, typing=OntologyClass
)
for o in self.session.graph.objects(self.identifier, RDF_type)
if o != OWL_NamedIndividual
)

@classes.setter
Expand Down
2 changes: 0 additions & 2 deletions simphony_osp/ontology/oclass.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,8 +33,6 @@

logger = logging.getLogger(__name__)

BLACKLIST = {OWL.Nothing, OWL.Thing, OWL.NamedIndividual}

# The RDFLib namespace object is not as fast as it should be, so it is
# useful to reuse some IRIs throughout the file.
# TODO: Send PR to RDFLib to fix this upstream.
Expand Down
139 changes: 125 additions & 14 deletions simphony_osp/session/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import itertools
import logging
from datetime import datetime
from functools import wraps
from functools import lru_cache, wraps
from inspect import isclass
from typing import (
TYPE_CHECKING,
Expand Down Expand Up @@ -624,13 +624,23 @@ def add(
details).
exists_ok: Merge or overwrite individuals when they already exist
in the session rather than raising an exception.
all_triples:
When the individual is attached through an object property
to another one which is not properly defined (i.e. has no type
assigned), such connection is generally dropped. Setting this
option to `True` keeps such connections on the copy. Can give
rise to bugs. A common case in which you might want to do this
involves the `dcat:accessURL` object property.
all_triples: When an individual is added to the session, SimPhoNy
only copies the details that are relevant from an ontological
point of view: the individual's attributes, the classes it
belongs to, and its connections to other ontology individuals
that are also being copied at the same time.
However, in some cases, it is necessary to keep all the
information about the individual, even if it cannot be
understood by SimPhoNy. Set this option to `True` to copy all
RDF statements describing the individual, that is, all RDF
statements where the individual is the subject.
One example of a situation where this option is useful is
when the individual is attached through an object property to
another one which is not properly defined (i.e. has no type
assigned). This situation commonly arises when using the
`dcat:accessURL` object property.
Returns:
The new copies of the individuals.
Expand Down Expand Up @@ -691,17 +701,118 @@ def add(
for individual in individuals
if individual.session is not self
)

@lru_cache(maxsize=4096)
def is_known(
p: Node,
) -> Optional[
Union[OntologyAttribute, OntologyRelationship, OntologyAnnotation]
]:
"""Check whether a predicate is known in the session's ontology.
Args:
p: Predicate to be evaluated.
Returns:
The predicate if it is known, `None` if it is not.
"""
try:
entity = self.ontology.from_identifier(p)
if not isinstance(
entity,
(
OntologyRelationship,
OntologyAttribute,
OntologyAnnotation,
),
):
entity = None
except KeyError:
entity = None
return entity

def is_valid(
s: Node, p: Node, o: Node, exception: bool = False
) -> bool:
"""Check whether a predicate is known and has a valid target.
Check whether the predicate is a known relationship, attribute
or annotation in this session's ontology, and points to a
"valid" target:
- Attributes must point to literals.
- Relationships must point to individuals being copied
simultaneously into the session.
- Annotations can point to anything.
The word "valid" is written with quotation marks because it is
arguably just a superset of what is really valid (e.g. it is
not checked that the data type of literals match the range of
the attributes).
Args:
s: Subject of the statement.
p: Predicate to be evaluated.
o: Target of the predicate.
exception: When a value is given, an exception instead of a
warning is emitted if an "invalid" target is identified.
The value is used to identify the ontology individual is
related to.
Returns:
The predicate points to a "valid" target.
"""
predicate = is_known(p)
if isinstance(predicate, OntologyAttribute):
result = isinstance(o, Literal)
elif isinstance(predicate, OntologyRelationship):
result = o in identifiers
elif isinstance(predicate, OntologyAnnotation):
result = True
else: # isinstance(predicate, type(None)):
result = False

if not result:
if not predicate:
text = (
f"Individual {s} is the subject of a statement "
f"that has {p} as predicate, which does not match any "
f"annotation, relationship or attribute from the "
f"installed ontologies."
)
elif isinstance(predicate, OntologyAttribute):
text = (
f"Individual {s} is the subject of a RDF "
f"statement that has {predicate} as predicate. "
f"{predicate} is an ontology attribute, but "
f"the object of the statement "
f"is not a literal."
)
else:
text = ""

if text:
if exception:
raise RuntimeError(
text + " Set the keyword argument `all_triples`"
"to `True` to ignore this error."
)
else:
logger.warning(
"Accepting uninterpretable RDF statement: " + text
)

return result

add = (
(s, p, o)
for individual in individuals
for s, p, o in individual.session.graph.triples(
(individual.identifier, None, None)
)
if (
all_triples
or p == RDF.type
or isinstance(o, Literal)
or o in identifiers
p == RDF.type
or is_valid(s, p, o, exception=not all_triples)
or all_triples
)
)
if not merge:
Expand Down Expand Up @@ -1437,7 +1548,7 @@ def iter_identifiers(self) -> Iterator[Union[BNode, URIRef]]:
}
)

# Yield the entities from the TBox (literals filtered out above).
# Yield the entities from the TBox (literals filtered out).
if self.ontology is self:
yield from (
s
Expand All @@ -1446,7 +1557,7 @@ def iter_identifiers(self) -> Iterator[Union[BNode, URIRef]]:
if not isinstance(s, Literal)
)

# Yield the entities from the ABox (literals filtered out below).
# Yield the entities from the ABox (literals filtered out).
yield from (
t[0]
for t in self._graph.triples((None, RDF.type, None))
Expand Down
Loading

0 comments on commit b0a7ba2

Please sign in to comment.