Skip to content

Commit

Permalink
Improve performance and adapt existing bechmarks to SimPhoNy v4 (#789)
Browse files Browse the repository at this point in the history
* Improve performance when creating individuals.

* Introduced `lru_cache_weak` and `lru_cache_timestamp` decorators. Introduced the `take` method to take a limited number of items from an iterator.

* Apply such decorators to methods from `OntologyEntity`, `OntologyClass` in order to achieve faster performance while keeping the cache coherent when changes are done to a `Session`'s graph.

* Adapt existing benchmarks to SimPhoNy v4.

* Fix performance on various places so that it is on par with SimPhoNy v3.

* Fetch FOAF from web archive (currently unavailable).

* Code review changes.
  • Loading branch information
kysrpex authored Jul 5, 2022
1 parent 53b0c6a commit 90313f0
Show file tree
Hide file tree
Showing 13 changed files with 1,114 additions and 656 deletions.
1 change: 0 additions & 1 deletion simphony_osp/ontology/attribute.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def __init__(
the existing ones.
"""
super().__init__(uid, session, triples, merge=merge)
logger.debug("Instantiated ontology attribute %s." % self)

def convert_to_datatype(self, value: Any) -> Any:
"""Convert the given value to a Python object.
Expand Down
6 changes: 6 additions & 0 deletions simphony_osp/ontology/entity.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from rdflib import Graph, Literal, URIRef
from rdflib.term import Identifier

from simphony_osp.utils.cache import lru_cache_timestamp
from simphony_osp.utils.datatypes import UID, Triple

if TYPE_CHECKING:
Expand Down Expand Up @@ -101,6 +102,7 @@ def label_lang(self, value: str) -> None:
self.label_literal = Literal(self.label_literal, lang=value)

@property
@lru_cache_timestamp(lambda self: self.session.entity_cache_timestamp)
def namespace(self) -> Optional[OntologyNamespace]:
"""Return the ontology namespace to which this entity is associated."""
return next((x for x in self.session.namespaces if self in x), None)
Expand All @@ -123,6 +125,7 @@ def session(self, value: Session) -> None:
self._session = value

@property
@lru_cache_timestamp(lambda self: self.session.entity_cache_timestamp)
def direct_superclasses(self) -> FrozenSet[OntologyEntity]:
"""Get the direct superclasses of the entity.
Expand All @@ -132,6 +135,7 @@ def direct_superclasses(self) -> FrozenSet[OntologyEntity]:
return frozenset(self._get_direct_superclasses())

@property
@lru_cache_timestamp(lambda self: self.session.entity_cache_timestamp)
def direct_subclasses(self) -> FrozenSet[OntologyEntity]:
"""Get the direct subclasses of the entity.
Expand All @@ -141,6 +145,7 @@ def direct_subclasses(self) -> FrozenSet[OntologyEntity]:
return frozenset(self._get_direct_subclasses())

@property
@lru_cache_timestamp(lambda self: self.session.entity_cache_timestamp)
def superclasses(self) -> FrozenSet[OntologyEntity]:
"""Get the superclass of the entity.
Expand All @@ -151,6 +156,7 @@ def superclasses(self) -> FrozenSet[OntologyEntity]:
return frozenset(self._get_superclasses())

@property
@lru_cache_timestamp(lambda self: self.session.entity_cache_timestamp)
def subclasses(self) -> FrozenSet[OntologyEntity]:
"""Get the subclasses of the entity.
Expand Down
166 changes: 94 additions & 72 deletions simphony_osp/ontology/individual.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@

logger = logging.getLogger(__name__)

RDF_type = RDF.type
OWL_NamedIndividual = OWL.NamedIndividual


class ResultEmptyError(Exception):
"""The result is unexpectedly empty."""
Expand Down Expand Up @@ -185,7 +188,7 @@ def _predicates(
Such predicates, or `None` if no main predicate is
associated with this `ObjectSet`.
"""
return (
return set(
self._predicate.subclasses if self._predicate is not None else None
)

Expand Down Expand Up @@ -359,32 +362,70 @@ def __iter__(self) -> Iterator[OntologyIndividual]:
Returns:
The mentioned underlying set.
"""
individual = self._individual.identifier
graph = self._individual.session.graph
ontology = self._individual.session.ontology
predicates = self._predicates

# Get the predicate IRIs to be considered.
predicates_direct = {predicate.identifier for predicate in predicates}
predicates_inverse = {
p.identifier
for predicate in predicates
for p in (predicate.inverse,)
if p is not None
}
if self._inverse:
predicates_direct, predicates_inverse = (
predicates_inverse,
predicates_direct,
)

# Get the identifiers of the individuals connected to
# `self._individual` through the allowed predicates.
connected = set()
triples = graph.triples((individual, None, None))
connected |= {o for s, p, o in triples if p in predicates_direct}
triples = graph.triples((None, None, individual))
connected |= {s for s, p, o in triples if p in predicates_inverse}
identifiers = (
tuple(uid.to_identifier() for uid in self._uid_filter)
if self._uid_filter
else tuple()
)
if identifiers:
connected &= set(identifiers)
if self._class_filter:
connected &= {
identifier
for identifier in connected
if self._class_filter
in (
subclass
for c in graph.objects(identifier, RDF_type)
if c != OWL_NamedIndividual
for subclass in ontology.from_identifier_typed(
c, typing=OntologyClass
).superclasses
)
}

if self._uid_filter:
last_identifier = None
for i, r, t in self.iter_low_level():
if i == last_identifier:
continue
elif (r, t) == (None, None):
yield None
else:
item = self._individual.session.from_identifier_typed(
i, typing=OntologyIndividual
)
if not self._class_filter or item.is_a(self._class_filter):
yield item
else:
yield None
last_identifier = i
yield from (
self._individual.session.from_identifier_typed(
identifier, typing=OntologyIndividual
)
if identifier in connected
else None
for identifier in identifiers
)
else:
yielded: Set[Node] = set()
for i, r, t in self.iter_low_level():
item = self._individual.session.from_identifier(i)
if i in yielded or (
self._class_filter and not item.is_a(self._class_filter)
):
continue
yielded.add(i)
yield item
yield from (
self._individual.session.from_identifier_typed(
identifier, typing=OntologyIndividual
)
for identifier in connected
)

def __contains__(self, item: OntologyIndividual) -> bool:
"""Check if an individual is connected via the relationship."""
Expand Down Expand Up @@ -624,7 +665,10 @@ def __init__(

def iter_low_level(
self,
) -> Iterator[Tuple[Node, Optional[Node], Optional[bool]]]:
) -> Union[
Iterator[Tuple[Node, Optional[Node], Optional[bool]]],
Iterator[Tuple[Node, Optional[Node], Optional[bool], Node]],
]:
"""Iterate over individuals assigned to `self._predicates`.
Note: no class filter.
Expand Down Expand Up @@ -653,13 +697,13 @@ def iter_low_level(
if self._uid_filter is None:
predicate_individual_direct = (
(o, p)
for p, o in graph.predicate_objects(individual)
if p in direct_allowed
for p in direct_allowed
for o in graph.objects(individual, p)
)
predicate_individual_inverse = (
(s, p)
for s, p in graph.subject_predicates(individual)
if p in inverse_allowed
for p in inverse_allowed
for s in graph.subjects(p, individual)
)
individuals_and_relationships = chain(
((o, p, True) for o, p in predicate_individual_direct),
Expand All @@ -676,13 +720,13 @@ def individuals_and_relationships():
found = chain(
(
(p, True)
for p in graph.predicates(individual, identifier)
if p in direct_allowed
for p in direct_allowed
if (individual, p, identifier) in graph
),
(
(p, False)
for p in graph.predicates(identifier, individual)
if p in inverse_allowed
for p in inverse_allowed
if (identifier, p, individual) in graph
),
)
first = next(found, (None, None))
Expand Down Expand Up @@ -784,7 +828,6 @@ def __init__(
f"Tried to initialize an ontology individual with "
f"uid {uid}, which is not a UID object."
)
self._ontology_classes = []
triples = set(triples) if triples is not None else set()
# Attribute triples.
attributes = attributes or dict()
Expand All @@ -800,27 +843,8 @@ def __init__(
# Class triples.
if class_:
triples |= {(uid.to_iri(), RDF.type, class_.iri)}
self._ontology_classes += [class_]
# extra_class = False
# Extra triples
# for s, p, o in triples:
# if p == RDF.type:
# extra_class = True
# triples.add((s, p, o))
# TODO: grab extra class from tbox, add it to _ontology_classes.

# Determine whether class was assigned (currently unused).
# class_assigned = bool(class_) or extra_class
# if not class_assigned:
# raise TypeError(f"No ontology class associated with {self}! "
# f"Did you install the required ontology?")
# logger.warning(f"No ontology class associated with {self}! "
# f"Did you install the required ontology?")
# pass

# When the construction is complete, the session is switched.

super().__init__(uid, session, triples or None, merge=merge)
logger.debug("Instantiated ontology individual %s" % self)

# Public API
# ↓ ------ ↓
Expand All @@ -837,7 +861,8 @@ def classes(self) -> FrozenSet[OntologyClass]:
self.session.ontology.from_identifier_typed(
o, typing=OntologyClass
)
for o in self.session.graph.objects(self.identifier, RDF.type)
for o in self.session.graph.objects(self.identifier, RDF_type)
if o != OWL_NamedIndividual
)

@classes.setter
Expand Down Expand Up @@ -1600,12 +1625,9 @@ def iter(
"first using `session.add`."
)

if isinstance(x, str):
if not isinstance(x, Identifier):
x = URIRef(x)
identifiers[n] = UID(x)
elif isinstance(x, OntologyIndividual):
identifiers[n] = UID(x.identifier)
identifiers[n] = UID(
x.identifier if isinstance(x, OntologyIndividual) else x
)

if isinstance(rel, Identifier):
rel = self.session.ontology.from_identifier_typed(
Expand Down Expand Up @@ -1714,6 +1736,16 @@ def __exit__(self, *args):

raise AttributeError("__exit__")

@property
def attributes(
self,
) -> Mapping[OntologyAttribute, FrozenSet[AttributeValue]]:
"""Get the attributes of this individual as a dictionary."""
generator = self.attributes_attribute_and_value_generator()
return MappingProxyType(
{attr: frozenset(gen) for attr, gen in generator}
)

# ↑ ------ ↑
# Public API

Expand Down Expand Up @@ -1978,16 +2010,6 @@ def annotations_iter(
# Attribute handling
# ↓ -------------- ↓

@property
def attributes(
self,
) -> Mapping[OntologyAttribute, FrozenSet[AttributeValue]]:
"""Get the attributes of this individual as a dictionary."""
generator = self.attributes_attribute_and_value_generator()
return MappingProxyType(
{attr: frozenset(gen) for attr, gen in generator}
)

def _attributes_get_by_name(self, name: str) -> Set[OntologyAttribute]:
"""Get an attribute of this individual by name."""
attributes = (
Expand Down
13 changes: 13 additions & 0 deletions simphony_osp/ontology/namespace.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from rdflib.term import Identifier

from simphony_osp.ontology.entity import OntologyEntity
from simphony_osp.utils.cache import lru_cache_timestamp

if TYPE_CHECKING:
from simphony_osp.session.session import Session
Expand Down Expand Up @@ -188,6 +189,9 @@ def __len__(self) -> int:
"""Return the number of entities in the namespace."""
return sum(1 for _ in self)

@lru_cache_timestamp(
lambda self: self.ontology.entity_cache_timestamp, maxsize=4096
)
def get(self, name: str, default: Optional[Any] = None) -> OntologyEntity:
"""Get ontology entities from the registry by suffix or label.
Expand Down Expand Up @@ -244,6 +248,9 @@ def from_suffix(self, suffix: str) -> OntologyEntity:
"""
return self.from_iri(str(self._iri) + suffix)

@lru_cache_timestamp(
lambda self: self.ontology.entity_cache_timestamp, maxsize=4096
)
def from_iri(self, iri: Union[str, URIRef]) -> OntologyEntity:
"""Get an ontology entity directly from its IRI.
Expand Down Expand Up @@ -273,6 +280,9 @@ def from_iri(self, iri: Union[str, URIRef]) -> OntologyEntity:
f"The IRI {iri} does not belong to the namespace" f"{self}."
)

@lru_cache_timestamp(
lambda self: self.ontology.entity_cache_timestamp, maxsize=4096
)
def from_label(
self,
label: str,
Expand Down Expand Up @@ -341,6 +351,9 @@ def __repr__(self) -> str:
"""
return f"<{self.name}: {self.iri}>"

@lru_cache_timestamp(
lambda self: self.ontology.entity_cache_timestamp, maxsize=4096
)
def _from_label_set(
self,
label: str,
Expand Down
Loading

0 comments on commit 90313f0

Please sign in to comment.