Skip to content

Commit

Permalink
init commit
Browse files Browse the repository at this point in the history
using amigo yaml files as test
see geneontology/amigo#617
  • Loading branch information
cmungall committed Jul 24, 2021
1 parent 98c602d commit 3a9f75a
Show file tree
Hide file tree
Showing 22 changed files with 4,634 additions and 24 deletions.
18 changes: 18 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
RUN = pipenv run

all: test

test:
pipenv run python -m unittest

test_data: tests/test_models/kitchen_sink.context.jsonld tests/test_models/kitchen_sink.py

SRC = tests/test_models/%.yaml
tests/test_models/%.context.jsonld: $(SRC)
$(RUN) gen-jsonld-context $< > $@
tests/test_models/%.py: $(SRC)
$(RUN) gen-python $< > $@
tests/test_models/%.ttl: $(SRC)
$(RUN) gen-rdf $< > $@
tests/test_models/amigo.yaml: linkml_solr/utils/golr_schema_utils.py
pipenv run python $< tests/test_golr/*yaml > $@
18 changes: 18 additions & 0 deletions Pipfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
[[source]]
url = "https://pypi.org/simple"
verify_ssl = true
name = "pypi"

[packages]
linkml = ">=1.0.2"
linkml-runtime = "*"
linkml-model = "*"
pyparsing = "~=2.4"
sparqlwrapper = "*"
pysolr = "*"

[dev-packages]
tox = "*"

[pipenv]
allow_prereleases = true
87 changes: 87 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
# linkml-sparql: ORM for SPARQL endpoints

This provides a way of querying either in-memory RDF graphs or remote SPARQL endpoints using a LinkML specified datamodel.

## Step 1: Define your datamodel and RDF bindings

As an example, a small schema for a portion of DBpedia:

```yaml
id: http://dbpedia.org/ontology/
imports:
- linkml:types
prefixes:
dbont: http://dbpedia.org/ontology/
dbproperty: http://dbpedia.org/property/
dbr: http://dbpedia.org/resource/
linkml: https://w3id.org/linkml/
geo: http://www.w3.org/2003/01/geo/wgs84_pos#
wd: http://www.wikidata.org/entity/
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns#
rdfs: http://www.w3.org/2000/01/rdf-schema#

default_prefix: dbont

classes:
Thing:
slots:
- id
- label
- comment
- type

Food:
is_a: Thing
exact_mappings:
- wd:Q2095
slots:
- servingTemperature
- servingSize
- alias
- cuisine
- ingredientName
- origin
- region
- distributor
- approximateCalories
- carbohydrate
- fat
- protein
```
## Step 2: Create Python dataclasses
```bash
pipenv run gen-python dbont.yaml > dbont.py
```

## Step 3: Code

```python

from dbont import Food
from linkml_sparql import QueryEngine, SparqlEndpoint



schema = YAMLGenerator(SCHEMA).schema
qe = QueryEngine(schema=schema,
endpoint=SparqlEndpoint(url='http://dbpedia.org/sparql/'),
lang='en')
objs = qe.query(type=Food.class_class_curie,
origin='dbr:Scotland',
target_class=Food)
for obj in objs:
print(f'{obj.id} {obj.label} distributed by {obj.distributor}')
```

## More documentation

More documentation coming soon. For now, consult the tests.

## TODOs

Functionality is very incomplete

- remote querying via DESCRIBE can be inefficient
- ...lots more
1 change: 1 addition & 0 deletions linkml_solr/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from linkml_solr.query import SolrEndpoint, SolrQueryEngine
66 changes: 66 additions & 0 deletions linkml_solr/mapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import logging

from linkml_runtime.utils.formatutils import underscore
from linkml_model.meta import SchemaDefinition, ClassDefinition, YAMLRoot, ElementName, SlotDefinition
from rdflib import BNode, URIRef, Literal
from rdflib.term import Node

from linkml_solr.solrmodel import *

ASSERTED_TYPE_FIELD = '_type'


@dataclass
class Mapper(object):
"""
Maps between URIs and RDF/SOLR entities and Python datamodel entities
"""
None

@dataclass
class LinkMLMapper(Mapper):
"""
LinkML Mapper
"""

schema: SchemaDefinition

def _get_slot(self, sn: str) -> Optional[SlotDefinition]:
for slot in self.schema.slots.values():
if underscore(slot.name) == sn:
return slot


def _get_python_field_for_slot(self, slot: SlotDefinition) -> str:
return underscore(slot.name) # TODO: map to pythongen

def pyval_to_solr_atom(self, v: Any, range: ElementName = None, query: SolrQuery = None) -> str:
return str(v)

def _get_linkml_class(self, in_obj: Dict) -> str:
if ASSERTED_TYPE_FIELD in in_obj:
cn = in_obj[ASSERTED_TYPE_FIELD]
return self.schema.classes[cn]
else:
return None

def _instance_of_linkml_class(self, v) -> bool:
try:
type(v).class_name
return True
except:
return False

def _lookup_slot(self, cls: ClassDefinition, field: str):
for sn in cls.slots:
s: SlotDefinition
s = self.schema.slots[sn]
if underscore(s.name) == field:
return s
if s.alias and underscore(s.alias) == field:
return s
logging.error(f'Did not find {field} in {cls.name} slots = {cls.slots}')

def _slot_to_solr_prop(self, slot, prefixmap):
# TODO: allow mapping
return underscore(slot.name)
122 changes: 122 additions & 0 deletions linkml_solr/query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import logging
from typing import Union, Dict, Tuple, Type, List
import pysolr
from dataclasses import dataclass

from linkml_runtime.utils.formatutils import underscore
from linkml_model.meta import SchemaDefinition, ClassDefinition, YAMLRoot, ElementName, SlotDefinition

from linkml_solr.solrmodel import SolrEndpoint, SolrQuery, SolrQueryResult, RawSolrResult

from linkml_solr.mapper import LinkMLMapper

# https://stackoverflow.com/questions/1176136/convert-string-to-python-class-object
def class_for_name(module_name, class_name):
# load the module, will raise ImportError if module cannot be loaded
m = __import__(module_name, globals(), locals(), class_name)
# get the class, will raise AttributeError if class cannot be found
c = getattr(m, class_name)
return c


@dataclass
class SolrQueryEngine(object):
"""
ORM wrapper for SOLR endpoint
"""

endpoint: SolrEndpoint
schema: SchemaDefinition
mapper: LinkMLMapper = None

def __post_init__(self):
if self.mapper is None:
self.mapper = LinkMLMapper(schema=self.schema)
if self.mapper.schema is None:
self.mapper.schema = self.schema

def query(self, target_class: Type[YAMLRoot] = None, **params) -> List[YAMLRoot]:
"""
As search, but just returns items, discarding facet info etc
:param target_class:
:param params:
:return:
"""
return self.search(target_class, **params).items

def search(self, target_class: Type[YAMLRoot] = None, **params) -> SolrQueryResult:
"""
Query a SOLR endpoint for a list of objects
:param target_class:
:param params: key-value parameters. Keys should be in the schema
:return:
"""
sq = self.generate_query(**params)
rawres = self.execute(sq)
items = [self.fetch_object(row, sq, target_class=target_class) for row in rawres.docs]
return SolrQueryResult(items=items)

def generate_query(self, **params) -> SolrQuery:
"""
Generate a solr query given query parameters
:param prefixmap:
:param params:
:return:
"""
sq = SolrQuery(prefixmap={})
self._generate_query_for_params(sq, params)
return sq

def _generate_query_for_params(self, sq: SolrQuery, params: Dict) -> None:
schema = self.schema
mapper = self.mapper
for sn, v in params.items():
slot = mapper._get_slot(sn)
if slot is not None:
slot_range = slot.range
else:
slot_range = None
logging.error(f'Unknown slot name: {sn}')
solr_prop = mapper._slot_to_solr_prop(slot, sq.prefixmap)
solr_val = mapper.pyval_to_solr_atom(v, range=slot_range, query=sq)
sq.add_constraint(solr_prop, solr_val)

def fetch_object(self, row: Dict,
original_query: SolrQuery = None,
target_class: Type[YAMLRoot] = None) -> YAMLRoot:
"""
Given an ID, query out other fields and populate object
:param row:
:param original_query:
:param target_class:
:return:
"""
mapper = self.mapper
new_obj = {}
for k, v in row.items():
if v is not None and v != []:
new_obj[k] = v
cls = mapper._get_linkml_class(new_obj)
if cls is None:
cls = target_class
return cls(**new_obj)

def execute(self, query: SolrQuery) -> RawSolrResult:
"""
Execute a solr query on endpoint
Endpoint can be an in-memory graph or remote endpoint
:param query:
:return:
"""
#solr = pysolr.Solr(self.endpoint.url, **solr_params)
solr = pysolr.Solr(self.endpoint.url)
params = query.http_params()
print(params)
results = solr.search('*:*', **params)
print(results.docs)
return results

53 changes: 53 additions & 0 deletions linkml_solr/solrmodel.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@

from dataclasses import dataclass
from typing import Dict, List, Any, Optional

from linkml_model.meta import YAMLRoot

PREFIXMAP = Dict[str, str]
FIELD = str
RawSolrResult = Dict


@dataclass
class SolrEndpoint(object):
url: Optional[str] = None
type_property: str = 'type'


@dataclass
class SolrQuery:
prefixmap: PREFIXMAP = None
fields: List[FIELD] = None
filter_query: Dict[FIELD, Any] = None

def http_params(self) -> dict:
params = {}
params['fq'] = [f'{k}:{_quote(v)}' for (k,v) in self.filter_query.items()]
if self.fields is not None:
params['fields'] = ','.join(self.fields)
return params

def add_constraint(self, solr_prop, solr_val):
if self.filter_query is None:
self.filter_query = {}
self.filter_query[solr_prop] = solr_val


@dataclass
class SolrQueryResult:
num_found: int = 0
items: List[YAMLRoot] = None
facet_counts: Dict = None
highlighting: str = None


def _quote(v, operator="OR"):
if isinstance(v, list):
if len(v) == 1:
return _quote(v[0], operator)
else:
v2 = f" {operator} ".join([_quote(x) for x in v])
return f'({v2})'
else:
return f'"{v}"'
Loading

0 comments on commit 3a9f75a

Please sign in to comment.