-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
using amigo yaml files as test see geneontology/amigo#617
- Loading branch information
Showing
22 changed files
with
4,634 additions
and
24 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
RUN = pipenv run | ||
|
||
all: test | ||
|
||
test: | ||
pipenv run python -m unittest | ||
|
||
test_data: tests/test_models/kitchen_sink.context.jsonld tests/test_models/kitchen_sink.py | ||
|
||
SRC = tests/test_models/%.yaml | ||
tests/test_models/%.context.jsonld: $(SRC) | ||
$(RUN) gen-jsonld-context $< > $@ | ||
tests/test_models/%.py: $(SRC) | ||
$(RUN) gen-python $< > $@ | ||
tests/test_models/%.ttl: $(SRC) | ||
$(RUN) gen-rdf $< > $@ | ||
tests/test_models/amigo.yaml: linkml_solr/utils/golr_schema_utils.py | ||
pipenv run python $< tests/test_golr/*yaml > $@ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
[[source]] | ||
url = "https://pypi.org/simple" | ||
verify_ssl = true | ||
name = "pypi" | ||
|
||
[packages] | ||
linkml = ">=1.0.2" | ||
linkml-runtime = "*" | ||
linkml-model = "*" | ||
pyparsing = "~=2.4" | ||
sparqlwrapper = "*" | ||
pysolr = "*" | ||
|
||
[dev-packages] | ||
tox = "*" | ||
|
||
[pipenv] | ||
allow_prereleases = true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,87 @@ | ||
# linkml-sparql: ORM for SPARQL endpoints | ||
|
||
This provides a way of querying either in-memory RDF graphs or remote SPARQL endpoints using a LinkML specified datamodel. | ||
|
||
## Step 1: Define your datamodel and RDF bindings | ||
|
||
As an example, a small schema for a portion of DBpedia: | ||
|
||
```yaml | ||
id: http://dbpedia.org/ontology/ | ||
imports: | ||
- linkml:types | ||
prefixes: | ||
dbont: http://dbpedia.org/ontology/ | ||
dbproperty: http://dbpedia.org/property/ | ||
dbr: http://dbpedia.org/resource/ | ||
linkml: https://w3id.org/linkml/ | ||
geo: http://www.w3.org/2003/01/geo/wgs84_pos# | ||
wd: http://www.wikidata.org/entity/ | ||
rdf: http://www.w3.org/1999/02/22-rdf-syntax-ns# | ||
rdfs: http://www.w3.org/2000/01/rdf-schema# | ||
|
||
default_prefix: dbont | ||
|
||
classes: | ||
Thing: | ||
slots: | ||
- id | ||
- label | ||
- comment | ||
- type | ||
|
||
Food: | ||
is_a: Thing | ||
exact_mappings: | ||
- wd:Q2095 | ||
slots: | ||
- servingTemperature | ||
- servingSize | ||
- alias | ||
- cuisine | ||
- ingredientName | ||
- origin | ||
- region | ||
- distributor | ||
- approximateCalories | ||
- carbohydrate | ||
- fat | ||
- protein | ||
``` | ||
## Step 2: Create Python dataclasses | ||
```bash | ||
pipenv run gen-python dbont.yaml > dbont.py | ||
``` | ||
|
||
## Step 3: Code | ||
|
||
```python | ||
|
||
from dbont import Food | ||
from linkml_sparql import QueryEngine, SparqlEndpoint | ||
|
||
|
||
|
||
schema = YAMLGenerator(SCHEMA).schema | ||
qe = QueryEngine(schema=schema, | ||
endpoint=SparqlEndpoint(url='http://dbpedia.org/sparql/'), | ||
lang='en') | ||
objs = qe.query(type=Food.class_class_curie, | ||
origin='dbr:Scotland', | ||
target_class=Food) | ||
for obj in objs: | ||
print(f'{obj.id} {obj.label} distributed by {obj.distributor}') | ||
``` | ||
|
||
## More documentation | ||
|
||
More documentation coming soon. For now, consult the tests. | ||
|
||
## TODOs | ||
|
||
Functionality is very incomplete | ||
|
||
- remote querying via DESCRIBE can be inefficient | ||
- ...lots more |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
from linkml_solr.query import SolrEndpoint, SolrQueryEngine |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,66 @@ | ||
import logging | ||
|
||
from linkml_runtime.utils.formatutils import underscore | ||
from linkml_model.meta import SchemaDefinition, ClassDefinition, YAMLRoot, ElementName, SlotDefinition | ||
from rdflib import BNode, URIRef, Literal | ||
from rdflib.term import Node | ||
|
||
from linkml_solr.solrmodel import * | ||
|
||
ASSERTED_TYPE_FIELD = '_type' | ||
|
||
|
||
@dataclass | ||
class Mapper(object): | ||
""" | ||
Maps between URIs and RDF/SOLR entities and Python datamodel entities | ||
""" | ||
None | ||
|
||
@dataclass | ||
class LinkMLMapper(Mapper): | ||
""" | ||
LinkML Mapper | ||
""" | ||
|
||
schema: SchemaDefinition | ||
|
||
def _get_slot(self, sn: str) -> Optional[SlotDefinition]: | ||
for slot in self.schema.slots.values(): | ||
if underscore(slot.name) == sn: | ||
return slot | ||
|
||
|
||
def _get_python_field_for_slot(self, slot: SlotDefinition) -> str: | ||
return underscore(slot.name) # TODO: map to pythongen | ||
|
||
def pyval_to_solr_atom(self, v: Any, range: ElementName = None, query: SolrQuery = None) -> str: | ||
return str(v) | ||
|
||
def _get_linkml_class(self, in_obj: Dict) -> str: | ||
if ASSERTED_TYPE_FIELD in in_obj: | ||
cn = in_obj[ASSERTED_TYPE_FIELD] | ||
return self.schema.classes[cn] | ||
else: | ||
return None | ||
|
||
def _instance_of_linkml_class(self, v) -> bool: | ||
try: | ||
type(v).class_name | ||
return True | ||
except: | ||
return False | ||
|
||
def _lookup_slot(self, cls: ClassDefinition, field: str): | ||
for sn in cls.slots: | ||
s: SlotDefinition | ||
s = self.schema.slots[sn] | ||
if underscore(s.name) == field: | ||
return s | ||
if s.alias and underscore(s.alias) == field: | ||
return s | ||
logging.error(f'Did not find {field} in {cls.name} slots = {cls.slots}') | ||
|
||
def _slot_to_solr_prop(self, slot, prefixmap): | ||
# TODO: allow mapping | ||
return underscore(slot.name) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,122 @@ | ||
import logging | ||
from typing import Union, Dict, Tuple, Type, List | ||
import pysolr | ||
from dataclasses import dataclass | ||
|
||
from linkml_runtime.utils.formatutils import underscore | ||
from linkml_model.meta import SchemaDefinition, ClassDefinition, YAMLRoot, ElementName, SlotDefinition | ||
|
||
from linkml_solr.solrmodel import SolrEndpoint, SolrQuery, SolrQueryResult, RawSolrResult | ||
|
||
from linkml_solr.mapper import LinkMLMapper | ||
|
||
# https://stackoverflow.com/questions/1176136/convert-string-to-python-class-object | ||
def class_for_name(module_name, class_name): | ||
# load the module, will raise ImportError if module cannot be loaded | ||
m = __import__(module_name, globals(), locals(), class_name) | ||
# get the class, will raise AttributeError if class cannot be found | ||
c = getattr(m, class_name) | ||
return c | ||
|
||
|
||
@dataclass | ||
class SolrQueryEngine(object): | ||
""" | ||
ORM wrapper for SOLR endpoint | ||
""" | ||
|
||
endpoint: SolrEndpoint | ||
schema: SchemaDefinition | ||
mapper: LinkMLMapper = None | ||
|
||
def __post_init__(self): | ||
if self.mapper is None: | ||
self.mapper = LinkMLMapper(schema=self.schema) | ||
if self.mapper.schema is None: | ||
self.mapper.schema = self.schema | ||
|
||
def query(self, target_class: Type[YAMLRoot] = None, **params) -> List[YAMLRoot]: | ||
""" | ||
As search, but just returns items, discarding facet info etc | ||
:param target_class: | ||
:param params: | ||
:return: | ||
""" | ||
return self.search(target_class, **params).items | ||
|
||
def search(self, target_class: Type[YAMLRoot] = None, **params) -> SolrQueryResult: | ||
""" | ||
Query a SOLR endpoint for a list of objects | ||
:param target_class: | ||
:param params: key-value parameters. Keys should be in the schema | ||
:return: | ||
""" | ||
sq = self.generate_query(**params) | ||
rawres = self.execute(sq) | ||
items = [self.fetch_object(row, sq, target_class=target_class) for row in rawres.docs] | ||
return SolrQueryResult(items=items) | ||
|
||
def generate_query(self, **params) -> SolrQuery: | ||
""" | ||
Generate a solr query given query parameters | ||
:param prefixmap: | ||
:param params: | ||
:return: | ||
""" | ||
sq = SolrQuery(prefixmap={}) | ||
self._generate_query_for_params(sq, params) | ||
return sq | ||
|
||
def _generate_query_for_params(self, sq: SolrQuery, params: Dict) -> None: | ||
schema = self.schema | ||
mapper = self.mapper | ||
for sn, v in params.items(): | ||
slot = mapper._get_slot(sn) | ||
if slot is not None: | ||
slot_range = slot.range | ||
else: | ||
slot_range = None | ||
logging.error(f'Unknown slot name: {sn}') | ||
solr_prop = mapper._slot_to_solr_prop(slot, sq.prefixmap) | ||
solr_val = mapper.pyval_to_solr_atom(v, range=slot_range, query=sq) | ||
sq.add_constraint(solr_prop, solr_val) | ||
|
||
def fetch_object(self, row: Dict, | ||
original_query: SolrQuery = None, | ||
target_class: Type[YAMLRoot] = None) -> YAMLRoot: | ||
""" | ||
Given an ID, query out other fields and populate object | ||
:param row: | ||
:param original_query: | ||
:param target_class: | ||
:return: | ||
""" | ||
mapper = self.mapper | ||
new_obj = {} | ||
for k, v in row.items(): | ||
if v is not None and v != []: | ||
new_obj[k] = v | ||
cls = mapper._get_linkml_class(new_obj) | ||
if cls is None: | ||
cls = target_class | ||
return cls(**new_obj) | ||
|
||
def execute(self, query: SolrQuery) -> RawSolrResult: | ||
""" | ||
Execute a solr query on endpoint | ||
Endpoint can be an in-memory graph or remote endpoint | ||
:param query: | ||
:return: | ||
""" | ||
#solr = pysolr.Solr(self.endpoint.url, **solr_params) | ||
solr = pysolr.Solr(self.endpoint.url) | ||
params = query.http_params() | ||
print(params) | ||
results = solr.search('*:*', **params) | ||
print(results.docs) | ||
return results | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
|
||
from dataclasses import dataclass | ||
from typing import Dict, List, Any, Optional | ||
|
||
from linkml_model.meta import YAMLRoot | ||
|
||
PREFIXMAP = Dict[str, str] | ||
FIELD = str | ||
RawSolrResult = Dict | ||
|
||
|
||
@dataclass | ||
class SolrEndpoint(object): | ||
url: Optional[str] = None | ||
type_property: str = 'type' | ||
|
||
|
||
@dataclass | ||
class SolrQuery: | ||
prefixmap: PREFIXMAP = None | ||
fields: List[FIELD] = None | ||
filter_query: Dict[FIELD, Any] = None | ||
|
||
def http_params(self) -> dict: | ||
params = {} | ||
params['fq'] = [f'{k}:{_quote(v)}' for (k,v) in self.filter_query.items()] | ||
if self.fields is not None: | ||
params['fields'] = ','.join(self.fields) | ||
return params | ||
|
||
def add_constraint(self, solr_prop, solr_val): | ||
if self.filter_query is None: | ||
self.filter_query = {} | ||
self.filter_query[solr_prop] = solr_val | ||
|
||
|
||
@dataclass | ||
class SolrQueryResult: | ||
num_found: int = 0 | ||
items: List[YAMLRoot] = None | ||
facet_counts: Dict = None | ||
highlighting: str = None | ||
|
||
|
||
def _quote(v, operator="OR"): | ||
if isinstance(v, list): | ||
if len(v) == 1: | ||
return _quote(v[0], operator) | ||
else: | ||
v2 = f" {operator} ".join([_quote(x) for x in v]) | ||
return f'({v2})' | ||
else: | ||
return f'"{v}"' |
Oops, something went wrong.