-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #17 from scaife-viewer/atlas/cts-ingestion
Refactor ATLAS config, provide isolated ATLAS database and support library ingestion from CTS Collections
- Loading branch information
Showing
32 changed files
with
641 additions
and
137 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1 +1,76 @@ | ||
# Aligned Text and Linguistic Annotation Server (ATLAS) | ||
|
||
## Settings | ||
|
||
Settings can be overridden at a project level using via the `SV_ATLAS_<name>` | ||
naming convention. | ||
|
||
### Data model | ||
|
||
**DATA_DIR** | ||
Default: `None` | ||
|
||
The path to the directory containing ATLAS data | ||
|
||
**DATA_MODEL_ID** | ||
Default: A base64 encoded representation of the last release (in `YYYY-MM-DD-###` format) where a | ||
backwards incompatible schema change occurred. | ||
|
||
Site developers can use the value of this setting to help inform when ATLAS content should be re-ingested | ||
due to BI schema changes. | ||
|
||
**INGESTION_CONCURRENCY** | ||
Default: `None` | ||
|
||
Sets the number of processes available to ProcessPoolExecutors during ingestion. | ||
|
||
When `None`, defaults to number of processors as reported by multiprocessing.cpu_count() | ||
|
||
**NODE_ALPHABET** | ||
Default: `"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"` | ||
|
||
Used by `django-treebeard` to calculate the maximum path steps. | ||
|
||
See the [django-treebeard docs](https://django-treebeard.readthedocs.io/en/latest/mp_tree.html#treebeard.mp_tree.MP_Node.alphabet) for more information. | ||
|
||
|
||
### Database | ||
|
||
**DB_LABEL** | ||
Default: `"atlas"` | ||
|
||
The label to use for the ATLAS-specific database (required when using the `ATLASRouter` database router) | ||
|
||
**DB_PATH** | ||
Default: `None` | ||
|
||
The path to the SQLite database referenced by `DB_LABEL`. | ||
|
||
|
||
### GraphQL | ||
|
||
**IN_MEMORY_PASSAGE_CHUNK_MAX** | ||
Default: `2500` | ||
|
||
Sets the upper limit on the number of text parts used for in-memory passage chunking. | ||
|
||
When the number of text parts exceeds this limit, ATLAS will fall back to a database-backed | ||
chunking alogrithm. | ||
|
||
For most smaller passages, the in-memory chunking is faster than using the database. | ||
|
||
|
||
### Other | ||
|
||
**HOOKSET** | ||
Default: `"scaife_viewer.atlas.hooks.DefaultHookSet"` | ||
|
||
The path to a hookset that can be used to customize ATLAS functionality. | ||
|
||
## GraphQL Endpoint | ||
URL Name: `sv_atlas:graphql_endpoint` | ||
|
||
Primary GraphQL endpoint for `scaife-viewer-atlas` projects. | ||
|
||
When accessed [via a browser](https://github.com/graphql-python/graphene-django/blob/2e806384f60505a29745752bf9c477c71668f0fa/graphene_django/views.py#L154), delivers a [GraphiQL Playground](https://github.com/graphql/graphiql#graphiql) that can be used | ||
to explore ATLAS GraphQL fields. |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,12 +1,56 @@ | ||
import base64 | ||
import importlib | ||
|
||
from django.conf import settings # noqa | ||
from django.core.exceptions import ImproperlyConfigured | ||
|
||
from appconf import AppConf | ||
|
||
|
||
def load_path_attr(path): | ||
i = path.rfind(".") | ||
module, attr = path[:i], path[i + 1 :] | ||
try: | ||
mod = importlib.import_module(module) | ||
except ImportError as e: | ||
raise ImproperlyConfigured("Error importing {0}: '{1}'".format(module, e)) | ||
try: | ||
attr = getattr(mod, attr) | ||
except AttributeError: | ||
raise ImproperlyConfigured( | ||
"Module '{0}' does not define a '{1}'".format(module, attr) | ||
) | ||
return attr | ||
|
||
|
||
class ATLASAppConf(AppConf): | ||
# `INGESTION_CONCURRENCY` defaults to number of processors | ||
# as reported by multiprocessing.cpu_count() | ||
# Data model | ||
DATA_DIR = None | ||
DATA_MODEL_ID = base64.b64encode(b"2020-09-08-001\n").decode() | ||
INGESTION_CONCURRENCY = None | ||
NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" | ||
|
||
# GraphQL settings | ||
IN_MEMORY_PASSAGE_CHUNK_MAX = 2500 | ||
|
||
# Database settings | ||
DB_LABEL = "atlas" | ||
DB_PATH = None | ||
|
||
# Other | ||
HOOKSET = "scaife_viewer.atlas.hooks.DefaultHookSet" | ||
|
||
class Meta: | ||
prefix = "scaife_viewer_atlas" | ||
prefix = "sv_atlas" | ||
|
||
def configure_hookset(self, value): | ||
return load_path_attr(value)() | ||
|
||
def configure_data_dir(self, value): | ||
# NOTE: We've chosen an explicit `configure` method | ||
# vs making `DATA_DIR` a required field so we can check | ||
# that DATA_DIR is a non-None value. | ||
if value is None: | ||
msg = f"{self._meta.prefixed_name('DATA_DIR')} must be defined" | ||
raise ImproperlyConfigured(msg) | ||
return value |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,54 @@ | ||
from scaife_viewer.atlas.conf import ATLASAppConf | ||
|
||
|
||
atlas_conf = ATLASAppConf() | ||
ATLAS_DB_LABEL = atlas_conf.DB_LABEL | ||
|
||
|
||
class ATLASRouter: | ||
""" | ||
A router to control all database operations on models in the | ||
library application. | ||
""" | ||
|
||
route_app_labels = {"scaife_viewer_atlas"} | ||
|
||
def db_for_read(self, model, **hints): | ||
""" | ||
Attempts to read library models go to DB_LABEL. | ||
""" | ||
if model._meta.app_label in self.route_app_labels: | ||
return ATLAS_DB_LABEL | ||
return None | ||
|
||
def db_for_write(self, model, **hints): | ||
""" | ||
Attempts to write library models go to DB_LABEL. | ||
""" | ||
if model._meta.app_label in self.route_app_labels: | ||
return ATLAS_DB_LABEL | ||
return None | ||
|
||
def allow_relation(self, obj1, obj2, **hints): | ||
""" | ||
Allow relations if a model in library app is | ||
involved. | ||
""" | ||
if ( | ||
obj1._meta.app_label in self.route_app_labels | ||
or obj2._meta.app_label in self.route_app_labels | ||
): | ||
return True | ||
return None | ||
|
||
def allow_migrate(self, db, app_label, model_name=None, **hints): | ||
""" | ||
Only add library apps to the DB_LABEL database. | ||
Do not add library apps to any other database. | ||
""" | ||
if db == ATLAS_DB_LABEL: | ||
return app_label in self.route_app_labels | ||
elif app_label in self.route_app_labels: | ||
return db == ATLAS_DB_LABEL | ||
return None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
from . import constants | ||
from .resolvers.default import resolve_library | ||
|
||
|
||
def ensure_trailing_colon(urn): | ||
if not urn.endswith(":"): | ||
return f"{urn}:" | ||
return urn | ||
|
||
|
||
class DefaultHookSet: | ||
def resolve_library(self): | ||
return resolve_library() | ||
|
||
def can_access_urn(self, request, urn): | ||
return True | ||
|
||
def get_human_lang(self, value): | ||
return constants.HUMAN_FRIENDLY_LANGUAGE_MAP.get(value, value) | ||
|
||
def get_importer_class(self): | ||
from .importers.versions import CTSImporter # noqa: avoids circular import | ||
|
||
return CTSImporter | ||
|
||
def extract_cts_text_group_metadata(self, text_group): | ||
return dict( | ||
urn=f"{ensure_trailing_colon(text_group.urn)}", | ||
name=[dict(lang="eng", value=str(text_group.label))], | ||
) | ||
|
||
def extract_cts_work_metadata(self, work): | ||
# FIXME: backport `lang` attr to scaife-viewer-core | ||
lang = getattr(work, "lang", work.metadata.lang) | ||
return dict( | ||
urn=f"{ensure_trailing_colon(work.urn)}", | ||
lang=lang, | ||
title=[ | ||
{ | ||
# TODO: provide a better api for work.label lang | ||
"lang": work.label._language, | ||
"value": str(work.label), | ||
} | ||
], | ||
) | ||
|
||
def extract_cts_version_metadata(self, version): | ||
return dict( | ||
urn=f"{ensure_trailing_colon(version.urn)}", | ||
version_kind=version.kind, | ||
# TODO: provide first_passage_urn | ||
citation_scheme=[c.name for c in version.metadata.citation], | ||
label=[ | ||
{ | ||
# TODO: provide a better api for version.label lang | ||
"lang": version.label._language, | ||
"value": str(version.label), | ||
} | ||
], | ||
description=[ | ||
{ | ||
# TODO: provide a better api for version.description lang | ||
"lang": version.description._language, | ||
"value": str(version.description), | ||
} | ||
], | ||
lang=version.lang, | ||
) | ||
|
||
|
||
class HookProxy: | ||
def __getattr__(self, attr): | ||
from .conf import settings # noqa; avoids race condition | ||
|
||
return getattr(settings.SV_ATLAS_HOOKSET, attr) | ||
|
||
|
||
hookset = HookProxy() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.