From d5aefb1e419d9167324452f92a5f1555aa73361c Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 10:33:31 -0500 Subject: [PATCH 01/34] backport cts collection resolver --- .../scaife_viewer/atlas/importers/versions.py | 122 ++++++++++--- .../scaife_viewer/atlas/resolvers/__init__.py | 0 atlas/scaife_viewer/atlas/resolvers/cts.py | 167 ++++++++++++++++++ 3 files changed, 264 insertions(+), 25 deletions(-) create mode 100644 atlas/scaife_viewer/atlas/resolvers/__init__.py create mode 100644 atlas/scaife_viewer/atlas/resolvers/cts.py diff --git a/atlas/scaife_viewer/atlas/importers/versions.py b/atlas/scaife_viewer/atlas/importers/versions.py index 63006ab..5bed501 100644 --- a/atlas/scaife_viewer/atlas/importers/versions.py +++ b/atlas/scaife_viewer/atlas/importers/versions.py @@ -1,4 +1,5 @@ import json +import logging import os import sys from collections import defaultdict @@ -10,11 +11,15 @@ from treebeard.exceptions import PathOverflow from scaife_viewer.atlas import constants +from tqdm import tqdm from ..models import Node +from ..resolvers.cts import CTSCollectionResolver from ..urn import URN +logger = logging.getLogger(__name__) + LIBRARY_DATA_PATH = os.path.join(settings.ATLAS_CONFIG["DATA_DIR"], "library") @@ -81,9 +86,17 @@ def __init__(self, library, version_data, nodes=dict()): self.library = library self.version_data = version_data self.nodes = nodes + # TODO: Decouple "version_data" further self.urn = URN(self.version_data["urn"].strip()) self.work_urn = self.urn.up_to(self.urn.WORK) - self.label = get_first_value_for_language(version_data["label"], "eng") + + try: + label = get_first_value_for_language(version_data["label"], "eng") + except ValueError: + # TODO: Do we need this or can we support a fallback value above? + label = self.work_urn + self.label = label + self.citation_scheme = self.version_data["citation_scheme"] self.idx_lookup = defaultdict(int) @@ -133,25 +146,63 @@ def get_root_urn_scheme(self, node_urn): def get_urn_scheme(self, node_urn): return [*self.get_root_urn_scheme(node_urn), *self.citation_scheme] - def get_textgroup_metadata(self, urn): - metadata = self.library.text_groups[urn.up_to(URN.TEXTGROUP)] - return {"label": get_first_value_for_language(metadata["name"], "eng")} - - def get_work_metadata(self, urn): - metadata = self.library.works[urn.up_to(URN.WORK)] - return {"label": get_first_value_for_language(metadata["title"], "eng")} + # TODO: Move some metadata extraction out to our "resolvers" + + # def get_textgroup_metadata(self): + # metadata = self.library.text_groups[self.urn.up_to(URN.TEXTGROUP)] + # return {"label": get_first_value_for_language(metadata["name"], "eng")} + + def get_text_group_metadata(self): + text_group_urn = self.urn.up_to(self.urn.TEXTGROUP) + metadata = self.library.text_groups[text_group_urn] + name = metadata["name"][0] + return dict( + label=name["value"], lang=name["lang"], **(metadata.get("meta_") or {}) + ) + + # def get_work_metadata(self): + # metadata = self.library.works[self.urn.up_to(URN.WORK)] + # return {"label": get_first_value_for_language(metadata["title"], "eng")} + + def get_work_metadata(self): + work_urn = self.urn.up_to(self.urn.WORK) + metadata = self.library.works[work_urn] + return dict(lang=metadata["lang"], label=metadata["title"][0]["value"]) + + # def get_version_metadata(self): + # return { + # # @@@ how much of the `metadata.json` do we + # # "pass through" via GraphQL vs + # # apply to particular node kinds in the heirarchy + # "citation_scheme": self.citation_scheme, + # "label": self.label, + # "lang": self.version_data["lang"], + # "first_passage_urn": self.version_data["first_passage_urn"], + # "default_toc_urn": self.version_data.get("default_toc_urn"), + # } def get_version_metadata(self): - return { + default = { # @@@ how much of the `metadata.json` do we # "pass through" via GraphQL vs # apply to particular node kinds in the heirarchy "citation_scheme": self.citation_scheme, - "label": self.label, - "lang": self.version_data["lang"], - "first_passage_urn": self.version_data["first_passage_urn"], + "work_title": self.label, + "first_passage_urn": self.version_data.get("first_passage_urn"), "default_toc_urn": self.version_data.get("default_toc_urn"), } + # @@@ label + default.update( + dict( + label=self.version_data["label"][0]["value"], + description=self.version_data["description"][0]["value"], + lang=self.version_data["lang"], + kind=self.version_data["version_kind"], + tracking_title=self.version_data["tracking_title"], + image=self.version_data["image"], + ) + ) + return default def add_child_bulk(self, parent, node_data): # @@@ forked version of `Node._inc_path` @@ -197,20 +248,32 @@ def generate_node(self, idx, node_data, parent_urn): return self.add_child_bulk(parent, node_data) return self.add_child(parent, node_data) - def destructure_urn(self, node_urn, tokens): + def destructure_urn(self, node_urn, tokens, extract_text_parts): node_data = [] for kind in self.get_urn_scheme(node_urn): data = {"kind": kind} - if kind not in self.citation_scheme: + # TODO: Determine when we're dealing with a passage reference portion vs + # work part of the urn. + # May be done with parts of `get_urn_scheme` + # And maybe the "presence" / absence of tokens could help slightly too + # @@@ duplicate; we might need a cts_ prefix for work, for example + urn_is_work_part = ( + kind not in self.citation_scheme or kind == "work" and not tokens + ) + if urn_is_work_part: data.update({"urn": self.get_partial_urn(kind, node_urn)}) if kind == "textgroup": - data.update({"metadata": self.get_textgroup_metadata(node_urn)}) + data.update({"metadata": self.get_text_group_metadata()}) elif kind == "work": - data.update({"metadata": self.get_work_metadata(node_urn)}) + data.update({"metadata": self.get_work_metadata()}) elif kind == "version": data.update({"metadata": self.get_version_metadata()}) + # TODO: Handle exemplars else: + if not extract_text_parts: + continue + ref_index = self.citation_scheme.index(kind) ref = ".".join(node_urn.passage_nodes[: ref_index + 1]) urn = f"{node_urn.up_to(node_urn.NO_PASSAGE)}{ref}" @@ -223,16 +286,19 @@ def destructure_urn(self, node_urn, tokens): return node_data def extract_urn_and_tokens(self, line): - if self.format == "cex": + if not line: + tokens = "" + urn = f"{self.urn}" + elif self.format == "cex": urn, tokens = line.strip().split("#", maxsplit=1) else: ref, tokens = line.strip().split(maxsplit=1) urn = f"{self.urn}{ref}" return URN(urn), tokens - def generate_branch(self, line): + def generate_branch(self, line, extract_text_parts=True): node_urn, tokens = self.extract_urn_and_tokens(line) - branch_data = self.destructure_urn(node_urn, tokens) + branch_data = self.destructure_urn(node_urn, tokens, extract_text_parts) for idx, node_data in enumerate(branch_data): node = self.nodes.get(node_data["urn"]) if node is None: @@ -261,20 +327,26 @@ def finalize(self): def apply(self): full_content_path = self.library.versions[self.urn.absolute]["path"] - with open(full_content_path, "r") as f: - for line in f: - self.generate_branch(line) + if full_content_path: + with open(full_content_path, "r") as f: + for line in f: + self.generate_branch(line) + else: + self.generate_branch("", extract_text_parts=False) count = self.finalize() - print(f"{self.label}: {count} nodes.", file=sys.stderr) + logger.debug(f"{self.label}: {count} nodes.") def resolve_library(): - text_groups, works, versions = LibraryDataResolver(LIBRARY_DATA_PATH).resolved + # TODO: Customize library resolver class + # text_groups, works, versions = LibraryDataResolver(LIBRARY_DATA_PATH).resolved + text_groups, works, versions = CTSCollectionResolver().resolved return Library(text_groups, works, versions) def get_first_value_for_language(values, lang, fallback=True): + # TODO: When this is called, how would we pass a fallback? value = next(iter(filter(lambda x: x["lang"] == lang, values)), None) if value is None: if fallback: @@ -291,6 +363,6 @@ def import_versions(reset=False): library = resolve_library() nodes = {} - for _, version_data in library.versions.items(): + for _, version_data in tqdm(library.versions.items()): CTSImporter(library, version_data, nodes).apply() print(f"{Node.objects.count()} total nodes on the tree.", file=sys.stderr) diff --git a/atlas/scaife_viewer/atlas/resolvers/__init__.py b/atlas/scaife_viewer/atlas/resolvers/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/atlas/scaife_viewer/atlas/resolvers/cts.py b/atlas/scaife_viewer/atlas/resolvers/cts.py new file mode 100644 index 0000000..c78c2dc --- /dev/null +++ b/atlas/scaife_viewer/atlas/resolvers/cts.py @@ -0,0 +1,167 @@ +import re + +from scaife_viewer.core.cts import text_inventory + + +def get_lang_value(value): + if re.match(r"^[a-z]+-[A-Z][a-z]+$", value): + return value.split("-")[0] + else: + return value + + +class CTSCollectionResolver: + def __init__(self): + self.text_groups = {} + self.works = {} + self.versions = {} + self.resolved = self.resolve_text_inventory() + + def extract_text_group_metadata(self, text_group): + """ + { + "urn": "urn:cts:greekLit:tlg0012:", + "node_kind": "textgroup", + "name": [ + { + "lang": "eng", + "value": "Homer" + } + ] + } + """ + return dict( + urn=f"{text_group.urn}:", + node_kind="textgroup", + name=[dict(lang="eng", value=str(text_group.label))], + meta_=text_group.structured_metadata(), + ) + + def extract_work_metadata(self, work): + """ + { + "urn": "urn:cts:greekLit:tlg0012.tlg001:", + "group_urn": "urn:cts:greekLit:tlg0012:", + "node_kind": "work", + "lang": "grc", + "title": [ + { + "lang": "eng", + "value": "Iliad" + } + ], + "versions": [ + { + "urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:", + "node_kind": "version", + "version_kind": "edition", + "first_passage_urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:1.1-1.7", + "citation_scheme": ["book", "line"], + "label": [ + { + "lang": "eng", + "value": "Iliad (Greek Text of Munro & Allen)" + } + ], + "description": [ + { + "lang": "eng", + "value": "Homer, creator; Monro, D. B. (David Binning), 1836-1905, creator; Monro, D. B. (David Binning), 1836-1905, editor; Allen, Thomas W. (Thomas William), b. 1862, editor" + } + ] + } + ] + } + """ + return dict( + urn=f"{work.urn}:", + # @@@ + group_urn=f'{work.urn.rsplit(".", maxsplit=1)[0]}:', + node_kind="work", + lang=get_lang_value(work.metadata.lang), + # @@@ label vs title wa + title=[ + { + # @@@ hacky + "lang": work.label._language, + "value": str(work.label), + } + ], + ) + + def extract_version_metadata(self, version): + return dict( + urn=f"{version.urn}:", + node_kind="version", + version_kind=version.kind, + # @@@ + # first_passage_urn + citation_scheme=[c.name for c in version.metadata.citation], + label=[ + { + # @@@ hacky + "lang": version.label._language, + "value": str(version.label), + } + ], + description=[ + { + # @@@ hacky + "lang": version.description._language, + "value": str(version.description), + } + ], + lang=get_lang_value(version.metadata.lang), + tracking_title=str(version.tracking_title), + image=version.image, + ) + + def resolve_versions(self, work): + """ + { + "urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:", + "node_kind": "version", + "version_kind": "edition", + "first_passage_urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:1.1-1.7", + "citation_scheme": ["book", "line"], + "label": [ + { + "lang": "eng", + "value": "Iliad (Greek Text of Munro & Allen)" + } + ], + "description": [ + { + "lang": "eng", + "value": "Homer, creator; Monro, D. B. (David Binning), 1836-1905, creator; Monro, D. B. (David Binning), 1836-1905, editor; Allen, Thomas W. (Thomas William), b. 1862, editor" + } + ] + } + """ + for version in work.texts(): + version_metadata = self.extract_version_metadata(version) + # TODO: More validation around "path" + version_metadata["path"] = None + self.versions[version_metadata["urn"]] = version_metadata + + def resolve_works(self, text_group): + for work in text_group.works(): + if work.urn.count(" ") > 0: + # @@@ defensive coding around bad URNs + continue + work_metadata = self.extract_work_metadata(work) + self.works[work_metadata["urn"]] = work_metadata + self.resolve_versions(work) + + def resolve_text_inventory(self): + """ + Resolves the library from `cts.TextInventory`. + + Since Node instances are ordered by their `path` value, + `cts.collections.SORT_OVERRIDES` is respected by ATLAS. + """ + for text_group in text_inventory().text_groups(): + text_group_metadata = self.extract_text_group_metadata(text_group) + self.text_groups[text_group_metadata["urn"]] = text_group_metadata + self.resolve_works(text_group) + return self.text_groups, self.works, self.versions From 17bbd603cd8af7031a8c9775aa5ad08ed7ddb9e8 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 10:38:02 -0500 Subject: [PATCH 02/34] add text group and work to schema --- atlas/scaife_viewer/atlas/schema.py | 46 +++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/atlas/scaife_viewer/atlas/schema.py b/atlas/scaife_viewer/atlas/schema.py index 2aaf97f..8b846f6 100644 --- a/atlas/scaife_viewer/atlas/schema.py +++ b/atlas/scaife_viewer/atlas/schema.py @@ -238,6 +238,41 @@ def resolve_metadata(obj, *args, **kwargs): return camelize(obj.metadata) +class TextGroupNode(AbstractTextPartNode): + # @@@ work or version relations + + @classmethod + def get_queryset(cls, queryset, info): + return queryset.filter(kind="textgroup").order_by("pk") + + # TODO: extract to AbstractTextPartNode + def resolve_label(obj, *args, **kwargs): + # @@@ consider a direct field or faster mapping + return obj.metadata["label"] + + def resolve_metadata(obj, *args, **kwargs): + metadata = obj.metadata + return camelize(metadata) + + +class WorkNode(AbstractTextPartNode): + # @@@ apply a subfilter here? + versions = LimitedConnectionField(lambda: VersionNode) + + @classmethod + def get_queryset(cls, queryset, info): + return queryset.filter(kind="work").order_by("pk") + + # TODO: extract to AbstractTextPartNode + def resolve_label(obj, *args, **kwargs): + # @@@ consider a direct field or faster mapping + return obj.metadata["label"] + + def resolve_metadata(obj, *args, **kwargs): + metadata = obj.metadata + return camelize(metadata) + + class VersionNode(AbstractTextPartNode): text_alignment_chunks = LimitedConnectionField(lambda: TextAlignmentChunkNode) @@ -245,6 +280,11 @@ class VersionNode(AbstractTextPartNode): def get_queryset(cls, queryset, info): return queryset.filter(kind="version").order_by("urn") + # TODO: extract to AbstractTextPartNode + def resolve_label(obj, *args, **kwargs): + # @@@ consider a direct field or faster mapping + return obj.metadata["label"] + def resolve_metadata(obj, *args, **kwargs): metadata = obj.metadata work = obj.get_parent() @@ -438,6 +478,12 @@ class Meta: class Query(ObjectType): + text_group = relay.Node.Field(TextGroupNode) + text_groups = LimitedConnectionField(TextGroupNode) + + work = relay.Node.Field(WorkNode) + works = LimitedConnectionField(WorkNode) + version = relay.Node.Field(VersionNode) versions = LimitedConnectionField(VersionNode) From 0982922c9c1339763d3b03cebff10d490c6e6125 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 10:52:15 -0500 Subject: [PATCH 03/34] expand Version schema --- atlas/scaife_viewer/atlas/schema.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/atlas/scaife_viewer/atlas/schema.py b/atlas/scaife_viewer/atlas/schema.py index 8b846f6..59964d0 100644 --- a/atlas/scaife_viewer/atlas/schema.py +++ b/atlas/scaife_viewer/atlas/schema.py @@ -276,15 +276,42 @@ def resolve_metadata(obj, *args, **kwargs): class VersionNode(AbstractTextPartNode): text_alignment_chunks = LimitedConnectionField(lambda: TextAlignmentChunkNode) + description = String() + lang = String() + human_lang = String() + kind = String() + @classmethod def get_queryset(cls, queryset, info): - return queryset.filter(kind="version").order_by("urn") + # TODO: set a default somewhere + # return queryset.filter(kind="version").order_by("urn") + return queryset.filter(kind="version").order_by("pk") + + # TODO: Determine how tightly coupled these fields + # should be to metadata (including ["key"] vs .get("key")) + def resolve_human_lang(obj, *args, **kwargs): + lang = obj.metadata["lang"] + # @@@ make the language map decoupled from cts + # TODO: fix cts.constants + return cts.constants.LANGAUGE_MAP.get(lang, lang) + + def resolve_lang(obj, *args, **kwargs): + return obj.metadata["lang"] + + def resolve_description(obj, *args, **kwargs): + # @@@ consider a direct field or faster mapping + return obj.metadata["description"] + + def resolve_kind(obj, *args, **kwargs): + # @@@ consider a direct field or faster mapping + return obj.metadata["kind"] # TODO: extract to AbstractTextPartNode def resolve_label(obj, *args, **kwargs): # @@@ consider a direct field or faster mapping return obj.metadata["label"] + # TODO: convert metadata to proper fields def resolve_metadata(obj, *args, **kwargs): metadata = obj.metadata work = obj.get_parent() From f080b7fddb0a1ddb26cd7d7b72eadf322d6ccbff Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 10:56:11 -0500 Subject: [PATCH 04/34] backport access checking --- atlas/scaife_viewer/atlas/schema.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/atlas/scaife_viewer/atlas/schema.py b/atlas/scaife_viewer/atlas/schema.py index 59964d0..93dfeb9 100644 --- a/atlas/scaife_viewer/atlas/schema.py +++ b/atlas/scaife_viewer/atlas/schema.py @@ -1,7 +1,7 @@ from django.db.models import Q import django_filters -from graphene import Connection, Field, ObjectType, String, relay +from graphene import Boolean, Connection, Field, ObjectType, String, relay from graphene.types import generic from graphene_django import DjangoObjectType from graphene_django.filter import DjangoFilterConnectionField @@ -276,6 +276,7 @@ def resolve_metadata(obj, *args, **kwargs): class VersionNode(AbstractTextPartNode): text_alignment_chunks = LimitedConnectionField(lambda: TextAlignmentChunkNode) + access = Boolean() description = String() lang = String() human_lang = String() @@ -289,6 +290,11 @@ def get_queryset(cls, queryset, info): # TODO: Determine how tightly coupled these fields # should be to metadata (including ["key"] vs .get("key")) + def resolve_access(obj, info, *args, **kwargs): + request = info.context + # TODO: fix auth_request via a hookset + return auth_request(request, obj.urn) + def resolve_human_lang(obj, *args, **kwargs): lang = obj.metadata["lang"] # @@@ make the language map decoupled from cts From 0259ff905b1ec05a679b6f3f8982103d50bef2fb Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 11:00:03 -0500 Subject: [PATCH 05/34] stub out management command for ATLAS db --- atlas/scaife_viewer/atlas/db_routers.py | 51 ++++++++++++++++++ .../atlas/management/__init__.py | 0 .../atlas/management/commands/__init__.py | 0 .../management/commands/prepare_atlas_db.py | 54 +++++++++++++++++++ atlas/scaife_viewer/atlas/models.py | 5 ++ 5 files changed, 110 insertions(+) create mode 100644 atlas/scaife_viewer/atlas/db_routers.py create mode 100644 atlas/scaife_viewer/atlas/management/__init__.py create mode 100644 atlas/scaife_viewer/atlas/management/commands/__init__.py create mode 100644 atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py diff --git a/atlas/scaife_viewer/atlas/db_routers.py b/atlas/scaife_viewer/atlas/db_routers.py new file mode 100644 index 0000000..5a079d5 --- /dev/null +++ b/atlas/scaife_viewer/atlas/db_routers.py @@ -0,0 +1,51 @@ +# TODO: Document setting up ATLAS_DB_LABEL bits; possibly via appconf +ATLAS_DB_LABEL = "atlas" + + +class ATLASRouter: + """ + A router to control all database operations on models in the + library application. + """ + + route_app_labels = {"library"} + + def db_for_read(self, model, **hints): + """ + Attempts to read library models go to ATLAS_DB_LABEL. + """ + if model._meta.app_label in self.route_app_labels: + return ATLAS_DB_LABEL + return None + + def db_for_write(self, model, **hints): + """ + Attempts to write library models go to ATLAS_DB_LABEL. + """ + if model._meta.app_label in self.route_app_labels: + return ATLAS_DB_LABEL + return None + + def allow_relation(self, obj1, obj2, **hints): + """ + Allow relations if a model in library app is + involved. + """ + if ( + obj1._meta.app_label in self.route_app_labels + or obj2._meta.app_label in self.route_app_labels + ): + return True + return None + + def allow_migrate(self, db, app_label, model_name=None, **hints): + """ + Only add library apps to the ATLAS_DB_LABEL database. + + Do not add library apps to any other database. + """ + if db == ATLAS_DB_LABEL: + return app_label in self.route_app_labels + elif app_label in self.route_app_labels: + return db == ATLAS_DB_LABEL + return None diff --git a/atlas/scaife_viewer/atlas/management/__init__.py b/atlas/scaife_viewer/atlas/management/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/atlas/scaife_viewer/atlas/management/commands/__init__.py b/atlas/scaife_viewer/atlas/management/commands/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py b/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py new file mode 100644 index 0000000..a43e102 --- /dev/null +++ b/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py @@ -0,0 +1,54 @@ +# TODO: Revisit cts assumptions +import os +import shutil + +from django.conf import settings +from django.core.management import call_command +from django.core.management.base import BaseCommand + +from scaife_viewer.atlas.library.models import Node +from scaife_viewer.cts import text_inventory + +from ... import importers + + +class Command(BaseCommand): + """ + Prepares data used by ATLAS + """ + + help = "Prepares data used by ATLAS" + + def add_arguments(self, parser): + parser.add_argument( + "--force", + action="store_true", + help="Forces the ATLAS management command to run", + ) + + def handle(self, *args, **options): + database_path = settings.ATLAS_CONFIG["ATLAS_DB_PATH"] + db_path_exists = os.path.exists(database_path) + + reset_data = options.get("force") or not db_path_exists + if not reset_data: + self.stdout.write(f"Found existing ATLAS data at {database_path}") + return + + if db_path_exists: + os.remove(database_path) + self.stdout.write("--[Removed existing ATLAS database]--") + + self.stdout.write('--[Running database migrations on "atlas"]--') + call_command("migrate", database="atlas") + + resolver_path = settings.CTS_RESOLVER_CACHE_LOCATION + if os.path.exists(resolver_path): + shutil.rmtree(resolver_path) + self.stdout.write("--[Removed existing CTS resolver cache]--") + + self.stdout.write("--[Priming CTS Resolver cache]--") + text_inventory() + + self.stdout.write("--[Populating ATLAS db]--") + importers.versions.import_versions() diff --git a/atlas/scaife_viewer/atlas/models.py b/atlas/scaife_viewer/atlas/models.py index 285630c..8ea5f68 100644 --- a/atlas/scaife_viewer/atlas/models.py +++ b/atlas/scaife_viewer/atlas/models.py @@ -295,6 +295,9 @@ def resolve_references(self): self.text_parts.set(reference_objs) +# TODO: Review https://docs.djangoproject.com/en/3.0/topics/db/multi-db/ +# to see if there are more settings we can expose for "mixed" +# database backends class Node(MP_Node): # @@@ used to pivot siblings; may be possible if we hook into path field idx = models.IntegerField(help_text="0-based index", blank=True, null=True) @@ -307,6 +310,8 @@ class Node(MP_Node): # @@@ we may want to furthe de-norm label from metadata metadata = JSONField(default=dict, blank=True, null=True) + # TODO: standardize on ATLAS_CONFIG vs SCAIFE_VIEWER_ATLAS_CONFIG vs + # settings; consistency preferred alphabet = settings.NODE_ALPHABET def __str__(self): From 47b927b27fc9ed7c49a182c931832b4113a1ca12 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 13:55:31 -0500 Subject: [PATCH 06/34] configure ATLAS using appconf --- atlas/scaife_viewer/atlas/conf.py | 18 ++++++++++++++++++ .../atlas/importers/alignments.py | 4 ++-- .../atlas/importers/audio_annotations.py | 4 ++-- .../atlas/importers/image_annotations.py | 4 ++-- .../atlas/importers/metrical_annotations.py | 4 ++-- .../atlas/importers/named_entities.py | 6 +++--- .../atlas/importers/text_annotations.py | 4 ++-- .../atlas/importers/token_annotations.py | 4 ++-- .../scaife_viewer/atlas/importers/versions.py | 6 +++--- .../management/commands/prepare_atlas_db.py | 2 +- atlas/scaife_viewer/atlas/models.py | 6 ++---- atlas/scaife_viewer/atlas/tests/settings.py | 10 +--------- atlas/scaife_viewer/atlas/utils.py | 5 +++-- atlas/setup.py | 1 + 14 files changed, 44 insertions(+), 34 deletions(-) create mode 100644 atlas/scaife_viewer/atlas/conf.py diff --git a/atlas/scaife_viewer/atlas/conf.py b/atlas/scaife_viewer/atlas/conf.py new file mode 100644 index 0000000..e859384 --- /dev/null +++ b/atlas/scaife_viewer/atlas/conf.py @@ -0,0 +1,18 @@ +import base64 + +from django.conf import settings # noqa + +from appconf import AppConf + + +class ATLASAppConf(AppConf): + IN_MEMORY_PASSAGE_CHUNK_MAX = 2500 + NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + DATA_MODEL_ID = base64.b64encode(b"2020-09-08-001\n").decode() + + # required settings + # DATA_DIR + + class Meta: + prefix = "sv_atlas" + required = ["DATA_DIR"] diff --git a/atlas/scaife_viewer/atlas/importers/alignments.py b/atlas/scaife_viewer/atlas/importers/alignments.py index f3879b8..ebd93eb 100644 --- a/atlas/scaife_viewer/atlas/importers/alignments.py +++ b/atlas/scaife_viewer/atlas/importers/alignments.py @@ -3,12 +3,12 @@ import os import re -from django.conf import settings +from scaife_viewer.atlas.conf import settings from ..models import Node, TextAlignment, TextAlignmentChunk -ALIGNMENTS_DATA_PATH = os.path.join(settings.ATLAS_CONFIG["DATA_DIR"], "alignments") +ALIGNMENTS_DATA_PATH = os.path.join(settings.SV_ATLAS_DATA_DIR, "alignments") ALIGNMENTS_METADATA_PATH = os.path.join(ALIGNMENTS_DATA_PATH, "metadata.json") LINE_KIND_UNKNOWN = None diff --git a/atlas/scaife_viewer/atlas/importers/audio_annotations.py b/atlas/scaife_viewer/atlas/importers/audio_annotations.py index 491964f..1d174c0 100644 --- a/atlas/scaife_viewer/atlas/importers/audio_annotations.py +++ b/atlas/scaife_viewer/atlas/importers/audio_annotations.py @@ -1,7 +1,7 @@ import csv import os -from django.conf import settings +from scaife_viewer.atlas.conf import settings from ..models import AudioAnnotation @@ -10,7 +10,7 @@ COPYRIGHT_FRAGMENT = "© 2016 David Chamberlain under CC BY 4.0 License, https://creativecommons.org/licenses/by/4.0/" ANNOTATIONS_DATA_PATH = os.path.join( - settings.ATLAS_CONFIG["DATA_DIR"], "annotations", "audio-annotations" + settings.SV_ATLAS_DATA_DIR, "annotations", "audio-annotations" ) CITE_IDENTIFIER = "urn:cite2:exploreHomer:audio.v1:" diff --git a/atlas/scaife_viewer/atlas/importers/image_annotations.py b/atlas/scaife_viewer/atlas/importers/image_annotations.py index 780327d..55fbb83 100644 --- a/atlas/scaife_viewer/atlas/importers/image_annotations.py +++ b/atlas/scaife_viewer/atlas/importers/image_annotations.py @@ -1,7 +1,7 @@ import json import os -from django.conf import settings +from scaife_viewer.atlas.conf import settings from ..models import ( IMAGE_ANNOTATION_KIND_CANVAS, @@ -12,7 +12,7 @@ ANNOTATIONS_DATA_PATH = os.path.join( - settings.ATLAS_CONFIG["DATA_DIR"], "annotations", "image-annotations" + settings.SV_ATLAS_DATA_DIR, "annotations", "image-annotations" ) diff --git a/atlas/scaife_viewer/atlas/importers/metrical_annotations.py b/atlas/scaife_viewer/atlas/importers/metrical_annotations.py index dda621c..93070a0 100644 --- a/atlas/scaife_viewer/atlas/importers/metrical_annotations.py +++ b/atlas/scaife_viewer/atlas/importers/metrical_annotations.py @@ -1,7 +1,7 @@ import csv import os -from django.conf import settings +from scaife_viewer.atlas.conf import settings from ..models import MetricalAnnotation @@ -10,7 +10,7 @@ COPYRIGHT_FRAGMENT = "© 2016 David Chamberlain under CC BY 4.0 License, https://creativecommons.org/licenses/by/4.0/" ANNOTATIONS_DATA_PATH = os.path.join( - settings.ATLAS_CONFIG["DATA_DIR"], "annotations", "metrical-annotations" + settings.SV_ATLAS_DATA_DIR, "annotations", "metrical-annotations" ) CITE_IDENTIFIER = "urn:cite2:exploreHomer:metrical_annotation.v1:" diff --git a/atlas/scaife_viewer/atlas/importers/named_entities.py b/atlas/scaife_viewer/atlas/importers/named_entities.py index fa20bba..f7a9251 100644 --- a/atlas/scaife_viewer/atlas/importers/named_entities.py +++ b/atlas/scaife_viewer/atlas/importers/named_entities.py @@ -1,15 +1,15 @@ import csv import os -from django.conf import settings - import logfmt +from scaife_viewer.atlas.conf import settings + from ..models import NamedEntity, Node NAMED_ENTITIES_DATA_PATH = os.path.join( - settings.ATLAS_CONFIG["DATA_DIR"], "annotations", "named-entities" + settings.SV_ATLAS_DATA_DIR, "annotations", "named-entities" ) ENTITIES_DIR = os.path.join(NAMED_ENTITIES_DATA_PATH, "processed", "entities") STANDOFF_DIR = os.path.join(NAMED_ENTITIES_DATA_PATH, "processed", "standoff") diff --git a/atlas/scaife_viewer/atlas/importers/text_annotations.py b/atlas/scaife_viewer/atlas/importers/text_annotations.py index 47cce33..1147124 100644 --- a/atlas/scaife_viewer/atlas/importers/text_annotations.py +++ b/atlas/scaife_viewer/atlas/importers/text_annotations.py @@ -1,13 +1,13 @@ import json import os -from django.conf import settings +from scaife_viewer.atlas.conf import settings from ..models import TEXT_ANNOTATION_KIND_SCHOLIA, TextAnnotation ANNOTATIONS_DATA_PATH = os.path.join( - settings.ATLAS_CONFIG["DATA_DIR"], "annotations", "text-annotations" + settings.SV_ATLAS_DATA_DIR, "annotations", "text-annotations" ) diff --git a/atlas/scaife_viewer/atlas/importers/token_annotations.py b/atlas/scaife_viewer/atlas/importers/token_annotations.py index b7b5ff5..c5d41db 100644 --- a/atlas/scaife_viewer/atlas/importers/token_annotations.py +++ b/atlas/scaife_viewer/atlas/importers/token_annotations.py @@ -1,13 +1,13 @@ import csv import os -from django.conf import settings +from scaife_viewer.atlas.conf import settings from ..models import Node, Token ANNOTATIONS_DATA_PATH = os.path.join( - settings.ATLAS_CONFIG["DATA_DIR"], "annotations", "token-annotations" + settings.SV_ATLAS_DATA_DIR, "annotations", "token-annotations" ) diff --git a/atlas/scaife_viewer/atlas/importers/versions.py b/atlas/scaife_viewer/atlas/importers/versions.py index 5bed501..e081b79 100644 --- a/atlas/scaife_viewer/atlas/importers/versions.py +++ b/atlas/scaife_viewer/atlas/importers/versions.py @@ -4,14 +4,14 @@ import sys from collections import defaultdict -from django.conf import settings from django.db.models import Max from django.utils.translation import ugettext_noop +from tqdm import tqdm from treebeard.exceptions import PathOverflow from scaife_viewer.atlas import constants -from tqdm import tqdm +from scaife_viewer.atlas.conf import settings from ..models import Node from ..resolvers.cts import CTSCollectionResolver @@ -20,7 +20,7 @@ logger = logging.getLogger(__name__) -LIBRARY_DATA_PATH = os.path.join(settings.ATLAS_CONFIG["DATA_DIR"], "library") +LIBRARY_DATA_PATH = os.path.join(settings.SV_ATLAS_DATA_DIR, "library") class LibraryDataResolver: diff --git a/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py b/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py index a43e102..8a3e844 100644 --- a/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py +++ b/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py @@ -2,10 +2,10 @@ import os import shutil -from django.conf import settings from django.core.management import call_command from django.core.management.base import BaseCommand +from scaife_viewer.atlas.conf import settings from scaife_viewer.atlas.library.models import Node from scaife_viewer.cts import text_inventory diff --git a/atlas/scaife_viewer/atlas/models.py b/atlas/scaife_viewer/atlas/models.py index 8ea5f68..9bbe6cc 100644 --- a/atlas/scaife_viewer/atlas/models.py +++ b/atlas/scaife_viewer/atlas/models.py @@ -2,7 +2,6 @@ import re from collections import defaultdict -from django.conf import settings from django.core import serializers from django.db import models @@ -13,6 +12,7 @@ from treebeard.mp_tree import MP_Node from scaife_viewer.atlas import constants +from scaife_viewer.atlas.conf import settings class TextAlignment(models.Model): @@ -310,9 +310,7 @@ class Node(MP_Node): # @@@ we may want to furthe de-norm label from metadata metadata = JSONField(default=dict, blank=True, null=True) - # TODO: standardize on ATLAS_CONFIG vs SCAIFE_VIEWER_ATLAS_CONFIG vs - # settings; consistency preferred - alphabet = settings.NODE_ALPHABET + alphabet = settings.SV_ATLAS_NODE_ALPHABET def __str__(self): return f"{self.kind}: {self.urn}" diff --git a/atlas/scaife_viewer/atlas/tests/settings.py b/atlas/scaife_viewer/atlas/tests/settings.py index 3cfdad3..245a981 100644 --- a/atlas/scaife_viewer/atlas/tests/settings.py +++ b/atlas/scaife_viewer/atlas/tests/settings.py @@ -41,12 +41,4 @@ ROOT_URLCONF = "scaife_viewer.atlas.tests.urls" SECRET_KEY = "notasecret" -NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - -# @@@ proper config -ATLAS_CONFIG = dict( - IN_MEMORY_PASSAGE_CHUNK_MAX=int( - os.environ.get("ATLAS_IN_MEMORY_PASSAGE_CHUNK_MAX", 2500) - ), - DATA_DIR=os.path.join(PROJECT_ROOT, "data"), -) +SV_ATLAS_DATA_DIR = os.path.join(PROJECT_ROOT, "data") diff --git a/atlas/scaife_viewer/atlas/utils.py b/atlas/scaife_viewer/atlas/utils.py index 735000d..104ffa3 100644 --- a/atlas/scaife_viewer/atlas/utils.py +++ b/atlas/scaife_viewer/atlas/utils.py @@ -1,7 +1,8 @@ -from django.conf import settings from django.db.models import Max, Min, Q from django.utils.functional import cached_property +from scaife_viewer.atlas.conf import settings + class BaseSiblingChunker: def __init__(self, queryset, start_idx, chunk_length, queryset_values=None): @@ -85,7 +86,7 @@ def get_prev_next_boundaries(self): def get_chunker(queryset, start_idx, chunk_length, **kwargs): - if chunk_length < settings.ATLAS_CONFIG["IN_MEMORY_PASSAGE_CHUNK_MAX"]: + if chunk_length < settings.SV_ATLAS_IN_MEMORY_PASSAGE_CHUNK_MAX: return InMemorySiblingChunker(queryset, start_idx, chunk_length, **kwargs) return SQLSiblingChunker(queryset, start_idx, chunk_length, **kwargs) diff --git a/atlas/setup.py b/atlas/setup.py index 41e898b..48734e2 100644 --- a/atlas/setup.py +++ b/atlas/setup.py @@ -27,6 +27,7 @@ }, test_suite="runtests.runtests", install_requires=[ + "django_appconf>=1.0.4", "django-extensions>=2.2.6,<3", "django-filter>=2.3.0,<3", "django-sortedm2m>=2.0.0,<3", From a086bd0fd871068fe227dff3c5320794dd59b8f8 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 14:06:33 -0500 Subject: [PATCH 07/34] add tqdm dependency --- atlas/setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/atlas/setup.py b/atlas/setup.py index 48734e2..6924514 100644 --- a/atlas/setup.py +++ b/atlas/setup.py @@ -37,6 +37,7 @@ "django-jsonfield-backport==1.0.0", "graphene-django==2.6.0", "logfmt==0.4", + "tqdm>= 4.48.2,<5", ], tests_require=tests_require, extras_require={ From eb5ea68494753bc3eb820c9d9b5980c56a45c005 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 16:59:56 -0500 Subject: [PATCH 08/34] configure resolver via hooksets --- atlas/scaife_viewer/atlas/conf.py | 22 +++++++ atlas/scaife_viewer/atlas/hooks.py | 17 +++++ .../scaife_viewer/atlas/importers/versions.py | 66 +------------------ atlas/scaife_viewer/atlas/resolvers/common.py | 5 ++ atlas/scaife_viewer/atlas/resolvers/cts.py | 7 ++ .../scaife_viewer/atlas/resolvers/default.py | 57 ++++++++++++++++ 6 files changed, 110 insertions(+), 64 deletions(-) create mode 100644 atlas/scaife_viewer/atlas/hooks.py create mode 100644 atlas/scaife_viewer/atlas/resolvers/common.py create mode 100644 atlas/scaife_viewer/atlas/resolvers/default.py diff --git a/atlas/scaife_viewer/atlas/conf.py b/atlas/scaife_viewer/atlas/conf.py index e859384..f44274b 100644 --- a/atlas/scaife_viewer/atlas/conf.py +++ b/atlas/scaife_viewer/atlas/conf.py @@ -1,14 +1,33 @@ import base64 +import importlib from django.conf import settings # noqa +from django.core.exceptions import ImproperlyConfigured from appconf import AppConf +def load_path_attr(path): + i = path.rfind(".") + module, attr = path[:i], path[i + 1 :] + try: + mod = importlib.import_module(module) + except ImportError as e: + raise ImproperlyConfigured("Error importing {0}: '{1}'".format(module, e)) + try: + attr = getattr(mod, attr) + except AttributeError: + raise ImproperlyConfigured( + "Module '{0}' does not define a '{1}'".format(module, attr) + ) + return attr + + class ATLASAppConf(AppConf): IN_MEMORY_PASSAGE_CHUNK_MAX = 2500 NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" DATA_MODEL_ID = base64.b64encode(b"2020-09-08-001\n").decode() + HOOKSET = "scaife_viewer.atlas.hooks.DefaultHookSet" # required settings # DATA_DIR @@ -16,3 +35,6 @@ class ATLASAppConf(AppConf): class Meta: prefix = "sv_atlas" required = ["DATA_DIR"] + + def configure_hookset(self, value): + return load_path_attr(value)() diff --git a/atlas/scaife_viewer/atlas/hooks.py b/atlas/scaife_viewer/atlas/hooks.py new file mode 100644 index 0000000..c3d77d4 --- /dev/null +++ b/atlas/scaife_viewer/atlas/hooks.py @@ -0,0 +1,17 @@ +class DefaultHookSet: + def resolve_library(self): + # TODO: Document included resolvers + # from .resolvers.cts import resolve_cts_collection_library as resolver_func + from .resolvers.default import resolve_library as resolver_func + + return resolver_func() + + +class HookProxy: + def __getattr__(self, attr): + from .conf import settings # noqa; avoids race condition + + return getattr(settings.SV_ATLAS_HOOKSET, attr) + + +hookset = HookProxy() diff --git a/atlas/scaife_viewer/atlas/importers/versions.py b/atlas/scaife_viewer/atlas/importers/versions.py index e081b79..8956f3a 100644 --- a/atlas/scaife_viewer/atlas/importers/versions.py +++ b/atlas/scaife_viewer/atlas/importers/versions.py @@ -1,6 +1,4 @@ -import json import logging -import os import sys from collections import defaultdict @@ -11,67 +9,14 @@ from treebeard.exceptions import PathOverflow from scaife_viewer.atlas import constants -from scaife_viewer.atlas.conf import settings +from ..hooks import hookset from ..models import Node -from ..resolvers.cts import CTSCollectionResolver from ..urn import URN logger = logging.getLogger(__name__) -LIBRARY_DATA_PATH = os.path.join(settings.SV_ATLAS_DATA_DIR, "library") - - -class LibraryDataResolver: - def __init__(self, data_dir_path): - self.text_groups = {} - self.works = {} - self.versions = {} - self.resolved = self.resolve_data_dir_path(data_dir_path) - - def populate_versions(self, dirpath, data): - for version in data: - version_part = version["urn"].rsplit(":", maxsplit=2)[1] - - if version.get("format") == "cex": - extension = "cex" - else: - extension = "txt" - - version_path = os.path.join(dirpath, f"{version_part}.{extension}") - if not os.path.exists(version_path): - raise FileNotFoundError(version_path) - - self.versions[version["urn"]] = { - "format": extension, - "path": version_path, - **version, - } - - def resolve_data_dir_path(self, data_dir_path): - for dirpath, dirnames, filenames in sorted(os.walk(data_dir_path)): - if "metadata.json" not in filenames: - continue - - metadata = json.load(open(os.path.join(dirpath, "metadata.json"))) - assert metadata["node_kind"] in ["textgroup", "work"] - - if metadata["node_kind"] == "textgroup": - self.text_groups[metadata["urn"]] = metadata - elif metadata["node_kind"] == "work": - self.works[metadata["urn"]] = metadata - self.populate_versions(dirpath, metadata["versions"]) - - return self.text_groups, self.works, self.versions - - -class Library: - def __init__(self, text_groups, works, versions): - self.text_groups = text_groups - self.works = works - self.versions = versions - class CTSImporter: """ @@ -338,13 +283,6 @@ def apply(self): logger.debug(f"{self.label}: {count} nodes.") -def resolve_library(): - # TODO: Customize library resolver class - # text_groups, works, versions = LibraryDataResolver(LIBRARY_DATA_PATH).resolved - text_groups, works, versions = CTSCollectionResolver().resolved - return Library(text_groups, works, versions) - - def get_first_value_for_language(values, lang, fallback=True): # TODO: When this is called, how would we pass a fallback? value = next(iter(filter(lambda x: x["lang"] == lang, values)), None) @@ -360,7 +298,7 @@ def import_versions(reset=False): if reset: Node.objects.filter(kind="nid").delete() - library = resolve_library() + library = hookset.resolve_library() nodes = {} for _, version_data in tqdm(library.versions.items()): diff --git a/atlas/scaife_viewer/atlas/resolvers/common.py b/atlas/scaife_viewer/atlas/resolvers/common.py new file mode 100644 index 0000000..8d41ad4 --- /dev/null +++ b/atlas/scaife_viewer/atlas/resolvers/common.py @@ -0,0 +1,5 @@ +class Library: + def __init__(self, text_groups, works, versions): + self.text_groups = text_groups + self.works = works + self.versions = versions diff --git a/atlas/scaife_viewer/atlas/resolvers/cts.py b/atlas/scaife_viewer/atlas/resolvers/cts.py index c78c2dc..b47120e 100644 --- a/atlas/scaife_viewer/atlas/resolvers/cts.py +++ b/atlas/scaife_viewer/atlas/resolvers/cts.py @@ -2,6 +2,8 @@ from scaife_viewer.core.cts import text_inventory +from .common import Library + def get_lang_value(value): if re.match(r"^[a-z]+-[A-Z][a-z]+$", value): @@ -165,3 +167,8 @@ def resolve_text_inventory(self): self.text_groups[text_group_metadata["urn"]] = text_group_metadata self.resolve_works(text_group) return self.text_groups, self.works, self.versions + + +def resolve_cts_collection_library(): + text_groups, works, versions = CTSCollectionResolver().resolved + return Library(text_groups, works, versions) diff --git a/atlas/scaife_viewer/atlas/resolvers/default.py b/atlas/scaife_viewer/atlas/resolvers/default.py new file mode 100644 index 0000000..020e917 --- /dev/null +++ b/atlas/scaife_viewer/atlas/resolvers/default.py @@ -0,0 +1,57 @@ +import json +import os + +from scaife_viewer.atlas.conf import settings + +from .common import Library + + +LIBRARY_DATA_PATH = os.path.join(settings.SV_ATLAS_DATA_DIR, "library") + + +class LibraryDataResolver: + def __init__(self, data_dir_path): + self.text_groups = {} + self.works = {} + self.versions = {} + self.resolved = self.resolve_data_dir_path(data_dir_path) + + def populate_versions(self, dirpath, data): + for version in data: + version_part = version["urn"].rsplit(":", maxsplit=2)[1] + + if version.get("format") == "cex": + extension = "cex" + else: + extension = "txt" + + version_path = os.path.join(dirpath, f"{version_part}.{extension}") + if not os.path.exists(version_path): + raise FileNotFoundError(version_path) + + self.versions[version["urn"]] = { + "format": extension, + "path": version_path, + **version, + } + + def resolve_data_dir_path(self, data_dir_path): + for dirpath, dirnames, filenames in sorted(os.walk(data_dir_path)): + if "metadata.json" not in filenames: + continue + + metadata = json.load(open(os.path.join(dirpath, "metadata.json"))) + assert metadata["node_kind"] in ["textgroup", "work"] + + if metadata["node_kind"] == "textgroup": + self.text_groups[metadata["urn"]] = metadata + elif metadata["node_kind"] == "work": + self.works[metadata["urn"]] = metadata + self.populate_versions(dirpath, metadata["versions"]) + + return self.text_groups, self.works, self.versions + + +def resolve_library(): + text_groups, works, versions = LibraryDataResolver(LIBRARY_DATA_PATH).resolved + return Library(text_groups, works, versions) From 6a6ee7ddc5ea43c395718ffe1e4d97a91f987097 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 17:00:26 -0500 Subject: [PATCH 09/34] move access checking out to hookset --- atlas/scaife_viewer/atlas/hooks.py | 3 +++ atlas/scaife_viewer/atlas/schema.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/atlas/scaife_viewer/atlas/hooks.py b/atlas/scaife_viewer/atlas/hooks.py index c3d77d4..1f6191a 100644 --- a/atlas/scaife_viewer/atlas/hooks.py +++ b/atlas/scaife_viewer/atlas/hooks.py @@ -6,6 +6,9 @@ def resolve_library(self): return resolver_func() + def can_access_urn(self, request, urn): + return True + class HookProxy: def __getattr__(self, attr): diff --git a/atlas/scaife_viewer/atlas/schema.py b/atlas/scaife_viewer/atlas/schema.py index 93dfeb9..02ca4de 100644 --- a/atlas/scaife_viewer/atlas/schema.py +++ b/atlas/scaife_viewer/atlas/schema.py @@ -9,6 +9,7 @@ # @@@ ensure convert signal is registered from .compat import convert_jsonfield_to_string # noqa +from .hooks import hookset # from .models import Node as TextPart from .models import ( @@ -292,8 +293,7 @@ def get_queryset(cls, queryset, info): # should be to metadata (including ["key"] vs .get("key")) def resolve_access(obj, info, *args, **kwargs): request = info.context - # TODO: fix auth_request via a hookset - return auth_request(request, obj.urn) + return hookset.can_access_urn(request, obj.urn) def resolve_human_lang(obj, *args, **kwargs): lang = obj.metadata["lang"] From 8dccd035bd8a847fa0ea371b234c0fbc8cf4b960 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 17:00:47 -0500 Subject: [PATCH 10/34] use hookset to get human lang values --- atlas/scaife_viewer/atlas/constants.py | 10 ++++++++++ atlas/scaife_viewer/atlas/hooks.py | 6 ++++++ atlas/scaife_viewer/atlas/schema.py | 11 ++--------- 3 files changed, 18 insertions(+), 9 deletions(-) diff --git a/atlas/scaife_viewer/atlas/constants.py b/atlas/scaife_viewer/atlas/constants.py index 9abe3a0..ef06575 100644 --- a/atlas/scaife_viewer/atlas/constants.py +++ b/atlas/scaife_viewer/atlas/constants.py @@ -81,3 +81,13 @@ (NAMED_ENTITY_KIND_PERSON, "Person"), (NAMED_ENTITY_KIND_PLACE, "Place"), ] + +HUMAN_FRIENDLY_LANGUAGE_MAP = { + "eng": "English", + "fa": "Farsi", + "fre": "French", + "ger": "German", + "grc": "Greek", + "heb": "Hebrew", + "lat": "Latin", +} diff --git a/atlas/scaife_viewer/atlas/hooks.py b/atlas/scaife_viewer/atlas/hooks.py index 1f6191a..0bb5ff5 100644 --- a/atlas/scaife_viewer/atlas/hooks.py +++ b/atlas/scaife_viewer/atlas/hooks.py @@ -1,3 +1,6 @@ +from . import constants + + class DefaultHookSet: def resolve_library(self): # TODO: Document included resolvers @@ -9,6 +12,9 @@ def resolve_library(self): def can_access_urn(self, request, urn): return True + def get_human_lang(self, value): + return constants.HUMAN_FRIENDLY_LANGUAGE_MAP.get(value, value) + class HookProxy: def __getattr__(self, attr): diff --git a/atlas/scaife_viewer/atlas/schema.py b/atlas/scaife_viewer/atlas/schema.py index 02ca4de..515e1b3 100644 --- a/atlas/scaife_viewer/atlas/schema.py +++ b/atlas/scaife_viewer/atlas/schema.py @@ -297,9 +297,7 @@ def resolve_access(obj, info, *args, **kwargs): def resolve_human_lang(obj, *args, **kwargs): lang = obj.metadata["lang"] - # @@@ make the language map decoupled from cts - # TODO: fix cts.constants - return cts.constants.LANGAUGE_MAP.get(lang, lang) + return hookset.get_human_lang(lang) def resolve_lang(obj, *args, **kwargs): return obj.metadata["lang"] @@ -322,17 +320,12 @@ def resolve_metadata(obj, *args, **kwargs): metadata = obj.metadata work = obj.get_parent() text_group = work.get_parent() - # @@@ backport lang map - lang_map = { - "eng": "English", - "grc": "Greek", - } metadata.update( { "work_label": work.label, "text_group_label": text_group.label, "lang": metadata["lang"], - "human_lang": lang_map[metadata["lang"]], + "human_lang": hookset.get_human_lang(metadata["lang"]), } ) return camelize(metadata) From 734e7529935611fd6d9136d6c6c712fca423fe11 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 16:36:45 -0500 Subject: [PATCH 11/34] factor out DB_LABEL to appconf --- atlas/scaife_viewer/atlas/conf.py | 1 + atlas/scaife_viewer/atlas/db_routers.py | 15 +++++++++------ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/atlas/scaife_viewer/atlas/conf.py b/atlas/scaife_viewer/atlas/conf.py index f44274b..ac411dc 100644 --- a/atlas/scaife_viewer/atlas/conf.py +++ b/atlas/scaife_viewer/atlas/conf.py @@ -27,6 +27,7 @@ class ATLASAppConf(AppConf): IN_MEMORY_PASSAGE_CHUNK_MAX = 2500 NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" DATA_MODEL_ID = base64.b64encode(b"2020-09-08-001\n").decode() + DB_LABEL = "atlas" HOOKSET = "scaife_viewer.atlas.hooks.DefaultHookSet" # required settings diff --git a/atlas/scaife_viewer/atlas/db_routers.py b/atlas/scaife_viewer/atlas/db_routers.py index 5a079d5..661b594 100644 --- a/atlas/scaife_viewer/atlas/db_routers.py +++ b/atlas/scaife_viewer/atlas/db_routers.py @@ -1,5 +1,8 @@ -# TODO: Document setting up ATLAS_DB_LABEL bits; possibly via appconf -ATLAS_DB_LABEL = "atlas" +from scaife_viewer.atlas.conf import ATLASAppConf + + +atlas_conf = ATLASAppConf() +ATLAS_DB_LABEL = atlas_conf.DB_LABEL class ATLASRouter: @@ -8,11 +11,11 @@ class ATLASRouter: library application. """ - route_app_labels = {"library"} + route_app_labels = {"scaife_viewer_atlas"} def db_for_read(self, model, **hints): """ - Attempts to read library models go to ATLAS_DB_LABEL. + Attempts to read library models go to DB_LABEL. """ if model._meta.app_label in self.route_app_labels: return ATLAS_DB_LABEL @@ -20,7 +23,7 @@ def db_for_read(self, model, **hints): def db_for_write(self, model, **hints): """ - Attempts to write library models go to ATLAS_DB_LABEL. + Attempts to write library models go to DB_LABEL. """ if model._meta.app_label in self.route_app_labels: return ATLAS_DB_LABEL @@ -40,7 +43,7 @@ def allow_relation(self, obj1, obj2, **hints): def allow_migrate(self, db, app_label, model_name=None, **hints): """ - Only add library apps to the ATLAS_DB_LABEL database. + Only add library apps to the DB_LABEL database. Do not add library apps to any other database. """ From 6bfa2c5312818c93afe51393e40b9207dfb99dc1 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 16:53:53 -0500 Subject: [PATCH 12/34] decouple prepare_atlas_db from CTS resolver --- atlas/scaife_viewer/atlas/conf.py | 1 + .../management/commands/prepare_atlas_db.py | 19 ++++++++++--------- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/atlas/scaife_viewer/atlas/conf.py b/atlas/scaife_viewer/atlas/conf.py index ac411dc..354d2d6 100644 --- a/atlas/scaife_viewer/atlas/conf.py +++ b/atlas/scaife_viewer/atlas/conf.py @@ -28,6 +28,7 @@ class ATLASAppConf(AppConf): NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" DATA_MODEL_ID = base64.b64encode(b"2020-09-08-001\n").decode() DB_LABEL = "atlas" + DB_PATH = None HOOKSET = "scaife_viewer.atlas.hooks.DefaultHookSet" # required settings diff --git a/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py b/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py index 8a3e844..0562db1 100644 --- a/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py +++ b/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py @@ -1,13 +1,11 @@ -# TODO: Revisit cts assumptions import os import shutil +from django.core.exceptions import ImproperlyConfigured from django.core.management import call_command from django.core.management.base import BaseCommand from scaife_viewer.atlas.conf import settings -from scaife_viewer.atlas.library.models import Node -from scaife_viewer.cts import text_inventory from ... import importers @@ -27,7 +25,12 @@ def add_arguments(self, parser): ) def handle(self, *args, **options): - database_path = settings.ATLAS_CONFIG["ATLAS_DB_PATH"] + database_path = settings.SV_ATLAS_DB_PATH + + if database_path is None: + msg = "The SV_ATLAS_DB_PATH setting is missing and is required for this management command to work." + raise ImproperlyConfigured(msg) + db_path_exists = os.path.exists(database_path) reset_data = options.get("force") or not db_path_exists @@ -39,16 +42,14 @@ def handle(self, *args, **options): os.remove(database_path) self.stdout.write("--[Removed existing ATLAS database]--") - self.stdout.write('--[Running database migrations on "atlas"]--') - call_command("migrate", database="atlas") + db_label = settings.SV_ATLAS_DB_LABEL + self.stdout.write(f'--[Running database migrations on "{db_label}"]--') + call_command("migrate", database=db_label) resolver_path = settings.CTS_RESOLVER_CACHE_LOCATION if os.path.exists(resolver_path): shutil.rmtree(resolver_path) self.stdout.write("--[Removed existing CTS resolver cache]--") - self.stdout.write("--[Priming CTS Resolver cache]--") - text_inventory() - self.stdout.write("--[Populating ATLAS db]--") importers.versions.import_versions() From 981d1d7d2b284e22da7ee847fdaefbba22dc48b3 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 16:57:08 -0500 Subject: [PATCH 13/34] decouple from CTS text_inventory --- atlas/scaife_viewer/atlas/resolvers/cts.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/atlas/scaife_viewer/atlas/resolvers/cts.py b/atlas/scaife_viewer/atlas/resolvers/cts.py index b47120e..22edd3d 100644 --- a/atlas/scaife_viewer/atlas/resolvers/cts.py +++ b/atlas/scaife_viewer/atlas/resolvers/cts.py @@ -1,7 +1,5 @@ import re -from scaife_viewer.core.cts import text_inventory - from .common import Library @@ -13,11 +11,11 @@ def get_lang_value(value): class CTSCollectionResolver: - def __init__(self): + def __init__(self, text_inventory): self.text_groups = {} self.works = {} self.versions = {} - self.resolved = self.resolve_text_inventory() + self.resolved = self.resolve_text_inventory(text_inventory) def extract_text_group_metadata(self, text_group): """ @@ -155,20 +153,22 @@ def resolve_works(self, text_group): self.works[work_metadata["urn"]] = work_metadata self.resolve_versions(work) - def resolve_text_inventory(self): + def resolve_text_inventory(self, text_inventory): """ Resolves the library from `cts.TextInventory`. Since Node instances are ordered by their `path` value, `cts.collections.SORT_OVERRIDES` is respected by ATLAS. """ - for text_group in text_inventory().text_groups(): + for text_group in text_inventory.text_groups(): text_group_metadata = self.extract_text_group_metadata(text_group) self.text_groups[text_group_metadata["urn"]] = text_group_metadata self.resolve_works(text_group) return self.text_groups, self.works, self.versions -def resolve_cts_collection_library(): - text_groups, works, versions = CTSCollectionResolver().resolved +def resolve_cts_collection_library(text_inventory): + # TODO: Document text_inventory typing + # TODO: consider a hookset + text_groups, works, versions = CTSCollectionResolver(text_inventory).resolved return Library(text_groups, works, versions) From d905cc07b64e3bb7811015e80e6df0796cf52115 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 23:10:37 -0500 Subject: [PATCH 14/34] clean up extraction methods --- atlas/scaife_viewer/atlas/resolvers/cts.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/atlas/scaife_viewer/atlas/resolvers/cts.py b/atlas/scaife_viewer/atlas/resolvers/cts.py index 22edd3d..23725c5 100644 --- a/atlas/scaife_viewer/atlas/resolvers/cts.py +++ b/atlas/scaife_viewer/atlas/resolvers/cts.py @@ -34,7 +34,6 @@ def extract_text_group_metadata(self, text_group): urn=f"{text_group.urn}:", node_kind="textgroup", name=[dict(lang="eng", value=str(text_group.label))], - meta_=text_group.structured_metadata(), ) def extract_work_metadata(self, work): @@ -112,8 +111,6 @@ def extract_version_metadata(self, version): } ], lang=get_lang_value(version.metadata.lang), - tracking_title=str(version.tracking_title), - image=version.image, ) def resolve_versions(self, work): From f56996540c91f3d79da813dd6988612b9c243fe4 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 23:27:52 -0500 Subject: [PATCH 15/34] push cts importer customizations out to hookset --- atlas/scaife_viewer/atlas/hooks.py | 6 ++++++ atlas/scaife_viewer/atlas/importers/versions.py | 13 +++++++------ 2 files changed, 13 insertions(+), 6 deletions(-) diff --git a/atlas/scaife_viewer/atlas/hooks.py b/atlas/scaife_viewer/atlas/hooks.py index 0bb5ff5..0175969 100644 --- a/atlas/scaife_viewer/atlas/hooks.py +++ b/atlas/scaife_viewer/atlas/hooks.py @@ -1,4 +1,5 @@ from . import constants +from .importers.versions import CTSImporter class DefaultHookSet: @@ -15,6 +16,11 @@ def can_access_urn(self, request, urn): def get_human_lang(self, value): return constants.HUMAN_FRIENDLY_LANGUAGE_MAP.get(value, value) + def get_importer_class(self): + from .importers.versions import CTSImporter # noqa: avoids circular import + + return CTSImporter + class HookProxy: def __getattr__(self, attr): diff --git a/atlas/scaife_viewer/atlas/importers/versions.py b/atlas/scaife_viewer/atlas/importers/versions.py index 8956f3a..1c6bb3f 100644 --- a/atlas/scaife_viewer/atlas/importers/versions.py +++ b/atlas/scaife_viewer/atlas/importers/versions.py @@ -101,9 +101,9 @@ def get_text_group_metadata(self): text_group_urn = self.urn.up_to(self.urn.TEXTGROUP) metadata = self.library.text_groups[text_group_urn] name = metadata["name"][0] - return dict( - label=name["value"], lang=name["lang"], **(metadata.get("meta_") or {}) - ) + # TODO: allow additional passthrough from a `meta_` or `extra` + # key + return dict(label=name["value"], lang=name["lang"]) # def get_work_metadata(self): # metadata = self.library.works[self.urn.up_to(URN.WORK)] @@ -137,14 +137,14 @@ def get_version_metadata(self): "default_toc_urn": self.version_data.get("default_toc_urn"), } # @@@ label + # TODO: allow additional passthrough from a `meta_` or `extra` + # key default.update( dict( label=self.version_data["label"][0]["value"], description=self.version_data["description"][0]["value"], lang=self.version_data["lang"], kind=self.version_data["version_kind"], - tracking_title=self.version_data["tracking_title"], - image=self.version_data["image"], ) ) return default @@ -300,7 +300,8 @@ def import_versions(reset=False): library = hookset.resolve_library() + importer_class = hookset.get_importer_class() nodes = {} for _, version_data in tqdm(library.versions.items()): - CTSImporter(library, version_data, nodes).apply() + importer_class(library, version_data, nodes).apply() print(f"{Node.objects.count()} total nodes on the tree.", file=sys.stderr) From 337b84b8fb9686eeed3324c06350802f54c43932 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 8 Sep 2020 23:32:43 -0500 Subject: [PATCH 16/34] simplify resolve_library hook --- atlas/scaife_viewer/atlas/hooks.py | 8 ++------ atlas/scaife_viewer/atlas/resolvers/cts.py | 9 +++++---- 2 files changed, 7 insertions(+), 10 deletions(-) diff --git a/atlas/scaife_viewer/atlas/hooks.py b/atlas/scaife_viewer/atlas/hooks.py index 0175969..e3843aa 100644 --- a/atlas/scaife_viewer/atlas/hooks.py +++ b/atlas/scaife_viewer/atlas/hooks.py @@ -1,14 +1,10 @@ from . import constants -from .importers.versions import CTSImporter +from .resolvers.default import resolve_library class DefaultHookSet: def resolve_library(self): - # TODO: Document included resolvers - # from .resolvers.cts import resolve_cts_collection_library as resolver_func - from .resolvers.default import resolve_library as resolver_func - - return resolver_func() + return resolve_library() def can_access_urn(self, request, urn): return True diff --git a/atlas/scaife_viewer/atlas/resolvers/cts.py b/atlas/scaife_viewer/atlas/resolvers/cts.py index 23725c5..335da06 100644 --- a/atlas/scaife_viewer/atlas/resolvers/cts.py +++ b/atlas/scaife_viewer/atlas/resolvers/cts.py @@ -164,8 +164,9 @@ def resolve_text_inventory(self, text_inventory): return self.text_groups, self.works, self.versions -def resolve_cts_collection_library(text_inventory): - # TODO: Document text_inventory typing - # TODO: consider a hookset - text_groups, works, versions = CTSCollectionResolver(text_inventory).resolved +def resolve_cts_collection_library(text_inventory, resolver_class=None): + if resolver_class is None: + resolver_class = CTSCollectionResolver + + text_groups, works, versions = resolver_class(text_inventory).resolved return Library(text_groups, works, versions) From fb0320392d10c31e8a2e93a9c8f1defdc74abb12 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 9 Sep 2020 00:12:24 -0500 Subject: [PATCH 17/34] move extraction methods out to collection objs --- atlas/scaife_viewer/atlas/resolvers/cts.py | 142 ++------------------- 1 file changed, 10 insertions(+), 132 deletions(-) diff --git a/atlas/scaife_viewer/atlas/resolvers/cts.py b/atlas/scaife_viewer/atlas/resolvers/cts.py index 335da06..7dcfaae 100644 --- a/atlas/scaife_viewer/atlas/resolvers/cts.py +++ b/atlas/scaife_viewer/atlas/resolvers/cts.py @@ -1,15 +1,6 @@ -import re - from .common import Library -def get_lang_value(value): - if re.match(r"^[a-z]+-[A-Z][a-z]+$", value): - return value.split("-")[0] - else: - return value - - class CTSCollectionResolver: def __init__(self, text_inventory): self.text_groups = {} @@ -17,137 +8,23 @@ def __init__(self, text_inventory): self.versions = {} self.resolved = self.resolve_text_inventory(text_inventory) - def extract_text_group_metadata(self, text_group): - """ - { - "urn": "urn:cts:greekLit:tlg0012:", - "node_kind": "textgroup", - "name": [ - { - "lang": "eng", - "value": "Homer" - } - ] - } - """ - return dict( - urn=f"{text_group.urn}:", - node_kind="textgroup", - name=[dict(lang="eng", value=str(text_group.label))], - ) - - def extract_work_metadata(self, work): - """ - { - "urn": "urn:cts:greekLit:tlg0012.tlg001:", - "group_urn": "urn:cts:greekLit:tlg0012:", - "node_kind": "work", - "lang": "grc", - "title": [ - { - "lang": "eng", - "value": "Iliad" - } - ], - "versions": [ - { - "urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:", - "node_kind": "version", - "version_kind": "edition", - "first_passage_urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:1.1-1.7", - "citation_scheme": ["book", "line"], - "label": [ - { - "lang": "eng", - "value": "Iliad (Greek Text of Munro & Allen)" - } - ], - "description": [ - { - "lang": "eng", - "value": "Homer, creator; Monro, D. B. (David Binning), 1836-1905, creator; Monro, D. B. (David Binning), 1836-1905, editor; Allen, Thomas W. (Thomas William), b. 1862, editor" - } - ] - } - ] - } - """ - return dict( - urn=f"{work.urn}:", - # @@@ - group_urn=f'{work.urn.rsplit(".", maxsplit=1)[0]}:', - node_kind="work", - lang=get_lang_value(work.metadata.lang), - # @@@ label vs title wa - title=[ - { - # @@@ hacky - "lang": work.label._language, - "value": str(work.label), - } - ], - ) - - def extract_version_metadata(self, version): - return dict( - urn=f"{version.urn}:", - node_kind="version", - version_kind=version.kind, - # @@@ - # first_passage_urn - citation_scheme=[c.name for c in version.metadata.citation], - label=[ - { - # @@@ hacky - "lang": version.label._language, - "value": str(version.label), - } - ], - description=[ - { - # @@@ hacky - "lang": version.description._language, - "value": str(version.description), - } - ], - lang=get_lang_value(version.metadata.lang), - ) - def resolve_versions(self, work): - """ - { - "urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:", - "node_kind": "version", - "version_kind": "edition", - "first_passage_urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:1.1-1.7", - "citation_scheme": ["book", "line"], - "label": [ - { - "lang": "eng", - "value": "Iliad (Greek Text of Munro & Allen)" - } - ], - "description": [ - { - "lang": "eng", - "value": "Homer, creator; Monro, D. B. (David Binning), 1836-1905, creator; Monro, D. B. (David Binning), 1836-1905, editor; Allen, Thomas W. (Thomas William), b. 1862, editor" - } - ] - } - """ for version in work.texts(): - version_metadata = self.extract_version_metadata(version) + version_metadata = version.get_atlas_metadata() + # version_urn is required within CTSImporter + version_urn = version_metadata["urn"] # TODO: More validation around "path" version_metadata["path"] = None - self.versions[version_metadata["urn"]] = version_metadata + self.versions[version_urn] = version_metadata def resolve_works(self, text_group): for work in text_group.works(): if work.urn.count(" ") > 0: # @@@ defensive coding around bad URNs continue - work_metadata = self.extract_work_metadata(work) - self.works[work_metadata["urn"]] = work_metadata + work_metadata = work.get_atlas_metadata() + work_urn = work_metadata.pop("urn") + self.works[work_urn] = work_metadata self.resolve_versions(work) def resolve_text_inventory(self, text_inventory): @@ -158,8 +35,9 @@ def resolve_text_inventory(self, text_inventory): `cts.collections.SORT_OVERRIDES` is respected by ATLAS. """ for text_group in text_inventory.text_groups(): - text_group_metadata = self.extract_text_group_metadata(text_group) - self.text_groups[text_group_metadata["urn"]] = text_group_metadata + text_group_metadata = text_group.get_atlas_metadata() + tg_urn = text_group_metadata.pop("urn") + self.text_groups[tg_urn] = text_group_metadata self.resolve_works(text_group) return self.text_groups, self.works, self.versions From a354141e604de3fc2e839cbe1e67555856de2dbb Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 9 Sep 2020 00:46:14 -0500 Subject: [PATCH 18/34] standardize on "extra" key to pass extra metadata --- .../scaife_viewer/atlas/importers/versions.py | 44 +++++-------------- 1 file changed, 11 insertions(+), 33 deletions(-) diff --git a/atlas/scaife_viewer/atlas/importers/versions.py b/atlas/scaife_viewer/atlas/importers/versions.py index 1c6bb3f..1d6f639 100644 --- a/atlas/scaife_viewer/atlas/importers/versions.py +++ b/atlas/scaife_viewer/atlas/importers/versions.py @@ -91,40 +91,20 @@ def get_root_urn_scheme(self, node_urn): def get_urn_scheme(self, node_urn): return [*self.get_root_urn_scheme(node_urn), *self.citation_scheme] - # TODO: Move some metadata extraction out to our "resolvers" - - # def get_textgroup_metadata(self): - # metadata = self.library.text_groups[self.urn.up_to(URN.TEXTGROUP)] - # return {"label": get_first_value_for_language(metadata["name"], "eng")} - def get_text_group_metadata(self): text_group_urn = self.urn.up_to(self.urn.TEXTGROUP) metadata = self.library.text_groups[text_group_urn] - name = metadata["name"][0] - # TODO: allow additional passthrough from a `meta_` or `extra` - # key - return dict(label=name["value"], lang=name["lang"]) - - # def get_work_metadata(self): - # metadata = self.library.works[self.urn.up_to(URN.WORK)] - # return {"label": get_first_value_for_language(metadata["title"], "eng")} + label = get_first_value_for_language(metadata["name"], "eng") + # TODO: do we actually use `lang` yet? + extra = metadata.get("extra", {}) + return dict(label=label, **extra) def get_work_metadata(self): work_urn = self.urn.up_to(self.urn.WORK) metadata = self.library.works[work_urn] - return dict(lang=metadata["lang"], label=metadata["title"][0]["value"]) - - # def get_version_metadata(self): - # return { - # # @@@ how much of the `metadata.json` do we - # # "pass through" via GraphQL vs - # # apply to particular node kinds in the heirarchy - # "citation_scheme": self.citation_scheme, - # "label": self.label, - # "lang": self.version_data["lang"], - # "first_passage_urn": self.version_data["first_passage_urn"], - # "default_toc_urn": self.version_data.get("default_toc_urn"), - # } + title = get_first_value_for_language(metadata["title"], "eng") + extra = metadata.get("extra", {}) + return dict(label=title, lang=metadata["lang"], **extra) def get_version_metadata(self): default = { @@ -132,19 +112,17 @@ def get_version_metadata(self): # "pass through" via GraphQL vs # apply to particular node kinds in the heirarchy "citation_scheme": self.citation_scheme, - "work_title": self.label, + "label": self.label, + "lang": self.version_data["lang"], "first_passage_urn": self.version_data.get("first_passage_urn"), "default_toc_urn": self.version_data.get("default_toc_urn"), } - # @@@ label - # TODO: allow additional passthrough from a `meta_` or `extra` - # key + # TODO: how "universal" should these defaults be? default.update( dict( - label=self.version_data["label"][0]["value"], description=self.version_data["description"][0]["value"], - lang=self.version_data["lang"], kind=self.version_data["version_kind"], + **self.version_data.get("extra", {}), ) ) return default From 43d5fffb9502e8658c0b60d236b67e28f4349fc9 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 9 Sep 2020 01:44:11 -0500 Subject: [PATCH 19/34] decouple extraction in atlas from core --- atlas/scaife_viewer/atlas/hooks.py | 48 ++++++++++++++++++++++ atlas/scaife_viewer/atlas/resolvers/cts.py | 7 ++-- 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/atlas/scaife_viewer/atlas/hooks.py b/atlas/scaife_viewer/atlas/hooks.py index e3843aa..3f81ff9 100644 --- a/atlas/scaife_viewer/atlas/hooks.py +++ b/atlas/scaife_viewer/atlas/hooks.py @@ -17,6 +17,54 @@ def get_importer_class(self): return CTSImporter + def extract_cts_text_group_metadata(self, text_group): + return dict( + # TODO: urn with trailing colon + urn=f"{text_group.urn}:", + name=[dict(lang="eng", value=str(text_group.label))], + ) + + def extract_cts_work_metadata(self, work): + # FIXME: backport `lang` attr + lang = getattr(work, "lang", work.metadata.lang) + return dict( + # TODO: urn with trailing colon + urn=f"{work.urn}:", + lang=lang, + title=[ + { + # TODO: provide a better api for work.label lang + "lang": work.label._language, + "value": str(work.label), + } + ], + ) + + def extract_cts_version_metadata(self, version): + return dict( + # TODO: urn with trailing colon + urn=f"{version.urn}:", + version_kind=version.kind, + # TODO: + # first_passage_urn + citation_scheme=[c.name for c in version.metadata.citation], + label=[ + { + # TODO: provide a better api for version.label lang + "lang": version.label._language, + "value": str(version.label), + } + ], + description=[ + { + # TODO: provide a better api for version.description lang + "lang": version.description._language, + "value": str(version.description), + } + ], + lang=version.lang, + ) + class HookProxy: def __getattr__(self, attr): diff --git a/atlas/scaife_viewer/atlas/resolvers/cts.py b/atlas/scaife_viewer/atlas/resolvers/cts.py index 7dcfaae..841a98c 100644 --- a/atlas/scaife_viewer/atlas/resolvers/cts.py +++ b/atlas/scaife_viewer/atlas/resolvers/cts.py @@ -1,4 +1,5 @@ from .common import Library +from ..hooks import hookset class CTSCollectionResolver: @@ -10,7 +11,7 @@ def __init__(self, text_inventory): def resolve_versions(self, work): for version in work.texts(): - version_metadata = version.get_atlas_metadata() + version_metadata = hookset.extract_cts_version_metadata(version) # version_urn is required within CTSImporter version_urn = version_metadata["urn"] # TODO: More validation around "path" @@ -22,7 +23,7 @@ def resolve_works(self, text_group): if work.urn.count(" ") > 0: # @@@ defensive coding around bad URNs continue - work_metadata = work.get_atlas_metadata() + work_metadata = hookset.extract_cts_work_metadata(work) work_urn = work_metadata.pop("urn") self.works[work_urn] = work_metadata self.resolve_versions(work) @@ -35,7 +36,7 @@ def resolve_text_inventory(self, text_inventory): `cts.collections.SORT_OVERRIDES` is respected by ATLAS. """ for text_group in text_inventory.text_groups(): - text_group_metadata = text_group.get_atlas_metadata() + text_group_metadata = hookset.extract_cts_text_group_metadata(text_group) tg_urn = text_group_metadata.pop("urn") self.text_groups[tg_urn] = text_group_metadata self.resolve_works(text_group) From 35d3d567a398ef53a6fc0a51be965be7d981589d Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Wed, 9 Sep 2020 01:58:12 -0500 Subject: [PATCH 20/34] rename module as cts_collection (to better describe _what_ we're resolving, as technically anything we're resolving is CTS-based) --- .../scaife_viewer/atlas/resolvers/{cts.py => cts_collection.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename atlas/scaife_viewer/atlas/resolvers/{cts.py => cts_collection.py} (100%) diff --git a/atlas/scaife_viewer/atlas/resolvers/cts.py b/atlas/scaife_viewer/atlas/resolvers/cts_collection.py similarity index 100% rename from atlas/scaife_viewer/atlas/resolvers/cts.py rename to atlas/scaife_viewer/atlas/resolvers/cts_collection.py index 841a98c..7f189f7 100644 --- a/atlas/scaife_viewer/atlas/resolvers/cts.py +++ b/atlas/scaife_viewer/atlas/resolvers/cts_collection.py @@ -1,5 +1,5 @@ -from .common import Library from ..hooks import hookset +from .common import Library class CTSCollectionResolver: From 4948b36ff1169c44f445162cac89e01a40e910e5 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Thu, 10 Sep 2020 13:31:15 -0500 Subject: [PATCH 21/34] remove assumptions from prepare_atlas_db --- .../atlas/management/commands/prepare_atlas_db.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py b/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py index 0562db1..e9b22b3 100644 --- a/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py +++ b/atlas/scaife_viewer/atlas/management/commands/prepare_atlas_db.py @@ -41,15 +41,19 @@ def handle(self, *args, **options): if db_path_exists: os.remove(database_path) self.stdout.write("--[Removed existing ATLAS database]--") + else: + db_dir = os.path.dirname(database_path) + os.makedirs(db_dir, exist_ok=True) db_label = settings.SV_ATLAS_DB_LABEL self.stdout.write(f'--[Running database migrations on "{db_label}"]--') call_command("migrate", database=db_label) - resolver_path = settings.CTS_RESOLVER_CACHE_LOCATION - if os.path.exists(resolver_path): - shutil.rmtree(resolver_path) - self.stdout.write("--[Removed existing CTS resolver cache]--") + if hasattr(settings, "CTS_RESOLVER_CACHE_LOCATION"): + resolver_path = settings.CTS_RESOLVER_CACHE_LOCATION + if os.path.exists(resolver_path): + shutil.rmtree(resolver_path) + self.stdout.write("--[Removed existing CTS resolver cache]--") self.stdout.write("--[Populating ATLAS db]--") importers.versions.import_versions() From a6c7d14cabb2aa3637f033b9b5890a44f6cb5fb4 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 09:02:40 -0500 Subject: [PATCH 22/34] retire ATLAS_CONFIG --- atlas/scaife_viewer/atlas/importers/alignments.py | 2 +- atlas/scaife_viewer/atlas/importers/text_annotations.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/atlas/scaife_viewer/atlas/importers/alignments.py b/atlas/scaife_viewer/atlas/importers/alignments.py index 4ba2e8e..54fc729 100644 --- a/atlas/scaife_viewer/atlas/importers/alignments.py +++ b/atlas/scaife_viewer/atlas/importers/alignments.py @@ -17,7 +17,7 @@ ANNOTATIONS_DATA_PATH = os.path.join( - settings.ATLAS_CONFIG["DATA_DIR"], "annotations", "text-alignments" + settings.SV_ATLAS_DATA_DIR, "annotations", "text-alignments" ) RAW_PATH = os.path.join(ANNOTATIONS_DATA_PATH, "raw") diff --git a/atlas/scaife_viewer/atlas/importers/text_annotations.py b/atlas/scaife_viewer/atlas/importers/text_annotations.py index 8b59b03..b4556ec 100644 --- a/atlas/scaife_viewer/atlas/importers/text_annotations.py +++ b/atlas/scaife_viewer/atlas/importers/text_annotations.py @@ -14,7 +14,7 @@ settings.SV_ATLAS_DATA_DIR, "annotations", "text-annotations" ) SYNTAX_TREES_ANNOTATIONS_PATH = os.path.join( - settings.ATLAS_CONFIG["DATA_DIR"], "annotations", "syntax-trees" + settings.SV_ATLAS_DATA_DIR, "annotations", "syntax-trees" ) From 73b877c016231d9a10b7e6812762f7f311a7bd3d Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 09:04:48 -0500 Subject: [PATCH 23/34] lint --- atlas/scaife_viewer/atlas/importers/alignments.py | 3 +-- atlas/scaife_viewer/atlas/importers/versions.py | 2 +- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/atlas/scaife_viewer/atlas/importers/alignments.py b/atlas/scaife_viewer/atlas/importers/alignments.py index 54fc729..fecfb49 100644 --- a/atlas/scaife_viewer/atlas/importers/alignments.py +++ b/atlas/scaife_viewer/atlas/importers/alignments.py @@ -2,9 +2,8 @@ import os from collections import defaultdict -from scaife_viewer.atlas.conf import settings - from scaife_viewer.atlas.backports.scaife_viewer.cts.utils import natural_keys +from scaife_viewer.atlas.conf import settings from scaife_viewer.atlas.urn import URN from ..models import ( diff --git a/atlas/scaife_viewer/atlas/importers/versions.py b/atlas/scaife_viewer/atlas/importers/versions.py index 1d6f639..3c86ff4 100644 --- a/atlas/scaife_viewer/atlas/importers/versions.py +++ b/atlas/scaife_viewer/atlas/importers/versions.py @@ -5,10 +5,10 @@ from django.db.models import Max from django.utils.translation import ugettext_noop -from tqdm import tqdm from treebeard.exceptions import PathOverflow from scaife_viewer.atlas import constants +from tqdm import tqdm from ..hooks import hookset from ..models import Node From 57e8607b252600b99fc805973fc52f20c49978e5 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 10:23:23 -0500 Subject: [PATCH 24/34] add note about DATA_MODEL_ID --- atlas/scaife_viewer/atlas/conf.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/atlas/scaife_viewer/atlas/conf.py b/atlas/scaife_viewer/atlas/conf.py index 8d1459a..2cea66b 100644 --- a/atlas/scaife_viewer/atlas/conf.py +++ b/atlas/scaife_viewer/atlas/conf.py @@ -30,6 +30,10 @@ class ATLASAppConf(AppConf): IN_MEMORY_PASSAGE_CHUNK_MAX = 2500 NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + # `DATA_MODEL_ID` should be incremented when BI schema changes are made + # to ATLAS models. + # Site developers can use the value of this setting to help inform + # that ATLAS content must be re-ingested when schema changes occur DATA_MODEL_ID = base64.b64encode(b"2020-09-08-001\n").decode() DB_LABEL = "atlas" DB_PATH = None From 27c1fa91b0c0d2f630b8e499e73370b75f6442c6 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 10:52:13 -0500 Subject: [PATCH 25/34] document available settings --- atlas/README.md | 67 +++++++++++++++++++++++++++++++ atlas/scaife_viewer/atlas/conf.py | 29 +++++++------ 2 files changed, 84 insertions(+), 12 deletions(-) diff --git a/atlas/README.md b/atlas/README.md index 2dddb85..69347b8 100644 --- a/atlas/README.md +++ b/atlas/README.md @@ -1 +1,68 @@ # Aligned Text and Linguistic Annotation Server (ATLAS) + +## Settings + +Settings can be overridden at a project level using via the `SV_ATLAS_` +naming convention. + +### Data model + +**DATA_DIR** +Default: `None` + +The path to the directory containing ATLAS data + +**DATA_MODEL_ID** +Default: A base64 encoded representation of the last release (in `YYYY-MM-DD-###` format) where a +backwards incompatible schema change occurred. + +Site developers can use the value of this setting to help inform when ATLAS content should be re-ingested +due to BI schema changes. + +**INGESTION_CONCURRENCY** +Default: `None` + +Sets the number of processes available to ProcessPoolExecutors during ingestion. + +When `None`, defaults to number of processors as reported by multiprocessing.cpu_count() + +**NODE_ALPHABET** +Default: `"0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"` + +Used by `django-treebeard` to calculate the maximum path steps. + +See the [django-treebeard docs](https://django-treebeard.readthedocs.io/en/latest/mp_tree.html#treebeard.mp_tree.MP_Node.alphabet) for more information. + + +### Database + +**DB_LABEL** +Default: `"atlas"` + +The label to use for the ATLAS-specific database (required when using the `ATLASRouter` database router) + +**DB_PATH** +Default: `None` + +The path to the SQLite database referenced by `DB_LABEL`. + + +### GraphQL + +**IN_MEMORY_PASSAGE_CHUNK_MAX** +Default: `2500` + +Sets the upper limit on the number of text parts used for in-memory passage chunking. + +When the number of text parts exceeds this limit, ATLAS will fall back to a database-backed +chunking alogrithm. + +For most smaller passages, the in-memory chunking is faster than using the database. + + +### Other + +**HOOKSET** +Default: `"scaife_viewer.atlas.hooks.DefaultHookSet"` + +The path to a hookset that can be used to customize ATLAS functionality. diff --git a/atlas/scaife_viewer/atlas/conf.py b/atlas/scaife_viewer/atlas/conf.py index 2cea66b..bad7da5 100644 --- a/atlas/scaife_viewer/atlas/conf.py +++ b/atlas/scaife_viewer/atlas/conf.py @@ -24,27 +24,32 @@ def load_path_attr(path): class ATLASAppConf(AppConf): - # `INGESTION_CONCURRENCY` defaults to number of processors - # as reported by multiprocessing.cpu_count() + # Data model + DATA_DIR = None + DATA_MODEL_ID = base64.b64encode(b"2020-09-08-001\n").decode() INGESTION_CONCURRENCY = None + NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" + # GraphQL settings IN_MEMORY_PASSAGE_CHUNK_MAX = 2500 - NODE_ALPHABET = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz" - # `DATA_MODEL_ID` should be incremented when BI schema changes are made - # to ATLAS models. - # Site developers can use the value of this setting to help inform - # that ATLAS content must be re-ingested when schema changes occur - DATA_MODEL_ID = base64.b64encode(b"2020-09-08-001\n").decode() + + # Database settings DB_LABEL = "atlas" DB_PATH = None - HOOKSET = "scaife_viewer.atlas.hooks.DefaultHookSet" - # required settings - # DATA_DIR + # Other + HOOKSET = "scaife_viewer.atlas.hooks.DefaultHookSet" class Meta: prefix = "sv_atlas" - required = ["DATA_DIR"] def configure_hookset(self, value): return load_path_attr(value)() + + def configure_data_dir(self, value): + # NOTE: We've chosen an explicit `configure` method + # vs making `DATA_DIR` a required field so we can check + # that DATA_DIR is a non-None value. + if value is None: + msg = f"{self._meta.prefixed_name('DATA_DIR')} must be defined" + raise ImproperlyConfigured(msg) From 7500d1a3fad0648a57566fcab3849dae35fb021c Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 10:52:57 -0500 Subject: [PATCH 26/34] lint --- atlas/scaife_viewer/atlas/importers/versions.py | 2 +- atlas/scaife_viewer/atlas/tests/strategies.py | 1 - atlas/scaife_viewer/atlas/tests/test_importer_fuzz.py | 1 - atlas/scaife_viewer/atlas/tests/test_importer_integration.py | 2 +- atlas/scaife_viewer/atlas/tests/test_node.py | 1 - atlas/scaife_viewer/atlas/tests/test_urn.py | 1 - atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py | 1 - 7 files changed, 2 insertions(+), 7 deletions(-) diff --git a/atlas/scaife_viewer/atlas/importers/versions.py b/atlas/scaife_viewer/atlas/importers/versions.py index 3c86ff4..1d6f639 100644 --- a/atlas/scaife_viewer/atlas/importers/versions.py +++ b/atlas/scaife_viewer/atlas/importers/versions.py @@ -5,10 +5,10 @@ from django.db.models import Max from django.utils.translation import ugettext_noop +from tqdm import tqdm from treebeard.exceptions import PathOverflow from scaife_viewer.atlas import constants -from tqdm import tqdm from ..hooks import hookset from ..models import Node diff --git a/atlas/scaife_viewer/atlas/tests/strategies.py b/atlas/scaife_viewer/atlas/tests/strategies.py index 0dd8013..06b7143 100644 --- a/atlas/scaife_viewer/atlas/tests/strategies.py +++ b/atlas/scaife_viewer/atlas/tests/strategies.py @@ -1,5 +1,4 @@ from hypothesis import strategies - from scaife_viewer.atlas import constants diff --git a/atlas/scaife_viewer/atlas/tests/test_importer_fuzz.py b/atlas/scaife_viewer/atlas/tests/test_importer_fuzz.py index 2b2bd21..c059ab8 100644 --- a/atlas/scaife_viewer/atlas/tests/test_importer_fuzz.py +++ b/atlas/scaife_viewer/atlas/tests/test_importer_fuzz.py @@ -1,7 +1,6 @@ import copy import hypothesis - from scaife_viewer.atlas.importers.versions import CTSImporter, Library from scaife_viewer.atlas.tests.strategies import URNs from scaife_viewer.atlas.urn import URN diff --git a/atlas/scaife_viewer/atlas/tests/test_importer_integration.py b/atlas/scaife_viewer/atlas/tests/test_importer_integration.py index 6bf0573..1973ed8 100644 --- a/atlas/scaife_viewer/atlas/tests/test_importer_integration.py +++ b/atlas/scaife_viewer/atlas/tests/test_importer_integration.py @@ -1,9 +1,9 @@ import copy from unittest import mock -import pytest from treebeard.exceptions import PathOverflow +import pytest from scaife_viewer.atlas.importers.versions import CTSImporter, Library from scaife_viewer.atlas.models import Node from scaife_viewer.atlas.tests import constants diff --git a/atlas/scaife_viewer/atlas/tests/test_node.py b/atlas/scaife_viewer/atlas/tests/test_node.py index 52a20f6..c567a50 100644 --- a/atlas/scaife_viewer/atlas/tests/test_node.py +++ b/atlas/scaife_viewer/atlas/tests/test_node.py @@ -1,7 +1,6 @@ from collections import OrderedDict import pytest - from scaife_viewer.atlas.models import Node from scaife_viewer.atlas.tests import constants diff --git a/atlas/scaife_viewer/atlas/tests/test_urn.py b/atlas/scaife_viewer/atlas/tests/test_urn.py index 4110b40..aafe7bb 100644 --- a/atlas/scaife_viewer/atlas/tests/test_urn.py +++ b/atlas/scaife_viewer/atlas/tests/test_urn.py @@ -1,7 +1,6 @@ from unittest import mock import pytest - from scaife_viewer.atlas.urn import URN diff --git a/atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py b/atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py index b209714..9f78382 100644 --- a/atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py +++ b/atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py @@ -1,5 +1,4 @@ import hypothesis - from scaife_viewer.atlas.tests.strategies import URNs from scaife_viewer.atlas.urn import URN From 182ce09e03bbd49196a81c01b6cf580d1b94de5f Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 14:46:54 -0500 Subject: [PATCH 27/34] add namespace to ATLS urls --- atlas/scaife_viewer/atlas/urls.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/atlas/scaife_viewer/atlas/urls.py b/atlas/scaife_viewer/atlas/urls.py index d7f0607..df8ac81 100644 --- a/atlas/scaife_viewer/atlas/urls.py +++ b/atlas/scaife_viewer/atlas/urls.py @@ -4,6 +4,11 @@ from graphene_django.views import GraphQLView +app_name = "sv_atlas" urlpatterns = [ - path("graphql/", csrf_exempt(GraphQLView.as_view(graphiql=True))), + path( + "graphql/", + csrf_exempt(GraphQLView.as_view(graphiql=True)), + name="graphql_endpoint", + ), ] From 36c718471994e8e24cdfa7137128b4a41b8d4cf8 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 15:07:53 -0500 Subject: [PATCH 28/34] document GraphQL / GraphiQL endpoint --- atlas/README.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/atlas/README.md b/atlas/README.md index 69347b8..533bfb6 100644 --- a/atlas/README.md +++ b/atlas/README.md @@ -66,3 +66,11 @@ For most smaller passages, the in-memory chunking is faster than using the datab Default: `"scaife_viewer.atlas.hooks.DefaultHookSet"` The path to a hookset that can be used to customize ATLAS functionality. + +## GraphQL Endpoint +URL Name: `sv_atlas:graphql_endpoint` + +Primary GraphQL endpoint for `scaife-viewer-atlas` projects. + +When accessed [via a browser](https://github.com/graphql-python/graphene-django/blob/2e806384f60505a29745752bf9c477c71668f0fa/graphene_django/views.py#L154), delivers a [GraphiQL Playground](https://github.com/graphql/graphiql#graphiql) that can be used +to explore ATLAS GraphQL fields. From c99d53fe63ccd1a7e0554326e76369595a733299 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 15:34:47 -0500 Subject: [PATCH 29/34] fix configuration error for DATA_DIR --- atlas/scaife_viewer/atlas/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/atlas/scaife_viewer/atlas/conf.py b/atlas/scaife_viewer/atlas/conf.py index bad7da5..6169bf2 100644 --- a/atlas/scaife_viewer/atlas/conf.py +++ b/atlas/scaife_viewer/atlas/conf.py @@ -53,3 +53,4 @@ def configure_data_dir(self, value): if value is None: msg = f"{self._meta.prefixed_name('DATA_DIR')} must be defined" raise ImproperlyConfigured(msg) + return value From 7ae497ac18b767b866e1f8a7fc9c5dd096a4653b Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 15:50:03 -0500 Subject: [PATCH 30/34] fix failing tests --- atlas/scaife_viewer/atlas/importers/versions.py | 6 ++++-- atlas/scaife_viewer/atlas/tests/constants.py | 2 ++ atlas/scaife_viewer/atlas/tests/strategies.py | 1 + atlas/scaife_viewer/atlas/tests/test_importer.py | 15 ++++++++++----- .../atlas/tests/test_importer_fuzz.py | 4 +++- .../atlas/tests/test_importer_integration.py | 5 +++-- atlas/scaife_viewer/atlas/tests/test_node.py | 1 + atlas/scaife_viewer/atlas/tests/test_urn.py | 1 + atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py | 1 + 9 files changed, 26 insertions(+), 10 deletions(-) diff --git a/atlas/scaife_viewer/atlas/importers/versions.py b/atlas/scaife_viewer/atlas/importers/versions.py index 1d6f639..e082232 100644 --- a/atlas/scaife_viewer/atlas/importers/versions.py +++ b/atlas/scaife_viewer/atlas/importers/versions.py @@ -171,7 +171,7 @@ def generate_node(self, idx, node_data, parent_urn): return self.add_child_bulk(parent, node_data) return self.add_child(parent, node_data) - def destructure_urn(self, node_urn, tokens, extract_text_parts): + def destructure_urn(self, node_urn, tokens, extract_text_parts=True): node_data = [] for kind in self.get_urn_scheme(node_urn): data = {"kind": kind} @@ -221,7 +221,9 @@ def extract_urn_and_tokens(self, line): def generate_branch(self, line, extract_text_parts=True): node_urn, tokens = self.extract_urn_and_tokens(line) - branch_data = self.destructure_urn(node_urn, tokens, extract_text_parts) + branch_data = self.destructure_urn( + node_urn, tokens, extract_text_parts=extract_text_parts + ) for idx, node_data in enumerate(branch_data): node = self.nodes.get(node_data["urn"]) if node is None: diff --git a/atlas/scaife_viewer/atlas/tests/constants.py b/atlas/scaife_viewer/atlas/tests/constants.py index 28923d1..ae2e142 100644 --- a/atlas/scaife_viewer/atlas/tests/constants.py +++ b/atlas/scaife_viewer/atlas/tests/constants.py @@ -70,6 +70,8 @@ "label": "Iliad, Homeri Opera", "lang": "grc", "first_passage_urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:1.1-1.7", + "description": "Homer, creator; Monro, D. B. (David Binning), 1836-1905, creator; Monro, D. B. (David Binning), 1836-1905, editor; Allen, Thomas W. (Thomas William), b. 1862, editor", + "kind": "edition", "default_toc_urn": None, } LIBRARY_DATA = { diff --git a/atlas/scaife_viewer/atlas/tests/strategies.py b/atlas/scaife_viewer/atlas/tests/strategies.py index 06b7143..0dd8013 100644 --- a/atlas/scaife_viewer/atlas/tests/strategies.py +++ b/atlas/scaife_viewer/atlas/tests/strategies.py @@ -1,4 +1,5 @@ from hypothesis import strategies + from scaife_viewer.atlas import constants diff --git a/atlas/scaife_viewer/atlas/tests/test_importer.py b/atlas/scaife_viewer/atlas/tests/test_importer.py index 068e151..3c21971 100644 --- a/atlas/scaife_viewer/atlas/tests/test_importer.py +++ b/atlas/scaife_viewer/atlas/tests/test_importer.py @@ -1,7 +1,8 @@ import copy from unittest import mock -from scaife_viewer.atlas.importers.versions import CTSImporter, Library +from scaife_viewer.atlas.importers.versions import CTSImporter +from scaife_viewer.atlas.resolvers.common import Library from scaife_viewer.atlas.tests import constants from scaife_viewer.atlas.urn import URN @@ -26,7 +27,7 @@ def test_destructure(): { "kind": "work", "urn": "urn:cts:greekLit:tlg0012.tlg001:", - "metadata": {"label": "Iliad"}, + "metadata": {"label": "Iliad", "lang": "grc"}, }, { "kind": "version", @@ -71,7 +72,7 @@ def test_destructure_alphanumeric(): { "kind": "work", "urn": "urn:cts:greekLit:tlg0012.tlg001:", - "metadata": {"label": "Iliad"}, + "metadata": {"label": "Iliad", "lang": "grc"}, }, { "kind": "version", @@ -136,7 +137,7 @@ def test_importer(mock_node, mock_generate, mock_open): { "kind": "work", "urn": "urn:cts:greekLit:tlg0012.tlg001:", - "metadata": {"label": "Iliad"}, + "metadata": {"label": "Iliad", "lang": "grc"}, "idx": 0, }, "urn:cts:greekLit:tlg0012:", @@ -151,6 +152,8 @@ def test_importer(mock_node, mock_generate, mock_open): "label": "Iliad, Homeri Opera", "lang": "grc", "first_passage_urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:1.1-1.7", + "description": "Homer, creator; Monro, D. B. (David Binning), 1836-1905, creator; Monro, D. B. (David Binning), 1836-1905, editor; Allen, Thomas W. (Thomas William), b. 1862, editor", + "kind": "edition", "default_toc_urn": None, }, "idx": 0, @@ -292,7 +295,7 @@ def test_importer_with_exemplar(mock_node, mock_generate, mock_open): { "kind": "work", "urn": "urn:cts:greekLit:tlg0012.tlg001:", - "metadata": {"label": "Iliad"}, + "metadata": {"label": "Iliad", "lang": "grc"}, "idx": 0, }, "urn:cts:greekLit:tlg0012:", @@ -307,6 +310,8 @@ def test_importer_with_exemplar(mock_node, mock_generate, mock_open): "label": "Iliad, Homeri Opera", "lang": "grc", "first_passage_urn": "urn:cts:greekLit:tlg0012.tlg001.perseus-grc2:1.1-1.7", + "description": "Homer, creator; Monro, D. B. (David Binning), 1836-1905, creator; Monro, D. B. (David Binning), 1836-1905, editor; Allen, Thomas W. (Thomas William), b. 1862, editor", + "kind": "edition", "default_toc_urn": None, }, "idx": 0, diff --git a/atlas/scaife_viewer/atlas/tests/test_importer_fuzz.py b/atlas/scaife_viewer/atlas/tests/test_importer_fuzz.py index c059ab8..60c7a1e 100644 --- a/atlas/scaife_viewer/atlas/tests/test_importer_fuzz.py +++ b/atlas/scaife_viewer/atlas/tests/test_importer_fuzz.py @@ -1,7 +1,9 @@ import copy import hypothesis -from scaife_viewer.atlas.importers.versions import CTSImporter, Library + +from scaife_viewer.atlas.importers.versions import CTSImporter +from scaife_viewer.atlas.resolvers.common import Library from scaife_viewer.atlas.tests.strategies import URNs from scaife_viewer.atlas.urn import URN diff --git a/atlas/scaife_viewer/atlas/tests/test_importer_integration.py b/atlas/scaife_viewer/atlas/tests/test_importer_integration.py index 1973ed8..810b9ff 100644 --- a/atlas/scaife_viewer/atlas/tests/test_importer_integration.py +++ b/atlas/scaife_viewer/atlas/tests/test_importer_integration.py @@ -1,11 +1,12 @@ import copy from unittest import mock +import pytest from treebeard.exceptions import PathOverflow -import pytest -from scaife_viewer.atlas.importers.versions import CTSImporter, Library +from scaife_viewer.atlas.importers.versions import CTSImporter from scaife_viewer.atlas.models import Node +from scaife_viewer.atlas.resolvers.common import Library from scaife_viewer.atlas.tests import constants diff --git a/atlas/scaife_viewer/atlas/tests/test_node.py b/atlas/scaife_viewer/atlas/tests/test_node.py index c567a50..52a20f6 100644 --- a/atlas/scaife_viewer/atlas/tests/test_node.py +++ b/atlas/scaife_viewer/atlas/tests/test_node.py @@ -1,6 +1,7 @@ from collections import OrderedDict import pytest + from scaife_viewer.atlas.models import Node from scaife_viewer.atlas.tests import constants diff --git a/atlas/scaife_viewer/atlas/tests/test_urn.py b/atlas/scaife_viewer/atlas/tests/test_urn.py index aafe7bb..4110b40 100644 --- a/atlas/scaife_viewer/atlas/tests/test_urn.py +++ b/atlas/scaife_viewer/atlas/tests/test_urn.py @@ -1,6 +1,7 @@ from unittest import mock import pytest + from scaife_viewer.atlas.urn import URN diff --git a/atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py b/atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py index 9f78382..b209714 100644 --- a/atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py +++ b/atlas/scaife_viewer/atlas/tests/test_urn_fuzz.py @@ -1,4 +1,5 @@ import hypothesis + from scaife_viewer.atlas.tests.strategies import URNs from scaife_viewer.atlas.urn import URN From 1b038dd6f0a144ce63b1b0527b3756d0c76c2816 Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 15:56:30 -0500 Subject: [PATCH 31/34] only tweak PRAGMA when connection matches DB_LABEL --- atlas/scaife_viewer/atlas/apps.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/atlas/scaife_viewer/atlas/apps.py b/atlas/scaife_viewer/atlas/apps.py index e91ba88..7d223c0 100644 --- a/atlas/scaife_viewer/atlas/apps.py +++ b/atlas/scaife_viewer/atlas/apps.py @@ -1,4 +1,5 @@ from django.apps import AppConfig as BaseAppConfig +from django.conf import settings from django.db.backends.signals import connection_created from django.utils.translation import ugettext_lazy as _ @@ -14,9 +15,7 @@ def tweak_sqlite_pragma(sender, connection, **kwargs): """ Customize PRAGMA settings for SQLite """ - # TODO: Bind this only to the ATLAS database, - # rather than assuming any SQLite connection - if connection.vendor == "sqlite": + if connection.vendor == "sqlite" and connection.alias == settings.SV_ATLAS_DB_LABEL: cursor = connection.cursor() cursor.execute("PRAGMA synchronous=OFF;") cursor.execute("PRAGMA cache_size=100000;") From fad8c458d222d29fe6c7749012eb7cf47b71725f Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 16:14:09 -0500 Subject: [PATCH 32/34] resolve a few TODOs --- atlas/scaife_viewer/atlas/hooks.py | 20 ++++++++++--------- .../atlas/resolvers/cts_collection.py | 1 - 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/atlas/scaife_viewer/atlas/hooks.py b/atlas/scaife_viewer/atlas/hooks.py index 3f81ff9..4fd98bb 100644 --- a/atlas/scaife_viewer/atlas/hooks.py +++ b/atlas/scaife_viewer/atlas/hooks.py @@ -2,6 +2,12 @@ from .resolvers.default import resolve_library +def ensure_trailing_colon(urn): + if not urn.endswith(":"): + return f"{urn}:" + return urn + + class DefaultHookSet: def resolve_library(self): return resolve_library() @@ -19,17 +25,15 @@ def get_importer_class(self): def extract_cts_text_group_metadata(self, text_group): return dict( - # TODO: urn with trailing colon - urn=f"{text_group.urn}:", + urn=f"{ensure_trailing_colon(text_group.urn)}", name=[dict(lang="eng", value=str(text_group.label))], ) def extract_cts_work_metadata(self, work): - # FIXME: backport `lang` attr + # FIXME: backport `lang` attr to scaife-viewer-core lang = getattr(work, "lang", work.metadata.lang) return dict( - # TODO: urn with trailing colon - urn=f"{work.urn}:", + urn=f"{ensure_trailing_colon(work.urn)}", lang=lang, title=[ { @@ -42,11 +46,9 @@ def extract_cts_work_metadata(self, work): def extract_cts_version_metadata(self, version): return dict( - # TODO: urn with trailing colon - urn=f"{version.urn}:", + urn=f"{ensure_trailing_colon(version.urn)}", version_kind=version.kind, - # TODO: - # first_passage_urn + # TODO: provide first_passage_urn citation_scheme=[c.name for c in version.metadata.citation], label=[ { diff --git a/atlas/scaife_viewer/atlas/resolvers/cts_collection.py b/atlas/scaife_viewer/atlas/resolvers/cts_collection.py index 7f189f7..14ee6cd 100644 --- a/atlas/scaife_viewer/atlas/resolvers/cts_collection.py +++ b/atlas/scaife_viewer/atlas/resolvers/cts_collection.py @@ -14,7 +14,6 @@ def resolve_versions(self, work): version_metadata = hookset.extract_cts_version_metadata(version) # version_urn is required within CTSImporter version_urn = version_metadata["urn"] - # TODO: More validation around "path" version_metadata["path"] = None self.versions[version_urn] = version_metadata From 7b6bad56bfd81efb74e1fc8b8f1af2647ca4eb4d Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 16:24:10 -0500 Subject: [PATCH 33/34] update INGESTION_CONCURRENCY name --- atlas/scaife_viewer/atlas/tokenizers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atlas/scaife_viewer/atlas/tokenizers.py b/atlas/scaife_viewer/atlas/tokenizers.py index a84105b..732d0e4 100644 --- a/atlas/scaife_viewer/atlas/tokenizers.py +++ b/atlas/scaife_viewer/atlas/tokenizers.py @@ -26,7 +26,7 @@ def tokenize_text_parts(version_exemplar_urn, force=True): def tokenize_all_text_parts(reset=False): exceptions = False with concurrent.futures.ProcessPoolExecutor( - max_workers=settings.SCAIFE_VIEWER_ATLAS_INGESTION_CONCURRENCY + max_workers=settings.SV_ATLAS_INGESTION_CONCURRENCY ) as executor: version_exemplar_nodes = Node.objects.filter(kind__in=["version", "exemplar"]) urn_futures = { From c5a6dc96a5875e854e7f301ff6d7bb44cd9be78d Mon Sep 17 00:00:00 2001 From: Jacob Wegner Date: Tue, 29 Sep 2020 16:27:44 -0500 Subject: [PATCH 34/34] bump alpha version --- atlas/setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atlas/setup.py b/atlas/setup.py index b9e26c1..0645fa1 100644 --- a/atlas/setup.py +++ b/atlas/setup.py @@ -18,7 +18,7 @@ author_email="jtauber+scaife@jtauber.com", description="Aligned Text and Linguistic Annotation Server (ATLAS)", name="scaife-viewer-atlas", - version="0.1a4", + version="0.1a5", url="http://github.com/scaife-viewer/backend/", license="MIT", packages=find_packages(),