Skip to content

Commit

Permalink
Merge pull request #14 from scaife-viewer/core/urn-redirects
Browse files Browse the repository at this point in the history
Improve library / reader URN heuristics
  • Loading branch information
jacobwegner authored Sep 1, 2020
2 parents 65acfdc + 51da386 commit 4cf714d
Show file tree
Hide file tree
Showing 12 changed files with 193 additions and 20 deletions.
11 changes: 10 additions & 1 deletion core/README.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,12 @@
# Scaife Viewer :: Core functionality

This package was extracted from [https://github.com/scaife-viewer/scaife-viewer](https://github.com/scaife-viewer/scaife-viewer)
This package was extracted from
[https://github.com/scaife-viewer/scaife-viewer](https://github.com/scaife-viewer/scaife-viewer)

## Settings

### ALLOW_TRAILING_COLON
Default: `False`

When `False`, to maintain compatability with the MyCapitain resolver,
the trailing colon will be stripped from URNs.
10 changes: 10 additions & 0 deletions core/scaife_viewer/core/conf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
from django.conf import settings # noqa

from appconf import AppConf


class CoreAppConf(AppConf):
ALLOW_TRAILING_COLON = False

class Meta:
prefix = "scaife_viewer_core"
4 changes: 2 additions & 2 deletions core/scaife_viewer/core/cts/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
)
from .exceptions import ( # noqa
CollectionDoesNotExist,
PassageDoesNotExist,
InvalidPassageReference,
InvalidURN,
PassageDoesNotExist,
)
from .heal import heal
from .passage import Passage
from .reference import URN
from .heal import heal


def text_inventory() -> TextInventory:
Expand Down
20 changes: 15 additions & 5 deletions core/scaife_viewer/core/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from django.conf import settings
from django.urls import reverse
from django.utils.functional import SimpleLazyObject

import regex
from elasticsearch import Elasticsearch
Expand All @@ -10,11 +11,20 @@
from . import cts


es = Elasticsearch(
hosts=settings.ELASTICSEARCH_HOSTS,
sniff_on_start=settings.ELASTICSEARCH_SNIFF_ON_START,
sniff_on_connection_fail=settings.ELASTICSEARCH_SNIFF_ON_CONNECTION_FAIL,
)
def default_es_client_config():
return dict(
hosts=settings.ELASTICSEARCH_HOSTS,
sniff_on_start=settings.ELASTICSEARCH_SNIFF_ON_START,
sniff_on_connection_fail=settings.ELASTICSEARCH_SNIFF_ON_CONNECTION_FAIL,
)


def get_es_client():
return Elasticsearch(**default_es_client_config())


es = SimpleLazyObject(get_es_client)


"""
From https://www.elastic.co/guide/en/elasticsearch/reference/6.0/search-request-highlighting.html#boundary-scanners:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
{% comment %} left intentinoally blank {% endcomment %}
49 changes: 49 additions & 0 deletions core/scaife_viewer/core/tests/fixtures/ti.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
<GetCapabilities xmlns="http://chs.harvard.edu/xmlns/cts">
<request>
<requestName>GetInventory</requestName>
<requestFilters>urn=None</requestFilters>
</request>
<reply>
<TextInventory tiid="defaultTic"
xmlns:ns1="http://purl.org/dc/elements/1.1/"
xmlns:skos="http://www.w3.org/2004/02/skos/core#"
xmlns:dts="http://w3id.org/dts-ontology/"
xmlns="http://chs.harvard.edu/xmlns/cts"
xmlns:ns2="http://purl.org/dc/terms/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:rdfs="http://www.w3.org/2000/01/rdf-schema#">
<textgroup urn="urn:cts:greekLit:tlg0096">
<groupname xml:lang="eng">Aesop</groupname>
<work urn="urn:cts:greekLit:tlg0096.tlg002" xml:lang="grc" groupUrn="urn:cts:greekLit:tlg0096">
<title xml:lang="lat">Fabulae</title>
<edition urn="urn:cts:greekLit:tlg0096.tlg002.First1K-grc1" xml:lang="grc" workUrn="urn:cts:greekLit:tlg0096.tlg002">
<label xml:lang="lat">Fabulae Aesopicae Collectae</label>
<label xml:lang="grc">ΑΙΣΩΠΕΙΩΝ ΜΥΘΩΝ ΣΥΝΑΓΩΓΗ</label>
<description xml:lang="lat">Fabulae Aesopicae Collectae, Halm, Teubner, 1872</description>
<online>
<citationMapping>
<citation xpath="/tei:div[@n='?']" scope="/tei:TEI/tei:text/tei:body/tei:div" label="fabula"></citation>
</citationMapping>
</online>
</edition>
</work>
</textgroup>
<textgroup urn="urn:cts:greekLit:tlg4031">
<groupname xml:lang="lat">Eustratius</groupname>
<work urn="urn:cts:greekLit:tlg4031.tlg002" xml:lang="grc" groupUrn="urn:cts:greekLit:tlg4031">
<title xml:lang="lat">In Aristotelis Ethica Nicomachea I Commentaria</title>
<edition urn="urn:cts:greekLit:tlg4031.tlg002.opp-grc1" xml:lang="grc" workUrn="urn:cts:greekLit:tlg4031.tlg002">
<label xml:lang="lat">In Aristotelis Ethica Nicomachea I Commentaria</label>
<description xml:lang="mul">Eustratius, In Aristotelis Ethica Nicomachea I Commentaria, Commentaria in Aristotelem Graeca 20,
Reimer, Heylbut, 1892</description>
<online>
<citationMapping>
<citation xpath="/tei:div[@n='?']" scope="/tei:TEI/tei:text/tei:body/tei:div" label="chapter"></citation>
</citationMapping>
</online>
</edition>
</work>
</textgroup>
</TextInventory>
</reply>
</GetCapabilities>
15 changes: 12 additions & 3 deletions core/scaife_viewer/core/tests/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@
TEMPLATES = [
{
"BACKEND": "django.template.backends.django.DjangoTemplates",
"DIRS": [
# insert your TEMPLATE_DIRS here
],
"DIRS": ["scaife_viewer/core/tests/fixtures/templates"],
"APP_DIRS": True,
"OPTIONS": {
"context_processors": [
Expand All @@ -39,3 +37,14 @@
SITE_ID = 1
ROOT_URLCONF = "scaife_viewer.core.tests.urls"
SECRET_KEY = "notasecret"

CTS_API_ENDPOINT = os.environ.get(
"CTS_API_ENDPOINT", "https://scaife-cts-dev.perseus.org/api/cts"
)
CTS_RESOLVER = {
"type": "api",
"kwargs": {"endpoint": CTS_API_ENDPOINT},
}
CTS_LOCAL_TEXT_INVENTORY = "scaife_viewer/core/tests/fixtures/ti.xml"

DEPLOYMENT_TIMESTAMP_VAR_NAME = "foo"
44 changes: 40 additions & 4 deletions core/scaife_viewer/core/tests/tests.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,42 @@
from django.test import TestCase
from django.test import TestCase, override_settings
from django.urls import reverse

from ..utils import normalize_urn

class Tests(TestCase):
def setUp(self):
pass

class URNTests(TestCase):
def test_urn_normalized(self):
provided_urn = "urn:cts:greekLit:tlg0096.tlg002.First1K-grc1:"
acceptable_urn = "urn:cts:greekLit:tlg0096.tlg002.First1K-grc1"
result = normalize_urn(provided_urn)
assert result == acceptable_urn

@override_settings(SCAIFE_VIEWER_CORE_ALLOW_TRAILING_COLON=True)
def test_urn_trailing_colon_not_normalized(self):
provided_urn = "urn:cts:greekLit:tlg0096.tlg002.First1K-grc1:"
result = normalize_urn(provided_urn)
assert result == provided_urn

def test_urn_unmodified(self):
provided_urn = "urn:cts:greekLit:tlg0096.tlg002.First1K-grc1"
result = normalize_urn(provided_urn)
assert result == provided_urn


class ViewTests(TestCase):
def test_reader_version_urn_redirects_to_first_passage(self):
urn = "urn:cts:greekLit:tlg0096.tlg002.First1K-grc1"
reader_url = reverse("reader", kwargs={"urn": urn})
response = self.client.get(reader_url, follow=True)
assert len(response.redirect_chain) == 2
assert (
response.wsgi_request.path
== "/reader/urn:cts:greekLit:tlg0096.tlg002.First1K-grc1:1-4b/"
)

def test_reader_work_urn_redirects_to_library(self):
urn = "urn:cts:greekLit:tlg0096.tlg002"
reader_url = reverse("reader", kwargs={"urn": urn})
response = self.client.get(reader_url, follow=True)
assert len(response.redirect_chain) == 2
assert response.wsgi_request.path == "/library/urn:cts:greekLit:tlg0096.tlg002/"
19 changes: 18 additions & 1 deletion core/scaife_viewer/core/urls.py
Original file line number Diff line number Diff line change
@@ -1 +1,18 @@
urlpatterns = []
from django.urls import path

from .views import LibraryCollectionView, Reader, library_text_redirect


urlpatterns = [
path(
"library/<str:urn>/",
LibraryCollectionView.as_view(format="html"),
name="library_collection",
),
path(
"library/<str:urn>/redirect/",
library_text_redirect,
name="library_text_redirect",
),
path("reader/<str:urn>/", Reader.as_view(), name="reader"),
]
14 changes: 14 additions & 0 deletions core/scaife_viewer/core/utils.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,13 @@
import logging
import math

from django.urls import reverse

from . import cts
from .conf import settings


logger = logging.getLogger(__name__)


def link_collection(urn) -> dict:
Expand Down Expand Up @@ -117,3 +122,12 @@ def get_pagination_info(total_count, page_num):
"has_next": has_next,
"num_pages": num_pages,
}


def normalize_urn(urn):
if not settings.SCAIFE_VIEWER_CORE_ALLOW_TRAILING_COLON and urn.endswith(":"):
new_urn = urn[:-1]
msg = f'Normalized "{urn}" to "{new_urn}"'
logger.info(msg)
return new_urn
return urn
25 changes: 21 additions & 4 deletions core/scaife_viewer/core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,13 @@
from .http import ConditionMixin
from .precomputed import library_view_json
from .search import SearchQuery
from .utils import apify, encode_link_header, get_pagination_info, link_passage
from .utils import (
apify,
encode_link_header,
get_pagination_info,
link_passage,
normalize_urn,
)


class BaseLibraryView(View):
Expand Down Expand Up @@ -74,6 +80,10 @@ def get_collection(self):
raise Http404()

def as_html(self):
normalized_urn = normalize_urn(self.kwargs["urn"])
if normalized_urn != self.kwargs["urn"]:
return redirect("library_collection", urn=normalized_urn)

collection = self.get_collection()
collection_name = collection.__class__.__name__.lower()
ctx = {collection_name: collection}
Expand Down Expand Up @@ -201,10 +211,15 @@ class Reader(TemplateView):

template_name = "reader/reader.html"

def get(self, request, *args, **kwargs):
self.urn = cts.URN(self.kwargs["urn"])
if not self.urn.reference:
return redirect("library_text_redirect", urn=self.kwargs["urn"])
return super().get(request, *args, **kwargs)

def get_text(self):
urn = cts.URN(self.kwargs["urn"])
try:
text = cts.collection(urn.upTo(cts.URN.NO_PASSAGE))
text = cts.collection(self.urn.upTo(cts.URN.NO_PASSAGE))
except cts.CollectionDoesNotExist:
raise Http404()
return text
Expand All @@ -220,12 +235,14 @@ def library_text_redirect(request, urn):
Given a text URN redirect to the first chunk. Required to prevent
TOCing on the top-level library page.
"""
urn = normalize_urn(urn)

try:
text = cts.collection(urn)
except cts.CollectionDoesNotExist:
raise Http404()
if not isinstance(text, cts.Text):
raise Http404()
return redirect("library_collection", urn=urn)
passage = text.first_passage()
if not passage:
raise Http404()
Expand Down
1 change: 1 addition & 0 deletions core/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
"anytree==2.4.3",
"certifi==2018.11.29",
"dask[bag]==1.1.0",
"django_appconf>=1.0.4",
"Django>=2.2,<3.0",
"elasticsearch==6.3.1",
"google-auth==1.6.2",
Expand Down

0 comments on commit 4cf714d

Please sign in to comment.