diff --git a/web/app/controllers/resources/Application.java b/web/app/controllers/resources/Application.java index 0deb916c5..77125807a 100644 --- a/web/app/controllers/resources/Application.java +++ b/web/app/controllers/resources/Application.java @@ -72,6 +72,7 @@ import views.html.query; import views.html.rss; import views.html.stars; +import views.html.tags.result_doc; /** * The main application controller. @@ -456,6 +457,34 @@ public static Promise resourceDotFormat(final String id, return resource(id, format); } + /** + * @param id The resource ID. + * @return The preview page for the resource with the given ID. + */ + public static Promise preview(final String id) { + String cacheId = String.format("show(%s,%s)", id, "preview"); + @SuppressWarnings("unchecked") + Promise cachedResult = (Promise) Cache.get(cacheId); + if (cachedResult != null) + return cachedResult; + Promise promise = Promise.promise(() -> { + JsonNode result = + new Search.Builder().build().getResource(id).getResult(); + if (result == null) { + String movedTo = idSearchResult(id); + if (movedTo != null) { + return movedPermanently(routes.Application.preview(movedTo)); + } + } + return result != null + ? ok(result_doc + .render(play.api.libs.json.Json.parse(result.toString()))) + : notFound(details.render(CONFIG, "", id)); + }); + cacheOnRedeem(cacheId, promise, ONE_DAY); + return promise; + } + /** * @param id The resource ID. * @param format The response format (see {@code Accept.Format}) diff --git a/web/app/controllers/resources/Reconcile.java b/web/app/controllers/resources/Reconcile.java new file mode 100644 index 000000000..7dd307745 --- /dev/null +++ b/web/app/controllers/resources/Reconcile.java @@ -0,0 +1,248 @@ +/* Copyright 2017-2023 Fabian Steeg, hbz. Licensed under the EPL 2.0 */ + +package controllers.resources; + +import java.time.Instant; +import java.time.ZoneId; +import java.time.format.DateTimeFormatter; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; +import java.util.stream.Collectors; + +import org.elasticsearch.index.query.BoolQueryBuilder; +import org.elasticsearch.index.query.Operator; +import org.elasticsearch.index.query.QueryBuilders; +import org.elasticsearch.index.query.QueryStringQueryBuilder; +import org.elasticsearch.search.SearchHit; + +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.core.type.TypeReference; +import com.fasterxml.jackson.databind.JsonNode; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.fasterxml.jackson.databind.node.ObjectNode; + +import play.Logger; +import play.libs.Json; +import play.mvc.Controller; +import play.mvc.Result; +import play.mvc.Results; + +/** + * OpenRefine reconciliation service controller. + * + * Serves reconciliation service meta data and multi query requests. + * + * @author Fabian Steeg (fsteeg) + * + */ +public class Reconcile extends Controller { + + private static final JsonNode TYPES = + Json.toJson(Arrays.asList("BibliographicResource")); + + private static final DateTimeFormatter TIME_FORMATTER = DateTimeFormatter + .ofPattern("dd/MMM/yyyy:HH:mm:ss Z").withZone(ZoneId.systemDefault()); + + /** + * @param callback The name of the JSONP function to wrap the response in + * @param queries The queries. If this and extend are empty, return service + * metadata + * @param extend The extension data. If this and queries are empty, return + * service metadata + * @return OpenRefine reconciliation results (if queries is not empty), data + * extension information (if extend is not empty), or endpoint meta + * data (if queries and extend are empty), wrapped in `callback` + */ + public static Result main(String callback, String queries, String extend) { + ObjectNode result = queries.isEmpty() && extend.isEmpty() ? metadata() + : (!queries.isEmpty() ? queries(queries) : null); + response().setHeader("Access-Control-Allow-Origin", "*"); + final String resultString = prettyJsonString(result); + return (callback.isEmpty() ? ok(resultString) + : ok(String.format("/**/%s(%s);", callback, resultString))) + .as("application/json; charset=utf-8"); + } + + private static ObjectNode metadata() { + final String host = Application.CONFIG.getString("host"); + ObjectNode result = Json.newObject(); + result.putArray("versions").add("0.1").add("0.2"); + result.put("name", + "lobid-resources reconciliation for OpenRefine (" + host + ")"); + result.put("identifierSpace", host + "/resources"); + result.put("schemaSpace", "http://purl.org/dc/terms/BibliographicResource"); + result.set("defaultTypes", TYPES); + result.set("view", Json.newObject().put("url", host + "/resources/{{id}}")); + result.set("preview", Json.newObject()// + .put("height", 300)// + .put("width", 600)// + .put("url", host + "/resources/{{id}}.preview")); + return result; + } + + private static String prettyJsonString(JsonNode jsonNode) { + try { + return new ObjectMapper().writerWithDefaultPrettyPrinter() + .writeValueAsString(jsonNode); + } catch (JsonProcessingException x) { + x.printStackTrace(); + return null; + } + } + + /** @return Reconciliation data for the queries in the request */ + public static Result reconcile() { + Map body = request().body().asFormUrlEncoded(); + response().setHeader("Access-Control-Allow-Origin", "*"); + Result result = body.containsKey("extend") ? Results.TODO + : ok(queries(body.get("queries")[0])); + // Apache-compatible POST logging, see + // https://github.com/hbz/lobid-gnd/issues/207#issuecomment-526571646 + Logger.info("{} {} - [{}] \"{} {}\" {}", + request().headers().getOrDefault("X-Forwarded-For", + new String[] { request().remoteAddress() }), + request().host(), TIME_FORMATTER.format(Instant.now()), + request().method(), request().path(), result.status()); + return result; + } + + private static ObjectNode queries(String src) { + JsonNode request = Json.parse(src); + Iterator> inputQueries = request.fields(); + ObjectNode response = Json.newObject(); + while (inputQueries.hasNext()) { + Entry inputQuery = inputQueries.next(); + Logger.info("q: " + inputQuery); + Search searchResponse = executeQuery(inputQuery, + preprocess(mainQuery(inputQuery)), propQuery(inputQuery)); + List results = + mapToResults(mainQuery(inputQuery), searchResponse); + ObjectNode resultsForInputQuery = Json.newObject(); + resultsForInputQuery.set("result", Json.toJson(results)); + Logger.info("r: " + resultsForInputQuery); + response.set(inputQuery.getKey(), resultsForInputQuery); + } + return response; + } + + private static List mapToResults(String mainQuery, + Search searchHits) { + List result = new ArrayList<>(); + searchHits.getResult().elements().forEachRemaining(hit -> { + Map map = new ObjectMapper().convertValue(hit, + new TypeReference>() {/**/ + }); + ObjectNode resultForHit = Json.newObject(); + String[] elements = hit.get("id").asText().split("/"); + resultForHit.put("id", elements[elements.length - 1].replace("#!", "")); + Object nameObject = map.get("title"); + String name = nameObject == null ? "" : nameObject + ""; + resultForHit.put("name", name); + // TODO: temp, need a proper score solution, with query, see + // https://github.com/hbz/lobid-resources/issues/635 + resultForHit.set("score", hit.get("_score")); + resultForHit.put("match", false); + resultForHit.set("type", hit.get("type")); + result.add(resultForHit); + }); + markMatch(result); + return result; + } + + private static void markMatch(List result) { + if (!result.isEmpty()) { + ObjectNode topResult = result.get(0); + int bestScore = topResult.get("score").asInt(); + if (bestScore > 50 && (result.size() == 1 + || bestScore - result.get(1).get("score").asInt() > 10)) { + topResult.put("match", true); + } + } + } + + private static Search executeQuery(Entry entry, + String queryString, String propString) { + JsonNode limitNode = entry.getValue().get("limit"); + int limit = limitNode == null ? -1 : limitNode.asInt(); + JsonNode typeNode = entry.getValue().get("type"); + String filter = typeNode == null ? "" : "type:" + typeNode.asText(); + QueryStringQueryBuilder mainQuery = + QueryBuilders.queryStringQuery(queryString)// + .field("title", 8f)// + .field("alternativeTitle", 4f)// + .field("otherTitleInformation", 2f)// + .field("responsibilityStatement")// + .field("rpbId", 8f)// + .field("hbzId", 8f)// + .field("almaMmsId", 8f)// + .field("sameAs.id", 2f)// + .field("id", 8f);// + + BoolQueryBuilder query = QueryBuilders.boolQuery().must(mainQuery) + // TODO: temp, don't reconcile against RPB records: + .mustNot(queryStringQuery("_exists_:rpbId")); + if (!filter.isEmpty()) { + query = query.should(queryStringQuery(filter).boost(8f)); + } + if (propString != null && !propString.trim().isEmpty()) { + query = query.should(queryStringQuery(propString).boost(5f)); + } + return new Search.Builder().query(query).from(0).size(limit).build() + .queryResources((SearchHit hit) -> { + Map source = hit.getSource(); + // TODO: temp, need a proper score solution, with query, see + // https://github.com/hbz/lobid-resources/issues/635 + source.put("_score", hit.getScore()); + return Json.toJson(source); + }); + } + + private static QueryStringQueryBuilder queryStringQuery(String q) { + return QueryBuilders.queryStringQuery(q).defaultOperator(Operator.AND); + } + + private static String propQuery(Entry entry) { + List segments = new ArrayList<>(); + JsonNode props = entry.getValue().get("properties"); + if (props != null) { + Logger.debug("Properties: {}", props); + for (JsonNode p : props) { + String field = p.get("pid").asText(); + String value = preprocess(p.get("v").asText().trim()); + if (!value.isEmpty()) { + segments.add("(" + field + ":" + value + ")"); + } + } + } + String queryString = segments.stream().collect(Collectors.joining(" OR ")); + Logger.debug("Property query string: {}", queryString); + return queryString; + } + + static String preprocess(String s) { + return s.startsWith("http") || isGndId(s) ? "\"" + s + "\"" + : /* index.validate(s) ? s : */ clean(s); // TODO add validation + } + + private static boolean isGndId(String string) { + return string.matches( + // https://www.wikidata.org/wiki/Property:P227#P1793 + "1[012]?\\d{7}[0-9X]|[47]\\d{6}-\\d|[1-9]\\d{0,7}-[0-9X]|3\\d{7}[0-9X]"); + } + + private static String clean(String in) { + String out = in.replaceAll("[\"!/:+\\-=<>(){}\\[\\]^]", " "); + if (!in.equals(out)) { + Logger.info("Cleaned query string '{}' to: '{}'", in, out); + } + return out; + } + + private static String mainQuery(Entry entry) { + return entry.getValue().get("query").asText(); + } +} diff --git a/web/app/controllers/resources/Search.java b/web/app/controllers/resources/Search.java index 5a71751ff..1ee1fa4d1 100644 --- a/web/app/controllers/resources/Search.java +++ b/web/app/controllers/resources/Search.java @@ -1,4 +1,4 @@ -/* Copyright 2015-2019 Fabian Steeg, hbz. Licensed under the EPL 2.0 */ +/* Copyright 2015-2023 Fabian Steeg, hbz. Licensed under the EPL 2.0 */ package controllers.resources; @@ -144,6 +144,10 @@ public long totalHits() { * {@link #getTotal()} */ public Search queryResources() { + return queryResources((SearchHit hit) -> Json.toJson(hit.getSource())); + } + + Search queryResources(Function transformer) { Search resultIndex = withClient((Client client) -> { validate(client, query); Logger.trace("queryResources: q={}, from={}, size={}, sort={}, query={}", @@ -163,7 +167,7 @@ public Search queryResources() { List results = new ArrayList<>(); this.aggregations = response.getAggregations(); for (SearchHit sh : hits.getHits()) { - results.add(Json.toJson(sh.getSource())); + results.add(transformer.apply(sh)); } result = Json.toJson(results); total = hits.getTotalHits(); diff --git a/web/conf/resources.routes b/web/conf/resources.routes index a29139b38..e65ea6f90 100644 --- a/web/conf/resources.routes +++ b/web/conf/resources.routes @@ -12,6 +12,10 @@ GET /resources/advanced controllers.resources.Application.advanc GET /resources/search controllers.resources.Application.query(q?="", agent?="", name?="", subject?="", id?="", publisher?="", issued?="", medium ?= "", from:Int?=0, size:Int?=15, owner?="", t?="", sort ?= "", word?="", format ?= null, aggregations ?= "", location ?= "", nested ?= "", filter ?= "") GET /resources/facets controllers.resources.Application.facets(q,agent?="", name?="", subject?="", id?="", publisher?="", issued?="", medium ?= "", from:Int,size:Int,owner,t,field,sort,word?="", location ?= "", nested ?= "", filter ?= "") +#OpenRefine reconciliation endpoint +GET /resources/reconcile controllers.resources.Reconcile.main(callback ?= "", queries ?= "", extend ?= "") +POST /resources/reconcile controllers.resources.Reconcile.reconcile() + GET /resources/stars controllers.resources.Application.showStars(format?="", ids?="") GET /resources/stars/clear controllers.resources.Application.clearStars(ids ?= "") GET /resources/stars/all controllers.resources.Application.starAll(ids) @@ -21,6 +25,7 @@ DELETE /resources/stars/:id controllers.resources.Application.unstar GET /resources/context.jsonld controllers.resources.Application.context() GET /resources/dataset.jsonld controllers.resources.Application.dataset(format="json") GET /resources/dataset controllers.resources.Application.dataset(format?="") +GET /resources/:id.preview controllers.resources.Application.preview(id) GET /resources/:id.:format controllers.resources.Application.resourceDotFormat(id, format) GET /items/:id.$format controllers.resources.Application.itemDotFormat(id, format) GET /resources/:id controllers.resources.Application.resource(id, format ?= null)