From 3f6c92d539abe151745825ce3782b30620b0a551 Mon Sep 17 00:00:00 2001 From: Esha Datta Date: Fri, 29 Mar 2019 11:05:10 -0400 Subject: [PATCH 1/4] added optional argument to index-command function to only output hash-map without the esindex id --- src/cayenne/tasks/funder.clj | 56 ++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/src/cayenne/tasks/funder.clj b/src/cayenne/tasks/funder.clj index 5e854650..b63b80e5 100644 --- a/src/cayenne/tasks/funder.clj +++ b/src/cayenne/tasks/funder.clj @@ -139,7 +139,7 @@ (defn index-command "Build an Elastic Search object from a resource in the context of a model. The unique ID is taken from the Resource ID." - [model funder-resource] + [model funder-resource & json-only] (let [primary-name (-> model (get-labels funder-resource "prefLabel") first) alt-names (-> model (get-labels funder-resource "altLabel")) ancestors (resource-ancestors model funder-resource) @@ -147,30 +147,36 @@ ancestor-ids (->> ancestors (map res->id) distinct sort) descendant-ids (->> descendants (map res->id) distinct sort) level (-> ancestor-ids count (+ 1)) - hierarchy (build-hierarchy model funder-resource (id-name model funder-resource))] - [{:index {:_id (res->id funder-resource)}} - {:doi (res->doi funder-resource) - :id (res->id funder-resource) - :primary-name primary-name - :name alt-names - :token (concat - (util/tokenize-name primary-name) - (flatten (map util/tokenize-name alt-names))) - :country (get-country-literal-name model funder-resource) - :parent (-> model (broader funder-resource) first res->doi) - :ancestor ancestor-ids - :level level - :child (distinct (map res->id (narrower model funder-resource))) - :descendant descendant-ids - :affiliated (distinct (map res->id (affiliated model funder-resource))) - :replaced-by (distinct (map res->id (replaced-by model funder-resource))) - :replaces (distinct (map res->id (replaces model funder-resource))) - :hierarchy-names (reduce - (fn [m [k v]] - (assoc m k v)) - (if (> level 1) {:more nil} {}) - (partition 2 (util/get-all-in hierarchy [:id :name]))) - :hierarchy hierarchy}])) + hierarchy (build-hierarchy model funder-resource (id-name model funder-resource)) + es-index [{:index {:_id (res->id funder-resource)}}] + funder-hash-map [ + {:doi (res->doi funder-resource) + :id (res->id funder-resource) + :primary-name primary-name + :name alt-names + :token (concat + (util/tokenize-name primary-name) + (flatten (map util/tokenize-name alt-names))) + :country (get-country-literal-name model funder-resource) + :parent (-> model (broader funder-resource) first res->doi) + :ancestor ancestor-ids + :level level + :child (distinct (map res->id (narrower model funder-resource))) + :descendant descendant-ids + :affiliated (distinct (map res->id (affiliated model funder-resource))) + :replaced-by (distinct (map res->id (replaced-by model funder-resource))) + :replaces (distinct (map res->id (replaces model funder-resource))) + :hierarchy-names (reduce + (fn [m [k v]] + (assoc m k v)) + (if (> level 1) {:more nil} {}) + (partition 2 (util/get-all-in hierarchy [:id :name]))) + :hierarchy hierarchy}]] + (if (not json-only) + (->> (conj es-index funder-hash-map) flatten) + ;; to call without the es index id - (->> "true" (map (partial index-command model) funder-resource-lazy-seq) + funder-hash-map + ))) (defn index-funders "Retrieve funder information RDF and index into Elastic." From da04adb25edfd45ae5a39bcdcd6fdce2d5ab435b Mon Sep 17 00:00:00 2001 From: Esha Datta Date: Fri, 29 Mar 2019 11:05:55 -0400 Subject: [PATCH 2/4] started writing tests for the index-command function --- test/cayenne/funders_test.clj | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/test/cayenne/funders_test.clj b/test/cayenne/funders_test.clj index d2af3238..30f9e956 100644 --- a/test/cayenne/funders_test.clj +++ b/test/cayenne/funders_test.clj @@ -26,6 +26,14 @@ (let [response (api-get (str "/v1/funders/" funder "/works?rows=1000")) expected-response (read-string (slurp (resource (str "funders/" funder "-works.edn"))))] (is (= expected-response response)))))) +(deftest ^:unit check-index-command-output + (testing "index-command output with and without optional argument yields same output except for elastic search index id") + (let [model (-> (java.net.URL. (conf/get-param [:location :cr-funder-registry])) rdf/document->model) + funders (first (->> model find-funders (partition-all 5))) + with-es-id-output (->> funders (map (partial index-command model)) flatten) + without-es-id-output (->> "true" (map (partial index-command model) funders) flatten)] + + ) (use-fixtures :once From 6ca62c06596f290b38d46f9111f8af2d4defeb96 Mon Sep 17 00:00:00 2001 From: Esha Datta Date: Mon, 1 Apr 2019 12:48:23 -0400 Subject: [PATCH 3/4] added test and change to funders to separate elastic search id and object generation --- src/cayenne/tasks/funder.clj | 74 +++++++++++++++++++----------------- 1 file changed, 39 insertions(+), 35 deletions(-) diff --git a/src/cayenne/tasks/funder.clj b/src/cayenne/tasks/funder.clj index b63b80e5..564491d4 100644 --- a/src/cayenne/tasks/funder.clj +++ b/src/cayenne/tasks/funder.clj @@ -135,11 +135,15 @@ (if (not-empty ancestors) (build-hierarchy model (first ancestors) (hierarcy-node model funder-resource descendants child)) (hierarcy-node model funder-resource descendants child)))) - -(defn index-command - "Build an Elastic Search object from a resource in the context of a model. - The unique ID is taken from the Resource ID." - [model funder-resource & json-only] +(defn generate-es-id + "Generate an elastic search id from a resource in the context of a model. The unique id is taken from the resource ID" + [funder-resource] + {:index {:_id (res->id funder-resource)}} + ) + +(defn generate-es-object + "Generates an Elastic Search object from a resource in the context of a model. The unique ID is taken from the Resource ID" + [model funder-resource] (let [primary-name (-> model (get-labels funder-resource "prefLabel") first) alt-names (-> model (get-labels funder-resource "altLabel")) ancestors (resource-ancestors model funder-resource) @@ -147,36 +151,36 @@ ancestor-ids (->> ancestors (map res->id) distinct sort) descendant-ids (->> descendants (map res->id) distinct sort) level (-> ancestor-ids count (+ 1)) - hierarchy (build-hierarchy model funder-resource (id-name model funder-resource)) - es-index [{:index {:_id (res->id funder-resource)}}] - funder-hash-map [ - {:doi (res->doi funder-resource) - :id (res->id funder-resource) - :primary-name primary-name - :name alt-names - :token (concat - (util/tokenize-name primary-name) - (flatten (map util/tokenize-name alt-names))) - :country (get-country-literal-name model funder-resource) - :parent (-> model (broader funder-resource) first res->doi) - :ancestor ancestor-ids - :level level - :child (distinct (map res->id (narrower model funder-resource))) - :descendant descendant-ids - :affiliated (distinct (map res->id (affiliated model funder-resource))) - :replaced-by (distinct (map res->id (replaced-by model funder-resource))) - :replaces (distinct (map res->id (replaces model funder-resource))) - :hierarchy-names (reduce - (fn [m [k v]] - (assoc m k v)) - (if (> level 1) {:more nil} {}) - (partition 2 (util/get-all-in hierarchy [:id :name]))) - :hierarchy hierarchy}]] - (if (not json-only) - (->> (conj es-index funder-hash-map) flatten) - ;; to call without the es index id - (->> "true" (map (partial index-command model) funder-resource-lazy-seq) - funder-hash-map - ))) + hierarchy (build-hierarchy model funder-resource (id-name model funder-resource))] + {:doi (res->doi funder-resource) + :id (res->id funder-resource) + :primary-name primary-name + :name alt-names + :token (concat + (util/tokenize-name primary-name) + (flatten (map util/tokenize-name alt-names))) + :country (get-country-literal-name model funder-resource) + :parent (-> model (broader funder-resource) first res->doi) + :ancestor ancestor-ids + :level level + :child (distinct (map res->id (narrower model funder-resource))) + :descendant descendant-ids + :affiliated (distinct (map res->id (affiliated model funder-resource))) + :replaced-by (distinct (map res->id (replaced-by model funder-resource))) + :replaces (distinct (map res->id (replaces model funder-resource))) + :hierarchy-names (reduce + (fn [m [k v]] + (assoc m k v)) + (if (> level 1) {:more nil} {}) + (partition 2 (util/get-all-in hierarchy [:id :name]))) + :hierarchy hierarchy})) + +(defn index-command + "Build an Elastic Search object from a resource in the context of a model. + The unique ID is taken from the Resource ID." + [model funder-resource] + [(generate-es-id funder-resource) (generate-es-object model funder-resource)] + ) (defn index-funders "Retrieve funder information RDF and index into Elastic." From 924058e8597e130f25f81208afe873951cfe74f3 Mon Sep 17 00:00:00 2001 From: Esha Datta Date: Mon, 1 Apr 2019 12:48:36 -0400 Subject: [PATCH 4/4] added test and change to funders to separate elastic search id and object generation --- test/cayenne/funders_test.clj | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/test/cayenne/funders_test.clj b/test/cayenne/funders_test.clj index 30f9e956..58218c6e 100644 --- a/test/cayenne/funders_test.clj +++ b/test/cayenne/funders_test.clj @@ -1,6 +1,9 @@ (ns cayenne.funders-test (:require [cayenne.api-fixture :refer [api-root api-get api-with]] [clojure.java.io :refer [resource]] + [cayenne.rdf :as rdf] + [cayenne.conf :as conf] + [cayenne.tasks.funder :as funder] [clojure.test :refer [use-fixtures deftest testing is]])) (deftest ^:integration querying-funders @@ -26,14 +29,15 @@ (let [response (api-get (str "/v1/funders/" funder "/works?rows=1000")) expected-response (read-string (slurp (resource (str "funders/" funder "-works.edn"))))] (is (= expected-response response)))))) + (deftest ^:unit check-index-command-output - (testing "index-command output with and without optional argument yields same output except for elastic search index id") + (testing "index-command output which returns es-id and function generate-es-object yields same output except for the inclusion of elastic search index id" (let [model (-> (java.net.URL. (conf/get-param [:location :cr-funder-registry])) rdf/document->model) - funders (first (->> model find-funders (partition-all 5))) - with-es-id-output (->> funders (map (partial index-command model)) flatten) - without-es-id-output (->> "true" (map (partial index-command model) funders) flatten)] - - ) + funders (first (->> model funder/find-funders (partition-all 5))) + with-es-id-output (->> funders (map (partial funder/index-command model)) flatten) + remove-id (filter #(not(contains? % :index)) with-es-id-output) + without-es-id-output (->> funders (map (partial funder/generate-es-object model)) flatten)] + (is (= remove-id without-es-id-output))))) (use-fixtures :once