diff --git a/pom.xml b/pom.xml index 81a613b8..18452a6c 100644 --- a/pom.xml +++ b/pom.xml @@ -27,7 +27,7 @@ 6.0.0-SNAPSHOT 6.0.0-SNAPSHOT UTF-8 - 9.7.0 + 9.10.0 -Xmx3g -Dfile.encoding=UTF-8 -ea --add-opens=java.base/java.lang=ALL-UNNAMED --add-opens=java.base/java.lang.reflect=ALL-UNNAMED --add-opens java.base/sun.nio.ch=ALL-UNNAMED --add-opens java.base/java.io=ALL-UNNAMED ${settings.localRepository}/org/springframework/spring-instrument/${spring-framework.version}/spring-instrument-${spring-framework.version}.jar diff --git a/src/main/java/cpath/service/IndexImpl.java b/src/main/java/cpath/service/IndexImpl.java index 6de7c88e..576082fb 100644 --- a/src/main/java/cpath/service/IndexImpl.java +++ b/src/main/java/cpath/service/IndexImpl.java @@ -524,13 +524,8 @@ public long count(String queryString) { private void addDatasources(Set set, Document doc) { for (Provenance p : set) { //store but do not index/tokenize the URI - doc.add(new StoredField(FIELD_DATASOURCE, p.getUri())); - - //index the last/local (collection prefix) part of the Provenance uri - String u = p.getUri(); - if (u.endsWith("/")) u = u.substring(0, u.length() - 1); - u = u.replaceAll(".*[/#:]", ""); - doc.add(new TextField(FIELD_DATASOURCE, u.toLowerCase(), Field.Store.NO)); +// doc.add(new StoredField(FIELD_DATASOURCE, p.getUri())); + doc.add(new TextField(FIELD_DATASOURCE, p.getUri(), Field.Store.YES)); //index names (including the datasource identifier from metadata json config; see premerge/merge) //different data sources can have the same name e.g. 'intact'; tokenized - to search by partial name @@ -542,8 +537,8 @@ private void addDatasources(Set set, Document doc) { private void addOrganisms(Set set, Document doc) { for(BioSource bs : set) { - // store but do not index URI (see transform method above, where the organism URIs are added to search hits) - doc.add(new StoredField(FIELD_ORGANISM, bs.getUri())); + //doc.add(new StoredField(FIELD_ORGANISM, bs.getUri())); + doc.add(new TextField(FIELD_ORGANISM, bs.getUri(), Field.Store.YES)); // add organism names for(String s : bs.getName()) { @@ -558,8 +553,9 @@ private void addOrganisms(Set set, Document doc) { } // include tissue type terms if (bs.getTissue() != null) { - for (String s : bs.getTissue().getTerm()) + for (String s : bs.getTissue().getTerm()) { doc.add(new TextField(FIELD_ORGANISM, s.toLowerCase(), Field.Store.NO)); + } } // include cell type terms if (bs.getCellType() != null) { diff --git a/src/test/java/cpath/service/IndexIT.java b/src/test/java/cpath/service/IndexIT.java index 006e4b9f..86da34aa 100644 --- a/src/test/java/cpath/service/IndexIT.java +++ b/src/test/java/cpath/service/IndexIT.java @@ -95,19 +95,12 @@ public final void search() throws IOException { assertEquals(2, response.getSearchHit().size()); response = index.search("*", 0, Provenance.class, new String[] {"kegg"}, null); assertEquals(1, response.getSearchHit().size()); - //datasource filter using Provenance absolute URI - not needed anymore - still stored but not indexed anymore - assertTrue(index.search("*", 0, Pathway.class, new String[] {"http://identifiers.org/reactome/"}, null).isEmpty()); - assertTrue(index.search("*", 0, Pathway.class, new String[] {"test:kegg_test"}, null).isEmpty()); - //using the local/last part of the URI (standard bio collection prefix/name) - response = index.search("*", 0, Pathway.class, new String[] {"kegg_test"}, null); - assertFalse(response.isEmpty()); - assertEquals(1, response.getSearchHit().size()); - assertTrue(response.getSearchHit().stream().anyMatch(h -> h.getDataSource().contains("test:kegg_test"))); //find by partial name of a datasource - "pathway" of "KEGG Pathway"... response = index.search("*", 0, Pathway.class, new String[] {"pathway"}, null); assertFalse(response.isEmpty()); assertEquals(1, response.getSearchHit().size()); + assertTrue(response.getSearchHit().stream().anyMatch(h -> h.getDataSource().contains("test:kegg_test"))); response = index.search("pathway:glycolysis", 0, SmallMoleculeReference.class, null, null); assertEquals(5, response.getSearchHit().size()); diff --git a/src/test/resources/test-index-it.owl b/src/test/resources/test-index-it.owl new file mode 100644 index 00000000..864fa540 --- /dev/null +++ b/src/test/resources/test-index-it.owl @@ -0,0 +1,51 @@ + + + + + + + + BIND + BIND + Biomolecular Interaction Network Database + bind + Source http://download.baderlab.org/BINDTranslation/release1_0/PSIMI25_XML/taxid9606_PSIMI25.xml type: PSI_MI, BIND (human), 15-Dec-2010 + + + + 1ZDT_B + + Protein Chain B, NR5A1[221-461]. This residue range is taken from the PDB file and may not match the GI given. + experimental form entity + + + + + + + + + + + + Nuclear Receptor Coactivator 2 + Chain P; The Crystal Structure Of Human Steroidogenic Factor-1 + + + + + + + + 67463979 + genbank indentifier + + + + + \ No newline at end of file