From 348957178250a248d0f570e010fb694f2e9232c9 Mon Sep 17 00:00:00 2001 From: IgorRodchenkov Date: Sat, 22 Jun 2024 23:03:54 -0400 Subject: [PATCH] Modified console command: -i (reindex), etc. to delete all biopax el. idx. documents first... --- .../cpath/service/ConsoleApplication.java | 16 +++++--- src/main/java/cpath/service/IndexImpl.java | 38 ++++++++++--------- src/main/java/cpath/service/Merger.java | 3 +- src/main/java/cpath/service/ServiceImpl.java | 8 ---- .../java/cpath/service/metadata/Index.java | 4 +- .../java/cpath/service/metadata/Mappings.java | 8 ---- .../cpath/service/ConsoleApplicationIT.java | 3 +- src/test/java/cpath/service/IndexIT.java | 8 +++- 8 files changed, 43 insertions(+), 45 deletions(-) diff --git a/src/main/java/cpath/service/ConsoleApplication.java b/src/main/java/cpath/service/ConsoleApplication.java index 5dde6f9f..00532cc7 100644 --- a/src/main/java/cpath/service/ConsoleApplication.java +++ b/src/main/java/cpath/service/ConsoleApplication.java @@ -194,16 +194,22 @@ private void modifyModel(String analysisClass) throws IOException { LOG.info("Over-writing model: {}...", service.settings().mainModelFile()); new SimpleIOHandler(BioPAXLevel.L3).convertToOWL(model, new GZIPOutputStream(new FileOutputStream(service.settings().mainModelFile()))); - //init the lucene index as read-write - service.initIndex(model, service.settings().indexDir(), false); - //re-index the model - service.index().save(model); + //re-index + reindex(model); } - private void reindex() throws IOException { + private void reindex() { Model model = CPathUtils.importFromTheArchive(service.settings().mainModelFile()); + reindex(model); + } + + private void reindex(Model model) { service.initIndex(model, service.settings().indexDir(), false); + //remove biopax but not id-mapping docs + service.index().drop(); + //re-index service.index().save(model); + service.index().close(); } /* diff --git a/src/main/java/cpath/service/IndexImpl.java b/src/main/java/cpath/service/IndexImpl.java index 6f59ac86..a2ef1c79 100644 --- a/src/main/java/cpath/service/IndexImpl.java +++ b/src/main/java/cpath/service/IndexImpl.java @@ -179,10 +179,10 @@ private SearchResponse transform(Query query, IndexSearcher searcher, TopDocs to throw new IllegalArgumentException("topDocs is null"); } SearchResponse response = new SearchResponse(); - response.setMaxHitsPerPage(maxHitsPerPage); - long numTotalHits = topDocs.totalHits.value; //todo: call searcher.count(q) instead or it's same?.. + response.setMaxHitsPerPage(getMaxHitsPerPage()); + long numTotalHits = topDocs.totalHits.value; //todo: call searcher.count(q) instead or it's the same? response.setNumHits(numTotalHits); - List hits = response.getSearchHit();//empty list + List hits = response.getSearchHit();//empty list to be filled from top docs assert hits!=null && hits.isEmpty(); LOG.debug("transform, no. TopDocs to process:" + topDocs.scoreDocs.length); for(ScoreDoc scoreDoc : topDocs.scoreDocs) { @@ -459,6 +459,7 @@ public void save(BioPAXElement bpe) { @Override public void save(Model model) { + setModel(model); final int numObjectsToIndex = model.getObjects(Entity.class).size() + model.getObjects(EntityReference.class).size() + model.getObjects(Provenance.class).size(); @@ -477,7 +478,6 @@ public void save(Model model) { commit(); //force refreshing the index state (for new readers) refresh(); - setModel(model); LOG.info("build(), all done."); } @@ -485,7 +485,6 @@ public void save(Model model) { public void commit() { try { indexWriter.commit(); - indexWriter.flush(); } catch (Exception e) { throw new RuntimeException(e); } @@ -495,7 +494,7 @@ public void commit() { public void close() { try { if (indexWriter != null && indexWriter.isOpen()) { - indexWriter.flush(); + indexWriter.commit(); indexWriter.close(); } } catch (Exception e) { @@ -513,21 +512,25 @@ public synchronized void refresh() { } @Override - public boolean isClosed() { - return indexWriter == null || !indexWriter.isOpen(); - } - - @Override - public long count(String queryString) { - return 0; + public void drop() { + if(indexWriter==null) { + throw new IllegalStateException("read-only index"); + } + try { + Query q = new FieldExistsQuery(FIELD_KEYWORD); + indexWriter.deleteDocuments(q); + indexWriter.commit(); + indexWriter.deleteUnusedFiles(); + setModel(null); + LOG.info("dropped (deleted) BioPAX index"); + } catch (IOException e) { + throw new RuntimeException(e); + } } private void addDatasources(Set set, Document doc) { for (Provenance p : set) { - //store but do not index/tokenize the URI -// doc.add(new StoredField(FIELD_DATASOURCE, p.getUri())); doc.add(new TextField(FIELD_DATASOURCE, p.getUri(), Field.Store.YES)); - //index names (including the datasource identifier from metadata json config; see premerge/merge) //different data sources can have the same name e.g. 'intact'; tokenized - to search by partial name for (String s : p.getName()) { @@ -538,7 +541,6 @@ private void addDatasources(Set set, Document doc) { private void addOrganisms(Set set, Document doc) { for(BioSource bs : set) { - //doc.add(new StoredField(FIELD_ORGANISM, bs.getUri())); doc.add(new TextField(FIELD_ORGANISM, bs.getUri(), Field.Store.YES)); // add organism names @@ -815,6 +817,6 @@ public void save(Mapping mapping) { } catch (IOException e) { throw new RuntimeException(e); } - //call commit(), refresh() after one or several save(mapping) + //call commit(), refresh() after several save(mapping) } } diff --git a/src/main/java/cpath/service/Merger.java b/src/main/java/cpath/service/Merger.java index 99e2e459..1797a528 100644 --- a/src/main/java/cpath/service/Merger.java +++ b/src/main/java/cpath/service/Merger.java @@ -81,8 +81,9 @@ public void merge() { simpleMerger.merge(m, providerModel); } - //remove dangling SPEs (such non-participant/components molecules are not useful for pathway analyses...) + //remove dangling SPEs and Genes (such non-participant/components are not useful for pathway analyses...) ModelUtils.removeObjectsIfDangling(m, SimplePhysicalEntity.class); + ModelUtils.removeObjectsIfDangling(m, Gene.class); //now, remove dangling xrefs, CV et al. utility type individuals ModelUtils.removeObjectsIfDangling(m, UtilityClass.class); diff --git a/src/main/java/cpath/service/ServiceImpl.java b/src/main/java/cpath/service/ServiceImpl.java index 237badb0..8b205ffe 100644 --- a/src/main/java/cpath/service/ServiceImpl.java +++ b/src/main/java/cpath/service/ServiceImpl.java @@ -139,14 +139,6 @@ public void setBlacklist(Blacklist blacklist) { this.blacklist = blacklist; } - IndexImpl getIndex() { - return index; - } - - void setIndex(IndexImpl index) { - this.index = index; - } - public ServiceResponse search(String queryStr, int page, Class biopaxClass, String[] dsources, String[] organisms) { diff --git a/src/main/java/cpath/service/metadata/Index.java b/src/main/java/cpath/service/metadata/Index.java index 8c873139..5877a5fa 100644 --- a/src/main/java/cpath/service/metadata/Index.java +++ b/src/main/java/cpath/service/metadata/Index.java @@ -37,7 +37,7 @@ public interface Index { /** * Full-text search for an object. * - * @param query String (keywords or Lucene query string) + * @param query String (keywords or Lucene query string) * @param page hits page number (when the number of hits exceeds a threshold) * @param type - filter by class * @param datasources - filter by datasource @@ -56,5 +56,5 @@ public interface Index { void refresh(); - boolean isClosed(); + void drop(); } diff --git a/src/main/java/cpath/service/metadata/Mappings.java b/src/main/java/cpath/service/metadata/Mappings.java index b9646d59..43e6fb00 100644 --- a/src/main/java/cpath/service/metadata/Mappings.java +++ b/src/main/java/cpath/service/metadata/Mappings.java @@ -27,12 +27,4 @@ public interface Mappings { void close(); - boolean isClosed(); - - /** - * Total number of search hits for the given lucene query. - * @param queryString - * @return - */ - long count(String queryString); } diff --git a/src/test/java/cpath/service/ConsoleApplicationIT.java b/src/test/java/cpath/service/ConsoleApplicationIT.java index f73f6baf..5b0b4f04 100644 --- a/src/test/java/cpath/service/ConsoleApplicationIT.java +++ b/src/test/java/cpath/service/ConsoleApplicationIT.java @@ -272,8 +272,7 @@ public void premergeAndMerge() throws IOException { merger.replaceConflictingUris(providerModel, mainModel); mainModel.merge(providerModel); -// //in prod, we bremove dangling SPEs, but here/below we need them for merge assertions; so commented out... -// ModelUtils.removeObjectsIfDangling(mainModel, SimplePhysicalEntity.class); + //in prod, we also remove dangling SPEs and Genes but here below we need them for merge assertions... ModelUtils.removeObjectsIfDangling(mainModel, UtilityClass.class); //it's vital to save to and then read the main model back from file, diff --git a/src/test/java/cpath/service/IndexIT.java b/src/test/java/cpath/service/IndexIT.java index b4c60841..9ab90feb 100644 --- a/src/test/java/cpath/service/IndexIT.java +++ b/src/test/java/cpath/service/IndexIT.java @@ -31,7 +31,6 @@ public final void search() throws IOException { .getResource("classpath:merge/pathwaydata1.owl").getInputStream()); IndexImpl index = new IndexImpl(model, "target/test-idx", false); index.save(model); - index.refresh(); //close index writer and re-open the index searcher in the read-only mode //(optional; tests should pass regardless; if you remove the following two lines, keep index.close() at the end) @@ -209,6 +208,13 @@ public final void search() throws IOException { assertFalse(response.getSearchHit().isEmpty()); assertEquals(1, response.getSearchHit().size()); + //re-open to write + index.close(); + index = new IndexImpl(model, "target/test-idx", false); + index.drop(); + response = index.search("*", 1, null, null, null); + assertTrue(response.getSearchHit().isEmpty()); + index.close(); }