Skip to content

Commit

Permalink
Modified console command: -i (reindex), etc. to delete all biopax el.…
Browse files Browse the repository at this point in the history
… idx. documents first...
  • Loading branch information
IgorRodchenkov committed Jun 23, 2024
1 parent 4f58c6e commit 3489571
Show file tree
Hide file tree
Showing 8 changed files with 43 additions and 45 deletions.
16 changes: 11 additions & 5 deletions src/main/java/cpath/service/ConsoleApplication.java
Original file line number Diff line number Diff line change
Expand Up @@ -194,16 +194,22 @@ private void modifyModel(String analysisClass) throws IOException {
LOG.info("Over-writing model: {}...", service.settings().mainModelFile());
new SimpleIOHandler(BioPAXLevel.L3).convertToOWL(model,
new GZIPOutputStream(new FileOutputStream(service.settings().mainModelFile())));
//init the lucene index as read-write
service.initIndex(model, service.settings().indexDir(), false);
//re-index the model
service.index().save(model);
//re-index
reindex(model);
}

private void reindex() throws IOException {
private void reindex() {
Model model = CPathUtils.importFromTheArchive(service.settings().mainModelFile());
reindex(model);
}

private void reindex(Model model) {
service.initIndex(model, service.settings().indexDir(), false);
//remove biopax but not id-mapping docs
service.index().drop();
//re-index
service.index().save(model);
service.index().close();
}

/*
Expand Down
38 changes: 20 additions & 18 deletions src/main/java/cpath/service/IndexImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -179,10 +179,10 @@ private SearchResponse transform(Query query, IndexSearcher searcher, TopDocs to
throw new IllegalArgumentException("topDocs is null");
}
SearchResponse response = new SearchResponse();
response.setMaxHitsPerPage(maxHitsPerPage);
long numTotalHits = topDocs.totalHits.value; //todo: call searcher.count(q) instead or it's same?..
response.setMaxHitsPerPage(getMaxHitsPerPage());
long numTotalHits = topDocs.totalHits.value; //todo: call searcher.count(q) instead or it's the same?
response.setNumHits(numTotalHits);
List<SearchHit> hits = response.getSearchHit();//empty list
List<SearchHit> hits = response.getSearchHit();//empty list to be filled from top docs
assert hits!=null && hits.isEmpty();
LOG.debug("transform, no. TopDocs to process:" + topDocs.scoreDocs.length);
for(ScoreDoc scoreDoc : topDocs.scoreDocs) {
Expand Down Expand Up @@ -459,6 +459,7 @@ public void save(BioPAXElement bpe) {

@Override
public void save(Model model) {
setModel(model);
final int numObjectsToIndex = model.getObjects(Entity.class).size()
+ model.getObjects(EntityReference.class).size()
+ model.getObjects(Provenance.class).size();
Expand All @@ -477,15 +478,13 @@ public void save(Model model) {
commit();
//force refreshing the index state (for new readers)
refresh();
setModel(model);
LOG.info("build(), all done.");
}

@Override
public void commit() {
try {
indexWriter.commit();
indexWriter.flush();
} catch (Exception e) {
throw new RuntimeException(e);
}
Expand All @@ -495,7 +494,7 @@ public void commit() {
public void close() {
try {
if (indexWriter != null && indexWriter.isOpen()) {
indexWriter.flush();
indexWriter.commit();
indexWriter.close();
}
} catch (Exception e) {
Expand All @@ -513,21 +512,25 @@ public synchronized void refresh() {
}

@Override
public boolean isClosed() {
return indexWriter == null || !indexWriter.isOpen();
}

@Override
public long count(String queryString) {
return 0;
public void drop() {
if(indexWriter==null) {
throw new IllegalStateException("read-only index");
}
try {
Query q = new FieldExistsQuery(FIELD_KEYWORD);
indexWriter.deleteDocuments(q);
indexWriter.commit();
indexWriter.deleteUnusedFiles();
setModel(null);
LOG.info("dropped (deleted) BioPAX index");
} catch (IOException e) {
throw new RuntimeException(e);
}
}

private void addDatasources(Set<Provenance> set, Document doc) {
for (Provenance p : set) {
//store but do not index/tokenize the URI
// doc.add(new StoredField(FIELD_DATASOURCE, p.getUri()));
doc.add(new TextField(FIELD_DATASOURCE, p.getUri(), Field.Store.YES));

//index names (including the datasource identifier from metadata json config; see premerge/merge)
//different data sources can have the same name e.g. 'intact'; tokenized - to search by partial name
for (String s : p.getName()) {
Expand All @@ -538,7 +541,6 @@ private void addDatasources(Set<Provenance> set, Document doc) {

private void addOrganisms(Set<BioSource> set, Document doc) {
for(BioSource bs : set) {
//doc.add(new StoredField(FIELD_ORGANISM, bs.getUri()));
doc.add(new TextField(FIELD_ORGANISM, bs.getUri(), Field.Store.YES));

// add organism names
Expand Down Expand Up @@ -815,6 +817,6 @@ public void save(Mapping mapping) {
} catch (IOException e) {
throw new RuntimeException(e);
}
//call commit(), refresh() after one or several save(mapping)
//call commit(), refresh() after several save(mapping)
}
}
3 changes: 2 additions & 1 deletion src/main/java/cpath/service/Merger.java
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ public void merge() {
simpleMerger.merge(m, providerModel);
}

//remove dangling SPEs (such non-participant/components molecules are not useful for pathway analyses...)
//remove dangling SPEs and Genes (such non-participant/components are not useful for pathway analyses...)
ModelUtils.removeObjectsIfDangling(m, SimplePhysicalEntity.class);
ModelUtils.removeObjectsIfDangling(m, Gene.class);
//now, remove dangling xrefs, CV et al. utility type individuals
ModelUtils.removeObjectsIfDangling(m, UtilityClass.class);

Expand Down
8 changes: 0 additions & 8 deletions src/main/java/cpath/service/ServiceImpl.java
Original file line number Diff line number Diff line change
Expand Up @@ -139,14 +139,6 @@ public void setBlacklist(Blacklist blacklist) {
this.blacklist = blacklist;
}

IndexImpl getIndex() {
return index;
}

void setIndex(IndexImpl index) {
this.index = index;
}

public ServiceResponse search(String queryStr,
int page, Class<? extends BioPAXElement> biopaxClass,
String[] dsources, String[] organisms) {
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/cpath/service/metadata/Index.java
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ public interface Index {
/**
* Full-text search for an object.
*
* @param query String (keywords or Lucene query string)
* @param query String (keywords or Lucene query string)
* @param page hits page number (when the number of hits exceeds a threshold)
* @param type - filter by class
* @param datasources - filter by datasource
Expand All @@ -56,5 +56,5 @@ public interface Index {

void refresh();

boolean isClosed();
void drop();
}
8 changes: 0 additions & 8 deletions src/main/java/cpath/service/metadata/Mappings.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,4 @@ public interface Mappings {

void close();

boolean isClosed();

/**
* Total number of search hits for the given lucene query.
* @param queryString
* @return
*/
long count(String queryString);
}
3 changes: 1 addition & 2 deletions src/test/java/cpath/service/ConsoleApplicationIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -272,8 +272,7 @@ public void premergeAndMerge() throws IOException {
merger.replaceConflictingUris(providerModel, mainModel);
mainModel.merge(providerModel);

// //in prod, we bremove dangling SPEs, but here/below we need them for merge assertions; so commented out...
// ModelUtils.removeObjectsIfDangling(mainModel, SimplePhysicalEntity.class);
//in prod, we also remove dangling SPEs and Genes but here below we need them for merge assertions...
ModelUtils.removeObjectsIfDangling(mainModel, UtilityClass.class);

//it's vital to save to and then read the main model back from file,
Expand Down
8 changes: 7 additions & 1 deletion src/test/java/cpath/service/IndexIT.java
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@ public final void search() throws IOException {
.getResource("classpath:merge/pathwaydata1.owl").getInputStream());
IndexImpl index = new IndexImpl(model, "target/test-idx", false);
index.save(model);
index.refresh();

//close index writer and re-open the index searcher in the read-only mode
//(optional; tests should pass regardless; if you remove the following two lines, keep index.close() at the end)
Expand Down Expand Up @@ -209,6 +208,13 @@ public final void search() throws IOException {
assertFalse(response.getSearchHit().isEmpty());
assertEquals(1, response.getSearchHit().size());

//re-open to write
index.close();
index = new IndexImpl(model, "target/test-idx", false);
index.drop();
response = index.search("*", 1, null, null, null);
assertTrue(response.getSearchHit().isEmpty());

index.close();
}

Expand Down

0 comments on commit 3489571

Please sign in to comment.