Skip to content

Commit

Permalink
Merge pull request #5264 from inception-project/bugfix/5263-Assistant…
Browse files Browse the repository at this point in the history
…-index-does-not-update-correctly-when-documents-are-added-or-removed

#5263 - Assistant index does not update correctly when documents are added or removed
  • Loading branch information
reckart authored Jan 27, 2025
2 parents 86a25bd + 41308c4 commit 7a34a08
Showing 1 changed file with 11 additions and 3 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,6 @@
import org.apache.lucene.document.LongPoint;
import org.apache.lucene.document.StoredField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.search.FieldExistsQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.uima.cas.CAS;
import org.slf4j.Logger;
Expand Down Expand Up @@ -100,6 +99,10 @@ public void execute() throws Exception
{
var documents = documentService.listSourceDocuments(getProject());
if (documents.isEmpty()) {
try (var index = documentQueryService.borrowIndex(getProject())) {
index.getIndexWriter().deleteAll();
index.getIndexWriter().commit();
}
return;
}

Expand All @@ -114,15 +117,16 @@ public void execute() throws Exception
try (var index = documentQueryService.borrowIndex(getProject())) {
try (var reader = DirectoryReader.open(index.getIndexWriter())) {
var searcher = new IndexSearcher(reader);
var query = new FieldExistsQuery(FIELD_SOURCE_DOC_COMPLETE);
var query = LongPoint.newRangeQuery(FIELD_SOURCE_DOC_COMPLETE, Long.MIN_VALUE,
Long.MAX_VALUE);
var result = searcher.search(query, Integer.MAX_VALUE);

var documentsToIndex = new HashMap<Long, SourceDocument>();
for (var doc : documents) {
documentsToIndex.put(doc.getId(), doc);
}
var documentsToUnindex = new ArrayList<Long>();

var documentsToUnindex = new ArrayList<Long>();
for (var doc : result.scoreDocs) {
var fields = searcher.getIndexReader().storedFields().document(doc.doc);
var sourceDocId = fields.getField(FIELD_SOURCE_DOC_COMPLETE).numericValue()
Expand Down Expand Up @@ -181,6 +185,8 @@ private void unindexDocument(LuceneIndexPool.PooledIndex aIndex, long aSourceDoc
try {
aIndex.getIndexWriter().deleteDocuments(
LongPoint.newExactQuery(FIELD_SOURCE_DOC_ID, aSourceDocumentId));
aIndex.getIndexWriter().deleteDocuments(
LongPoint.newExactQuery(FIELD_SOURCE_DOC_COMPLETE, aSourceDocumentId));
}
catch (IOException e) {
LOG.error("Error unindexing document [{}]", aSourceDocumentId, e);
Expand Down Expand Up @@ -232,6 +238,7 @@ private void markDocumentAsIndexed(LuceneIndexPool.PooledIndex aIndex,
throws IOException
{
var doc = new Document();
doc.add(new LongPoint(FIELD_SOURCE_DOC_COMPLETE, aSourceDocument.getId()));
doc.add(new StoredField(FIELD_SOURCE_DOC_COMPLETE, aSourceDocument.getId()));
aIndex.getIndexWriter().addDocument(doc);
}
Expand All @@ -251,6 +258,7 @@ private void indexChunks(LuceneIndexPool.PooledIndex aIndex, SourceDocument aSou
var normalizedEmbedding = l2normalize(aEmbeddedChunks.getValue(), false);
doc.add(new KnnFloatVectorField(FIELD_EMBEDDING, normalizedEmbedding, DOT_PRODUCT));
doc.add(new IntRange(FIELD_RANGE, new int[] { chunk.begin() }, new int[] { chunk.end() }));
doc.add(new LongPoint(FIELD_SOURCE_DOC_ID, aSourceDocument.getId()));
doc.add(new StoredField(FIELD_SOURCE_DOC_ID, aSourceDocument.getId()));
doc.add(new StoredField(FIELD_SECTION, ""));
doc.add(new StoredField(FIELD_TEXT, chunk.text()));
Expand Down

0 comments on commit 7a34a08

Please sign in to comment.