Skip to content

Commit

Permalink
#13867: add tooling to generate int7_hnsw.9.10.zip bwc index
Browse files Browse the repository at this point in the history
  • Loading branch information
mikemccand committed Oct 9, 2024
1 parent 9dd068b commit a5204d9
Show file tree
Hide file tree
Showing 5 changed files with 164 additions and 4 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@ public abstract class BackwardsCompatibilityTestBase extends LuceneTestCase {
* This is a base constructor for parameterized BWC tests. The constructor arguments are provided
* by {@link com.carrotsearch.randomizedtesting.RandomizedRunner} during test execution. A {@link
* com.carrotsearch.randomizedtesting.annotations.ParametersFactory} specified in a subclass
* provides a list lists of arguments for the tests and RandomizedRunner will execute the test for
* each of the argument list.
* provides a list of arguments for the tests and RandomizedRunner will execute the test for each
* of the argument list.
*
* @param version the version this test should run for
* @param indexPattern an index pattern in order to open an index of see {@link
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -511,7 +511,7 @@ private static void doTestHits(ScoreDoc[] hits, int expectedCount, IndexReader r
}
}

private static ScoreDoc[] assertKNNSearch(
static ScoreDoc[] assertKNNSearch(
IndexSearcher searcher,
float[] queryVector,
int k,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ public class TestGenerateBwcIndices extends LuceneTestCase {
// To generate backcompat indexes with the current default codec, run the following gradle
// command:
// gradlew test -Ptests.bwcdir=/path/to/store/indexes -Ptests.codec=default
// -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices
// -Ptests.useSecurityManager=false --tests TestGenerateBwcIndices --max-workers=1
//
// Also add testmethod with one of the index creation methods below, for example:
// -Ptestmethod=testCreateCFS
Expand Down Expand Up @@ -82,6 +82,16 @@ public void testCreateSortedIndex() throws IOException {
sortedTest.createBWCIndex();
}

public void testCreateInt7HNSWIndices() throws IOException {
TestInt7HnswBackwardsCompatibility int7HnswBackwardsCompatibility =
new TestInt7HnswBackwardsCompatibility(
Version.LATEST,
createPattern(
TestInt7HnswBackwardsCompatibility.INDEX_NAME,
TestInt7HnswBackwardsCompatibility.SUFFIX));
int7HnswBackwardsCompatibility.createBWCIndex();
}

private boolean isInitialMajorVersionRelease() {
return Version.LATEST.equals(Version.fromBits(Version.LATEST.major, 0, 0));
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.backward_index;

import static org.apache.lucene.backward_index.TestBasicBackwardsCompatibility.assertKNNSearch;

import com.carrotsearch.randomizedtesting.annotations.ParametersFactory;
import java.io.IOException;
import org.apache.lucene.codecs.Codec;
import org.apache.lucene.codecs.KnnVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99Codec;
import org.apache.lucene.codecs.lucene99.Lucene99HnswScalarQuantizedVectorsFormat;
import org.apache.lucene.codecs.lucene99.Lucene99HnswVectorsFormat;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.NoMergePolicy;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.analysis.MockAnalyzer;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.Version;

public class TestInt7HnswBackwardsCompatibility extends BackwardsCompatibilityTestBase {

static final String INDEX_NAME = "int7_hnsw";
static final String SUFFIX = "";
private static final Version FIRST_INT7_HNSW_VERSION = Version.LUCENE_9_10_0;
private static final String KNN_VECTOR_FIELD = "knn_field";
private static final int DOC_COUNT = 30;
private static final FieldType KNN_VECTOR_FIELD_TYPE =
KnnFloatVectorField.createFieldType(3, VectorSimilarityFunction.COSINE);
private static final float[] KNN_VECTOR = {0.2f, -0.1f, 0.1f};

public TestInt7HnswBackwardsCompatibility(Version version, String pattern) {
super(version, pattern);
}

/** Provides all sorted versions to the test-framework */
@ParametersFactory(argumentFormatting = "Lucene-Version:%1$s; Pattern: %2$s")
public static Iterable<Object[]> testVersionsFactory() throws IllegalAccessException {
return allVersion(INDEX_NAME, SUFFIX);
}

protected Codec getCodec() {
return new Lucene99Codec() {
@Override
public KnnVectorsFormat getKnnVectorsFormatForField(String field) {
return new Lucene99HnswScalarQuantizedVectorsFormat(
Lucene99HnswVectorsFormat.DEFAULT_MAX_CONN,
Lucene99HnswVectorsFormat.DEFAULT_BEAM_WIDTH);
}
};
}

@Override
protected boolean supportsVersion(Version version) {
return version.onOrAfter(FIRST_INT7_HNSW_VERSION);
}

@Override
void verifyUsesDefaultCodec(Directory dir, String name) throws IOException {
// We don't use the default codec
}

public void testInt7HnswIndexAndSearch() throws Exception {
IndexWriterConfig indexWriterConfig =
newIndexWriterConfig(new MockAnalyzer(random()))
.setOpenMode(IndexWriterConfig.OpenMode.APPEND)
.setCodec(getCodec())
.setMergePolicy(newLogMergePolicy());
try (IndexWriter writer = new IndexWriter(directory, indexWriterConfig)) {
// add 10 docs
for (int i = 0; i < 10; i++) {
writer.addDocument(knnDocument(i + DOC_COUNT));
if (random().nextBoolean()) {
writer.flush();
}
}
if (random().nextBoolean()) {
writer.forceMerge(1);
}
writer.commit();
try (IndexReader reader = DirectoryReader.open(directory)) {
IndexSearcher searcher = new IndexSearcher(reader);
assertKNNSearch(searcher, KNN_VECTOR, 1000, DOC_COUNT + 10, "0");
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
}
}
// This will confirm the docs are really sorted
TestUtil.checkIndex(directory);
}

@Override
protected void createIndex(Directory dir) throws IOException {
IndexWriterConfig conf =
new IndexWriterConfig(new MockAnalyzer(random()))
.setMaxBufferedDocs(10)
.setCodec(getCodec())
.setMergePolicy(NoMergePolicy.INSTANCE);
try (IndexWriter writer = new IndexWriter(dir, conf)) {
for (int i = 0; i < DOC_COUNT; i++) {
writer.addDocument(knnDocument(i));
}
writer.forceMerge(1);
}
try (DirectoryReader reader = DirectoryReader.open(dir)) {
IndexSearcher searcher = new IndexSearcher(reader);
assertKNNSearch(searcher, KNN_VECTOR, 1000, DOC_COUNT, "0");
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
}
}

private static Document knnDocument(int id) {
Document doc = new Document();
float[] vector = {KNN_VECTOR[0], KNN_VECTOR[1], KNN_VECTOR[2] + 0.1f * id};
doc.add(new KnnFloatVectorField(KNN_VECTOR_FIELD, vector, KNN_VECTOR_FIELD_TYPE));
doc.add(new StringField("id", Integer.toString(id), Field.Store.YES));
return doc;
}

public void testReadOldIndices() throws Exception {
try (DirectoryReader reader = DirectoryReader.open(directory)) {
IndexSearcher searcher = new IndexSearcher(reader);
assertKNNSearch(searcher, KNN_VECTOR, 1000, DOC_COUNT, "0");
assertKNNSearch(searcher, KNN_VECTOR, 10, 10, "0");
}
}
}
Binary file not shown.

0 comments on commit a5204d9

Please sign in to comment.