diff --git a/CHANGELOG.md b/CHANGELOG.md index 4e53ecc62..bf61d49e7 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -17,12 +17,12 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), * Add parent join support for lucene knn [#1182](https://github.com/opensearch-project/k-NN/pull/1182) ### Enhancements * Increase Lucene max dimension limit to 16,000 [#1346](https://github.com/opensearch-project/k-NN/pull/1346) +* Tuned default values for ef_search and ef_construction for better indexing and search performance for vector search [#1353](https://github.com/opensearch-project/k-NN/pull/1353) ### Bug Fixes * Fix use-after-free case on nmslib search path [#1305](https://github.com/opensearch-project/k-NN/pull/1305) * Allow nested knn field mapping when train model [#1318](https://github.com/opensearch-project/k-NN/pull/1318) * Properly designate model state for actively training models when nodes crash or leave cluster [#1317](https://github.com/opensearch-project/k-NN/pull/1317) ->>>>>>> main ### Infrastructure * Upgrade gradle to 8.4 [1289](https://github.com/opensearch-project/k-NN/pull/1289) ### Documentation diff --git a/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java b/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java index c8d0cac93..2df79a3a2 100644 --- a/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java +++ b/qa/restart-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java @@ -5,9 +5,12 @@ package org.opensearch.knn.bwc; +import org.junit.Assert; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.knn.index.SpaceType; +import java.util.Map; + import static org.opensearch.knn.TestUtils.KNN_ALGO_PARAM_EF_CONSTRUCTION_MIN_VALUE; import static org.opensearch.knn.TestUtils.KNN_ALGO_PARAM_M_MIN_VALUE; import static org.opensearch.knn.TestUtils.KNN_VECTOR; @@ -16,8 +19,13 @@ import static org.opensearch.knn.TestUtils.VECTOR_TYPE; import static org.opensearch.knn.common.KNNConstants.DIMENSION; import static org.opensearch.knn.common.KNNConstants.FAISS_NAME; +import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE; import static org.opensearch.knn.common.KNNConstants.KNN_METHOD; import static org.opensearch.knn.common.KNNConstants.METHOD_HNSW; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_SEARCH; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_M; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE; import static org.opensearch.knn.common.KNNConstants.NAME; import static org.opensearch.knn.common.KNNConstants.PARAMETERS; @@ -28,6 +36,7 @@ public class IndexingIT extends AbstractRestartUpgradeTestCase { private static final int K = 5; private static final int M = 50; private static final int EF_CONSTRUCTION = 1024; + private static final int EF_SEARCH = 200; private static final int NUM_DOCS = 10; private static int QUERY_COUNT = 0; @@ -78,20 +87,43 @@ public void testKNNIndexDefaultMethodFieldMapping() throws Exception { } // Custom Method Field Mapping - // space_type : "inner_product", engine : "faiss", m : 50, ef_construction : 1024 + // space_type : "inner_product", engine : "faiss", m : 50, ef_construction : 1024, ef_search : 200 public void testKNNIndexCustomMethodFieldMapping() throws Exception { if (isRunningAgainstOldCluster()) { createKnnIndex( testIndex, getKNNDefaultIndexSettings(), - createKNNIndexCustomMethodFieldMapping(TEST_FIELD, DIMENSIONS, SpaceType.INNER_PRODUCT, FAISS_NAME, M, EF_CONSTRUCTION) + createKNNIndexCustomMethodFieldMapping( + TEST_FIELD, + DIMENSIONS, + SpaceType.INNER_PRODUCT, + FAISS_NAME, + M, + EF_CONSTRUCTION, + EF_SEARCH + ) ); addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, DOC_ID, NUM_DOCS); } else { + validateCustomMethodFieldMappingAfterUpgrade(); validateKNNIndexingOnUpgrade(); } } + private void validateCustomMethodFieldMappingAfterUpgrade() throws Exception { + final Map indexMappings = getIndexMappingAsMap(testIndex); + final Map properties = (Map) indexMappings.get(PROPERTIES); + final Map knnMethod = ((Map) ((Map) properties.get(TEST_FIELD)).get(KNN_METHOD)); + final Map methodParameters = (Map) knnMethod.get(PARAMETERS); + + Assert.assertEquals(METHOD_HNSW, knnMethod.get(NAME)); + Assert.assertEquals(SpaceType.INNER_PRODUCT.getValue(), knnMethod.get(METHOD_PARAMETER_SPACE_TYPE)); + Assert.assertEquals(FAISS_NAME, knnMethod.get(KNN_ENGINE)); + Assert.assertEquals(EF_CONSTRUCTION, ((Integer) methodParameters.get(METHOD_PARAMETER_EF_CONSTRUCTION)).intValue()); + Assert.assertEquals(EF_SEARCH, ((Integer) methodParameters.get(METHOD_PARAMETER_EF_SEARCH)).intValue()); + Assert.assertEquals(M, ((Integer) methodParameters.get(METHOD_PARAMETER_M)).intValue()); + } + // test null parameters public void testNullParametersOnUpgrade() throws Exception { if (isRunningAgainstOldCluster()) { diff --git a/qa/rolling-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java b/qa/rolling-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java index b22abc69d..10df1a79b 100644 --- a/qa/rolling-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java +++ b/qa/rolling-upgrade/src/test/java/org/opensearch/knn/bwc/IndexingIT.java @@ -13,6 +13,10 @@ public class IndexingIT extends AbstractRollingUpgradeTestCase { private static final int K = 5; private static final int NUM_DOCS = 10; + private static final String ALGO = "hnsw"; + + private static final String ENGINE = "faiss"; + public void testKNNDefaultIndexSettings() throws Exception { waitForClusterHealthGreen(NODES_BWC_CLUSTER); switch (getClusterType()) { @@ -46,6 +50,82 @@ public void testKNNDefaultIndexSettings() throws Exception { } } + public void testKNNIndexCreation_withLegacyMapper() throws Exception { + waitForClusterHealthGreen(NODES_BWC_CLUSTER); + final String firstMixRoundIndex = testIndex + "first-mix-round"; + final String otherMixRoundIndex = testIndex + "other-mix-round"; + final String upgradedIndex = testIndex + "upgraded"; + switch (getClusterType()) { + case OLD: + createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS)); + int docIdOld = 0; + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS); + break; + case MIXED: + if (isFirstMixedRound()) { + docIdOld = 0; + createKnnIndex(firstMixRoundIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS)); + addKNNDocs(firstMixRoundIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS); + } else { + docIdOld = 0; + createKnnIndex(otherMixRoundIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS)); + addKNNDocs(otherMixRoundIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS); + } + break; + case UPGRADED: + docIdOld = 0; + createKnnIndex(upgradedIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS)); + addKNNDocs(upgradedIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS); + + deleteKNNIndex(testIndex); + deleteKNNIndex(firstMixRoundIndex); + deleteKNNIndex(otherMixRoundIndex); + deleteKNNIndex(upgradedIndex); + } + } + + public void testKNNIndexCreation_withMethodMapper() throws Exception { + waitForClusterHealthGreen(NODES_BWC_CLUSTER); + final String firstMixRoundIndex = testIndex + "first-mix-round"; + final String otherMixRoundIndex = testIndex + "other-mix-round"; + final String upgradedIndex = testIndex + "upgraded"; + switch (getClusterType()) { + case OLD: + createKnnIndex(testIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS, ALGO, ENGINE)); + int docIdOld = 0; + addKNNDocs(testIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS); + break; + case MIXED: + if (isFirstMixedRound()) { + docIdOld = 0; + createKnnIndex( + firstMixRoundIndex, + getKNNDefaultIndexSettings(), + createKnnIndexMapping(TEST_FIELD, DIMENSIONS, ALGO, ENGINE) + ); + addKNNDocs(firstMixRoundIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS); + } else { + docIdOld = 0; + createKnnIndex( + otherMixRoundIndex, + getKNNDefaultIndexSettings(), + createKnnIndexMapping(TEST_FIELD, DIMENSIONS, ALGO, ENGINE) + ); + addKNNDocs(otherMixRoundIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS); + } + break; + case UPGRADED: + docIdOld = 0; + createKnnIndex(upgradedIndex, getKNNDefaultIndexSettings(), createKnnIndexMapping(TEST_FIELD, DIMENSIONS, ALGO, ENGINE)); + addKNNDocs(upgradedIndex, TEST_FIELD, DIMENSIONS, docIdOld, NUM_DOCS); + + deleteKNNIndex(testIndex); + deleteKNNIndex(firstMixRoundIndex); + deleteKNNIndex(otherMixRoundIndex); + deleteKNNIndex(upgradedIndex); + } + } + // validation steps for indexing after upgrading each node from old version to new version public void validateKNNIndexingOnUpgrade(int totalDocsCount, int docId) throws Exception { validateKNNSearch(testIndex, TEST_FIELD, DIMENSIONS, totalDocsCount, K); diff --git a/src/main/java/org/opensearch/knn/index/KNNMethod.java b/src/main/java/org/opensearch/knn/index/KNNMethod.java index d46e8fccd..2d3672d87 100644 --- a/src/main/java/org/opensearch/knn/index/KNNMethod.java +++ b/src/main/java/org/opensearch/knn/index/KNNMethod.java @@ -63,7 +63,7 @@ public ValidationException validate(KNNMethodContext knnMethodContext) { ); } - ValidationException methodValidation = methodComponent.validate(knnMethodContext.getMethodComponent()); + ValidationException methodValidation = methodComponent.validate(knnMethodContext.getMethodComponentContext()); if (methodValidation != null) { errorMessages.addAll(methodValidation.validationErrors()); } @@ -84,7 +84,7 @@ public ValidationException validate(KNNMethodContext knnMethodContext) { * @return true if training is required; false otherwise */ public boolean isTrainingRequired(KNNMethodContext knnMethodContext) { - return methodComponent.isTrainingRequired(knnMethodContext.getMethodComponent()); + return methodComponent.isTrainingRequired(knnMethodContext.getMethodComponentContext()); } /** @@ -95,7 +95,7 @@ public boolean isTrainingRequired(KNNMethodContext knnMethodContext) { * @return estimate overhead in KB */ public int estimateOverheadInKB(KNNMethodContext knnMethodContext, int dimension) { - return methodComponent.estimateOverheadInKB(knnMethodContext.getMethodComponent(), dimension); + return methodComponent.estimateOverheadInKB(knnMethodContext.getMethodComponentContext(), dimension); } /** @@ -105,7 +105,7 @@ public int estimateOverheadInKB(KNNMethodContext knnMethodContext, int dimension * @return KNNMethod as a map */ public Map getAsMap(KNNMethodContext knnMethodContext) { - Map parameterMap = new HashMap<>(methodComponent.getAsMap(knnMethodContext.getMethodComponent())); + Map parameterMap = new HashMap<>(methodComponent.getAsMap(knnMethodContext.getMethodComponentContext())); parameterMap.put(KNNConstants.SPACE_TYPE, knnMethodContext.getSpaceType().getValue()); return parameterMap; } diff --git a/src/main/java/org/opensearch/knn/index/KNNMethodContext.java b/src/main/java/org/opensearch/knn/index/KNNMethodContext.java index 5f5a0f232..d4df713c2 100644 --- a/src/main/java/org/opensearch/knn/index/KNNMethodContext.java +++ b/src/main/java/org/opensearch/knn/index/KNNMethodContext.java @@ -63,7 +63,7 @@ public static synchronized KNNMethodContext getDefault() { @NonNull private final SpaceType spaceType; @NonNull - private final MethodComponentContext methodComponent; + private final MethodComponentContext methodComponentContext; /** * Constructor from stream. @@ -74,7 +74,7 @@ public static synchronized KNNMethodContext getDefault() { public KNNMethodContext(StreamInput in) throws IOException { this.knnEngine = KNNEngine.getEngine(in.readString()); this.spaceType = SpaceType.getSpace(in.readString()); - this.methodComponent = new MethodComponentContext(in); + this.methodComponentContext = new MethodComponentContext(in); } /** @@ -198,7 +198,7 @@ public static KNNMethodContext parse(Object in) { public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.field(KNN_ENGINE, knnEngine.getName()); builder.field(METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()); - builder = methodComponent.toXContent(builder, params); + builder = methodComponentContext.toXContent(builder, params); return builder; } @@ -211,20 +211,20 @@ public boolean equals(Object obj) { EqualsBuilder equalsBuilder = new EqualsBuilder(); equalsBuilder.append(knnEngine, other.knnEngine); equalsBuilder.append(spaceType, other.spaceType); - equalsBuilder.append(methodComponent, other.methodComponent); + equalsBuilder.append(methodComponentContext, other.methodComponentContext); return equalsBuilder.isEquals(); } @Override public int hashCode() { - return new HashCodeBuilder().append(knnEngine).append(spaceType).append(methodComponent).toHashCode(); + return new HashCodeBuilder().append(knnEngine).append(spaceType).append(methodComponentContext).toHashCode(); } @Override public void writeTo(StreamOutput out) throws IOException { out.writeString(knnEngine.getName()); out.writeString(spaceType.getValue()); - this.methodComponent.writeTo(out); + this.methodComponentContext.writeTo(out); } } diff --git a/src/main/java/org/opensearch/knn/index/KNNSettings.java b/src/main/java/org/opensearch/knn/index/KNNSettings.java index 14a69f317..53ee17150 100644 --- a/src/main/java/org/opensearch/knn/index/KNNSettings.java +++ b/src/main/java/org/opensearch/knn/index/KNNSettings.java @@ -8,6 +8,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.opensearch.OpenSearchParseException; +import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.core.action.ActionListener; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsRequest; import org.opensearch.action.admin.cluster.settings.ClusterUpdateSettingsResponse; @@ -21,6 +22,7 @@ import org.opensearch.index.IndexModule; import org.opensearch.knn.index.memory.NativeMemoryCacheManager; import org.opensearch.knn.index.memory.NativeMemoryCacheManagerDto; +import org.opensearch.knn.index.util.IndexHyperParametersUtil; import org.opensearch.monitor.jvm.JvmInfo; import org.opensearch.monitor.os.OsProbe; @@ -80,8 +82,8 @@ public class KNNSettings { */ public static final String INDEX_KNN_DEFAULT_SPACE_TYPE = "l2"; public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_M = 16; - public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH = 512; - public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION = 512; + public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH = 100; + public static final Integer INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION = 100; public static final Integer KNN_DEFAULT_ALGO_PARAM_INDEX_THREAD_QTY = 1; public static final Integer KNN_DEFAULT_CIRCUIT_BREAKER_UNSET_PERCENTAGE = 75; public static final Integer KNN_DEFAULT_MODEL_CACHE_SIZE_LIMIT_PERCENTAGE = 10; // By default, set aside 10% of the JVM for the limit @@ -447,11 +449,12 @@ public void onFailure(Exception e) { * @return efSearch value */ public static int getEfSearchParam(String index) { - return KNNSettings.state().clusterService.state() - .getMetadata() - .index(index) - .getSettings() - .getAsInt(KNNSettings.KNN_ALGO_PARAM_EF_SEARCH, 512); + final IndexMetadata indexMetadata = KNNSettings.state().clusterService.state().getMetadata().index(index); + return indexMetadata.getSettings() + .getAsInt( + KNNSettings.KNN_ALGO_PARAM_EF_SEARCH, + IndexHyperParametersUtil.getHNSWEFSearchValue(indexMetadata.getCreationVersion()) + ); } public void setClusterService(ClusterService clusterService) { diff --git a/src/main/java/org/opensearch/knn/index/MethodComponent.java b/src/main/java/org/opensearch/knn/index/MethodComponent.java index c904f15c3..f2e2d878e 100644 --- a/src/main/java/org/opensearch/knn/index/MethodComponent.java +++ b/src/main/java/org/opensearch/knn/index/MethodComponent.java @@ -12,9 +12,11 @@ package org.opensearch.knn.index; import lombok.Getter; +import org.opensearch.Version; import org.opensearch.common.TriFunction; import org.opensearch.common.ValidationException; import org.opensearch.knn.common.KNNConstants; +import org.opensearch.knn.index.util.IndexHyperParametersUtil; import java.util.ArrayList; import java.util.HashMap; @@ -296,11 +298,24 @@ public static Map getParameterMapWithDefaultsAdded( ) { Map parametersWithDefaultsMap = new HashMap<>(); Map userProvidedParametersMap = methodComponentContext.getParameters(); + Version indexCreationVersion = methodComponentContext.getIndexVersion(); for (Parameter parameter : methodComponent.getParameters().values()) { if (methodComponentContext.getParameters().containsKey(parameter.getName())) { parametersWithDefaultsMap.put(parameter.getName(), userProvidedParametersMap.get(parameter.getName())); } else { - parametersWithDefaultsMap.put(parameter.getName(), parameter.getDefaultValue()); + // Picking the right values for the parameters whose values are different based on different index + // created version. + if (parameter.getName().equals(KNNConstants.METHOD_PARAMETER_EF_SEARCH)) { + parametersWithDefaultsMap.put(parameter.getName(), IndexHyperParametersUtil.getHNSWEFSearchValue(indexCreationVersion)); + } else if (parameter.getName().equals(KNNConstants.METHOD_PARAMETER_EF_CONSTRUCTION)) { + parametersWithDefaultsMap.put( + parameter.getName(), + IndexHyperParametersUtil.getHNSWEFConstructionValue(indexCreationVersion) + ); + } else { + parametersWithDefaultsMap.put(parameter.getName(), parameter.getDefaultValue()); + } + } } diff --git a/src/main/java/org/opensearch/knn/index/MethodComponentContext.java b/src/main/java/org/opensearch/knn/index/MethodComponentContext.java index cf10e9a64..66952f448 100644 --- a/src/main/java/org/opensearch/knn/index/MethodComponentContext.java +++ b/src/main/java/org/opensearch/knn/index/MethodComponentContext.java @@ -11,8 +11,10 @@ package org.opensearch.knn.index; -import lombok.AllArgsConstructor; import lombok.Getter; +import lombok.RequiredArgsConstructor; +import lombok.Setter; +import org.opensearch.Version; import org.opensearch.core.common.io.stream.StreamInput; import org.opensearch.core.common.io.stream.StreamOutput; import org.opensearch.core.common.io.stream.Writeable; @@ -36,13 +38,17 @@ * * Each component is composed of a name and a map of parameters. */ -@AllArgsConstructor +@RequiredArgsConstructor public class MethodComponentContext implements ToXContentFragment, Writeable { @Getter private final String name; private final Map parameters; + @Getter + @Setter + private Version indexVersion; + /** * Constructor from stream. * diff --git a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java index c8eb22a97..b9e280e2e 100644 --- a/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java +++ b/src/main/java/org/opensearch/knn/index/codec/BasePerFieldKnnVectorsFormat.java @@ -47,7 +47,7 @@ public KnnVectorsFormat getKnnVectorsFormatForField(final String field) { String.format("Cannot read field type for field [%s] because mapper service is not available", field) ) ).fieldType(field); - var params = type.getKnnMethodContext().getMethodComponent().getParameters(); + var params = type.getKnnMethodContext().getMethodComponentContext().getParameters(); int maxConnections = getMaxConnections(params); int beamWidth = getBeamWidth(params); log.debug( diff --git a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java index 346d4c238..5b427517b 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapper.java @@ -7,6 +7,7 @@ import lombok.Getter; import lombok.extern.log4j.Log4j2; +import org.opensearch.Version; import org.opensearch.common.ValidationException; import org.opensearch.knn.common.KNNConstants; @@ -79,6 +80,10 @@ private static KNNVectorFieldMapper toType(FieldMapper in) { return (KNNVectorFieldMapper) in; } + // We store the version of the index with the mapper as different version of Opensearch has different default + // values of KNN engine Algorithms hyperparameters. + protected Version indexCreatedVersion; + /** * Builder for KNNVectorFieldMapper. This class defines the set of parameters that can be applied to the knn_vector * field type @@ -139,7 +144,7 @@ public static class Builder extends ParametrizedFieldMapper.Builder { b.startObject(n); v.toXContent(b, ToXContent.EMPTY_PARAMS); b.endObject(); - }), m -> m.getMethodComponent().getName()).setValidator(v -> { + }), m -> m.getMethodComponentContext().getName()).setValidator(v -> { if (v == null) return; ValidationException validationException = null; @@ -167,9 +172,12 @@ public static class Builder extends ParametrizedFieldMapper.Builder { protected ModelDao modelDao; - public Builder(String name, ModelDao modelDao) { + protected Version indexCreatedVersion; + + public Builder(String name, ModelDao modelDao, Version indexCreatedVersion) { super(name); this.modelDao = modelDao; + this.indexCreatedVersion = indexCreatedVersion; } /** @@ -181,11 +189,12 @@ public Builder(String name, ModelDao modelDao) { * @param m m value of field * @param efConstruction efConstruction value of field */ - public Builder(String name, String spaceType, String m, String efConstruction) { + public Builder(String name, String spaceType, String m, String efConstruction, Version indexCreatedVersion) { super(name); this.spaceType = spaceType; this.m = m; this.efConstruction = efConstruction; + this.indexCreatedVersion = indexCreatedVersion; } @Override @@ -221,6 +230,7 @@ public KNNVectorFieldMapper build(BuilderContext context) { final Map metaValue = meta.getValue(); if (knnMethodContext != null) { + knnMethodContext.getMethodComponentContext().setIndexVersion(indexCreatedVersion); final KNNVectorFieldType mappedFieldType = new KNNVectorFieldType( buildFullName(context), metaValue, @@ -278,7 +288,8 @@ public KNNVectorFieldMapper build(BuilderContext context) { stored.get(), hasDocValues.get(), modelDao, - modelIdAsString + modelIdAsString, + indexCreatedVersion ); } @@ -292,7 +303,7 @@ public KNNVectorFieldMapper build(BuilderContext context) { } if (this.efConstruction == null) { - this.efConstruction = LegacyFieldMapper.getEfConstruction(context.indexSettings()); + this.efConstruction = LegacyFieldMapper.getEfConstruction(context.indexSettings(), indexCreatedVersion); } // Validates and throws exception if index.knn is set to true in the index settings @@ -310,7 +321,8 @@ public KNNVectorFieldMapper build(BuilderContext context) { hasDocValues.get(), spaceType, m, - efConstruction + efConstruction, + indexCreatedVersion ); } @@ -346,7 +358,7 @@ public TypeParser(Supplier modelDaoSupplier) { @Override public Mapper.Builder parse(String name, Map node, ParserContext parserContext) throws MapperParsingException { - Builder builder = new KNNVectorFieldMapper.Builder(name, modelDaoSupplier.get()); + Builder builder = new KNNVectorFieldMapper.Builder(name, modelDaoSupplier.get(), parserContext.indexVersionCreated()); builder.parse(name, parserContext, node); // All ignoreMalformed, boolean stored, - boolean hasDocValues + boolean hasDocValues, + Version indexCreatedVersion ) { super(simpleName, mappedFieldType, multiFields, copyTo); this.ignoreMalformed = ignoreMalformed; @@ -469,6 +482,7 @@ public KNNVectorFieldMapper( this.dimension = mappedFieldType.getDimension(); this.vectorDataType = mappedFieldType.getVectorDataType(); updateEngineStats(); + this.indexCreatedVersion = indexCreatedVersion; } public KNNVectorFieldMapper clone() { @@ -610,7 +624,7 @@ protected boolean docValuesByDefault() { @Override public ParametrizedFieldMapper.Builder getMergeBuilder() { - return new KNNVectorFieldMapper.Builder(simpleName(), modelDao).init(this); + return new KNNVectorFieldMapper.Builder(simpleName(), modelDao, indexCreatedVersion).init(this); } @Override diff --git a/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java index 868aec3e6..d67ffc73c 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/LegacyFieldMapper.java @@ -7,10 +7,12 @@ import lombok.extern.log4j.Log4j2; import org.apache.lucene.document.FieldType; +import org.opensearch.Version; import org.opensearch.common.Explicit; import org.opensearch.common.settings.Settings; import org.opensearch.index.mapper.ParametrizedFieldMapper; import org.opensearch.knn.index.KNNSettings; +import org.opensearch.knn.index.util.IndexHyperParametersUtil; import org.opensearch.knn.index.util.KNNEngine; import static org.opensearch.knn.common.KNNConstants.DIMENSION; @@ -47,9 +49,10 @@ public class LegacyFieldMapper extends KNNVectorFieldMapper { boolean hasDocValues, String spaceType, String m, - String efConstruction + String efConstruction, + Version indexCreatedVersion ) { - super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues); + super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues, indexCreatedVersion); this.spaceType = spaceType; this.m = m; @@ -70,7 +73,9 @@ public class LegacyFieldMapper extends KNNVectorFieldMapper { @Override public ParametrizedFieldMapper.Builder getMergeBuilder() { - return new KNNVectorFieldMapper.Builder(simpleName(), this.spaceType, this.m, this.efConstruction).init(this); + return new KNNVectorFieldMapper.Builder(simpleName(), this.spaceType, this.m, this.efConstruction, this.indexCreatedVersion).init( + this + ); } static String getSpaceType(Settings indexSettings) { @@ -103,17 +108,19 @@ static String getM(Settings indexSettings) { return m; } - static String getEfConstruction(Settings indexSettings) { - String efConstruction = indexSettings.get(KNNSettings.INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING.getKey()); + static String getEfConstruction(Settings indexSettings, Version indexVersion) { + final String efConstruction = indexSettings.get(KNNSettings.INDEX_KNN_ALGO_PARAM_EF_CONSTRUCTION_SETTING.getKey()); if (efConstruction == null) { + final String defaultEFConstructionValue = String.valueOf(IndexHyperParametersUtil.getHNSWEFConstructionValue(indexVersion)); log.info( String.format( - "[KNN] The setting \"%s\" was not set for the index. Likely caused by recent version upgrade. Setting the setting to the default value=%s", + "[KNN] The setting \"%s\" was not set for the index. Likely caused by recent version upgrade. " + + "Picking up default value for the index =%s", HNSW_ALGO_EF_CONSTRUCTION, - KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION + defaultEFConstructionValue ) ); - return String.valueOf(KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION); + return defaultEFConstructionValue; } return efConstruction; } diff --git a/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java index 173f057e6..831a23f4b 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/LuceneFieldMapper.java @@ -44,7 +44,8 @@ public class LuceneFieldMapper extends KNNVectorFieldMapper { input.getCopyTo(), input.getIgnoreMalformed(), input.isStored(), - input.isHasDocValues() + input.isHasDocValues(), + input.getKnnMethodContext().getMethodComponentContext().getIndexVersion() ); vectorDataType = input.getVectorDataType(); diff --git a/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java index 1db710bad..d2db7fb5a 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/MethodFieldMapper.java @@ -34,7 +34,16 @@ public class MethodFieldMapper extends KNNVectorFieldMapper { KNNMethodContext knnMethodContext ) { - super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues); + super( + simpleName, + mappedFieldType, + multiFields, + copyTo, + ignoreMalformed, + stored, + hasDocValues, + knnMethodContext.getMethodComponentContext().getIndexVersion() + ); this.knnMethod = knnMethodContext; diff --git a/src/main/java/org/opensearch/knn/index/mapper/ModelFieldMapper.java b/src/main/java/org/opensearch/knn/index/mapper/ModelFieldMapper.java index 0fa116937..2f138dba6 100644 --- a/src/main/java/org/opensearch/knn/index/mapper/ModelFieldMapper.java +++ b/src/main/java/org/opensearch/knn/index/mapper/ModelFieldMapper.java @@ -6,6 +6,7 @@ package org.opensearch.knn.index.mapper; import org.apache.lucene.document.FieldType; +import org.opensearch.Version; import org.opensearch.common.Explicit; import org.opensearch.index.mapper.ParseContext; import org.opensearch.knn.indices.ModelDao; @@ -29,9 +30,10 @@ public class ModelFieldMapper extends KNNVectorFieldMapper { boolean stored, boolean hasDocValues, ModelDao modelDao, - String modelId + String modelId, + Version indexCreatedVersion ) { - super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues); + super(simpleName, mappedFieldType, multiFields, copyTo, ignoreMalformed, stored, hasDocValues, indexCreatedVersion); this.modelId = modelId; this.modelDao = modelDao; diff --git a/src/main/java/org/opensearch/knn/index/util/AbstractKNNLibrary.java b/src/main/java/org/opensearch/knn/index/util/AbstractKNNLibrary.java index c5a3d1d1e..f97d18810 100644 --- a/src/main/java/org/opensearch/knn/index/util/AbstractKNNLibrary.java +++ b/src/main/java/org/opensearch/knn/index/util/AbstractKNNLibrary.java @@ -35,22 +35,24 @@ public KNNMethod getMethod(String methodName) { @Override public ValidationException validateMethod(KNNMethodContext knnMethodContext) { - String methodName = knnMethodContext.getMethodComponent().getName(); + String methodName = knnMethodContext.getMethodComponentContext().getName(); return getMethod(methodName).validate(knnMethodContext); } @Override public boolean isTrainingRequired(KNNMethodContext knnMethodContext) { - String methodName = knnMethodContext.getMethodComponent().getName(); + String methodName = knnMethodContext.getMethodComponentContext().getName(); return getMethod(methodName).isTrainingRequired(knnMethodContext); } @Override public Map getMethodAsMap(KNNMethodContext knnMethodContext) { - KNNMethod knnMethod = methods.get(knnMethodContext.getMethodComponent().getName()); + KNNMethod knnMethod = methods.get(knnMethodContext.getMethodComponentContext().getName()); if (knnMethod == null) { - throw new IllegalArgumentException(String.format("Invalid method name: %s", knnMethodContext.getMethodComponent().getName())); + throw new IllegalArgumentException( + String.format("Invalid method name: %s", knnMethodContext.getMethodComponentContext().getName()) + ); } return knnMethod.getAsMap(knnMethodContext); diff --git a/src/main/java/org/opensearch/knn/index/util/IndexHyperParametersUtil.java b/src/main/java/org/opensearch/knn/index/util/IndexHyperParametersUtil.java new file mode 100644 index 000000000..af842788a --- /dev/null +++ b/src/main/java/org/opensearch/knn/index/util/IndexHyperParametersUtil.java @@ -0,0 +1,79 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn.index.util; + +import lombok.AccessLevel; +import lombok.NoArgsConstructor; +import lombok.NonNull; +import lombok.extern.log4j.Log4j2; +import org.opensearch.Version; +import org.opensearch.knn.index.KNNSettings; + +/** + * This class acts as an abstraction to get the default hyperparameter values for different parameters used in the + * Nearest Neighbor Algorithm across different version of Index. + */ +@Log4j2 +@NoArgsConstructor(access = AccessLevel.PRIVATE) +public class IndexHyperParametersUtil { + + private static final int INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION_OLD_VALUE = 512; + private static final int INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH_OLD_VALUE = 512; + + /** + * Returns the default value of EF Construction that should be used for the input index version. After version 2.12.0 + * of Opensearch we are have reduced the value of ef_construction in favor of better build times. + * + * @param indexVersion {@code Version} of the index with which it was created. + * @return default value of EF Construction that should be used for the input index version. + */ + public static int getHNSWEFConstructionValue(@NonNull final Version indexVersion) { + if (indexVersion.before(Version.V_2_12_0)) { + log.debug( + "Picking up old values of ef_construction : index version : {}, value: {}", + indexVersion, + INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION_OLD_VALUE + ); + return INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION_OLD_VALUE; + } + log.debug( + "Picking up new values of ef_construction : index version : {}, value: {}", + indexVersion, + KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION + ); + return KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION; + } + + /** + * Returns the default value of EF Search that should be used for the input index version. After version 2.12.0 + * of Opensearch we are have reduced the value of ef_search in favor of better latency. + * + * @param indexVersion {@code Version} of the index with which it was created. + * @return default value of EF Search that should be used for the input index version. + */ + public static int getHNSWEFSearchValue(@NonNull final Version indexVersion) { + if (indexVersion.before(Version.V_2_12_0)) { + log.debug( + "Picking up old values of ef_search : index version : {}, value: {}", + indexVersion, + INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH_OLD_VALUE + ); + return INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH_OLD_VALUE; + } + log.debug( + "Picking up new values of ef_search : index version : {}, value: {}", + indexVersion, + KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH + ); + return KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH; + } +} diff --git a/src/main/java/org/opensearch/knn/index/util/NativeLibrary.java b/src/main/java/org/opensearch/knn/index/util/NativeLibrary.java index 896c2b889..5e264ed12 100644 --- a/src/main/java/org/opensearch/knn/index/util/NativeLibrary.java +++ b/src/main/java/org/opensearch/knn/index/util/NativeLibrary.java @@ -62,7 +62,7 @@ public float score(float rawScore, SpaceType spaceType) { @Override public int estimateOverheadInKB(KNNMethodContext knnMethodContext, int dimension) { - String methodName = knnMethodContext.getMethodComponent().getName(); + String methodName = knnMethodContext.getMethodComponentContext().getName(); return getMethod(methodName).estimateOverheadInKB(knnMethodContext, dimension); } diff --git a/src/main/java/org/opensearch/knn/training/TrainingJob.java b/src/main/java/org/opensearch/knn/training/TrainingJob.java index 8a2af4319..7b5404f6c 100644 --- a/src/main/java/org/opensearch/knn/training/TrainingJob.java +++ b/src/main/java/org/opensearch/knn/training/TrainingJob.java @@ -174,7 +174,7 @@ public void run() { if (trainingDataAllocation.isClosed()) { throw new RuntimeException("Unable to load training data into memory: allocation is already closed"); } - + setVersionInKnnMethodContext(); Map trainParameters = model.getModelMetadata().getKnnEngine().getMethodAsMap(knnMethodContext); trainParameters.put( KNNConstants.INDEX_THREAD_QTY, @@ -192,7 +192,7 @@ public void run() { model.setModelBlob(modelBlob); modelMetadata.setState(ModelState.CREATED); } catch (Exception e) { - logger.error("Failed to run training job for model \"" + modelId + "\": " + e.getMessage()); + logger.error("Failed to run training job for model \"" + modelId + "\": ", e); modelMetadata.setState(ModelState.FAILED); modelMetadata.setError( "Failed to execute training. May be caused by an invalid method definition or " + "not enough memory to perform training." @@ -208,4 +208,10 @@ public void run() { nativeMemoryCacheManager.invalidate(modelAnonymousEntryContext.getKey()); } } + + private void setVersionInKnnMethodContext() { + // We are picking up the node version here. For more details why we did this please check below conversation + // Ref: https://github.com/opensearch-project/k-NN/pull/1353#discussion_r1434428542 + knnMethodContext.getMethodComponentContext().setIndexVersion(trainingDataEntryContext.getClusterService().localNode().getVersion()); + } } diff --git a/src/test/java/org/opensearch/knn/KNNTestCase.java b/src/test/java/org/opensearch/knn/KNNTestCase.java index 1b0dbefef..3f20981bd 100644 --- a/src/test/java/org/opensearch/knn/KNNTestCase.java +++ b/src/test/java/org/opensearch/knn/KNNTestCase.java @@ -39,6 +39,11 @@ public class KNNTestCase extends OpenSearchTestCase { public void setUp() throws Exception { super.setUp(); openMocks = MockitoAnnotations.openMocks(this); + // This is required to make sure that before every test we are initializing the KNNSettings. Not doing this + // leads to failures of unit tests cases when a unit test is run separately. Try running this test: + // ./gradlew ':test' --tests "org.opensearch.knn.training.TrainingJobTests.testRun_success" and see it fails + // but if run along with other tests this test passes. + initKNNSettings(); } @Override @@ -53,7 +58,14 @@ public void resetState() { for (KNNCounter knnCounter : KNNCounter.values()) { knnCounter.set(0L); } + initKNNSettings(); + // Clean up the cache + NativeMemoryCacheManager.getInstance().invalidateAll(); + NativeMemoryCacheManager.getInstance().close(); + } + + private void initKNNSettings() { Set> defaultClusterSettings = new HashSet<>(ClusterSettings.BUILT_IN_CLUSTER_SETTINGS); defaultClusterSettings.addAll( KNNSettings.state() @@ -64,10 +76,6 @@ public void resetState() { ); when(clusterService.getClusterSettings()).thenReturn(new ClusterSettings(Settings.EMPTY, defaultClusterSettings)); KNNSettings.state().setClusterService(clusterService); - - // Clean up the cache - NativeMemoryCacheManager.getInstance().invalidateAll(); - NativeMemoryCacheManager.getInstance().close(); } public Map xContentBuilderToMap(XContentBuilder xContentBuilder) { diff --git a/src/test/java/org/opensearch/knn/index/FaissIT.java b/src/test/java/org/opensearch/knn/index/FaissIT.java index 9a9f8779f..c2dd70d3f 100644 --- a/src/test/java/org/opensearch/knn/index/FaissIT.java +++ b/src/test/java/org/opensearch/knn/index/FaissIT.java @@ -267,6 +267,113 @@ public void testEndToEnd_whenMethodIsHNSWPQ_thenSucceed() { fail("Graphs are not getting evicted"); } + @SneakyThrows + public void testEndToEnd_whenMethodIsHNSWPQAndHyperParametersNotSet_thenSucceed() { + String indexName = "test-index"; + String fieldName = "test-field"; + String trainingIndexName = "training-index"; + String trainingFieldName = "training-field"; + + String modelId = "test-model"; + String modelDescription = "test model"; + + List mValues = ImmutableList.of(16, 32, 64, 128); + List pqMValues = ImmutableList.of(2, 4, 8); + + // training data needs to be at least equal to the number of centroids for PQ + // which is 2^8 = 256. 8 because thats the only valid code_size for HNSWPQ + int trainingDataCount = 256; + + SpaceType spaceType = SpaceType.L2; + + Integer dimension = testData.indexData.vectors[0].length; + + XContentBuilder xContentBuilder = XContentFactory.jsonBuilder() + .startObject() + .field(NAME, METHOD_HNSW) + .field(KNN_ENGINE, FAISS_NAME) + .startObject(PARAMETERS) + .field(KNNConstants.METHOD_PARAMETER_M, mValues.get(random().nextInt(mValues.size()))) + .startObject(METHOD_ENCODER_PARAMETER) + .field(NAME, ENCODER_PQ) + .startObject(PARAMETERS) + .field(ENCODER_PARAMETER_PQ_M, pqMValues.get(random().nextInt(pqMValues.size()))) + .endObject() + .endObject() + .endObject() + .endObject(); + Map in = xContentBuilderToMap(xContentBuilder); + + createBasicKnnIndex(trainingIndexName, trainingFieldName, dimension); + ingestDataAndTrainModel(modelId, trainingIndexName, trainingFieldName, dimension, modelDescription, in, trainingDataCount); + assertTrainingSucceeds(modelId, 360, 1000); + + // Create an index + XContentBuilder builder = XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject(fieldName) + .field("type", "knn_vector") + .field("model_id", modelId) + .endObject() + .endObject() + .endObject(); + + Map mappingMap = xContentBuilderToMap(builder); + String mapping = builder.toString(); + + createKnnIndex(indexName, mapping); + assertEquals(new TreeMap<>(mappingMap), new TreeMap<>(getIndexMappingAsMap(indexName))); + + // Index the test data + for (int i = 0; i < testData.indexData.docs.length; i++) { + addKnnDoc( + indexName, + Integer.toString(testData.indexData.docs[i]), + fieldName, + Floats.asList(testData.indexData.vectors[i]).toArray() + ); + } + + // Assert we have the right number of documents in the index + refreshAllNonSystemIndices(); + assertEquals(testData.indexData.docs.length, getDocCount(indexName)); + + int k = 10; + for (int i = 0; i < testData.queries.length; i++) { + Response response = searchKNNIndex(indexName, new KNNQueryBuilder(fieldName, testData.queries[i], k), k); + String responseBody = EntityUtils.toString(response.getEntity()); + List knnResults = parseSearchResponse(responseBody, fieldName); + assertEquals(k, knnResults.size()); + + List actualScores = parseSearchResponseScore(responseBody, fieldName); + for (int j = 0; j < k; j++) { + float[] primitiveArray = Floats.toArray(Arrays.stream(knnResults.get(j).getVector()).collect(Collectors.toList())); + assertEquals( + KNNEngine.FAISS.score(KNNScoringUtil.l2Squared(testData.queries[i], primitiveArray), spaceType), + actualScores.get(j), + 0.0001 + ); + } + } + + // Delete index + deleteKNNIndex(indexName); + deleteModel(modelId); + + // Search every 5 seconds 14 times to confirm graph gets evicted + int intervals = 14; + for (int i = 0; i < intervals; i++) { + if (getTotalGraphsInCache() == 0) { + return; + } + + Thread.sleep(5 * 1000); + } + + fail("Graphs are not getting evicted"); + } + public void testDocUpdate() throws IOException { String indexName = "test-index-1"; String fieldName = "test-field-1"; diff --git a/src/test/java/org/opensearch/knn/index/IndexUtilTests.java b/src/test/java/org/opensearch/knn/index/IndexUtilTests.java index dc9c980e0..c8b29b6ef 100644 --- a/src/test/java/org/opensearch/knn/index/IndexUtilTests.java +++ b/src/test/java/org/opensearch/knn/index/IndexUtilTests.java @@ -12,6 +12,7 @@ package org.opensearch.knn.index; import com.google.common.collect.ImmutableMap; +import org.opensearch.Version; import org.opensearch.cluster.ClusterState; import org.opensearch.cluster.metadata.IndexMetadata; import org.opensearch.cluster.metadata.MappingMetadata; @@ -59,6 +60,7 @@ public void testGetLoadParameters() { Settings settings = Settings.builder().loadFromMap(indexSettings).build(); IndexMetadata indexMetadata = mock(IndexMetadata.class); when(indexMetadata.getSettings()).thenReturn(settings); + when(indexMetadata.getCreationVersion()).thenReturn(Version.CURRENT); Metadata metadata = mock(Metadata.class); when(metadata.index(anyString())).thenReturn(indexMetadata); ClusterState clusterState = mock(ClusterState.class); diff --git a/src/test/java/org/opensearch/knn/index/KNNMethodContextTests.java b/src/test/java/org/opensearch/knn/index/KNNMethodContextTests.java index 0b12980ab..1330e8da0 100644 --- a/src/test/java/org/opensearch/knn/index/KNNMethodContextTests.java +++ b/src/test/java/org/opensearch/knn/index/KNNMethodContextTests.java @@ -66,7 +66,7 @@ public void testStreams() throws IOException { public void testGetMethodComponent() { MethodComponentContext methodComponent = new MethodComponentContext("test-method", Collections.emptyMap()); KNNMethodContext knnMethodContext = new KNNMethodContext(KNNEngine.DEFAULT, SpaceType.DEFAULT, methodComponent); - assertEquals(methodComponent, knnMethodContext.getMethodComponent()); + assertEquals(methodComponent, knnMethodContext.getMethodComponentContext()); } /** @@ -275,7 +275,7 @@ public void testParse_nullParameters() throws IOException { .endObject(); Map in = xContentBuilderToMap(xContentBuilder); KNNMethodContext knnMethodContext = KNNMethodContext.parse(in); - assertTrue(knnMethodContext.getMethodComponent().getParameters().isEmpty()); + assertTrue(knnMethodContext.getMethodComponentContext().getParameters().isEmpty()); } /** @@ -291,8 +291,8 @@ public void testParse_valid() throws IOException { assertEquals(KNNEngine.DEFAULT, knnMethodContext.getKnnEngine()); assertEquals(SpaceType.DEFAULT, knnMethodContext.getSpaceType()); - assertEquals(methodName, knnMethodContext.getMethodComponent().getName()); - assertTrue(knnMethodContext.getMethodComponent().getParameters().isEmpty()); + assertEquals(methodName, knnMethodContext.getMethodComponentContext().getName()); + assertTrue(knnMethodContext.getMethodComponentContext().getParameters().isEmpty()); // Method with parameters String methodParameterKey1 = "p-1"; @@ -311,8 +311,8 @@ public void testParse_valid() throws IOException { in = xContentBuilderToMap(xContentBuilder); knnMethodContext = KNNMethodContext.parse(in); - assertEquals(methodParameterValue1, knnMethodContext.getMethodComponent().getParameters().get(methodParameterKey1)); - assertEquals(methodParameterValue2, knnMethodContext.getMethodComponent().getParameters().get(methodParameterKey2)); + assertEquals(methodParameterValue1, knnMethodContext.getMethodComponentContext().getParameters().get(methodParameterKey1)); + assertEquals(methodParameterValue2, knnMethodContext.getMethodComponentContext().getParameters().get(methodParameterKey2)); // Method with parameter that is a method context paramet @@ -330,12 +330,12 @@ public void testParse_valid() throws IOException { in = xContentBuilderToMap(xContentBuilder); knnMethodContext = KNNMethodContext.parse(in); - assertTrue(knnMethodContext.getMethodComponent().getParameters().get(methodParameterKey1) instanceof MethodComponentContext); + assertTrue(knnMethodContext.getMethodComponentContext().getParameters().get(methodParameterKey1) instanceof MethodComponentContext); assertEquals( methodParameterValue1, - ((MethodComponentContext) knnMethodContext.getMethodComponent().getParameters().get(methodParameterKey1)).getName() + ((MethodComponentContext) knnMethodContext.getMethodComponentContext().getParameters().get(methodParameterKey1)).getName() ); - assertEquals(methodParameterValue2, knnMethodContext.getMethodComponent().getParameters().get(methodParameterKey2)); + assertEquals(methodParameterValue2, knnMethodContext.getMethodComponentContext().getParameters().get(methodParameterKey2)); } /** diff --git a/src/test/java/org/opensearch/knn/index/KNNSettingsTests.java b/src/test/java/org/opensearch/knn/index/KNNSettingsTests.java index 07b2aa20a..6d6f59556 100644 --- a/src/test/java/org/opensearch/knn/index/KNNSettingsTests.java +++ b/src/test/java/org/opensearch/knn/index/KNNSettingsTests.java @@ -132,6 +132,41 @@ public void testFilteredSearchAdvanceSetting_whenValuesProvidedByUsers_thenValid assertWarnings(); } + @SneakyThrows + public void testGetEfSearch_whenNoValuesProvidedByUsers_thenDefaultSettingsUsed() { + Node mockNode = createMockNode(Collections.emptyMap()); + mockNode.start(); + ClusterService clusterService = mockNode.injector().getInstance(ClusterService.class); + mockNode.client().admin().cluster().state(new ClusterStateRequest()).actionGet(); + mockNode.client().admin().indices().create(new CreateIndexRequest(INDEX_NAME)).actionGet(); + KNNSettings.state().setClusterService(clusterService); + + Integer efSearchValue = KNNSettings.getEfSearchParam(INDEX_NAME); + mockNode.close(); + assertEquals(KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH, efSearchValue); + assertWarnings(); + } + + @SneakyThrows + public void testGetEfSearch_whenEFSearchValueSetByUser_thenReturnValue() { + int userProvidedEfSearch = 300; + Node mockNode = createMockNode(Collections.emptyMap()); + mockNode.start(); + ClusterService clusterService = mockNode.injector().getInstance(ClusterService.class); + mockNode.client().admin().cluster().state(new ClusterStateRequest()).actionGet(); + final Settings settings = Settings.builder() + .put(KNNSettings.KNN_ALGO_PARAM_EF_SEARCH, userProvidedEfSearch) + .put(KNNSettings.KNN_INDEX, true) + .build(); + mockNode.client().admin().indices().create(new CreateIndexRequest(INDEX_NAME, settings)).actionGet(); + KNNSettings.state().setClusterService(clusterService); + + int efSearchValue = KNNSettings.getEfSearchParam(INDEX_NAME); + mockNode.close(); + assertEquals(userProvidedEfSearch, efSearchValue); + assertWarnings(); + } + private Node createMockNode(Map configSettings) throws IOException { Path configDir = createTempDir(); File configFile = configDir.resolve("opensearch.yml").toFile(); diff --git a/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumerTests.java b/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumerTests.java index 242aeaa17..eeca1e5ed 100644 --- a/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumerTests.java +++ b/src/test/java/org/opensearch/knn/index/codec/KNN80Codec/KNN80DocValuesConsumerTests.java @@ -18,6 +18,7 @@ import org.apache.lucene.store.IOContext; import org.junit.AfterClass; import org.junit.BeforeClass; +import org.opensearch.Version; import org.opensearch.cluster.service.ClusterService; import org.opensearch.common.settings.ClusterSettings; import org.opensearch.common.settings.Settings; @@ -283,6 +284,7 @@ public void testAddKNNBinaryField_fromScratch_faissCurrent() throws IOException spaceType, new MethodComponentContext(METHOD_HNSW, ImmutableMap.of(METHOD_PARAMETER_M, 16, METHOD_PARAMETER_EF_CONSTRUCTION, 512)) ); + knnMethodContext.getMethodComponentContext().setIndexVersion(Version.CURRENT); String parameterString = XContentFactory.jsonBuilder().map(knnEngine.getMethodAsMap(knnMethodContext)).toString(); diff --git a/src/test/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperTests.java b/src/test/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperTests.java index 4ed231063..4a5db8c8f 100644 --- a/src/test/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperTests.java +++ b/src/test/java/org/opensearch/knn/index/mapper/KNNVectorFieldMapperTests.java @@ -93,7 +93,7 @@ public class KNNVectorFieldMapperTests extends KNNTestCase { public void testBuilder_getParameters() { String fieldName = "test-field-name"; ModelDao modelDao = mock(ModelDao.class); - KNNVectorFieldMapper.Builder builder = new KNNVectorFieldMapper.Builder(fieldName, modelDao); + KNNVectorFieldMapper.Builder builder = new KNNVectorFieldMapper.Builder(fieldName, modelDao, CURRENT); assertEquals(7, builder.getParameters().size()); List actualParams = builder.getParameters().stream().map(a -> a.name).collect(Collectors.toList()); @@ -104,7 +104,7 @@ public void testBuilder_getParameters() { public void testBuilder_build_fromKnnMethodContext() { // Check that knnMethodContext takes precedent over both model and legacy ModelDao modelDao = mock(ModelDao.class); - KNNVectorFieldMapper.Builder builder = new KNNVectorFieldMapper.Builder("test-field-name-1", modelDao); + KNNVectorFieldMapper.Builder builder = new KNNVectorFieldMapper.Builder("test-field-name-1", modelDao, CURRENT); SpaceType spaceType = SpaceType.COSINESIMIL; int m = 17; @@ -141,7 +141,7 @@ public void testBuilder_build_fromKnnMethodContext() { public void testBuilder_build_fromModel() { // Check that modelContext takes precedent over legacy ModelDao modelDao = mock(ModelDao.class); - KNNVectorFieldMapper.Builder builder = new KNNVectorFieldMapper.Builder("test-field-name-1", modelDao); + KNNVectorFieldMapper.Builder builder = new KNNVectorFieldMapper.Builder("test-field-name-1", modelDao, CURRENT); SpaceType spaceType = SpaceType.COSINESIMIL; int m = 17; @@ -179,7 +179,7 @@ public void testBuilder_build_fromModel() { public void testBuilder_build_fromLegacy() { // Check legacy is picked up if model context and method context are not set ModelDao modelDao = mock(ModelDao.class); - KNNVectorFieldMapper.Builder builder = new KNNVectorFieldMapper.Builder("test-field-name-1", modelDao); + KNNVectorFieldMapper.Builder builder = new KNNVectorFieldMapper.Builder("test-field-name-1", modelDao, CURRENT); SpaceType spaceType = SpaceType.COSINESIMIL; int m = 17; @@ -237,10 +237,10 @@ public void testBuilder_parse_fromKnnMethodContext_luceneEngine() throws IOExcep Mapper.BuilderContext builderContext = new Mapper.BuilderContext(settings, new ContentPath()); builder.build(builderContext); - assertEquals(METHOD_HNSW, builder.knnMethodContext.get().getMethodComponent().getName()); + assertEquals(METHOD_HNSW, builder.knnMethodContext.get().getMethodComponentContext().getName()); assertEquals( efConstruction, - builder.knnMethodContext.get().getMethodComponent().getParameters().get(METHOD_PARAMETER_EF_CONSTRUCTION) + builder.knnMethodContext.get().getMethodComponentContext().getParameters().get(METHOD_PARAMETER_EF_CONSTRUCTION) ); assertTrue(KNNEngine.LUCENE.isInitialized()); @@ -260,8 +260,8 @@ public void testBuilder_parse_fromKnnMethodContext_luceneEngine() throws IOExcep buildParserContext(indexName, settings) ); - assertEquals(METHOD_HNSW, builder.knnMethodContext.get().getMethodComponent().getName()); - assertTrue(builderEmptyParams.knnMethodContext.get().getMethodComponent().getParameters().isEmpty()); + assertEquals(METHOD_HNSW, builder.knnMethodContext.get().getMethodComponentContext().getName()); + assertTrue(builderEmptyParams.knnMethodContext.get().getMethodComponentContext().getParameters().isEmpty()); XContentBuilder xContentBuilderUnsupportedParam = XContentFactory.jsonBuilder() .startObject() @@ -485,10 +485,10 @@ public void testTypeParser_parse_fromKnnMethodContext() throws IOException { buildParserContext(indexName, settings) ); - assertEquals(METHOD_HNSW, builder.knnMethodContext.get().getMethodComponent().getName()); + assertEquals(METHOD_HNSW, builder.knnMethodContext.get().getMethodComponentContext().getName()); assertEquals( efConstruction, - builder.knnMethodContext.get().getMethodComponent().getParameters().get(METHOD_PARAMETER_EF_CONSTRUCTION) + builder.knnMethodContext.get().getMethodComponentContext().getParameters().get(METHOD_PARAMETER_EF_CONSTRUCTION) ); // Test invalid parameter diff --git a/src/test/java/org/opensearch/knn/index/util/FaissTests.java b/src/test/java/org/opensearch/knn/index/util/FaissTests.java index 99a153780..0e7bc6482 100644 --- a/src/test/java/org/opensearch/knn/index/util/FaissTests.java +++ b/src/test/java/org/opensearch/knn/index/util/FaissTests.java @@ -5,6 +5,7 @@ package org.opensearch.knn.index.util; +import org.opensearch.Version; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.knn.KNNTestCase; @@ -48,6 +49,7 @@ public void testGetMethodAsMap_whenMethodIsHNSWFlat_thenCreateCorrectIndexDescri .endObject(); Map in = xContentBuilderToMap(xContentBuilder); KNNMethodContext knnMethodContext = KNNMethodContext.parse(in); + knnMethodContext.getMethodComponentContext().setIndexVersion(Version.CURRENT); Map map = Faiss.INSTANCE.getMethodAsMap(knnMethodContext); @@ -76,6 +78,7 @@ public void testGetMethodAsMap_whenMethodIsHNSWPQ_thenCreateCorrectIndexDescript .endObject(); Map in = xContentBuilderToMap(xContentBuilder); KNNMethodContext knnMethodContext = KNNMethodContext.parse(in); + knnMethodContext.getMethodComponentContext().setIndexVersion(Version.CURRENT); Map map = Faiss.INSTANCE.getMethodAsMap(knnMethodContext); diff --git a/src/test/java/org/opensearch/knn/index/util/IndexHyperParametersUtilTests.java b/src/test/java/org/opensearch/knn/index/util/IndexHyperParametersUtilTests.java new file mode 100644 index 000000000..508b8765c --- /dev/null +++ b/src/test/java/org/opensearch/knn/index/util/IndexHyperParametersUtilTests.java @@ -0,0 +1,44 @@ +/* + * SPDX-License-Identifier: Apache-2.0 + * + * The OpenSearch Contributors require contributions made to + * this file be licensed under the Apache-2.0 license or a + * compatible open source license. + * + * Modifications Copyright OpenSearch Contributors. See + * GitHub history for details. + */ + +package org.opensearch.knn.index.util; + +import junit.framework.TestCase; +import org.junit.Assert; +import org.opensearch.Version; +import org.opensearch.knn.index.KNNSettings; + +public class IndexHyperParametersUtilTests extends TestCase { + + public void testLombokNonNull() { + Assert.assertThrows(NullPointerException.class, () -> IndexHyperParametersUtil.getHNSWEFConstructionValue(null)); + Assert.assertThrows(NullPointerException.class, () -> IndexHyperParametersUtil.getHNSWEFSearchValue(null)); + } + + public void testGetHNSWEFConstructionValue_withDifferentValues_thenSuccess() { + Assert.assertEquals(512, IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.V_2_11_0)); + Assert.assertEquals(512, IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.V_2_3_0)); + Assert.assertEquals( + KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_CONSTRUCTION.intValue(), + IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.CURRENT) + ); + + } + + public void testGetHNSWEFSearchValue_withDifferentValues_thenSuccess() { + Assert.assertEquals(512, IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.V_2_11_0)); + Assert.assertEquals(512, IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.V_2_3_0)); + Assert.assertEquals( + KNNSettings.INDEX_KNN_DEFAULT_ALGO_PARAM_EF_SEARCH.intValue(), + IndexHyperParametersUtil.getHNSWEFConstructionValue(Version.CURRENT) + ); + } +} diff --git a/src/test/java/org/opensearch/knn/jni/JNIServiceTests.java b/src/test/java/org/opensearch/knn/jni/JNIServiceTests.java index d1a5be741..185f2953d 100644 --- a/src/test/java/org/opensearch/knn/jni/JNIServiceTests.java +++ b/src/test/java/org/opensearch/knn/jni/JNIServiceTests.java @@ -14,6 +14,7 @@ import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableMap; import org.junit.BeforeClass; +import org.opensearch.Version; import org.opensearch.common.xcontent.XContentFactory; import org.opensearch.core.xcontent.XContentBuilder; import org.opensearch.knn.KNNTestCase; @@ -840,6 +841,7 @@ public void testTrain_whenConfigurationIsHNSWPQ_thenSucceed() throws IOException .endObject(); Map in = xContentBuilderToMap(xContentBuilder); KNNMethodContext knnMethodContext = KNNMethodContext.parse(in); + knnMethodContext.getMethodComponentContext().setIndexVersion(Version.CURRENT); Map parameters = KNNEngine.FAISS.getMethodAsMap(knnMethodContext); byte[] faissIndex = JNIService.trainIndex(parameters, 128, trainPointer, FAISS_NAME); diff --git a/src/test/java/org/opensearch/knn/training/TrainingJobTests.java b/src/test/java/org/opensearch/knn/training/TrainingJobTests.java index daf9645fa..6b07b2dd2 100644 --- a/src/test/java/org/opensearch/knn/training/TrainingJobTests.java +++ b/src/test/java/org/opensearch/knn/training/TrainingJobTests.java @@ -12,8 +12,9 @@ package org.opensearch.knn.training; import com.google.common.collect.ImmutableMap; +import org.opensearch.Version; +import org.opensearch.cluster.node.DiscoveryNode; import org.opensearch.knn.KNNTestCase; -import org.opensearch.knn.jni.JNIService; import org.opensearch.knn.index.KNNMethodContext; import org.opensearch.knn.index.MethodComponentContext; import org.opensearch.knn.index.SpaceType; @@ -24,6 +25,7 @@ import org.opensearch.knn.indices.Model; import org.opensearch.knn.indices.ModelMetadata; import org.opensearch.knn.indices.ModelState; +import org.opensearch.knn.jni.JNIService; import java.io.File; import java.io.IOException; @@ -39,6 +41,16 @@ public class TrainingJobTests extends KNNTestCase { + private final String trainingIndexName = "trainingindexname"; + + @Override + public void setUp() throws Exception { + super.setUp(); + DiscoveryNode mockedDiscoveryNode = mock(DiscoveryNode.class); + when(clusterService.localNode()).thenReturn(mockedDiscoveryNode); + when(mockedDiscoveryNode.getVersion()).thenReturn(Version.CURRENT); + } + public void testGetModelId() { String modelId = "test-model-id"; KNNMethodContext knnMethodContext = mock(KNNMethodContext.class); @@ -149,6 +161,8 @@ public void testRun_success() throws IOException, ExecutionException { NativeMemoryEntryContext.TrainingDataEntryContext.class ); when(trainingDataEntryContext.getKey()).thenReturn(tdataKey); + when(trainingDataEntryContext.getTrainIndexName()).thenReturn(trainingIndexName); + when(trainingDataEntryContext.getClusterService()).thenReturn(clusterService); when(nativeMemoryCacheManager.get(trainingDataEntryContext, false)).thenReturn(nativeMemoryAllocation); doAnswer(invocationOnMock -> { diff --git a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java index 93f6885ce..d68d2b8d8 100644 --- a/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java +++ b/src/testFixtures/java/org/opensearch/knn/KNNRestTestCase.java @@ -50,6 +50,7 @@ import javax.management.remote.JMXServiceURL; import java.io.IOException; +import java.io.InputStream; import java.net.URL; import java.nio.file.Files; import java.nio.file.Path; @@ -72,6 +73,7 @@ import static org.opensearch.knn.common.KNNConstants.KNN_ENGINE; import static org.opensearch.knn.common.KNNConstants.KNN_METHOD; import static org.opensearch.knn.common.KNNConstants.METHOD_ENCODER_PARAMETER; +import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_EF_SEARCH; import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_NLIST; import static org.opensearch.knn.common.KNNConstants.METHOD_PARAMETER_SPACE_TYPE; import static org.opensearch.knn.common.KNNConstants.MODEL_BLOB_PARAMETER; @@ -726,11 +728,26 @@ protected int getTotalGraphsInCache() throws Exception { * Get specific Index setting value from response */ protected String getIndexSettingByName(String indexName, String settingName) throws IOException { - @SuppressWarnings("unchecked") - Map settings = (Map) ((Map) getIndexSettings(indexName).get(indexName)).get( - "settings" - ); - return (String) settings.get(settingName); + return getIndexSettingByName(indexName, settingName, false); + } + + protected String getIndexSettingByName(String indexName, String settingName, boolean includeDefaults) throws IOException { + Request request = new Request("GET", "/" + indexName + "/_settings"); + if (includeDefaults) { + request.addParameter("include_defaults", "true"); + } + request.addParameter("flat_settings", "true"); + Response response = client().performRequest(request); + try (InputStream is = response.getEntity().getContent()) { + Map settings = (Map) XContentHelper.convertToMap(MediaTypeRegistry.JSON.xContent(), is, true) + .get(indexName); + Map defaultSettings = new HashMap<>(); + if (includeDefaults) { + defaultSettings = (Map) settings.get("defaults"); + } + Map userSettings = (Map) settings.get("settings"); + return (String) (userSettings.get(settingName) == null ? defaultSettings.get(settingName) : userSettings.get(settingName)); + } } protected void createModelSystemIndex() throws IOException { @@ -1100,6 +1117,37 @@ public String createKNNIndexCustomMethodFieldMapping( .toString(); } + public String createKNNIndexCustomMethodFieldMapping( + String fieldName, + Integer dimensions, + SpaceType spaceType, + String engine, + Integer m, + Integer ef_construction, + Integer ef_search + ) throws IOException { + return XContentFactory.jsonBuilder() + .startObject() + .startObject(PROPERTIES) + .startObject(fieldName) + .field(VECTOR_TYPE, KNN_VECTOR) + .field(DIMENSION, dimensions.toString()) + .startObject(KNN_METHOD) + .field(NAME, METHOD_HNSW) + .field(METHOD_PARAMETER_SPACE_TYPE, spaceType.getValue()) + .field(KNN_ENGINE, engine) + .startObject(PARAMETERS) + .field(METHOD_PARAMETER_EF_CONSTRUCTION, ef_construction) + .field(METHOD_PARAMETER_M, m) + .field(METHOD_PARAMETER_EF_SEARCH, ef_search) + .endObject() + .endObject() + .endObject() + .endObject() + .endObject() + .toString(); + } + // Default KNN script score settings protected Settings createKNNDefaultScriptScoreSettings() { return Settings.builder().put(NUMBER_OF_SHARDS, 1).put(NUMBER_OF_REPLICAS, 0).put(INDEX_KNN, false).build();