Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Removes Array based vector serialization #2587

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
* Update package name to fix compilation issue [#2513](https://github.com/opensearch-project/k-NN/pull/2513)
* Update gradle to 8.13 to fix command exec on java 21 [#2571](https://github.com/opensearch-project/k-NN/pull/2571)
* Add fix for nmslib pragma on arm [#2574](https://github.com/opensearch-project/k-NN/pull/2574)
* Removes Array based vector serialization [#2587](https://github.com/opensearch-project/k-NN/pull/2587)
### Refactoring
* Small Refactor Post Lucene 10.0.1 upgrade [#2541](https://github.com/opensearch-project/k-NN/pull/2541)
* Refactor codec to leverage backwards_codecs [#2546](https://github.com/opensearch-project/k-NN/pull/2546)
Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/opensearch/knn/index/VectorDataType.java
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
import org.apache.lucene.document.KnnFloatVectorField;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.BytesRef;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.memory.NativeMemoryAllocation;
import org.opensearch.knn.jni.JNICommons;
import org.opensearch.knn.training.BinaryTrainingDataConsumer;
Expand Down Expand Up @@ -105,7 +105,7 @@ public FieldType createKnnVectorFieldType(int dimension, KNNVectorSimilarityFunc

@Override
public float[] getVectorFromBytesRef(BytesRef binaryValue) {
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getSerializerByBytesRef(binaryValue);
final KNNVectorSerializer vectorSerializer = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE;
return vectorSerializer.byteToFloatArray(binaryValue);
}

Expand Down
4 changes: 2 additions & 2 deletions src/main/java/org/opensearch/knn/index/VectorField.java
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.util.BytesRef;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;

public class VectorField extends Field {

public VectorField(String name, float[] value, IndexableFieldType type) {
super(name, new BytesRef(), type);
try {
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getDefaultSerializer();
final KNNVectorSerializer vectorSerializer = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE;
final byte[] floatToByte = vectorSerializer.floatToByteArray(value);
this.setBytesValue(floatToByte);
} catch (Exception e) {
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
public class KNNVectorAsCollectionOfFloatsSerializer implements KNNVectorSerializer {
private static final int BYTES_IN_FLOAT = 4;

public static final KNNVectorAsCollectionOfFloatsSerializer INSTANCE = new KNNVectorAsCollectionOfFloatsSerializer();

@Override
public byte[] floatToByteArray(float[] input) {
final ByteBuffer bb = ByteBuffer.allocate(input.length * BYTES_IN_FLOAT).order(ByteOrder.BIG_ENDIAN);
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
import org.opensearch.knn.index.KnnCircuitBreakerException;
import org.opensearch.knn.index.SpaceType;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;
import org.opensearch.knn.index.engine.KNNEngine;
import org.opensearch.knn.index.engine.KNNMethodContext;
import org.opensearch.knn.index.engine.MethodComponentContext;
Expand Down Expand Up @@ -74,7 +74,7 @@ public static StoredField createStoredFieldForByteVector(String name, byte[] vec
* @param vector vector to be added to stored field
*/
public static StoredField createStoredFieldForFloatVector(String name, float[] vector) {
return new StoredField(name, KNNVectorSerializerFactory.getDefaultSerializer().floatToByteArray(vector));
return new StoredField(name, KNNVectorAsCollectionOfFloatsSerializer.INSTANCE.floatToByteArray(vector));
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.opensearch.knn.index.VectorDataType;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializer;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;

import java.io.IOException;

Expand Down Expand Up @@ -121,7 +121,7 @@ private <T> T extractFromKnnVectorValues(
}

private float[] getFloatVectorFromByteRef(final BytesRef bytesRef) {
final KNNVectorSerializer vectorSerializer = KNNVectorSerializerFactory.getSerializerByBytesRef(bytesRef);
final KNNVectorSerializer vectorSerializer = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE;
return vectorSerializer.byteToFloatArray(bytesRef);
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
import org.apache.lucene.store.Directory;
import org.opensearch.index.fielddata.ScriptDocValues;
import org.junit.Before;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;

import java.io.IOException;

Expand All @@ -43,7 +43,7 @@ private void createKNNVectorDocument(Directory directory) throws IOException {
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(directory, conf);
Document knnDocument = new Document();
byte[] vectorBinary = KNNVectorSerializerFactory.getDefaultSerializer().floatToByteArray(new float[] { 1.0f, 2.0f });
byte[] vectorBinary = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE.floatToByteArray(new float[] { 1.0f, 2.0f });
knnDocument.add(new BinaryDocValuesField(MOCK_INDEX_FIELD_NAME, new BytesRef(vectorBinary)));
knnDocument.add(new NumericDocValuesField(MOCK_NUMERIC_INDEX_FIELD_NAME, 1000));
writer.addDocument(knnDocument);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
import org.junit.Before;
import org.junit.After;
import org.junit.Test;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;

import java.io.IOException;

Expand Down Expand Up @@ -167,7 +167,7 @@ private void createKNNVectorDocument(Directory directory, Class<?> valuesClass)
Field field;

if (BinaryDocValues.class.equals(valuesClass)) {
byte[] vectorBinary = KNNVectorSerializerFactory.getDefaultSerializer().floatToByteArray(SAMPLE_VECTOR_DATA);
byte[] vectorBinary = KNNVectorAsCollectionOfFloatsSerializer.INSTANCE.floatToByteArray(SAMPLE_VECTOR_DATA);
field = new BinaryDocValuesField(MOCK_INDEX_FIELD_NAME, new BytesRef(vectorBinary));
} else if (ByteVectorValues.class.equals(valuesClass)) {
field = new KnnByteVectorField(MOCK_INDEX_FIELD_NAME, SAMPLE_BYTE_VECTOR_DATA);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import org.apache.lucene.util.BytesRef;
import org.junit.Assert;
import org.opensearch.knn.KNNTestCase;
import org.opensearch.knn.index.codec.util.KNNVectorSerializerFactory;
import org.opensearch.knn.index.codec.util.KNNVectorAsCollectionOfFloatsSerializer;

import java.io.IOException;

Expand Down Expand Up @@ -82,7 +82,7 @@ private void createKNNFloatVectorDocument(Directory directory) throws IOExceptio
IndexWriterConfig conf = newIndexWriterConfig(new MockAnalyzer(random()));
IndexWriter writer = new IndexWriter(directory, conf);
Document knnDocument = new Document();
BytesRef bytesRef = new BytesRef(KNNVectorSerializerFactory.getDefaultSerializer().floatToByteArray(SAMPLE_FLOAT_VECTOR_DATA));
BytesRef bytesRef = new BytesRef(KNNVectorAsCollectionOfFloatsSerializer.INSTANCE.floatToByteArray(SAMPLE_FLOAT_VECTOR_DATA));
knnDocument.add(new BinaryDocValuesField(MOCK_FLOAT_INDEX_FIELD_NAME, bytesRef));
writer.addDocument(knnDocument);
writer.commit();
Expand Down
Loading
Loading