Skip to content

Commit

Permalink
[2.5] Support Functions (#365)
Browse files Browse the repository at this point in the history
* init 2.5

Signed-off-by: ryjiang <[email protected]>

* update milvus version

Signed-off-by: ryjiang <[email protected]>

* update proto

Signed-off-by: ryjiang <[email protected]>

* [2.5] support functions (#364)

* WIP: functions

Signed-off-by: ryjiang <[email protected]>

* WIP: functions

Signed-off-by: ryjiang <[email protected]>

* WIP

Signed-off-by: ryjiang <[email protected]>

* WIP

Signed-off-by: ryjiang <[email protected]>

* suppport functions part2

Signed-off-by: ryjiang <[email protected]>

* functions part3

Signed-off-by: ryjiang <[email protected]>

* finish functions

Signed-off-by: ryjiang <[email protected]>

* update test

Signed-off-by: ryjiang <[email protected]>

---------

Signed-off-by: ryjiang <[email protected]>

---------

Signed-off-by: ryjiang <[email protected]>
  • Loading branch information
shanghaikid authored Oct 24, 2024
1 parent 88c2ee0 commit ab7357d
Show file tree
Hide file tree
Showing 18 changed files with 548 additions and 160 deletions.
4 changes: 2 additions & 2 deletions .github/workflows/check.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,12 @@ on:
pull_request:
branches:
- main
- 2.4
- 2.5
types: [opened, synchronize]
push:
branches:
- main
- 2.4
- 2.5
jobs:
publish:
runs-on: ubuntu-latest
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "proto"]
path = proto
url = https://github.com/milvus-io/milvus-proto.git
branch = 2.4
branch = master
7 changes: 7 additions & 0 deletions milvus/const/milvus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ export enum MetricType {
SUBSTRUCTURE = 'SUBSTRUCTURE',
// SUPERSTRUCTURE superstructure
SUPERSTRUCTURE = 'SUPERSTRUCTURE',
// BM 25
BM25 = 'BM25',
}

// Index types
Expand Down Expand Up @@ -279,6 +281,11 @@ export enum DataType {
SparseFloatVector = 104,
}

export enum FunctionType {
Unknown = 0,
BM25 = 1,
}

export const VectorDataTypes = [
DataType.BinaryVector,
DataType.FloatVector,
Expand Down
14 changes: 2 additions & 12 deletions milvus/grpc/BaseClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,13 @@ export class BaseClient {
protected schemaProto: Root;
// The Milvus protobuf.
protected milvusProto: Root;
// The milvus collection schema Type
protected collectionSchemaType: Type;
// The milvus field schema Type
protected fieldSchemaType: Type;

// milvus proto
protected readonly protoInternalPath = {
serviceName: 'milvus.proto.milvus.MilvusService',
collectionSchema: 'milvus.proto.schema.CollectionSchema',
fieldSchema: 'milvus.proto.schema.FieldSchema',
functionSchema: 'milvus.proto.schema.FunctionSchema',
};

/**
Expand Down Expand Up @@ -136,14 +134,6 @@ export class BaseClient {
this.schemaProto = protobuf.loadSync(this.protoFilePath.schema);
this.milvusProto = protobuf.loadSync(this.protoFilePath.milvus);

// Get the CollectionSchemaType and FieldSchemaType from the schemaProto object.
this.collectionSchemaType = this.schemaProto.lookupType(
this.protoInternalPath.collectionSchema
);
this.fieldSchemaType = this.schemaProto.lookupType(
this.protoInternalPath.fieldSchema
);

// options
this.channelOptions = {
// Milvus default max_receive_message_length is 100MB, but Milvus support change max_receive_message_length .
Expand Down
19 changes: 16 additions & 3 deletions milvus/grpc/Collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,28 @@ export class Collection extends Database {
validatePartitionNumbers(num_partitions);
}

// Get the CollectionSchemaType and FieldSchemaType from the schemaProto object.
const schemaTypes = {
collectionSchemaType: this.schemaProto.lookupType(
this.protoInternalPath.collectionSchema
),
fieldSchemaType: this.schemaProto.lookupType(
this.protoInternalPath.fieldSchema
),
functionSchemaType: this.schemaProto.lookupType(
this.protoInternalPath.functionSchema
),
};

// Create the payload object with the collection_name, description, and fields.
// it should follow CollectionSchema in schema.proto
const payload = formatCollectionSchema(data, this.fieldSchemaType);
const payload = formatCollectionSchema(data, schemaTypes);

// Create the collectionParams object from the payload.
const collectionSchema = this.collectionSchemaType.create(payload);
const collectionSchema = schemaTypes.collectionSchemaType.create(payload);

// Encode the collectionParams object to bytes.
const schemaBytes = this.collectionSchemaType
const schemaBytes = schemaTypes.collectionSchemaType
.encode(collectionSchema)
.finish();

Expand Down
2 changes: 2 additions & 0 deletions milvus/grpc/Data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -142,9 +142,11 @@ export class Data extends Collection {

// Tip: The field data sequence needs to be set same as `collectionInfo.schema.fields`.
// If primarykey is set `autoid = true`, you cannot insert the data.
// and if function field is set, you need to ignore the field value in the data.
const fieldMap = new Map<string, Field>(
collectionInfo.schema.fields
.filter(v => !v.is_primary_key || !v.autoID)
.filter(v => !v.is_function_output)
.map(v => [
v.name,
{
Expand Down
33 changes: 26 additions & 7 deletions milvus/types/Collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
LoadState,
DataTypeMap,
ShowCollectionsType,
FunctionType,
} from '../';

// returned from milvus
Expand All @@ -21,17 +22,19 @@ export interface FieldSchema {
index_params: KeyValuePair[];
fieldID: string | number;
name: string;
is_primary_key?: boolean;
is_primary_key: boolean;
description: string;
data_type: keyof typeof DataType;
autoID: boolean;
state: string;
element_type?: keyof typeof DataType;
default_value?: number | string;
dataType: DataType;
is_partition_key?: boolean;
is_dynamic?: boolean;
is_clustering_key?: boolean;
is_partition_key: boolean;
is_dynamic: boolean;
is_clustering_key: boolean;
is_function_output: boolean;
nullable: boolean;
}

export interface CollectionData {
Expand All @@ -56,7 +59,7 @@ export interface ReplicaInfo {
node_ids: string[];
}

export type TypeParam = string | number;
export type TypeParam = string | number | Record<string, any>;
export type TypeParamKey = 'dim' | 'max_length' | 'max_capacity';

// create collection
Expand All @@ -67,6 +70,7 @@ export interface FieldType {
element_type?: DataType | keyof typeof DataTypeMap;
is_primary_key?: boolean;
is_partition_key?: boolean;
is_function_output?: boolean;
is_clustering_key?: boolean;
type_params?: {
[key: string]: TypeParam;
Expand All @@ -76,6 +80,9 @@ export interface FieldType {
max_capacity?: TypeParam;
max_length?: TypeParam;
default_value?: number | string;
enable_match?: boolean;
tokenizer_params?: Record<string, any>;
enable_tokenizer?: boolean;
}

export interface ShowCollectionsReq extends GrpcTimeOut {
Expand All @@ -86,6 +93,15 @@ export interface ShowCollectionsReq extends GrpcTimeOut {

export type Properties = Record<string, string | number | boolean>;

export type Function = {
name: string;
description?: string;
type: FunctionType;
input_field_names: string[];
output_field_names?: string[];
params: Record<string, any>;
};

export interface BaseCreateCollectionReq extends GrpcTimeOut {
// collection name
collection_name: string; // required, collection name
Expand All @@ -102,8 +118,9 @@ export interface BaseCreateCollectionReq extends GrpcTimeOut {
clustring_key_field?: string; // optional, clustring key field
enable_dynamic_field?: boolean; // optional, enable dynamic field, default is false
enableDynamicField?: boolean; // optional, alias of enable_dynamic_field
properties?: Properties;
db_name?: string;
properties?: Properties; // optional, collection properties
db_name?: string; // optional, db name
functions?: Function[]; // optionals, doc-in/doc-out functions
}

export interface CreateCollectionWithFieldsReq extends BaseCreateCollectionReq {
Expand Down Expand Up @@ -187,6 +204,7 @@ export interface CollectionSchema {
enable_dynamic_field: boolean;
autoID: boolean;
fields: FieldSchema[];
functions: Function[];
}

export interface DescribeCollectionResponse extends TimeStamp {
Expand All @@ -205,6 +223,7 @@ export interface DescribeCollectionResponse extends TimeStamp {
shards_num: number;
num_partitions?: string; // int64
db_name: string;
functions: Function[];
}

export interface GetCompactionPlansResponse extends resStatusResponse {
Expand Down
17 changes: 11 additions & 6 deletions milvus/types/Data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ export interface SearchParam {
group_by_field?: string; // group by field
}

// old search api parameter type
// old search api parameter type, deprecated
export interface SearchReq extends collectionNameReq {
anns_field?: string; // your vector field name
partition_names?: string[]; // partition names
Expand All @@ -307,13 +307,18 @@ export interface SearchIteratorReq
limit: number;
}

export type SearchTextType = string | string[];
export type SearchVectorType = VectorTypes | VectorTypes[];
export type SearchDataType = SearchVectorType | SearchTextType;
export type SearchMultipleDataType = VectorTypes[] | SearchTextType[];

// simplified search api parameter type
export interface SearchSimpleReq extends collectionNameReq {
partition_names?: string[]; // partition names
anns_field?: string; // your vector field name
data?: VectorTypes[] | VectorTypes; // vector to search
vector?: VectorTypes; // alias for data
vectors?: VectorTypes[]; // alias for data
anns_field?: string; // your vector field name,rquired if you are searching on multiple vector fields collection
data?: SearchDataType; // vector or text to search
vector?: VectorTypes; // alias for data, deprecated
vectors?: VectorTypes[]; // alias for data, deprecated
output_fields?: string[];
limit?: number; // how many results you want
topk?: number; // limit alias
Expand All @@ -333,7 +338,7 @@ export type HybridSearchSingleReq = Pick<
SearchParam,
'anns_field' | 'ignore_growing' | 'group_by_field'
> & {
data: VectorTypes[] | VectorTypes; // vector to search
data: SearchDataType; // vector to search
expr?: string; // filter expression
params?: keyValueObj; // extra search parameters
transformers?: OutputTransformers; // provide custom data transformer for specific data type like bf16 or f16 vectors
Expand Down
61 changes: 35 additions & 26 deletions milvus/utils/Bytes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ import {
BinaryVector,
SparseFloatVector,
DataType,
VectorTypes,
SearchMultipleDataType,
Float16Vector,
SparseVectorCSR,
SparseVectorCOO,
BFloat16Vector,
SparseVectorArray,
FieldSchema,
} from '..';

/**
Expand Down Expand Up @@ -250,41 +251,49 @@ export const bytesToSparseRow = (bufferData: Buffer): SparseFloatVector => {
* This function builds a placeholder group in bytes format for Milvus.
*
* @param {Root} milvusProto - The root object of the Milvus protocol.
* @param {VectorTypes[]} vectors - An array of search vectors.
* @param {SearchMultipleDataType[]} data - An array of search vectors.
* @param {DataType} vectorDataType - The data type of the vectors.
*
* @returns {Uint8Array} The placeholder group in bytes format.
*/
export const buildPlaceholderGroupBytes = (
milvusProto: Root,
vectors: VectorTypes[],
vectorDataType: DataType
data: SearchMultipleDataType,
field: FieldSchema
) => {
const { dataType, is_function_output } = field;
// create placeholder_group value
let bytes;
// parse vectors to bytes
switch (vectorDataType) {
case DataType.FloatVector:
bytes = vectors.map(v => f32ArrayToF32Bytes(v as FloatVector));
break;
case DataType.BinaryVector:
bytes = vectors.map(v => f32ArrayToBinaryBytes(v as BinaryVector));
break;
case DataType.BFloat16Vector:
bytes = vectors.map(v =>
Array.isArray(v) ? f32ArrayToBf16Bytes(v as BFloat16Vector) : v
);
break;
case DataType.Float16Vector:
bytes = vectors.map(v =>
Array.isArray(v) ? f32ArrayToF16Bytes(v as Float16Vector) : v
);
break;
case DataType.SparseFloatVector:
bytes = vectors.map(v => sparseToBytes(v as SparseFloatVector));

break;
if (is_function_output) {
// parse text to bytes
bytes = data.map(d => new TextEncoder().encode(String(d)));
} else {
// parse vectors to bytes
switch (dataType) {
case DataType.FloatVector:
bytes = data.map(v => f32ArrayToF32Bytes(v as FloatVector));
break;
case DataType.BinaryVector:
bytes = data.map(v => f32ArrayToBinaryBytes(v as BinaryVector));
break;
case DataType.BFloat16Vector:
bytes = data.map(v =>
Array.isArray(v) ? f32ArrayToBf16Bytes(v as BFloat16Vector) : v
);
break;
case DataType.Float16Vector:
bytes = data.map(v =>
Array.isArray(v) ? f32ArrayToF16Bytes(v as Float16Vector) : v
);
break;
case DataType.SparseFloatVector:
bytes = data.map(v => sparseToBytes(v as SparseFloatVector));

break;
}
}

// create placeholder_group
const PlaceholderGroup = milvusProto.lookupType(
'milvus.proto.common.PlaceholderGroup'
Expand All @@ -295,7 +304,7 @@ export const buildPlaceholderGroupBytes = (
placeholders: [
{
tag: '$0',
type: vectorDataType,
type: is_function_output ? DataType.VarChar : dataType,
values: bytes,
},
],
Expand Down
Loading

0 comments on commit ab7357d

Please sign in to comment.