Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[2.5] support functions #364

Merged
merged 8 commits into from
Oct 24, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions milvus/const/milvus.ts
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,8 @@ export enum MetricType {
SUBSTRUCTURE = 'SUBSTRUCTURE',
// SUPERSTRUCTURE superstructure
SUPERSTRUCTURE = 'SUPERSTRUCTURE',
// BM 25
BM25 = 'BM25',
}

// Index types
Expand Down Expand Up @@ -279,6 +281,11 @@ export enum DataType {
SparseFloatVector = 104,
}

export enum FunctionType {
Unknown = 0,
BM25 = 1,
}

export const VectorDataTypes = [
DataType.BinaryVector,
DataType.FloatVector,
Expand Down
14 changes: 2 additions & 12 deletions milvus/grpc/BaseClient.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,15 +67,13 @@ export class BaseClient {
protected schemaProto: Root;
// The Milvus protobuf.
protected milvusProto: Root;
// The milvus collection schema Type
protected collectionSchemaType: Type;
// The milvus field schema Type
protected fieldSchemaType: Type;

// milvus proto
protected readonly protoInternalPath = {
serviceName: 'milvus.proto.milvus.MilvusService',
collectionSchema: 'milvus.proto.schema.CollectionSchema',
fieldSchema: 'milvus.proto.schema.FieldSchema',
functionSchema: 'milvus.proto.schema.FunctionSchema',
};

/**
Expand Down Expand Up @@ -136,14 +134,6 @@ export class BaseClient {
this.schemaProto = protobuf.loadSync(this.protoFilePath.schema);
this.milvusProto = protobuf.loadSync(this.protoFilePath.milvus);

// Get the CollectionSchemaType and FieldSchemaType from the schemaProto object.
this.collectionSchemaType = this.schemaProto.lookupType(
this.protoInternalPath.collectionSchema
);
this.fieldSchemaType = this.schemaProto.lookupType(
this.protoInternalPath.fieldSchema
);

// options
this.channelOptions = {
// Milvus default max_receive_message_length is 100MB, but Milvus support change max_receive_message_length .
Expand Down
19 changes: 16 additions & 3 deletions milvus/grpc/Collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -145,15 +145,28 @@ export class Collection extends Database {
validatePartitionNumbers(num_partitions);
}

// Get the CollectionSchemaType and FieldSchemaType from the schemaProto object.
const schemaTypes = {
collectionSchemaType: this.schemaProto.lookupType(
this.protoInternalPath.collectionSchema
),
fieldSchemaType: this.schemaProto.lookupType(
this.protoInternalPath.fieldSchema
),
functionSchemaType: this.schemaProto.lookupType(
this.protoInternalPath.functionSchema
),
};

// Create the payload object with the collection_name, description, and fields.
// it should follow CollectionSchema in schema.proto
const payload = formatCollectionSchema(data, this.fieldSchemaType);
const payload = formatCollectionSchema(data, schemaTypes);

// Create the collectionParams object from the payload.
const collectionSchema = this.collectionSchemaType.create(payload);
const collectionSchema = schemaTypes.collectionSchemaType.create(payload);

// Encode the collectionParams object to bytes.
const schemaBytes = this.collectionSchemaType
const schemaBytes = schemaTypes.collectionSchemaType
.encode(collectionSchema)
.finish();

Expand Down
2 changes: 2 additions & 0 deletions milvus/grpc/Data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,11 @@ export class Data extends Collection {

// Tip: The field data sequence needs to be set same as `collectionInfo.schema.fields`.
// If primarykey is set `autoid = true`, you cannot insert the data.
// and if function field is set, you need to ignore the field value in the data.
const fieldMap = new Map<string, Field>(
collectionInfo.schema.fields
.filter(v => !v.is_primary_key || !v.autoID)
.filter(v => !v.is_function_output)
.map(v => [
v.name,
{
Expand Down
33 changes: 26 additions & 7 deletions milvus/types/Collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ import {
LoadState,
DataTypeMap,
ShowCollectionsType,
FunctionType,
} from '../';

// returned from milvus
Expand All @@ -21,17 +22,19 @@ export interface FieldSchema {
index_params: KeyValuePair[];
fieldID: string | number;
name: string;
is_primary_key?: boolean;
is_primary_key: boolean;
description: string;
data_type: keyof typeof DataType;
autoID: boolean;
state: string;
element_type?: keyof typeof DataType;
default_value?: number | string;
dataType: DataType;
is_partition_key?: boolean;
is_dynamic?: boolean;
is_clustering_key?: boolean;
is_partition_key: boolean;
is_dynamic: boolean;
is_clustering_key: boolean;
is_function_output: boolean;
nullable: boolean;
}

export interface CollectionData {
Expand All @@ -56,7 +59,7 @@ export interface ReplicaInfo {
node_ids: string[];
}

export type TypeParam = string | number;
export type TypeParam = string | number | Record<string, any>;
export type TypeParamKey = 'dim' | 'max_length' | 'max_capacity';

// create collection
Expand All @@ -67,6 +70,7 @@ export interface FieldType {
element_type?: DataType | keyof typeof DataTypeMap;
is_primary_key?: boolean;
is_partition_key?: boolean;
is_function_output?: boolean;
type_params?: {
[key: string]: TypeParam;
};
Expand All @@ -75,6 +79,9 @@ export interface FieldType {
max_capacity?: TypeParam;
max_length?: TypeParam;
default_value?: number | string;
enable_match?: boolean;
tokenizer_params?: Record<string, any>;
enable_tokenizer?: boolean;
}

export interface ShowCollectionsReq extends GrpcTimeOut {
Expand All @@ -85,6 +92,15 @@ export interface ShowCollectionsReq extends GrpcTimeOut {

export type Properties = Record<string, string | number | boolean>;

export type Function = {
name: string;
description?: string;
type: FunctionType;
input_field_names: string[];
output_field_names?: string[];
params: Record<string, any>;
};

export interface BaseCreateCollectionReq extends GrpcTimeOut {
// collection name
collection_name: string; // required, collection name
Expand All @@ -100,8 +116,9 @@ export interface BaseCreateCollectionReq extends GrpcTimeOut {
partition_key_field?: string; // optional, partition key field
enable_dynamic_field?: boolean; // optional, enable dynamic field, default is false
enableDynamicField?: boolean; // optional, alias of enable_dynamic_field
properties?: Properties;
db_name?: string;
properties?: Properties; // optional, collection properties
db_name?: string; // optional, db name
functions?: Function[]; // optionals, doc-in/doc-out functions
}

export interface CreateCollectionWithFieldsReq extends BaseCreateCollectionReq {
Expand Down Expand Up @@ -185,6 +202,7 @@ export interface CollectionSchema {
enable_dynamic_field: boolean;
autoID: boolean;
fields: FieldSchema[];
functions: Function[];
}

export interface DescribeCollectionResponse extends TimeStamp {
Expand All @@ -203,6 +221,7 @@ export interface DescribeCollectionResponse extends TimeStamp {
shards_num: number;
num_partitions?: string; // int64
db_name: string;
functions: Function[];
}

export interface GetCompactionPlansResponse extends resStatusResponse {
Expand Down
17 changes: 11 additions & 6 deletions milvus/types/Data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ export interface SearchParam {
group_by_field?: string; // group by field
}

// old search api parameter type
// old search api parameter type, deprecated
export interface SearchReq extends collectionNameReq {
anns_field?: string; // your vector field name
partition_names?: string[]; // partition names
Expand All @@ -307,13 +307,18 @@ export interface SearchIteratorReq
limit: number;
}

export type SearchTextType = string | string[];
export type SearchVectorType = VectorTypes | VectorTypes[];
export type SearchDataType = SearchVectorType | SearchTextType;
export type SearchMultipleDataType = VectorTypes[] | SearchTextType[];

// simplified search api parameter type
export interface SearchSimpleReq extends collectionNameReq {
partition_names?: string[]; // partition names
anns_field?: string; // your vector field name
data?: VectorTypes[] | VectorTypes; // vector to search
vector?: VectorTypes; // alias for data
vectors?: VectorTypes[]; // alias for data
anns_field?: string; // your vector field name,rquired if you are searching on multiple vector fields collection
data?: SearchDataType; // vector or text to search
vector?: VectorTypes; // alias for data, deprecated
vectors?: VectorTypes[]; // alias for data, deprecated
output_fields?: string[];
limit?: number; // how many results you want
topk?: number; // limit alias
Expand All @@ -333,7 +338,7 @@ export type HybridSearchSingleReq = Pick<
SearchParam,
'anns_field' | 'ignore_growing' | 'group_by_field'
> & {
data: VectorTypes[] | VectorTypes; // vector to search
data: SearchDataType; // vector to search
expr?: string; // filter expression
params?: keyValueObj; // extra search parameters
transformers?: OutputTransformers; // provide custom data transformer for specific data type like bf16 or f16 vectors
Expand Down
61 changes: 35 additions & 26 deletions milvus/utils/Bytes.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@ import {
BinaryVector,
SparseFloatVector,
DataType,
VectorTypes,
SearchMultipleDataType,
Float16Vector,
SparseVectorCSR,
SparseVectorCOO,
BFloat16Vector,
SparseVectorArray,
FieldSchema,
} from '..';

/**
Expand Down Expand Up @@ -250,41 +251,49 @@ export const bytesToSparseRow = (bufferData: Buffer): SparseFloatVector => {
* This function builds a placeholder group in bytes format for Milvus.
*
* @param {Root} milvusProto - The root object of the Milvus protocol.
* @param {VectorTypes[]} vectors - An array of search vectors.
* @param {SearchMultipleDataType[]} data - An array of search vectors.
* @param {DataType} vectorDataType - The data type of the vectors.
*
* @returns {Uint8Array} The placeholder group in bytes format.
*/
export const buildPlaceholderGroupBytes = (
milvusProto: Root,
vectors: VectorTypes[],
vectorDataType: DataType
data: SearchMultipleDataType,
field: FieldSchema
) => {
const { dataType, is_function_output } = field;
// create placeholder_group value
let bytes;
// parse vectors to bytes
switch (vectorDataType) {
case DataType.FloatVector:
bytes = vectors.map(v => f32ArrayToF32Bytes(v as FloatVector));
break;
case DataType.BinaryVector:
bytes = vectors.map(v => f32ArrayToBinaryBytes(v as BinaryVector));
break;
case DataType.BFloat16Vector:
bytes = vectors.map(v =>
Array.isArray(v) ? f32ArrayToBf16Bytes(v as BFloat16Vector) : v
);
break;
case DataType.Float16Vector:
bytes = vectors.map(v =>
Array.isArray(v) ? f32ArrayToF16Bytes(v as Float16Vector) : v
);
break;
case DataType.SparseFloatVector:
bytes = vectors.map(v => sparseToBytes(v as SparseFloatVector));

break;
if (is_function_output) {
// parse text to bytes
bytes = data.map(d => new TextEncoder().encode(String(d)));
} else {
// parse vectors to bytes
switch (dataType) {
case DataType.FloatVector:
bytes = data.map(v => f32ArrayToF32Bytes(v as FloatVector));
break;
case DataType.BinaryVector:
bytes = data.map(v => f32ArrayToBinaryBytes(v as BinaryVector));
break;
case DataType.BFloat16Vector:
bytes = data.map(v =>
Array.isArray(v) ? f32ArrayToBf16Bytes(v as BFloat16Vector) : v
);
break;
case DataType.Float16Vector:
bytes = data.map(v =>
Array.isArray(v) ? f32ArrayToF16Bytes(v as Float16Vector) : v
);
break;
case DataType.SparseFloatVector:
bytes = data.map(v => sparseToBytes(v as SparseFloatVector));

break;
}
}

// create placeholder_group
const PlaceholderGroup = milvusProto.lookupType(
'milvus.proto.common.PlaceholderGroup'
Expand All @@ -295,7 +304,7 @@ export const buildPlaceholderGroupBytes = (
placeholders: [
{
tag: '$0',
type: vectorDataType,
type: is_function_output ? DataType.VarChar : dataType,
values: bytes,
},
],
Expand Down
Loading
Loading