Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge branch 'main' of github.com:milvus-io/milvus-sdk-node into bitmap
Browse files Browse the repository at this point in the history
Signed-off-by: ryjiang <[email protected]>
shanghaikid committed Nov 18, 2024

Verified

This commit was signed with the committer’s verified signature.
shanghaikid ryjiang
2 parents 713811a + 7c44b88 commit ec5070c
Showing 7 changed files with 305 additions and 53 deletions.
2 changes: 1 addition & 1 deletion milvus/types/Collection.ts
Original file line number Diff line number Diff line change
@@ -83,7 +83,7 @@ export interface FieldType {
nullable?: boolean;
enable_match?: boolean;
tokenizer_params?: Record<string, any>;
enable_tokenizer?: boolean;
enable_analyzer?: boolean;
}

export interface ShowCollectionsReq extends GrpcTimeOut {
89 changes: 48 additions & 41 deletions milvus/utils/Format.ts
Original file line number Diff line number Diff line change
@@ -189,11 +189,11 @@ export const formatAddress = (address: string) => {
};

/**
* Assigns properties with keys `dim` or `max_length` to the `type_params` object of a `FieldType` object.
* If the property exists in the `field` object, it is converted to a string and then deleted from the `field` object.
* If the property already exists in the `type_params` object, it is also converted to a string.
* Assigns specified properties from the `field` object to `type_params` within the `FieldType` object.
* Converts properties to strings, serializing objects as JSON strings if needed, then removes them from `field`.
*
* @param field The `FieldType` object to modify.
* @param field - The `FieldType` object to modify.
* @param typeParamKeys - Keys to assign to `type_params` if present in `field`.
* @returns The modified `FieldType` object.
*/
export const assignTypeParams = (
@@ -203,31 +203,30 @@ export const assignTypeParams = (
'max_length',
'max_capacity',
'enable_match',
'enable_tokenizer',
'tokenizer_params',
'enable_analyzer',
'analyzer_params',
]
) => {
let newField = cloneObj<FieldType>(field);
): FieldType => {
const newField = cloneObj<FieldType>(field);

// Initialize `type_params` if undefined
newField.type_params ??= {};

typeParamKeys.forEach(key => {
if (newField.hasOwnProperty(key)) {
// if the property exists in the field object, assign it to the type_params object
newField.type_params = newField.type_params || {};
newField.type_params[key] =
typeof newField[key as keyof FieldType] !== 'object'
? String(newField[key as keyof FieldType] ?? '')
: (newField[key as keyof FieldType] as TypeParam);
// delete the property from the field object
if (key in newField) {
const value = newField[key as keyof FieldType];
// Convert the value to a string, JSON-stringify if it’s an object
newField.type_params![key] =
typeof value === 'object' ? JSON.stringify(value) : String(value ?? '');
delete newField[key as keyof FieldType];
}

if (newField.type_params && newField.type_params[key]) {
// if the property already exists in the type_params object, convert it to a string,
newField.type_params[key] =
typeof newField.type_params[key] !== 'object'
? String(newField.type_params[key])
: newField.type_params[key];
}
});

// delete type_params if it's empty
if (!Object.keys(newField.type_params).length) {
delete newField.type_params;
}

return newField;
};

@@ -328,7 +327,27 @@ export const formatCollectionSchema = (
fields = (data as CreateCollectionWithSchemaReq).schema;
}

const payload = {
let payload = {} as any;

const functionOutputFields: string[] = [];

// if functions is set, parse its params to key-value pairs, and delete inputs and outputs
if (functions) {
payload.functions = functions.map((func: any) => {
const { input_field_names, output_field_names, ...rest } = func;

functionOutputFields.push(...output_field_names);

return schemaTypes.functionSchemaType.create({
...rest,
inputFieldNames: input_field_names,
outputFieldNames: output_field_names,
params: parseToKeyValue(func.params, true),
});
});
}

payload = {
name: collection_name,
description: description || '',
enableDynamicField: !!enableDynamicField || !!enable_dynamic_field,
@@ -352,7 +371,8 @@ export const formatCollectionSchema = (
isPrimaryKey: !!is_primary_key,
isPartitionKey:
!!is_partition_key || field.name === partition_key_field,
isFunctionOutput: !!is_function_output,
isFunctionOutput:
!!is_function_output || functionOutputFields.includes(field.name),
isClusteringKey:
!!field.is_clustering_key || field.name === clustring_key_field,
};
@@ -372,21 +392,8 @@ export const formatCollectionSchema = (
}
return schemaTypes.fieldSchemaType.create(createObj);
}),
functions: [],
} as any;

// if functions is set, parse its params to key-value pairs, and delete inputs and outputs
if (functions) {
payload.functions = functions.map((func: any) => {
const { input_field_names, output_field_names, ...rest } = func;
return schemaTypes.functionSchemaType.create({
...rest,
inputFieldNames: input_field_names,
outputFieldNames: output_field_names,
params: parseToKeyValue(func.params, true),
});
});
}
...payload,
};

return payload;
};
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@zilliz/milvus2-sdk-node",
"author": "[email protected]",
"milvusVersion": "master-20241113-af433ffd-amd64",
"milvusVersion": "master-20241118-12ed40e1-amd64",
"version": "2.4.9",
"main": "dist/milvus",
"files": [
205 changes: 205 additions & 0 deletions test/grpc/FullTextSearch.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import {
MilvusClient,
DataType,
ErrorCode,
MetricType,
ConsistencyLevelEnum,
IndexType,
} from '../../milvus';
import {
IP,
genCollectionParams,
GENERATE_NAME,
generateInsertData,
dynamicFields,
} from '../tools';

const milvusClient = new MilvusClient({ address: IP, logLevel: 'info' });
const COLLECTION = GENERATE_NAME();
const dbParam = {
db_name: 'FullTextSearch',
};
const numPartitions = 3;

// create
const createCollectionParams = genCollectionParams({
collectionName: COLLECTION,
dim: [4],
vectorType: [DataType.FloatVector],
autoID: false,
partitionKeyEnabled: true,
numPartitions,
enableDynamic: true,
fields: [
{
name: 'text',
description: 'text field',
data_type: DataType.VarChar,
max_length: 200,
is_partition_key: false,
enable_analyzer: true,
enable_match: true,
analyzer_params: { tokenizer: 'jieba' },
},
],
});

describe(`Full text search API`, () => {
beforeAll(async () => {
// create db and use db
await milvusClient.createDatabase(dbParam);
await milvusClient.use(dbParam);
});
afterAll(async () => {
await milvusClient.dropCollection({
collection_name: COLLECTION,
});
await milvusClient.dropDatabase(dbParam);
});

it(`Create schema with function collection should success`, async () => {
const create = await milvusClient.createCollection(createCollectionParams);

expect(create.error_code).toEqual(ErrorCode.SUCCESS);

// describe
const describe = await milvusClient.describeCollection({
collection_name: COLLECTION,
});
// expect the 'vector' field to be created
expect(describe.schema.fields.length).toEqual(
createCollectionParams.fields.length
);

// find varchar field
const text = describe.schema.fields.find(field => field.name === 'text');

const enableMatch = text?.type_params?.find(
param => param.key === 'enable_match'
);

const enableAnalyzer = text?.type_params?.find(
param => param.key === 'enable_analyzer'
);

const analyzerParams = text?.type_params?.find(
param => param.key === 'analyzer_params'
);

expect(enableMatch?.value).toEqual('true');
expect(enableAnalyzer?.value).toEqual('true');
expect(JSON.parse(analyzerParams?.value as any)).toEqual({
tokenizer: 'jieba',
});
});

it(`Insert data with function field should success`, async () => {
const data = generateInsertData(
[...createCollectionParams.fields, ...dynamicFields],
10
);

const insert = await milvusClient.insert({
collection_name: COLLECTION,
fields_data: data,
});

expect(insert.status.error_code).toEqual(ErrorCode.SUCCESS);
});

it(`Create index on function output field should success`, async () => {
const createIndex = await milvusClient.createIndex({
collection_name: COLLECTION,
index_name: 't2',
field_name: 'vector',
index_type: IndexType.AUTOINDEX,
metric_type: MetricType.COSINE,
});

expect(createIndex.error_code).toEqual(ErrorCode.SUCCESS);

// load
const load = await milvusClient.loadCollection({
collection_name: COLLECTION,
});

expect(load.error_code).toEqual(ErrorCode.SUCCESS);
});

it(`query with function output field should success`, async () => {
// query
const query = await milvusClient.query({
collection_name: COLLECTION,
limit: 10,
expr: 'id > 0',
output_fields: ['text'],
filter: "TEXT_MATCH(text, 'apple')",
consistency_level: ConsistencyLevelEnum.Strong,
});

expect(query.status.error_code).toEqual(ErrorCode.SUCCESS);
// every text value should be 'apple'
query.data.forEach(item => {
expect(item.text).toEqual('apple');
});
});

it(`search with text should success`, async () => {
// search nq = 1
const search = await milvusClient.search({
collection_name: COLLECTION,
limit: 10,
data: [1, 2, 3, 4],
output_fields: ['text'],
filter: "TEXT_MATCH(text, 'apple')",
params: { drop_ratio_search: 0.6 },
consistency_level: ConsistencyLevelEnum.Strong,
});

expect(search.status.error_code).toEqual(ErrorCode.SUCCESS);
// expect text value to be 'apple'
expect(search.results[0].text).toEqual('apple');

// nq > 1
const search2 = await milvusClient.search({
collection_name: COLLECTION,
limit: 10,
data: [
[1, 2, 3, 4],
[5, 6, 7, 8],
],
output_fields: ['*'],
filter: "TEXT_MATCH(text, 'apple')",
params: { drop_ratio_search: 0.6 },
consistency_level: ConsistencyLevelEnum.Strong,
});

expect(search2.status.error_code).toEqual(ErrorCode.SUCCESS);
// expect text value to be 'apple'
expect(search2.results[0][0].text).toEqual('apple');

// multiple search
const search3 = await milvusClient.search({
collection_name: COLLECTION,
limit: 10,
data: [
{
data: [1, 2, 3, 4],
anns_field: 'vector',
params: { nprobe: 2 },
},
{
data: [5, 6, 7, 8],
anns_field: 'vector',
},
],
filter: "TEXT_MATCH(text, 'apple')",
output_fields: ['text'],
consistency_level: ConsistencyLevelEnum.Strong,
});

expect(search3.status.error_code).toEqual(ErrorCode.SUCCESS);
// expect text value to be 'apple'
expect(search3.results[0].text).toEqual('apple');
});
});
4 changes: 2 additions & 2 deletions test/grpc/Functions.spec.ts
Original file line number Diff line number Diff line change
@@ -14,7 +14,7 @@ import {
dynamicFields,
} from '../tools';

const milvusClient = new MilvusClient({ address: IP, logLevel: 'info' });
const milvusClient = new MilvusClient({ address: IP, logLevel: 'debug' });
const COLLECTION = GENERATE_NAME();
const dbParam = {
db_name: 'Functions',
@@ -37,7 +37,7 @@ const createCollectionParams = genCollectionParams({
data_type: DataType.VarChar,
max_length: 20,
is_partition_key: false,
enable_tokenizer: true,
enable_analyzer: true,
},
{
name: 'sparse',
2 changes: 1 addition & 1 deletion test/tools/collection.ts
Original file line number Diff line number Diff line change
@@ -132,7 +132,7 @@ export const genCollectionParams = (data: {
default_value: DEFAULT_STRING_VALUE,
max_length: MAX_LENGTH,
is_partition_key: partitionKeyEnabled,
enable_tokenizer: true,
enable_analyzer: true,
},
{
name: 'json',
54 changes: 47 additions & 7 deletions test/utils/Format.spec.ts
Original file line number Diff line number Diff line change
@@ -29,6 +29,7 @@ import {
formatSearchData,
buildSearchRequest,
FieldSchema,
CreateCollectionReq,
buildSearchParams,
SearchSimpleReq,
} from '../../milvus';
@@ -178,15 +179,15 @@ describe('utils/format', () => {
expect(methodName).toBe('123');
});

it('should assign properties with keys `dim` or `max_length` to the `type_params`, `enable_match`, `tokenizer_params`, `enable_tokenizer` object and delete them from the `field` object', () => {
it('should assign properties with keys `dim` or `max_length` to the `type_params`, `enable_match`, `analyzer_params`, `enable_analyzer` object and delete them from the `field` object', () => {
const field = {
name: 'vector',
data_type: 'BinaryVector',
dim: 128,
max_length: 100,
enable_match: true,
tokenizer_params: { key: 'value' },
enable_tokenizer: true,
analyzer_params: { key: 'value' },
enable_analyzer: true,
} as FieldType;
const expectedOutput = {
name: 'vector',
@@ -195,8 +196,8 @@ describe('utils/format', () => {
dim: '128',
max_length: '100',
enable_match: 'true',
tokenizer_params: { key: 'value' },
enable_tokenizer: 'true',
analyzer_params: JSON.stringify({ key: 'value' }),
enable_analyzer: 'true',
},
};
expect(assignTypeParams(field)).toEqual(expectedOutput);
@@ -291,8 +292,23 @@ describe('utils/format', () => {
max_capacity: 64,
element_type: DataType.Int64,
},
{
name: 'sparse',
data_type: DataType.SparseFloatVector,
description: 'sparse field',
},
],
} as any;
functions: [
{
name: 'bm25f1',
description: 'bm25 function',
type: 1,
input_field_names: ['testField1'],
output_field_names: ['sparse'],
params: { a: 1 },
},
],
} as CreateCollectionReq;

const schemaProtoPath = path.resolve(
__dirname,
@@ -346,18 +362,42 @@ describe('utils/format', () => {
isPrimaryKey: false,
isPartitionKey: false,
isFunctionOutput: false,
isClusteringKey: false,
elementType: 5,
element_type: 5,
},
{
typeParams: [],
indexParams: [],
name: 'sparse',
description: 'sparse field',
data_type: 104,
dataType: 104,
isPrimaryKey: false,
isPartitionKey: false,
isFunctionOutput: true,
isClusteringKey: false,
},
],
functions: [],
functions: [
{
inputFieldNames: ['testField1'],
inputFieldIds: [],
outputFieldNames: ['sparse'],
outputFieldIds: [],
params: [{ key: 'a', value: '1' }],
name: 'bm25f1',
description: 'bm25 function',
type: 1,
},
],
};

const payload = formatCollectionSchema(data, {
fieldSchemaType,
functionSchemaType,
});

expect(payload).toEqual(expectedResult);
});

0 comments on commit ec5070c

Please sign in to comment.