Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[2.5] support full text search #374

Merged
merged 5 commits into from
Nov 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion milvus/types/Collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@ export interface FieldType {
nullable?: boolean;
enable_match?: boolean;
tokenizer_params?: Record<string, any>;
enable_tokenizer?: boolean;
enable_analyzer?: boolean;
}

export interface ShowCollectionsReq extends GrpcTimeOut {
Expand Down
89 changes: 48 additions & 41 deletions milvus/utils/Format.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,11 +189,11 @@ export const formatAddress = (address: string) => {
};

/**
* Assigns properties with keys `dim` or `max_length` to the `type_params` object of a `FieldType` object.
* If the property exists in the `field` object, it is converted to a string and then deleted from the `field` object.
* If the property already exists in the `type_params` object, it is also converted to a string.
* Assigns specified properties from the `field` object to `type_params` within the `FieldType` object.
* Converts properties to strings, serializing objects as JSON strings if needed, then removes them from `field`.
*
* @param field The `FieldType` object to modify.
* @param field - The `FieldType` object to modify.
* @param typeParamKeys - Keys to assign to `type_params` if present in `field`.
* @returns The modified `FieldType` object.
*/
export const assignTypeParams = (
Expand All @@ -203,31 +203,30 @@ export const assignTypeParams = (
'max_length',
'max_capacity',
'enable_match',
'enable_tokenizer',
'tokenizer_params',
'enable_analyzer',
'analyzer_params',
]
) => {
let newField = cloneObj<FieldType>(field);
): FieldType => {
const newField = cloneObj<FieldType>(field);

// Initialize `type_params` if undefined
newField.type_params ??= {};

typeParamKeys.forEach(key => {
if (newField.hasOwnProperty(key)) {
// if the property exists in the field object, assign it to the type_params object
newField.type_params = newField.type_params || {};
newField.type_params[key] =
typeof newField[key as keyof FieldType] !== 'object'
? String(newField[key as keyof FieldType] ?? '')
: (newField[key as keyof FieldType] as TypeParam);
// delete the property from the field object
if (key in newField) {
const value = newField[key as keyof FieldType];
// Convert the value to a string, JSON-stringify if it’s an object
newField.type_params![key] =
typeof value === 'object' ? JSON.stringify(value) : String(value ?? '');
delete newField[key as keyof FieldType];
}

if (newField.type_params && newField.type_params[key]) {
// if the property already exists in the type_params object, convert it to a string,
newField.type_params[key] =
typeof newField.type_params[key] !== 'object'
? String(newField.type_params[key])
: newField.type_params[key];
}
});

// delete type_params if it's empty
if (!Object.keys(newField.type_params).length) {
delete newField.type_params;
}

return newField;
};

Expand Down Expand Up @@ -328,7 +327,27 @@ export const formatCollectionSchema = (
fields = (data as CreateCollectionWithSchemaReq).schema;
}

const payload = {
let payload = {} as any;

const functionOutputFields: string[] = [];

// if functions is set, parse its params to key-value pairs, and delete inputs and outputs
if (functions) {
payload.functions = functions.map((func: any) => {
const { input_field_names, output_field_names, ...rest } = func;

functionOutputFields.push(...output_field_names);

return schemaTypes.functionSchemaType.create({
...rest,
inputFieldNames: input_field_names,
outputFieldNames: output_field_names,
params: parseToKeyValue(func.params, true),
});
});
}

payload = {
name: collection_name,
description: description || '',
enableDynamicField: !!enableDynamicField || !!enable_dynamic_field,
Expand All @@ -352,7 +371,8 @@ export const formatCollectionSchema = (
isPrimaryKey: !!is_primary_key,
isPartitionKey:
!!is_partition_key || field.name === partition_key_field,
isFunctionOutput: !!is_function_output,
isFunctionOutput:
!!is_function_output || functionOutputFields.includes(field.name),
isClusteringKey:
!!field.is_clustering_key || field.name === clustring_key_field,
};
Expand All @@ -372,21 +392,8 @@ export const formatCollectionSchema = (
}
return schemaTypes.fieldSchemaType.create(createObj);
}),
functions: [],
} as any;

// if functions is set, parse its params to key-value pairs, and delete inputs and outputs
if (functions) {
payload.functions = functions.map((func: any) => {
const { input_field_names, output_field_names, ...rest } = func;
return schemaTypes.functionSchemaType.create({
...rest,
inputFieldNames: input_field_names,
outputFieldNames: output_field_names,
params: parseToKeyValue(func.params, true),
});
});
}
...payload,
};

return payload;
};
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "@zilliz/milvus2-sdk-node",
"author": "[email protected]",
"milvusVersion": "master-20241105-bd04cac4-amd64",
"milvusVersion": "master-20241118-12ed40e1-amd64",
"version": "2.4.9",
"main": "dist/milvus",
"files": [
Expand Down
205 changes: 205 additions & 0 deletions test/grpc/FullTextSearch.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,205 @@
import {
MilvusClient,
DataType,
ErrorCode,
MetricType,
ConsistencyLevelEnum,
IndexType,
} from '../../milvus';
import {
IP,
genCollectionParams,
GENERATE_NAME,
generateInsertData,
dynamicFields,
} from '../tools';

const milvusClient = new MilvusClient({ address: IP, logLevel: 'info' });
const COLLECTION = GENERATE_NAME();
const dbParam = {
db_name: 'FullTextSearch',
};
const numPartitions = 3;

// create
const createCollectionParams = genCollectionParams({
collectionName: COLLECTION,
dim: [4],
vectorType: [DataType.FloatVector],
autoID: false,
partitionKeyEnabled: true,
numPartitions,
enableDynamic: true,
fields: [
{
name: 'text',
description: 'text field',
data_type: DataType.VarChar,
max_length: 200,
is_partition_key: false,
enable_analyzer: true,
enable_match: true,
analyzer_params: { tokenizer: 'jieba' },
},
],
});

describe(`Full text search API`, () => {
beforeAll(async () => {
// create db and use db
await milvusClient.createDatabase(dbParam);
await milvusClient.use(dbParam);
});
afterAll(async () => {
await milvusClient.dropCollection({
collection_name: COLLECTION,
});
await milvusClient.dropDatabase(dbParam);
});

it(`Create schema with function collection should success`, async () => {
const create = await milvusClient.createCollection(createCollectionParams);

expect(create.error_code).toEqual(ErrorCode.SUCCESS);

// describe
const describe = await milvusClient.describeCollection({
collection_name: COLLECTION,
});
// expect the 'vector' field to be created
expect(describe.schema.fields.length).toEqual(
createCollectionParams.fields.length
);

// find varchar field
const text = describe.schema.fields.find(field => field.name === 'text');

const enableMatch = text?.type_params?.find(
param => param.key === 'enable_match'
);

const enableAnalyzer = text?.type_params?.find(
param => param.key === 'enable_analyzer'
);

const analyzerParams = text?.type_params?.find(
param => param.key === 'analyzer_params'
);

expect(enableMatch?.value).toEqual('true');
expect(enableAnalyzer?.value).toEqual('true');
expect(JSON.parse(analyzerParams?.value as any)).toEqual({
tokenizer: 'jieba',
});
});

it(`Insert data with function field should success`, async () => {
const data = generateInsertData(
[...createCollectionParams.fields, ...dynamicFields],
10
);

const insert = await milvusClient.insert({
collection_name: COLLECTION,
fields_data: data,
});

expect(insert.status.error_code).toEqual(ErrorCode.SUCCESS);
});

it(`Create index on function output field should success`, async () => {
const createIndex = await milvusClient.createIndex({
collection_name: COLLECTION,
index_name: 't2',
field_name: 'vector',
index_type: IndexType.AUTOINDEX,
metric_type: MetricType.COSINE,
});

expect(createIndex.error_code).toEqual(ErrorCode.SUCCESS);

// load
const load = await milvusClient.loadCollection({
collection_name: COLLECTION,
});

expect(load.error_code).toEqual(ErrorCode.SUCCESS);
});

it(`query with function output field should success`, async () => {
// query
const query = await milvusClient.query({
collection_name: COLLECTION,
limit: 10,
expr: 'id > 0',
output_fields: ['text'],
filter: "TEXT_MATCH(text, 'apple')",
consistency_level: ConsistencyLevelEnum.Strong,
});

expect(query.status.error_code).toEqual(ErrorCode.SUCCESS);
// every text value should be 'apple'
query.data.forEach(item => {
expect(item.text).toEqual('apple');
});
});

it(`search with text should success`, async () => {
// search nq = 1
const search = await milvusClient.search({
collection_name: COLLECTION,
limit: 10,
data: [1, 2, 3, 4],
output_fields: ['text'],
filter: "TEXT_MATCH(text, 'apple')",
params: { drop_ratio_search: 0.6 },
consistency_level: ConsistencyLevelEnum.Strong,
});

expect(search.status.error_code).toEqual(ErrorCode.SUCCESS);
// expect text value to be 'apple'
expect(search.results[0].text).toEqual('apple');

// nq > 1
const search2 = await milvusClient.search({
collection_name: COLLECTION,
limit: 10,
data: [
[1, 2, 3, 4],
[5, 6, 7, 8],
],
output_fields: ['*'],
filter: "TEXT_MATCH(text, 'apple')",
params: { drop_ratio_search: 0.6 },
consistency_level: ConsistencyLevelEnum.Strong,
});

expect(search2.status.error_code).toEqual(ErrorCode.SUCCESS);
// expect text value to be 'apple'
expect(search2.results[0][0].text).toEqual('apple');

// multiple search
const search3 = await milvusClient.search({
collection_name: COLLECTION,
limit: 10,
data: [
{
data: [1, 2, 3, 4],
anns_field: 'vector',
params: { nprobe: 2 },
},
{
data: [5, 6, 7, 8],
anns_field: 'vector',
},
],
filter: "TEXT_MATCH(text, 'apple')",
output_fields: ['text'],
consistency_level: ConsistencyLevelEnum.Strong,
});

expect(search3.status.error_code).toEqual(ErrorCode.SUCCESS);
// expect text value to be 'apple'
expect(search3.results[0].text).toEqual('apple');
});
});
4 changes: 2 additions & 2 deletions test/grpc/Functions.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ import {
dynamicFields,
} from '../tools';

const milvusClient = new MilvusClient({ address: IP, logLevel: 'info' });
const milvusClient = new MilvusClient({ address: IP, logLevel: 'debug' });
const COLLECTION = GENERATE_NAME();
const dbParam = {
db_name: 'Functions',
Expand All @@ -37,7 +37,7 @@ const createCollectionParams = genCollectionParams({
data_type: DataType.VarChar,
max_length: 20,
is_partition_key: false,
enable_tokenizer: true,
enable_analyzer: true,
},
{
name: 'sparse',
Expand Down
2 changes: 1 addition & 1 deletion test/tools/collection.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ export const genCollectionParams = (data: {
default_value: DEFAULT_STRING_VALUE,
max_length: MAX_LENGTH,
is_partition_key: partitionKeyEnabled,
enable_tokenizer: true,
enable_analyzer: true,
},
{
name: 'json',
Expand Down
Loading
Loading