From b0d7dc53a59ca836b269812198e8294120b3eeb5 Mon Sep 17 00:00:00 2001 From: ryjiang Date: Tue, 26 Nov 2024 11:09:09 +0800 Subject: [PATCH] rename text file name Signed-off-by: ryjiang --- test/grpc/FullTextSearch.spec.ts | 160 ++++++++++++------ ...Functions.spec.ts => KeywordMatch.spec.ts} | 160 ++++++------------ 2 files changed, 160 insertions(+), 160 deletions(-) rename test/grpc/{Functions.spec.ts => KeywordMatch.spec.ts} (51%) diff --git a/test/grpc/FullTextSearch.spec.ts b/test/grpc/FullTextSearch.spec.ts index dbf8af3f..bae5f76a 100644 --- a/test/grpc/FullTextSearch.spec.ts +++ b/test/grpc/FullTextSearch.spec.ts @@ -4,7 +4,7 @@ import { ErrorCode, MetricType, ConsistencyLevelEnum, - IndexType, + FunctionType, } from '../../milvus'; import { IP, @@ -14,7 +14,7 @@ import { dynamicFields, } from '../tools'; -const milvusClient = new MilvusClient({ address: IP, logLevel: 'info' }); +const milvusClient = new MilvusClient({ address: IP, logLevel: 'debug' }); const COLLECTION = GENERATE_NAME(); const dbParam = { db_name: 'FullTextSearch', @@ -35,16 +35,43 @@ const createCollectionParams = genCollectionParams({ name: 'text', description: 'text field', data_type: DataType.VarChar, - max_length: 200, + max_length: 20, is_partition_key: false, enable_analyzer: true, - enable_match: true, - analyzer_params: { tokenizer: 'jieba' }, + }, + { + name: 'sparse', + description: 'sparse field', + data_type: DataType.SparseFloatVector, + is_function_output: true, + }, + { + name: 'sparse2', + description: 'sparse field2', + data_type: DataType.SparseFloatVector, + }, + ], + functions: [ + { + name: 'bm25f1', + description: 'bm25 function', + type: FunctionType.BM25, + input_field_names: ['text'], + output_field_names: ['sparse'], + params: {}, + }, + { + name: 'bm25f2', + description: 'bm25 function', + type: FunctionType.BM25, + input_field_names: ['text'], + output_field_names: ['sparse2'], + params: {}, }, ], }); -describe(`Full text search API`, () => { +describe(`FulltextSearch API`, () => { beforeAll(async () => { // create db and use db await milvusClient.createDatabase(dbParam); @@ -66,37 +93,43 @@ describe(`Full text search API`, () => { const describe = await milvusClient.describeCollection({ collection_name: COLLECTION, }); - // expect the 'vector' field to be created + // expect the 'sparse' field to be created expect(describe.schema.fields.length).toEqual( createCollectionParams.fields.length ); - - // find varchar field - const text = describe.schema.fields.find(field => field.name === 'text'); - - const enableMatch = text?.type_params?.find( - param => param.key === 'enable_match' + // extract the 'sparse' field + const sparse = describe.schema.fields.find( + field => field.is_function_output ); - const enableAnalyzer = text?.type_params?.find( - param => param.key === 'enable_analyzer' - ); + // expect the 'sparse' field's name to be 'sparse' + expect(sparse!.name).toEqual('sparse'); - const analyzerParams = text?.type_params?.find( - param => param.key === 'analyzer_params' - ); + // expect functions are in the schema + expect(describe.schema.functions.length).toEqual(2); + expect(describe.schema.functions[0].name).toEqual('bm25f1'); + expect(describe.schema.functions[0].input_field_names).toEqual(['text']); + expect(describe.schema.functions[0].output_field_names).toEqual(['sparse']); + expect(describe.schema.functions[0].type).toEqual('BM25'); + expect(describe.schema.functions[1].name).toEqual('bm25f2'); + expect(describe.schema.functions[1].input_field_names).toEqual(['text']); + expect(describe.schema.functions[1].output_field_names).toEqual([ + 'sparse2', + ]); + expect(describe.schema.functions[1].type).toEqual('BM25'); - expect(enableMatch?.value).toEqual('true'); - expect(enableAnalyzer?.value).toEqual('true'); - expect(JSON.parse(analyzerParams?.value as any)).toEqual({ - tokenizer: 'jieba', - }); + // find the `sparse2` field + const sparse2 = describe.schema.fields.find( + field => field.name === 'sparse2' + ); + // its function output should be true + expect(sparse2!.is_function_output).toEqual(true); }); it(`Insert data with function field should success`, async () => { const data = generateInsertData( [...createCollectionParams.fields, ...dynamicFields], - 500 + 10 ); const insert = await milvusClient.insert({ @@ -108,15 +141,37 @@ describe(`Full text search API`, () => { }); it(`Create index on function output field should success`, async () => { - const createIndex = await milvusClient.createIndex({ + // create index + const createVectorIndex = await milvusClient.createIndex({ collection_name: COLLECTION, - index_name: 't2', + index_name: 't', field_name: 'vector', - index_type: IndexType.AUTOINDEX, + index_type: 'HNSW', metric_type: MetricType.COSINE, + params: { M: 4, efConstruction: 8 }, + }); + + const createIndex = await milvusClient.createIndex({ + collection_name: COLLECTION, + index_name: 't2', + field_name: 'sparse', + index_type: 'SPARSE_INVERTED_INDEX', + metric_type: 'BM25', + params: { drop_ratio_build: 0.3, bm25_k1: 1.25, bm25_b: 0.8 }, + }); + + const createIndex2 = await milvusClient.createIndex({ + collection_name: COLLECTION, + index_name: 't3', + field_name: 'sparse2', + index_type: 'SPARSE_INVERTED_INDEX', + metric_type: 'BM25', + params: { drop_ratio_build: 0.3, bm25_k1: 1.25, bm25_b: 0.8 }, }); + expect(createVectorIndex.error_code).toEqual(ErrorCode.SUCCESS); expect(createIndex.error_code).toEqual(ErrorCode.SUCCESS); + expect(createIndex2.error_code).toEqual(ErrorCode.SUCCESS); // load const load = await milvusClient.loadCollection({ @@ -132,51 +187,52 @@ describe(`Full text search API`, () => { collection_name: COLLECTION, limit: 10, expr: 'id > 0', - output_fields: ['text'], - filter: "TEXT_MATCH(text, 'apple')", + output_fields: ['vector', 'id', 'text', 'sparse', 'sparse2'], consistency_level: ConsistencyLevelEnum.Strong, }); - expect(query.status.error_code).toEqual(ErrorCode.SUCCESS); - // every text value should be 'apple' - query.data.forEach(item => { - expect(item.text).toEqual('apple'); + expect(query.status.error_code).toEqual(ErrorCode.UnexpectedError); + expect(query.status.reason).toEqual( + 'not allowed to retrieve raw data of field sparse' + ); + + const query2 = await milvusClient.query({ + collection_name: COLLECTION, + limit: 10, + expr: 'id > 0', + output_fields: ['vector', 'id', 'text', '$meta'], + consistency_level: ConsistencyLevelEnum.Strong, }); + + expect(query2.data.length).toEqual(10); }); - it(`search with text should success`, async () => { + it(`search with varchar should success`, async () => { // search nq = 1 const search = await milvusClient.search({ collection_name: COLLECTION, limit: 10, - data: [1, 2, 3, 4], - output_fields: ['text'], - filter: "TEXT_MATCH(text, 'apple')", + data: 'apple', + anns_field: 'sparse', + output_fields: ['*'], params: { drop_ratio_search: 0.6 }, consistency_level: ConsistencyLevelEnum.Strong, }); expect(search.status.error_code).toEqual(ErrorCode.SUCCESS); - // expect text value to be 'apple' - expect(search.results[0].text).toEqual('apple'); // nq > 1 const search2 = await milvusClient.search({ collection_name: COLLECTION, limit: 10, - data: [ - [1, 2, 3, 4], - [5, 6, 7, 8], - ], + data: ['apple', 'banana'], + anns_field: 'sparse', output_fields: ['*'], - filter: "TEXT_MATCH(text, 'apple')", params: { drop_ratio_search: 0.6 }, consistency_level: ConsistencyLevelEnum.Strong, }); expect(search2.status.error_code).toEqual(ErrorCode.SUCCESS); - // expect text value to be 'apple' - expect(search2.results[0][0].text).toEqual('apple'); // multiple search const search3 = await milvusClient.search({ @@ -184,22 +240,18 @@ describe(`Full text search API`, () => { limit: 10, data: [ { - data: [1, 2, 3, 4], - anns_field: 'vector', + data: 'apple', + anns_field: 'sparse', params: { nprobe: 2 }, }, { - data: [5, 6, 7, 8], + data: [1, 2, 3, 4], anns_field: 'vector', }, ], - filter: "TEXT_MATCH(text, 'apple')", - output_fields: ['text'], consistency_level: ConsistencyLevelEnum.Strong, }); expect(search3.status.error_code).toEqual(ErrorCode.SUCCESS); - // expect text value to be 'apple' - expect(search3.results[0].text).toEqual('apple'); }); }); diff --git a/test/grpc/Functions.spec.ts b/test/grpc/KeywordMatch.spec.ts similarity index 51% rename from test/grpc/Functions.spec.ts rename to test/grpc/KeywordMatch.spec.ts index 1ad6b988..c18d8896 100644 --- a/test/grpc/Functions.spec.ts +++ b/test/grpc/KeywordMatch.spec.ts @@ -4,7 +4,7 @@ import { ErrorCode, MetricType, ConsistencyLevelEnum, - FunctionType, + IndexType, } from '../../milvus'; import { IP, @@ -17,7 +17,7 @@ import { const milvusClient = new MilvusClient({ address: IP, logLevel: 'info' }); const COLLECTION = GENERATE_NAME(); const dbParam = { - db_name: 'Functions', + db_name: 'KeywordMatch', }; const numPartitions = 3; @@ -35,43 +35,16 @@ const createCollectionParams = genCollectionParams({ name: 'text', description: 'text field', data_type: DataType.VarChar, - max_length: 20, + max_length: 200, is_partition_key: false, enable_analyzer: true, - }, - { - name: 'sparse', - description: 'sparse field', - data_type: DataType.SparseFloatVector, - is_function_output: true, - }, - { - name: 'sparse2', - description: 'sparse field2', - data_type: DataType.SparseFloatVector, - }, - ], - functions: [ - { - name: 'bm25f1', - description: 'bm25 function', - type: FunctionType.BM25, - input_field_names: ['text'], - output_field_names: ['sparse'], - params: {}, - }, - { - name: 'bm25f2', - description: 'bm25 function', - type: FunctionType.BM25, - input_field_names: ['text'], - output_field_names: ['sparse2'], - params: {}, + enable_match: true, + analyzer_params: { tokenizer: 'jieba' }, }, ], }); -describe(`Functions schema API`, () => { +describe(`Keyword match API`, () => { beforeAll(async () => { // create db and use db await milvusClient.createDatabase(dbParam); @@ -93,43 +66,37 @@ describe(`Functions schema API`, () => { const describe = await milvusClient.describeCollection({ collection_name: COLLECTION, }); - // expect the 'sparse' field to be created + // expect the 'vector' field to be created expect(describe.schema.fields.length).toEqual( createCollectionParams.fields.length ); - // extract the 'sparse' field - const sparse = describe.schema.fields.find( - field => field.is_function_output - ); - // expect the 'sparse' field's name to be 'sparse' - expect(sparse!.name).toEqual('sparse'); + // find varchar field + const text = describe.schema.fields.find(field => field.name === 'text'); - // expect functions are in the schema - expect(describe.schema.functions.length).toEqual(2); - expect(describe.schema.functions[0].name).toEqual('bm25f1'); - expect(describe.schema.functions[0].input_field_names).toEqual(['text']); - expect(describe.schema.functions[0].output_field_names).toEqual(['sparse']); - expect(describe.schema.functions[0].type).toEqual('BM25'); - expect(describe.schema.functions[1].name).toEqual('bm25f2'); - expect(describe.schema.functions[1].input_field_names).toEqual(['text']); - expect(describe.schema.functions[1].output_field_names).toEqual([ - 'sparse2', - ]); - expect(describe.schema.functions[1].type).toEqual('BM25'); + const enableMatch = text?.type_params?.find( + param => param.key === 'enable_match' + ); + + const enableAnalyzer = text?.type_params?.find( + param => param.key === 'enable_analyzer' + ); - // find the `sparse2` field - const sparse2 = describe.schema.fields.find( - field => field.name === 'sparse2' + const analyzerParams = text?.type_params?.find( + param => param.key === 'analyzer_params' ); - // its function output should be true - expect(sparse2!.is_function_output).toEqual(true); + + expect(enableMatch?.value).toEqual('true'); + expect(enableAnalyzer?.value).toEqual('true'); + expect(JSON.parse(analyzerParams?.value as any)).toEqual({ + tokenizer: 'jieba', + }); }); it(`Insert data with function field should success`, async () => { const data = generateInsertData( [...createCollectionParams.fields, ...dynamicFields], - 10 + 500 ); const insert = await milvusClient.insert({ @@ -141,37 +108,15 @@ describe(`Functions schema API`, () => { }); it(`Create index on function output field should success`, async () => { - // create index - const createVectorIndex = await milvusClient.createIndex({ - collection_name: COLLECTION, - index_name: 't', - field_name: 'vector', - index_type: 'HNSW', - metric_type: MetricType.COSINE, - params: { M: 4, efConstruction: 8 }, - }); - const createIndex = await milvusClient.createIndex({ collection_name: COLLECTION, index_name: 't2', - field_name: 'sparse', - index_type: 'SPARSE_INVERTED_INDEX', - metric_type: 'BM25', - params: { drop_ratio_build: 0.3, bm25_k1: 1.25, bm25_b: 0.8 }, - }); - - const createIndex2 = await milvusClient.createIndex({ - collection_name: COLLECTION, - index_name: 't3', - field_name: 'sparse2', - index_type: 'SPARSE_INVERTED_INDEX', - metric_type: 'BM25', - params: { drop_ratio_build: 0.3, bm25_k1: 1.25, bm25_b: 0.8 }, + field_name: 'vector', + index_type: IndexType.AUTOINDEX, + metric_type: MetricType.COSINE, }); - expect(createVectorIndex.error_code).toEqual(ErrorCode.SUCCESS); expect(createIndex.error_code).toEqual(ErrorCode.SUCCESS); - expect(createIndex2.error_code).toEqual(ErrorCode.SUCCESS); // load const load = await milvusClient.loadCollection({ @@ -187,52 +132,51 @@ describe(`Functions schema API`, () => { collection_name: COLLECTION, limit: 10, expr: 'id > 0', - output_fields: ['vector', 'id', 'text', 'sparse', 'sparse2'], + output_fields: ['text'], + filter: "TEXT_MATCH(text, 'apple')", consistency_level: ConsistencyLevelEnum.Strong, }); - expect(query.status.error_code).toEqual(ErrorCode.UnexpectedError); - expect(query.status.reason).toEqual( - 'not allowed to retrieve raw data of field sparse' - ); - - const query2 = await milvusClient.query({ - collection_name: COLLECTION, - limit: 10, - expr: 'id > 0', - output_fields: ['vector', 'id', 'text'], - consistency_level: ConsistencyLevelEnum.Strong, + expect(query.status.error_code).toEqual(ErrorCode.SUCCESS); + // every text value should be 'apple' + query.data.forEach(item => { + expect(item.text).toEqual('apple'); }); - - expect(query2.data.length).toEqual(10); }); - it(`search with varchar should success`, async () => { + it(`search with text should success`, async () => { // search nq = 1 const search = await milvusClient.search({ collection_name: COLLECTION, limit: 10, - data: 'apple', - anns_field: 'sparse', - output_fields: ['*'], + data: [1, 2, 3, 4], + output_fields: ['text'], + filter: "TEXT_MATCH(text, 'apple')", params: { drop_ratio_search: 0.6 }, consistency_level: ConsistencyLevelEnum.Strong, }); expect(search.status.error_code).toEqual(ErrorCode.SUCCESS); + // expect text value to be 'apple' + expect(search.results[0].text).toEqual('apple'); // nq > 1 const search2 = await milvusClient.search({ collection_name: COLLECTION, limit: 10, - data: ['apple', 'banana'], - anns_field: 'sparse', + data: [ + [1, 2, 3, 4], + [5, 6, 7, 8], + ], output_fields: ['*'], + filter: "TEXT_MATCH(text, 'apple')", params: { drop_ratio_search: 0.6 }, consistency_level: ConsistencyLevelEnum.Strong, }); expect(search2.status.error_code).toEqual(ErrorCode.SUCCESS); + // expect text value to be 'apple' + expect(search2.results[0][0].text).toEqual('apple'); // multiple search const search3 = await milvusClient.search({ @@ -240,18 +184,22 @@ describe(`Functions schema API`, () => { limit: 10, data: [ { - data: 'apple', - anns_field: 'sparse', + data: [1, 2, 3, 4], + anns_field: 'vector', params: { nprobe: 2 }, }, { - data: [1, 2, 3, 4], + data: [5, 6, 7, 8], anns_field: 'vector', }, ], + filter: "TEXT_MATCH(text, 'apple')", + output_fields: ['text'], consistency_level: ConsistencyLevelEnum.Strong, }); expect(search3.status.error_code).toEqual(ErrorCode.SUCCESS); + // expect text value to be 'apple' + expect(search3.results[0].text).toEqual('apple'); }); });