Skip to content

Commit

Permalink
feat(opensearch): implement filtering by metadata attributes + integr…
Browse files Browse the repository at this point in the history
…ation test
  • Loading branch information
igorshapiro committed Apr 13, 2023
1 parent fa8006b commit 37f1b8f
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 41 deletions.
2 changes: 1 addition & 1 deletion examples/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@
"tsx": "^3.12.3",
"typescript": "^4.9.5"
}
}
}
2 changes: 1 addition & 1 deletion langchain/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -803,4 +803,4 @@
},
"./package.json": "./package.json"
}
}
}
41 changes: 38 additions & 3 deletions langchain/src/vectorstores/opensearch.ts
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
/* eslint-disable no-instanceof/no-instanceof */
import { Embeddings } from "embeddings/base.js";
import { Client, RequestParams, errors } from "@opensearch-project/opensearch";
import { v4 as uuid } from "uuid";
Expand Down Expand Up @@ -87,14 +88,21 @@ export class OpenSearchVectorStore extends VectorStore {
async similaritySearchVectorWithScore(
query: number[],
k: number,
_filter?: object | undefined
filter?: object | undefined
): Promise<[Document, number][]> {
const search: RequestParams.Search = {
index: this.indexName,
body: {
query: {
knn: {
embedding: { vector: query, k },
bool: {
filter: { bool: { must: this.buildMetadataTerms(filter) } },
must: [
{
knn: {
embedding: { vector: query, k },
},
},
],
},
},
size: k,
Expand Down Expand Up @@ -154,6 +162,15 @@ export class OpenSearchVectorStore extends VectorStore {
},
},
mappings: {
dynamic_templates: [
{
// map all metadata properties to be keyword
"metadata.*": {
match_mapping_type: "*",
mapping: { type: "keyword" },
},
},
],
properties: {
text: { type: "text" },
metadata: { type: "object" },
Expand All @@ -177,6 +194,17 @@ export class OpenSearchVectorStore extends VectorStore {
await this.client.indices.create({ index: this.indexName, body });
}

private buildMetadataTerms(
filter?: object
): { term: Record<string, unknown> }[] {
if (filter == null) return [];
const result = [];
for (const [key, value] of Object.entries(filter)) {
result.push({ term: { [`metadata.${key}`]: value } });
}
return result;
}

async doesIndexExist(): Promise<boolean> {
try {
await this.client.cat.indices({ index: this.indexName });
Expand All @@ -188,4 +216,11 @@ export class OpenSearchVectorStore extends VectorStore {
throw err;
}
}

async deleteIfExists(): Promise<void> {
const indexExists = await this.doesIndexExist();
if (!indexExists) return;

await this.client.indices.delete({ index: this.indexName });
}
}
42 changes: 42 additions & 0 deletions langchain/src/vectorstores/tests/opensearch.int.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
/* eslint-disable no-process-env */
import { test, expect } from "@jest/globals";
import { Client } from "@opensearch-project/opensearch";
import { OpenAIEmbeddings } from "../../embeddings/index.js";
import { OpenSearchVectorStore } from "../opensearch.js";
import { Document } from "../../document.js";

test("OpenSearchVectorStore integration", async () => {
const client = new Client({
nodes: [process.env.OPENSEARCH_URL!],
});

const indexName = "test_index";

const embeddings = new OpenAIEmbeddings(undefined, {
baseOptions: { temperature: 0 },
});
const store = new OpenSearchVectorStore(embeddings, { client, indexName });
await store.deleteIfExists();

expect(store).toBeDefined();

await store.addDocuments([
{ pageContent: "hello", metadata: { a: 2 } },
{ pageContent: "car", metadata: { a: 1 } },
{ pageContent: "adjective", metadata: { a: 1 } },
{ pageContent: "hi", metadata: { a: 1 } },
]);

const results1 = await store.similaritySearch("hello!", 1);

expect(results1).toHaveLength(1);
expect(results1).toEqual([
new Document({ metadata: { a: 2 }, pageContent: "hello" }),
]);

const results2 = await store.similaritySearchWithScore("hello!", 1, {
a: 1,
});

expect(results2).toHaveLength(1);
});
58 changes: 22 additions & 36 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -12950,6 +12950,7 @@ __metadata:
resolution: "examples@workspace:examples"
dependencies:
"@getmetal/metal-sdk": ^1.0.12
"@opensearch-project/opensearch": ^2.2.0
"@pinecone-database/pinecone": ^0.0.12
"@prisma/client": ^4.11.0
"@supabase/supabase-js": ^2.10.0
Expand Down Expand Up @@ -14437,7 +14438,25 @@ __metadata:
languageName: node
linkType: hard

"html-entities@npm:^2.3.2":
"html-encoding-sniffer@npm:^2.0.1":
version: 2.0.1
resolution: "html-encoding-sniffer@npm:2.0.1"
dependencies:
whatwg-encoding: ^1.0.5
checksum: bf30cce461015ed7e365736fcd6a3063c7bc016a91f74398ef6158886970a96333938f7c02417ab3c12aa82e3e53b40822145facccb9ddfbcdc15a879ae4d7ba
languageName: node
linkType: hard

"html-encoding-sniffer@npm:^3.0.0":
version: 3.0.0
resolution: "html-encoding-sniffer@npm:3.0.0"
dependencies:
whatwg-encoding: ^2.0.0
checksum: 8d806aa00487e279e5ccb573366a951a9f68f65c90298eac9c3a2b440a7ffe46615aff2995a2f61c6746c639234e6179a97e18ca5ccbbf93d3725ef2099a4502
languageName: node
linkType: hard

"html-entities@npm:^2.1.0, html-entities@npm:^2.3.2":
version: 2.3.3
resolution: "html-entities@npm:2.3.3"
checksum: 92521501da8aa5f66fee27f0f022d6e9ceae62667dae93aa6a2f636afa71ad530b7fb24a18d4d6c124c9885970cac5f8a52dbf1731741161002816ae43f98196
Expand Down Expand Up @@ -17026,39 +17045,6 @@ __metadata:
languageName: node
linkType: hard

"langchain-examples@workspace:examples":
version: 0.0.0-use.local
resolution: "langchain-examples@workspace:examples"
dependencies:
"@dqbd/tiktoken": ^1.0.2
"@getmetal/metal-sdk": ^1.0.12
"@opensearch-project/opensearch": ^2.2.0
"@pinecone-database/pinecone": ^0.0.10
"@prisma/client": ^4.11.0
"@supabase/supabase-js": ^2.10.0
"@tsconfig/recommended": ^1.0.2
"@types/js-yaml": ^4
"@typescript-eslint/eslint-plugin": ^5.51.0
"@typescript-eslint/parser": ^5.51.0
chromadb: ^1.3.0
dotenv: ^16.0.3
eslint: ^8.33.0
eslint-config-airbnb-base: ^15.0.0
eslint-config-prettier: ^8.6.0
eslint-plugin-import: ^2.27.5
eslint-plugin-prettier: ^4.2.1
js-yaml: ^4.1.0
langchain: "workspace:*"
prettier: ^2.8.3
prisma: ^4.11.0
sqlite3: ^5.1.4
tsx: ^3.12.3
typeorm: ^0.3.12
typescript: ^4.9.5
zod: ^3.21.4
languageName: unknown
linkType: soft

"langchain@workspace:*, langchain@workspace:langchain":
version: 0.0.0-use.local
resolution: "langchain@workspace:langchain"
Expand All @@ -17071,7 +17057,7 @@ __metadata:
"@huggingface/inference": ^1.5.1
"@jest/globals": ^29.5.0
"@opensearch-project/opensearch": ^2.2.0
"@pinecone-database/pinecone": ^0.0.10
"@pinecone-database/pinecone": ^0.0.12
"@supabase/supabase-js": ^2.10.0
"@tsconfig/recommended": ^1.0.2
"@types/d3-dsv": ^2
Expand Down Expand Up @@ -17131,7 +17117,7 @@ __metadata:
"@getmetal/metal-sdk": "*"
"@huggingface/inference": ^1.5.1
"@opensearch-project/opensearch": "*"
"@pinecone-database/pinecone": ^0.0.10
"@pinecone-database/pinecone": "*"
"@supabase/supabase-js": ^2.10.0
"@zilliz/milvus2-sdk-node": ^2.2.0
cheerio: ^1.0.0-rc.12
Expand Down

0 comments on commit 37f1b8f

Please sign in to comment.