pinecone-retrieval

StarlightSearch · May 8, 2024 · 500bb47 · 500bb47
1 parent a16c9b1
commit 500bb47
Show file tree

Hide file tree

Showing 5 changed files with 35 additions and 2 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -1,7 +1,7 @@
 [package]
 name = "embed_anything"
 
-version = "0.1.14"
+version = "0.1.15"
 edition = "2021"
 
 [dependencies]

diff --git a/README.md b/README.md
@@ -12,6 +12,7 @@
 [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CowJrqZxDDYJzkclI-rbHaZHgL9C6K3p?usp=sharing)
 [![license]( https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
 [![license]( https://img.shields.io/badge/Package-PYPI-blue.svg)](https://pypi.org/project/embed-anything/)
+[![license](https://img.shields.io/discord/1223707915827937321?style=flat&logo=discord&link=https%3A%2F%2Fdiscord.gg%2FHGxDZxNt9G)](https://discord.gg/juETVTMdZu)
 
 EmbedAnything is a powerful python library designed to streamline the creation and management of embedding pipelines. Whether you're working with text, images, audio, or any other type of data., EmbedAnything makes it easy to generate embeddings from multiple sources and store them efficiently in a vector database.
 

diff --git a/Vector_database_files/test_paper.pdf b/Vector_database_files/test_paper.pdf
diff --git a/retrieval.py b/retrieval.py
@@ -0,0 +1,32 @@
+import embed_anything
+from openai import OpenAI
+
+import os
+import time
+from pinecone import Pinecone
+import numpy as np
+
+
+
+data = embed_anything.embed_directory('Vector_database_files\test_paper.pdf', embeder= "OpenAI")
+embeddings = np.array([data.embedding for data in data])
+
+print(len(data))
+query= embed_anything.embed_query(["what is AI?"], embeder="OpenAI")
+
+pc = Pinecone(api_key="")
+index = pc.Index("anything")
+
+# for i in range(len(data)):
+#     index.upsert(
+#         vectors=[{"id": str(i), "values": data[i].embedding, "metadata": {"text": data[i].text}}]
+#     )
+
+
+
+def retrieval(query):
+    query_embedding = embed_anything.embed_query(query, embeder="OpenAI")
+    return index.query(vector=query_embedding[0].embedding, top_k=2)
+
+
+print(retrieval(["what is AI?"]))