Skip to content

Commit

Permalink
RAG-pipeline
Browse files Browse the repository at this point in the history
  • Loading branch information
sonam-pankaj95 committed May 15, 2024
1 parent b9b518f commit cad4205
Show file tree
Hide file tree
Showing 3 changed files with 259 additions and 8 deletions.
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,8 @@
</p>

[![Downloads](https://static.pepy.tech/badge/embed-anything)](https://pepy.tech/project/embed-anything)
[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CowJrqZxDDYJzkclI-rbHaZHgL9C6K3p?usp=sharing)
[![Multimodal Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CowJrqZxDDYJzkclI-rbHaZHgL9C6K3p?usp=sharing)
[![RAG Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://drive.google.com/file/d/1GCSJ7kMwJWT86khVN9JrP_I-v0ipeWvP/view?usp=sharing)
[![license]( https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
[![license]( https://img.shields.io/badge/Package-PYPI-blue.svg)](https://pypi.org/project/embed-anything/)
[![license](https://img.shields.io/discord/1213966302046064711?style=flat&logo=discord&link=https%3A%2F%2Fdiscord.gg%2FHGxDZxNt9G)](https://discord.gg/juETVTMdZu)
Expand All @@ -32,6 +33,7 @@ We will soon post on how we use Candle to increase the performance and decrease

## Examples
1. Image Search: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/drive/1CowJrqZxDDYJzkclI-rbHaZHgL9C6K3p?usp=sharing)
2. RAG with Pinecone: [![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://drive.google.com/file/d/1GCSJ7kMwJWT86khVN9JrP_I-v0ipeWvP/view?usp=sharing)

[Watch the demo](https://youtu.be/HLXIuznnXcI)

Expand Down
223 changes: 223 additions & 0 deletions examples/RAG.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,223 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"import embed_anything\n",
"import os\n",
"import time\n",
"from pinecone import Pinecone\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"os.environ['OPENAI_API_KEY'] = \"YOUR KEY HERE\""
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"data = embed_anything.embed_directory(\"test_files\", embeder=\"OpenAI\")\n",
"embeddings = np.array([data.embedding for data in data])\n"
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"pc = Pinecone(api_key=\"YOUR_API_KEY\")\n",
"index = pc.Index(\"anything\")"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {},
"outputs": [],
"source": [
"for i in range(len(data)):\n",
" index.upsert(\n",
" vectors=[{\"id\": str(i), \"values\": data[i].embedding, \"metadata\": {\"text\": data[i].text}}]\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [],
"source": [
"query = \"What is AI?\"\n",
"query_embedding = embed_anything.embed_query([query], embeder=\"OpenAI\")"
]
},
{
"cell_type": "code",
"execution_count": 62,
"metadata": {},
"outputs": [],
"source": [
"def retrieval(query):\n",
" query_embedding = embed_anything.embed_query([query], embeder=\"OpenAI\")\n",
" return index.query(queries=[query_embedding[0].embedding], top_k=5)\n",
" "
]
},
{
"cell_type": "code",
"execution_count": 175,
"metadata": {},
"outputs": [],
"source": [
"def retrieval(query):\n",
" query_embedding = embed_anything.embed_query(query, embeder=\"OpenAI\")\n",
" context = index.query(vector=query_embedding[0].embedding, top_k=2)\n",
" indices = [int(context.matches[i]['id']) for i in range(len(context.matches))]\n",
" return indices\n",
" \n",
"\n",
"\n",
"# for i in range(len(matches)):\n",
"# print(index.fetch[i].vectors[i].metadata['text'])\n"
]
},
{
"cell_type": "code",
"execution_count": 176,
"metadata": {},
"outputs": [],
"source": [
"indices = retrieval([\"what is AI?\"])"
]
},
{
"cell_type": "code",
"execution_count": 177,
"metadata": {},
"outputs": [],
"source": [
"def get_text(indices):\n",
" return [index.fetch([str(e)])['vectors'][str(e)]['metadata']['text'] for e in indices]\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 178,
"metadata": {},
"outputs": [],
"source": [
"content = query + \" \"\n",
"for i in range(min(len(indices), 3)):\n",
" content += get_text(indices)[i] + \" \""
]
},
{
"cell_type": "code",
"execution_count": 179,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'What is AI? Artificial Intelligence helps in the analysis and decoding of neural activity data. AI, in conjunction with Brain-Computer Interfaces, receives ongoing updates of internal parameters, including data on pulse durations, amplitudes, stimulation/recording densities, and electrical properties of neural tissues. Once equipped with this information, AI algorithms swiftly discern valuable insights and logical patterns within the data, enabling them to generate the desired functional results in real time. One of the applications that has resulted from this is the ability to control a computer cursor. This can help people with disabilities to operate a personal computer to get various tasks done. Another crucial application has been in neuroprosthetics and limb rehabilitation. What does this imply in the context of healthcare, equality, and our humanity itself? All this has been possible mainly due to the advancements in Artificial Intelligence technology that have propelled the development of Brain-computer Interfaces. Most of the BCI applications in the past decade have been possible with AI assistance (Zhang, et al. , 2020) [3] These applications help improve the quality of paralyzed patient’s lives, help people with disabilities with their everyday activities. They have also shown a promising impact in the rehabilitation of people with neurodevelopment disorders (Papanastasiou et al. , 2017) [4] . Without AI, it was very hard to make any sense of the data (EEG, fMRI) that was gathered from BCI. '"
]
},
"execution_count": 179,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"content"
]
},
{
"cell_type": "code",
"execution_count": 180,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{\n",
" \"AI\": {\n",
" \"Definition\": \"Artificial intelligence (AI) is a technology that enables machines to learn from experience, adapt to new inputs, and perform human-like tasks.\",\n",
" \"Applications\": [\n",
" \"Analysis and decoding of neural activity data\",\n",
" \"Brain-Computer Interfaces updates\",\n",
" \"Control of computer cursor for people with disabilities\",\n",
" \"Neuroprosthetics and limb rehabilitation\"\n",
" ],\n",
" \"Implications\": {\n",
" \"Healthcare\": \"AI in conjunction with Brain-Computer Interfaces has significant implications for healthcare, enabling improved quality of life for paralyzed patients and assisting people with disabilities in their daily activities.\",\n",
" \"Equality\": \"By providing assistive technologies like AI-powered Brain-Computer Interfaces, it helps in promoting equality by giving individuals with disabilities the tools to independently perform tasks that they might otherwise find challenging.\",\n",
" \"Humanity\": \"The development of AI technology in the context of healthcare and disability support showcases the positive impact of leveraging technology to enhance human capabilities, thereby improving the overall quality of life and well-being.\"\n",
" },\n",
" \"Contributions\": {\n",
" \"BCI Applications\": \"Many advancements in Brain-Computer Interfaces have been made possible with AI assistance, enabling the extraction of valuable insights and logical patterns from complex neural data that would have been challenging to decipher without AI.\"\n",
" },\n",
" \"References\": [\n",
" \"Zhang, et al. , 2020\",\n",
" \"Papanastasiou et al. , 2017\"\n",
" ]\n",
" }\n",
"}\n"
]
}
],
"source": [
"from openai import OpenAI\n",
"client = OpenAI()\n",
"\n",
"\n",
"\n",
"response = client.chat.completions.create(\n",
" model=\"gpt-3.5-turbo-0125\",\n",
" response_format={ \"type\": \"json_object\" },\n",
" messages=[\n",
" {\"role\": \"system\", \"content\": \"You are a helpful assistant designed to output JSON.\"},\n",
" {\"role\": \"user\", \"content\": content}\n",
" ]\n",
")\n",
"print(response.choices[0].message.content)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
40 changes: 33 additions & 7 deletions retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,42 @@
pc = Pinecone(api_key="")
index = pc.Index("anything")

# for i in range(len(data)):
# index.upsert(
# vectors=[{"id": str(i), "values": data[i].embedding, "metadata": {"text": data[i].text}}]
# )
for i in range(len(data)):
index.upsert(
vectors=[{"id": str(i), "values": data[i].embedding, "metadata": {"text": data[i].text}}]
)



def retrieval(query):
query_embedding = embed_anything.embed_query(query, embeder="OpenAI")
return index.query(vector=query_embedding[0].embedding, top_k=2)
index.fetch(["82", "81"])
context = index.query(vector=query_embedding[0].embedding, top_k=2)
indices = [int(context.matches[i]['id']) for i in range(len(context.matches))]
return indices

# print(retrieval(["what is AI?"]))
indices = retrieval(["what is AI?"])

def get_text(indices):
return [index.fetch([str(e)])['vectors'][str(e)]['metadata']['text'] for e in indices]



content = query + " "
for i in range(min(len(indices), 3)):
content += get_text(indices)[i] + " "



client = OpenAI()



response = client.chat.completions.create(
model="gpt-3.5-turbo-0125",
response_format={ "type": "json_object" },
messages=[
{"role": "system", "content": "You are a helpful assistant designed to output JSON."},
{"role": "user", "content": content}
]
)
print(response.choices[0].message.content)

0 comments on commit cad4205

Please sign in to comment.