From c25ea0ecca7b793438aad026ffac82fbe10e60ec Mon Sep 17 00:00:00 2001 From: rcarrata Date: Wed, 6 Nov 2024 19:33:47 +0100 Subject: [PATCH] added rag --- lab-materials/04-rag/5.1-ingest-rag.ipynb | 1578 +++++++++++++++++++++ lab-materials/04-rag/5.2-query-rag.ipynb | 280 ++++ lab-materials/04-rag/5.3-simple-rag.ipynb | 371 +++++ 3 files changed, 2229 insertions(+) create mode 100644 lab-materials/04-rag/5.1-ingest-rag.ipynb create mode 100644 lab-materials/04-rag/5.2-query-rag.ipynb create mode 100644 lab-materials/04-rag/5.3-simple-rag.ipynb diff --git a/lab-materials/04-rag/5.1-ingest-rag.ipynb b/lab-materials/04-rag/5.1-ingest-rag.ipynb new file mode 100644 index 0000000..5936710 --- /dev/null +++ b/lab-materials/04-rag/5.1-ingest-rag.ipynb @@ -0,0 +1,1578 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "3420575b-4d00-458b-aa0e-7030008ccd53", + "metadata": {}, + "source": [ + "## Creating an index and populating it with documents using Milvus and Nomic AI Embeddings\n", + "\n", + "Ingest PDF documents, then web pages content into a Milvus VectorStore." + ] + }, + { + "cell_type": "markdown", + "id": "8308b229-b520-4e82-a783-eb921bb955e7", + "metadata": {}, + "source": [ + "### Needed packages and imports" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "id": "91e41b41-f60a-4b0f-91a1-cd273b60f21b", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#!pip install einops==0.7.0 langchain==0.1.9 pypdf==4.0.2 pymilvus==2.3.6 sentence-transformers==2.4.0\n", + "#!pip install -q einops==0.7.0 langchain==0.1.9 pymilvus==2.3.6" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "600cd763-6ecc-4c77-89c0-47108c31c44e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import requests\n", + "import os\n", + "from langchain.document_loaders import PyPDFDirectoryLoader, WebBaseLoader\n", + "from langchain.text_splitter import RecursiveCharacterTextSplitter\n", + "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", + "from langchain_community.vectorstores import Milvus" + ] + }, + { + "cell_type": "markdown", + "id": "1a82063d-6153-4812-8977-042241736b53", + "metadata": {}, + "source": [ + "### Base parameters, the Milvus connection info" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "417ed4a4-9418-4f48-bebd-ef0ea11ae434", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "MILVUS_HOST = \"vectordb-milvus.milvus.svc.cluster.local\"\n", + "MILVUS_PORT = 19530\n", + "MILVUS_USERNAME = os.getenv('MILVUS_USERNAME')\n", + "MILVUS_PASSWORD = os.getenv('MILVUS_PASSWORD')\n", + "MILVUS_COLLECTION = \"collection_nomicai_embeddings\"" + ] + }, + { + "cell_type": "markdown", + "id": "f68f6785-480e-4519-be4f-8e1738dba4ca", + "metadata": {}, + "source": [ + "## Initial index creation and document ingestion" + ] + }, + { + "cell_type": "markdown", + "id": "f8cff5f7-c509-48db-90b5-e15815b8b530", + "metadata": {}, + "source": [ + "#### Download and load pdfs" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bc4fe0db-f494-4cbd-9e97-8b6359a78cb7", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "product_version=\"2.13\"\n", + "documents = [\n", + " \"release_notes\",\n", + " \"introduction_to_red_hat_openshift_ai\",\n", + " \"getting_started_with_red_hat_openshift_ai_self-managed\",\n", + " \"openshift_ai_tutorial_-_fraud_detection_example\",\n", + " \"developing_a_model\",\n", + " \"integrating_data_from_amazon_s3\",\n", + " \"working_on_data_science_projects\",\n", + " \"serving_models\",\n", + " \"monitoring_data_science_models\",\n", + " \"managing_users\",\n", + " \"managing_resources\",\n", + " \"installing_and_uninstalling_openshift_ai_self-managed\",\n", + " \"installing_and_uninstalling_openshift_ai_self-managed_in_a_disconnected_environment\",\n", + " \"upgrading_openshift_ai_self-managed\",\n", + " \"upgrading_openshift_ai_self-managed_in_a_disconnected_environment\", \n", + "]\n", + "\n", + "pdfs = [f\"https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/{product_version}/pdf/{doc}/red_hat_openshift_ai_self-managed-{product_version}-{doc}-en-us.pdf\" for doc in documents]\n", + "pdfs_to_urls = {f\"red_hat_openshift_ai_self-managed-{product_version}-{doc}-en-us\": f\"https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/{product_version}/html-single/{doc}/index\" for doc in documents}" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "3eea5acc-49df-41c9-a01a-0cdbca96e8e2", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Skipped https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/2.13/pdf/developing_a_model/red_hat_openshift_ai_self-managed-2.13-developing_a_model-en-us.pdf\n", + "Skipped https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/2.13/pdf/integrating_data_from_amazon_s3/red_hat_openshift_ai_self-managed-2.13-integrating_data_from_amazon_s3-en-us.pdf\n", + "Skipped https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/2.13/pdf/monitoring_data_science_models/red_hat_openshift_ai_self-managed-2.13-monitoring_data_science_models-en-us.pdf\n" + ] + } + ], + "source": [ + "docs_dir = f\"rhoai-doc-{product_version}\"\n", + "\n", + "if not os.path.exists(docs_dir):\n", + " os.mkdir(docs_dir)\n", + "\n", + "for pdf in pdfs:\n", + " try:\n", + " response = requests.get(pdf)\n", + " except:\n", + " print(f\"Skipped {pdf}\")\n", + " continue\n", + " if response.status_code!=200:\n", + " print(f\"Skipped {pdf}\")\n", + " continue \n", + " with open(f\"{docs_dir}/{pdf.split('/')[-1]}\", 'wb') as f:\n", + " f.write(response.content)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "af4074d4-eff4-45b2-902d-ec8c075a83ef", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "pdf_folder_path = f\"./rhoai-doc-{product_version}\"\n", + "\n", + "pdf_loader = PyPDFDirectoryLoader(pdf_folder_path)\n", + "pdf_docs = pdf_loader.load()" + ] + }, + { + "cell_type": "markdown", + "id": "cde7ed3a-0530-47a1-95c2-22db6c782a95", + "metadata": {}, + "source": [ + "#### Inject metadata" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "702230f6-e6d3-44c7-a643-4996387606ff", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "\n", + "for doc in pdf_docs:\n", + " doc.metadata[\"source\"] = pdfs_to_urls[Path(doc.metadata[\"source\"]).stem]" + ] + }, + { + "cell_type": "markdown", + "id": "dd511d44-2d92-47a0-9163-b25576c9557b", + "metadata": {}, + "source": [ + "#### Load websites" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "8aebf003-d7ec-43ba-8e04-1931bcff2866", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "websites = [\n", + " \"https://ai-on-openshift.io/getting-started/openshift/\",\n", + " \"https://ai-on-openshift.io/getting-started/opendatahub/\",\n", + " \"https://ai-on-openshift.io/getting-started/openshift-ai/\",\n", + " \"https://ai-on-openshift.io/odh-rhoai/configuration/\",\n", + " \"https://ai-on-openshift.io/odh-rhoai/custom-notebooks/\",\n", + " \"https://ai-on-openshift.io/odh-rhoai/nvidia-gpus/\",\n", + " \"https://ai-on-openshift.io/odh-rhoai/custom-runtime-triton/\",\n", + " \"https://ai-on-openshift.io/odh-rhoai/openshift-group-management/\",\n", + " \"https://ai-on-openshift.io/tools-and-applications/minio/minio/\",\n", + " \"https://access.redhat.com/articles/7047935\",\n", + " \"https://access.redhat.com/articles/rhoai-supported-configs\",\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "99f41110-8ca7-4d90-93b2-3b5021c894b8", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "website_loader = WebBaseLoader(websites)\n", + "website_docs = website_loader.load()" + ] + }, + { + "cell_type": "markdown", + "id": "157ddd29-54b3-474a-9b10-2d274bc3254f", + "metadata": {}, + "source": [ + "#### Merge both types of docs" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "8d361094-8b43-4351-8495-37628c35c42d", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "docs = pdf_docs + website_docs" + ] + }, + { + "cell_type": "markdown", + "id": "4198fe0a-38bf-4cd4-af7d-35b41c645edd", + "metadata": {}, + "source": [ + "#### Split documents into chunks with some overlap" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "edba4a08-2194-4df1-9091-6f2b596757a1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "Document(page_content='Red Hat OpenShift AI Self-Managed\\n2.13\\nOpenShift AI tutorial - Fraud detection\\nexample\\nUse OpenShift AI to train an example model in JupyterLab, deploy the model, and\\nrefine the model by using automated pipelines\\nLast Updated: 2024-09-20', metadata={'source': 'https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/2.13/html-single/openshift_ai_tutorial_-_fraud_detection_example/index', 'page': 0})" + ] + }, + "execution_count": 11, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "text_splitter = RecursiveCharacterTextSplitter(chunk_size=1024,\n", + " chunk_overlap=40)\n", + "all_splits = text_splitter.split_documents(docs)\n", + "all_splits[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "79c2659f-4716-4c0a-a037-45af3560ab89", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "#!pip install sentence-transformers\n", + "#!pip show sentence-transformers\n", + "#!pip uninstall -y sentence-transformers\n", + "#!pip install sentence-transformers" + ] + }, + { + "cell_type": "markdown", + "id": "7ae7eae2-c670-4eb5-803b-b4d591fa83db", + "metadata": {}, + "source": [ + "#### Create the index and ingest the documents" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "bbb6a3e3-5ccd-441e-b80d-427555d9e9f6", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You try to use a model that was created with version 2.4.0.dev0, however, your version is 2.4.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# If you don't want to use a GPU, you can remove the 'device': 'cuda' argument\n", + "model_kwargs = {'trust_remote_code': True, 'device': 'cuda'}\n", + "embeddings = HuggingFaceEmbeddings(\n", + " model_name=\"nomic-ai/nomic-embed-text-v1\",\n", + " model_kwargs=model_kwargs,\n", + " show_progress=True\n", + ")\n", + "\n", + "\n", + "db = Milvus(\n", + " embedding_function=embeddings,\n", + " connection_args={\"host\": MILVUS_HOST, \"port\": MILVUS_PORT, \"user\": MILVUS_USERNAME, \"password\": MILVUS_PASSWORD},\n", + " collection_name=MILVUS_COLLECTION,\n", + " metadata_field=\"metadata\",\n", + " text_field=\"page_content\",\n", + " auto_id=True,\n", + " drop_old=True\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "b6bf425b-dffd-4f42-9537-49d41383182d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "de0956f111864f95a1c17a1b0e7d6b9e", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/35 [00:00=0.8.0, but you have requests-toolbelt 1.0.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.2.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install -q einops==0.7.0 langchain==0.1.9 pymilvus==2.3.6 sentence-transformers==2.4.0\n", + "!pip install -q --upgrade transformers\n", + "#!pip install einops==0.7.0 langchain==0.1.9 pymilvus==2.3.6 transformers\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "1c53e798", + "metadata": {}, + "outputs": [], + "source": [ + "import os\n", + "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", + "from langchain_community.vectorstores import Milvus" + ] + }, + { + "cell_type": "markdown", + "id": "f0b8ecae", + "metadata": { + "tags": [] + }, + "source": [ + "### Base parameters, the Milvus connection info" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "9376e567", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "MILVUS_HOST = \"vectordb-milvus.milvus.svc.cluster.local\"\n", + "MILVUS_PORT = 19530\n", + "MILVUS_USERNAME = os.getenv('MILVUS_USERNAME')\n", + "MILVUS_PASSWORD = os.getenv('MILVUS_PASSWORD')\n", + "MILVUS_COLLECTION = \"collection_nomicai_embeddings\"" + ] + }, + { + "cell_type": "markdown", + "id": "56d51868", + "metadata": {}, + "source": [ + "### Initialize the connection" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bbb6a3e3-5ccd-441e-b80d-427555d9e9f6", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You try to use a model that was created with version 2.4.0.dev0, however, your version is 2.4.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "# If you don't want to use a GPU, you can remove the 'device': 'cuda' argument\n", + "model_kwargs = {'trust_remote_code': True, 'device': 'cuda'}\n", + "embeddings = HuggingFaceEmbeddings(\n", + " model_name=\"nomic-ai/nomic-embed-text-v1\",\n", + " model_kwargs=model_kwargs,\n", + " show_progress=True\n", + ")\n", + "\n", + "store = Milvus(\n", + " embedding_function=embeddings,\n", + " connection_args={\"host\": MILVUS_HOST, \"port\": MILVUS_PORT, \"user\": MILVUS_USERNAME, \"password\": MILVUS_PASSWORD},\n", + " collection_name=MILVUS_COLLECTION,\n", + " metadata_field=\"metadata\",\n", + " text_field=\"page_content\",\n", + " drop_old=False\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "9856851c", + "metadata": {}, + "source": [ + "### Make a query to the index to verify sources" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9621e231-3541-40bc-85ef-8aa3b2ba2331", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "da82af613881491fa0b11c75381d6c14", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Batches: 0%| | 0/1 [00:00 \u001b[0m\u001b[32;49m24.2\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n" + ] + } + ], + "source": [ + "!pip install -q einops==0.7.0 langchain==0.1.9 pymilvus==2.3.6\n", + "#!pip install -q einops==0.7.0 langchain==0.1.9 pymilvus==2.3.6 openai==1.13.3\n", + "#!pip install einops==0.7.0 langchain==0.1.9 pymilvus==2.3.6 sentence-transformers==2.4.0 openai==1.13.3\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "83e11d23-c0ad-4875-b67f-149fc8b14725", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import os\n", + "from langchain.callbacks.base import BaseCallbackHandler\n", + "from langchain.chains import RetrievalQA\n", + "from langchain.embeddings.huggingface import HuggingFaceEmbeddings\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "from langchain_community.llms import VLLMOpenAI\n", + "from langchain.prompts import PromptTemplate\n", + "from langchain_community.vectorstores import Milvus" + ] + }, + { + "cell_type": "markdown", + "id": "9cd4537b", + "metadata": {}, + "source": [ + "#### Bases parameters, Inference server and Milvus info" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "51baf1a6-4111-4b40-b43a-833438bdc222", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "# Replace values according to your Milvus deployment\n", + "INFERENCE_SERVER_URL = \"https://mistral-7b-instruct-v0-3-maas-apicast-production.apps.prod.rhoai.rh-aiservices-bu.com:443/v1\"\n", + "MODEL_NAME = \"mistral-7b-instruct\"\n", + "API_KEY= os.getenv('API_KEY')\n", + "#API_KEY= \"Empty\"\n", + "MAX_TOKENS=1024\n", + "TOP_P=0.95\n", + "TEMPERATURE=0.01\n", + "PRESENCE_PENALTY=1.03\n", + "MILVUS_HOST = \"vectordb-milvus.milvus.svc.cluster.local\"\n", + "MILVUS_PORT = 19530\n", + "MILVUS_USERNAME = os.getenv('MILVUS_USERNAME')\n", + "MILVUS_PASSWORD = os.getenv('MILVUS_PASSWORD')\n", + "MILVUS_COLLECTION = \"collection_nomicai_embeddings\"" + ] + }, + { + "cell_type": "markdown", + "id": "fe4c1b1a", + "metadata": {}, + "source": [ + "#### Initialize the connection" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "bbb6a3e3-5ccd-441e-b80d-427555d9e9f6", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "You try to use a model that was created with version 2.4.0.dev0, however, your version is 2.4.0. This might cause unexpected behavior or errors. In that case, try to update to the latest version.\n", + "\n", + "\n", + "\n", + "\n" + ] + } + ], + "source": [ + "model_kwargs = {'trust_remote_code': True}\n", + "embeddings = HuggingFaceEmbeddings(\n", + " model_name=\"nomic-ai/nomic-embed-text-v1\",\n", + " model_kwargs=model_kwargs,\n", + " show_progress=False\n", + ")\n", + "\n", + "store = Milvus(\n", + " embedding_function=embeddings,\n", + " connection_args={\"host\": MILVUS_HOST, \"port\": MILVUS_PORT, \"user\": MILVUS_USERNAME, \"password\": MILVUS_PASSWORD},\n", + " collection_name=MILVUS_COLLECTION,\n", + " metadata_field=\"metadata\",\n", + " text_field=\"page_content\",\n", + " drop_old=False\n", + " )" + ] + }, + { + "cell_type": "markdown", + "id": "b72a3a2b", + "metadata": {}, + "source": [ + "#### Initialize query chain" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "ed8fd396-0798-45c5-8969-6b6ede134c77", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "template=\"\"\"[INST] <>\n", + "You are a helpful, respectful and honest assistant named HatBot answering questions.\n", + "You will be given a question you need to answer, and a context to provide you with information. You must answer the question based as much as possible on this context.\n", + "Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n", + "\n", + "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n", + "<>\n", + "\n", + "Context: \n", + "{context}\n", + "\n", + "Question: {question} [/INST]\n", + "\"\"\"\n", + "\n", + "QA_CHAIN_PROMPT = PromptTemplate.from_template(template)\n", + "\n", + "llm = ChatOpenAI(\n", + " openai_api_key=API_KEY,\n", + " openai_api_base=INFERENCE_SERVER_URL,\n", + " model_name=MODEL_NAME,\n", + " max_tokens=MAX_TOKENS,\n", + " top_p=TOP_P,\n", + " temperature=TEMPERATURE,\n", + " presence_penalty=PRESENCE_PENALTY,\n", + " streaming=True,\n", + " verbose=False,\n", + " callbacks=[StreamingStdOutCallbackHandler()]\n", + ")\n", + "\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=store.as_retriever(\n", + " search_type=\"similarity\",\n", + " search_kwargs={\"k\": 4}\n", + " ),\n", + " chain_type_kwargs={\"prompt\": QA_CHAIN_PROMPT},\n", + " return_source_documents=True\n", + " )\n", + "\n", + "os.environ[\"TOKENIZERS_PARALLELISM\"] = \"false\"" + ] + }, + { + "cell_type": "markdown", + "id": "7db84a77-ead5-43d8-9372-1e58e64830d2", + "metadata": {}, + "source": [ + "### Query the LLM without RAG" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "16a30bbb-47c5-45ee-a1fc-84c5f65fc516", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "An accelerator profile is a setting in some software applications or devices that can enhance the performance of the system. The specific steps to use an accelerator profile may vary depending on the software or device you're using. Here's a general idea of how you might use one:\n", + "\n", + "1. Locate the settings or preferences menu within your software or device. This is usually found in the main menu or under the gear icon.\n", + "\n", + "2. Look for a section labeled \"Performance,\" \"Performance Settings,\" or something similar.\n", + "\n", + "3. In this section, you should find options for different performance profiles. These might be labeled as \"Balanced,\" \"High Performance,\" \"Ultra,\" etc.\n", + "\n", + "4. Select the \"High Performance\" or \"Ultra\" option to enable the accelerator profile. This will typically increase the speed and responsiveness of your software or device, but may also consume more resources and potentially drain battery life faster.\n", + "\n", + "5. Save your changes and test the performance of your software or device to see if it meets your needs. If not, you can always switch back to a different profile or adjust other settings as needed." + ] + } + ], + "source": [ + "from langchain.chains import ConversationChain\n", + "from langchain.memory import ConversationBufferMemory\n", + "from langchain_community.llms import VLLMOpenAI\n", + "from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler\n", + "from langchain.prompts import PromptTemplate\n", + "\n", + "template=\"\"\"[INST] <>\n", + "You are a helpful, respectful and honest assistant. Always be as helpful as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\n", + "\n", + "If a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don't know the answer to a question, please don't share false information.\n", + "<>\n", + "\n", + "Current conversation:\n", + "{history}\n", + "Human: {input}\n", + "AI:\n", + "[/INST]\n", + "\"\"\"\n", + "PROMPT = PromptTemplate(input_variables=[\"history\", \"input\"], template=template)\n", + "\n", + "memory=ConversationBufferMemory()\n", + "\n", + "conversation = ConversationChain(llm=llm,\n", + " prompt=PROMPT,\n", + " verbose=False,\n", + " memory=memory\n", + " )\n", + "\n", + "question = \"How can I use an accelerator profile?\"\n", + "conversation.predict(input=question);" + ] + }, + { + "cell_type": "markdown", + "id": "3a45ad23", + "metadata": {}, + "source": [ + "### Query example with RAG" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "105d2fd1-f36c-409d-8e52-ec6d23a56ad1", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "To use an accelerator profile in OpenShift AI, you need to follow these steps:\n", + "\n", + "1. First, ensure that your OpenShift instance contains an associated accelerator. If it's a new accelerator, you'll need to configure an accelerator profile for the accelerator in context. You can create an accelerator profile from the \"Settings\" page on the OpenShift AI dashboard, under the \"Accelerator profiles\" section.\n", + "\n", + "2. If you have upgraded your OpenShift AI to version 2.13 or later and your instance already has an accelerator, its accelerator profile will be preserved after the upgrade. No additional action is required for existing accelerators.\n", + "\n", + "3. For Intel Gaudi AI accelerators, you'll need to install the necessary dependencies and the version of the HabanaAI Operator that matches the Habana version of the HabanaAI workbench image in your deployment. You can find more information about this process in the resources provided: \"HabanaAI Operator v1.10 for OpenShift\" and \"HabanaAI Operator v1.13 for OpenShift\".\n", + "\n", + "4. Once you have the necessary prerequisites in place, you can enable and use the Intel Gaudi AI accelerators in your OpenShift AI environment. The specific steps for this may vary depending on whether you are using the accelerators on-premises or with AWS DL1 compute nodes on an AWS instance." + ] + } + ], + "source": [ + "question = \"How can I use an accelerator profile?\"\n", + "result = qa_chain.invoke({\"query\": question})\n" + ] + }, + { + "cell_type": "markdown", + "id": "97d75d0c", + "metadata": {}, + "source": [ + "#### Retrieve source" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "acda357e-558a-4879-8ad8-21f0567f2f2e", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/2.13/html-single/managing_resources/index\n", + "https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/2.13/html-single/upgrading_openshift_ai_self-managed_in_a_disconnected_environment/index\n", + "https://access.redhat.com/documentation/en-us/red_hat_openshift_ai_self-managed/2.13/html-single/installing_and_uninstalling_openshift_ai_self-managed_in_a_disconnected_environment/index\n" + ] + } + ], + "source": [ + "def remove_duplicates(input_list):\n", + " unique_list = []\n", + " for item in input_list:\n", + " if item.metadata['source'] not in unique_list:\n", + " unique_list.append(item.metadata['source'])\n", + " return unique_list\n", + "\n", + "results = remove_duplicates(result['source_documents'])\n", + "\n", + "for s in results:\n", + " print(s)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3.9", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.18" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}