From f76363aed87c0baf1cdc0b9df54d50c5e20bd53d Mon Sep 17 00:00:00 2001 From: Marvin2798 Date: Tue, 21 Jan 2025 21:03:14 +0100 Subject: [PATCH] Created using Colab --- lab-langchain-evaluation.ipynb | 4111 ++++++++++++++++++++------------ 1 file changed, 2623 insertions(+), 1488 deletions(-) diff --git a/lab-langchain-evaluation.ipynb b/lab-langchain-evaluation.ipynb index 3759203..c11ff03 100644 --- a/lab-langchain-evaluation.ipynb +++ b/lab-langchain-evaluation.ipynb @@ -1,1492 +1,2627 @@ { - "cells": [ - { - "cell_type": "markdown", - "id": "1b00dae2-c271-4a69-a579-742e084a9058", - "metadata": {}, - "source": [ - "# Lab | Langchain Evaluation\n", - "\n", - "## Intro\n", - "\n", - "Pick different sets of data and re-run this notebook. The point is for you to understand all steps involve and the many different ways one can and should evaluate LLM applications.\n", - "\n", - "What did you learn? - Let's discuss that in class" - ] - }, - { - "cell_type": "markdown", - "id": "52824b89-532a-4e54-87e9-1410813cd39e", - "metadata": {}, - "source": [ - "## LangChain: Evaluation\n", - "\n", - "### Outline:\n", - "\n", - "* Example generation\n", - "* Manual evaluation (and debuging)\n", - "* LLM-assisted evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "id": "b7ed03ed-1322-49e3-b2a2-33e94fb592ef", - "metadata": { - "height": 98, - "tags": [] - }, - "outputs": [], - "source": [ - "from dotenv import load_dotenv, find_dotenv\n", - "import os\n", - "_ = load_dotenv(find_dotenv())\n", - "\n", - "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "ce74685f-50e9-48cf-8e2c-fac05e8e3ffd", - "metadata": {}, - "outputs": [], - "source": [] - }, - { - "cell_type": "markdown", - "id": "e7e29d2c-ba67-4cba-8ded-375fe040b9ba", - "metadata": {}, - "source": [ - "### Example 1" - ] - }, - { - "cell_type": "markdown", - "id": "28008949", - "metadata": {}, - "source": [ - "#### Create our QandA application" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "974acf8e-8f88-42de-88f8-40a82cb58e8b", - "metadata": { - "height": 115, - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.chains import RetrievalQA\n", - "from langchain_openai import ChatOpenAI\n", - "from langchain.llms import OpenAI\n", - "from langchain_huggingface import HuggingFaceEmbeddings\n", - "from langchain.document_loaders import CSVLoader, TextLoader\n", - "from langchain.indexes import VectorstoreIndexCreator\n", - "from langchain.vectorstores import DocArrayInMemorySearch\n", - "from langchain.chains import LLMChain\n" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "id": "9ec1106d", - "metadata": { - "height": 64, - "tags": [] - }, - "outputs": [], - "source": [ - "file = '../data/OutdoorClothingCatalog_1000.csv'\n", - "loader = CSVLoader(file_path=file)\n", - "data = loader.load()" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "id": "550eb642-c223-4d78-8f92-0f265ef78b86", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# !pip install --upgrade --force-reinstall sentence-transformers" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "id": "b31c218f", - "metadata": { - "height": 64, - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/anaconda3/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The class `HuggingFaceEmbeddings` was deprecated in LangChain 0.2.2 and will be removed in 0.3.0. An updated version of the class exists in the langchain-huggingface package and should be used instead. To use it run `pip install -U langchain-huggingface` and import as `from langchain_huggingface import HuggingFaceEmbeddings`.\n", - " warn_deprecated(\n", - "/opt/anaconda3/lib/python3.11/site-packages/sentence_transformers/cross_encoder/CrossEncoder.py:11: TqdmExperimentalWarning: Using `tqdm.autonotebook.tqdm` in notebook mode. Use `tqdm.tqdm` instead to force console mode (e.g. in jupyter console)\n", - " from tqdm.autonotebook import tqdm, trange\n" - ] - } - ], - "source": [ - "index = VectorstoreIndexCreator(\n", - " vectorstore_cls=DocArrayInMemorySearch,\n", - " embedding=HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\", model_kwargs = {'device': 'cpu'})\n", - ").from_loaders([loader])" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "a2006054", - "metadata": { - "height": 183, - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/anaconda3/lib/python3.11/site-packages/langchain_core/_api/deprecation.py:139: LangChainDeprecationWarning: The class `ChatOpenAI` was deprecated in LangChain 0.0.10 and will be removed in 0.3.0. An updated version of the class exists in the langchain-openai package and should be used instead. To use it run `pip install -U langchain-openai` and import as `from langchain_openai import ChatOpenAI`.\n", - " warn_deprecated(\n" - ] - } - ], - "source": [ - "llm = ChatOpenAI(temperature = 0.0)\n", - "qa = RetrievalQA.from_chain_type(\n", - " llm=llm, \n", - " chain_type=\"stuff\", \n", - " retriever=index.vectorstore.as_retriever(), \n", - " verbose=True,\n", - " chain_type_kwargs = {\n", - " \"document_separator\": \"<<<<>>>>>\"\n", - " }\n", - ")" - ] - }, - { - "cell_type": "markdown", - "id": "791ebd73", - "metadata": {}, - "source": [ - "#### Coming up with test datapoints" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "id": "fb04a0f9", - "metadata": { - "height": 30, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Document(page_content=\": 10\\nname: Cozy Comfort Pullover Set, Stripe\\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\\n\\nSize & Fit\\n- Pants are Favorite Fit: Sits lower on the waist.\\n- Relaxed Fit: Our most generous fit sits farthest from the body.\\n\\nFabric & Care\\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features\\n- Relaxed fit top with raglan sleeves and rounded hem.\\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\\n\\nImported.\", metadata={'source': '../data/OutdoorClothingCatalog_1000.csv', 'row': 10})" - ] - }, - "execution_count": 15, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data[10]" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "id": "fe4a88c2", - "metadata": { - "height": 30, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Document(page_content=': 11\\nname: Ultra-Lofty 850 Stretch Down Hooded Jacket\\ndescription: This technical stretch down jacket from our DownTek collection is sure to keep you warm and comfortable with its full-stretch construction providing exceptional range of motion. With a slightly fitted style that falls at the hip and best with a midweight layer, this jacket is suitable for light activity up to 20° and moderate activity up to -30°. The soft and durable 100% polyester shell offers complete windproof protection and is insulated with warm, lofty goose down. Other features include welded baffles for a no-stitch construction and excellent stretch, an adjustable hood, an interior media port and mesh stash pocket and a hem drawcord. Machine wash and dry. Imported.', metadata={'source': '../data/OutdoorClothingCatalog_1000.csv', 'row': 11})" - ] - }, - "execution_count": 20, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data[11]" - ] - }, - { - "cell_type": "markdown", - "id": "8d548aef", - "metadata": {}, - "source": [ - "#### Hard-coded examples" - ] - }, - { - "cell_type": "code", - "execution_count": 29, - "id": "106fbd91-f7bc-4d6b-b090-54b6a485ce39", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.prompts import PromptTemplate" - ] - }, - { - "cell_type": "code", - "execution_count": 55, - "id": "0c5d5af6-36db-4421-b635-46384e677847", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "answer='Yes, the Cozy Comfort Pullover Set is available in black, grey, and navy blue.'\n" - ] - } - ], - "source": [ - "from langchain.prompts import PromptTemplate\n", - "from langchain.schema import BaseOutputParser\n", - "from pydantic import BaseModel, Field\n", - "\n", - "examples = [\n", - " {\n", - " \"query\": \"Do the Cozy Comfort Pullover Set\\\n", - " have side pockets?\",\n", - " \"answer\": \"Yes\"\n", - " },\n", - " {\n", - " \"query\": \"What collection is the Ultra-Lofty \\\n", - " 850 Stretch Down Hooded Jacket from?\",\n", - " \"answer\": \"The DownTek collection\"\n", - " }\n", - "]\n", - "\n", - "# Define the prompt template\n", - "prompt_template = PromptTemplate(\n", - " input_variables=[\"query\"],\n", - " template=\"Examples:\\n\"\n", - " \"1. Query: Do the Cozy Comfort Pullover Set have side pockets?\\n\"\n", - " \" Answer: Yes\\n\"\n", - " \"2. Query: What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?\\n\"\n", - " \" Answer: The DownTek collection\\n\"\n", - " \"Query: {query}\\n\"\n", - " \"Answer:\"\n", - ")\n", - "\n", - "# Define the output model\n", - "class Answer(BaseModel):\n", - " answer: str = Field(description=\"The answer to the query\")\n", - "\n", - "# Create the output parser\n", - "class AnswerOutputParser(BaseOutputParser):\n", - " def parse(self, text: str) -> Answer:\n", - " # Split the response to get the answer\n", - " answer = text.strip().split(\"Answer:\")[-1].strip()\n", - " return Answer(answer=answer)\n", - "\n", - "# Initialize the LLM\n", - "# llm = OpenAI()\n", - "llm = ChatOpenAI()\n", - "\n", - "# Create the LLMChain\n", - "llm_chain = LLMChain(\n", - " llm=llm,\n", - " prompt=prompt_template,\n", - " output_parser=AnswerOutputParser()\n", - ")\n", - "\n", - "# Example query\n", - "query = \"Is the Cozy Comfort Pullover Set available in different colors?\"\n", - "\n", - "# Run the chain\n", - "result = llm_chain.run({\"query\": query})\n", - "\n", - "# Print the result\n", - "print(result)\n" - ] - }, - { - "cell_type": "markdown", - "id": "c7ce3e4f", - "metadata": {}, - "source": [ - "#### LLM-Generated examples" - ] - }, - { - "cell_type": "code", - "execution_count": 57, - "id": "d44f8376", - "metadata": { - "height": 64, - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.evaluation.qa import QAGenerateChain" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "id": "34e87816", - "metadata": { - "height": 47, - "tags": [] - }, - "outputs": [], - "source": [ - "example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())" - ] - }, - { - "cell_type": "code", - "execution_count": 75, - "id": "acb34772-368f-4b5e-b4bd-da9b637cc7e8", - "metadata": {}, - "outputs": [], - "source": [ - "llm_chain = LLMChain(llm=llm, prompt=prompt_template)" - ] - }, - { - "cell_type": "code", - "execution_count": 77, - "id": "62abae09", - "metadata": { - "height": 64, - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/anaconda3/lib/python3.11/site-packages/langchain/chains/llm.py:368: UserWarning: The apply_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "new_examples = example_gen_chain.apply_and_parse(\n", - " [{\"doc\": t} for t in data[:5]]\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 79, - "id": "97ab28b5", - "metadata": { - "height": 30, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'qa_pairs': {'query': \"According to the document, what is the approximate weight of the Women's Campside Oxfords per pair?\",\n", - " 'answer': \"The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\"}}" - ] - }, - "execution_count": 79, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "new_examples[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 34, - "id": "0ebe4228", - "metadata": { - "height": 30, - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "Document(page_content=\": 0\\nname: Women's Campside Oxfords\\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \\n\\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \\n\\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \\n\\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \\n\\nQuestions? Please contact us for any inquiries.\", metadata={'source': '../data/OutdoorClothingCatalog_1000.csv', 'row': 0})" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "data[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 65, - "id": "7693fe86-feeb-4d73-b400-e66e79315274", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'query': \"According to the document, what is the approximate weight of the Women's Campside Oxfords per pair?\",\n", - " 'answer': \"The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\"},\n", - " {'query': 'What are the dimensions of the small and medium sizes of the Recycled Waterhog Dog Mat, Chevron Weave?',\n", - " 'answer': 'The small size of the Recycled Waterhog Dog Mat, Chevron Weave has dimensions of 18\" x 28\", while the medium size has dimensions of 22.5\" x 34.5\".'},\n", - " {'query': \"What are some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece as described in the document?\",\n", - " 'answer': \"Some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+ rated fabric for sun protection, crossover no-slip straps, fully lined bottom for secure fit and maximum coverage. It is recommended to machine wash and line dry for best results.\"},\n", - " {'query': 'What is the fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top?',\n", - " 'answer': 'The body of the tankini top is made of 82% recycled nylon and 18% Lycra® spandex, while the lining is made of 90% recycled nylon and 10% Lycra® spandex.'},\n", - " {'query': 'What technology does the EcoFlex 3L Storm Pants feature that makes them more breathable than previous versions?',\n", - " 'answer': 'The EcoFlex 3L Storm Pants feature TEK O2 technology, which offers the most breathability ever tested by the company.'}]" - ] - }, - "execution_count": 65, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "d_flattened = [data['qa_pairs'] for data in new_examples]\n", - "d_flattened" - ] - }, - { - "cell_type": "markdown", - "id": "faf25f2f", - "metadata": {}, - "source": [ - "#### Combine examples" - ] - }, - { - "cell_type": "code", - "execution_count": 71, - "id": "ada2a3fc", - "metadata": { - "height": 30, - "tags": [] - }, - "outputs": [], - "source": [ - "# examples += new_example\n", - "examples += d_flattened" - ] - }, - { - "cell_type": "code", - "execution_count": 81, - "id": "2184b9d7-22ab-43a5-9ba5-b27fef024874", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", - " 'answer': 'Yes'}" - ] - }, - "execution_count": 81, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "examples[0]" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "id": "9cdf5cf5", - "metadata": { - "height": 30, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - }, - { - "data": { - "text/plain": [ - "{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", - " 'result': 'Yes, the Cozy Comfort Pullover Set has side pockets.'}" - ] - }, - "execution_count": 73, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "qa.invoke(examples[0][\"query\"])" - ] - }, - { - "cell_type": "markdown", - "id": "63f3cb08", - "metadata": {}, - "source": [ - "### Manual Evaluation - Fun part" - ] - }, - { - "cell_type": "code", - "execution_count": 84, - "id": "fcaf622e", - "metadata": { - "height": 47, - "tags": [] - }, - "outputs": [], - "source": [ - "import langchain\n", - "langchain.debug = True" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "id": "8a142638", - "metadata": { - "height": 30, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[chain:RetrievalQA] Entering Chain run with input:\n", - "\u001b[0m{\n", - " \"query\": \"Do the Cozy Comfort Pullover Set have side pockets?\"\n", - "}\n", - "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:\n", - "\u001b[0m[inputs]\n", - "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:\n", - "\u001b[0m{\n", - " \"question\": \"Do the Cozy Comfort Pullover Set have side pockets?\",\n", - " \"context\": \": 73\\nname: Cozy Cuddles Knit Pullover Set\\ndescription: Perfect for lounging, this knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out. \\n\\nSize & Fit \\nPants are Favorite Fit: Sits lower on the waist. \\nRelaxed Fit: Our most generous fit sits farthest from the body. \\n\\nFabric & Care \\nIn the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features \\nRelaxed fit top with raglan sleeves and rounded hem. \\nPull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg. \\nImported.<<<<>>>>>Berber fleece-lined pockets keep hands warm. One pocket has a hidden security pocket tucked inside. Imported.<<<<>>>>>: 10\\nname: Cozy Comfort Pullover Set, Stripe\\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\\n\\nSize & Fit\\n- Pants are Favorite Fit: Sits lower on the waist.\\n- Relaxed Fit: Our most generous fit sits farthest from the body.\\n\\nFabric & Care\\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features\\n- Relaxed fit top with raglan sleeves and rounded hem.\\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\\n\\nImported.<<<<>>>>>Additional Features: Bonded construction insulates for extra warmth and won't stretch out of shape. Classic shirt jac styling, with collar and modified hem for extra coverage. Full snap placket for easy on/off. Chest pockets with snap closures. Pencil pocket on chest. Underarm gussets enhance mobility. Berber fleece-lined pockets keep hands warm. One pocket has a hidden security pocket tucked inside. Imported\"\n", - "}\n", - "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain > llm:ChatOpenAI] Entering LLM run with input:\n", - "\u001b[0m{\n", - " \"prompts\": [\n", - " \"System: Use the following pieces of context to answer the user's question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\n: 73\\nname: Cozy Cuddles Knit Pullover Set\\ndescription: Perfect for lounging, this knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out. \\n\\nSize & Fit \\nPants are Favorite Fit: Sits lower on the waist. \\nRelaxed Fit: Our most generous fit sits farthest from the body. \\n\\nFabric & Care \\nIn the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features \\nRelaxed fit top with raglan sleeves and rounded hem. \\nPull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg. \\nImported.<<<<>>>>>Berber fleece-lined pockets keep hands warm. One pocket has a hidden security pocket tucked inside. Imported.<<<<>>>>>: 10\\nname: Cozy Comfort Pullover Set, Stripe\\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\\n\\nSize & Fit\\n- Pants are Favorite Fit: Sits lower on the waist.\\n- Relaxed Fit: Our most generous fit sits farthest from the body.\\n\\nFabric & Care\\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features\\n- Relaxed fit top with raglan sleeves and rounded hem.\\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\\n\\nImported.<<<<>>>>>Additional Features: Bonded construction insulates for extra warmth and won't stretch out of shape. Classic shirt jac styling, with collar and modified hem for extra coverage. Full snap placket for easy on/off. Chest pockets with snap closures. Pencil pocket on chest. Underarm gussets enhance mobility. Berber fleece-lined pockets keep hands warm. One pocket has a hidden security pocket tucked inside. Imported\\nHuman: Do the Cozy Comfort Pullover Set have side pockets?\"\n", - " ]\n", - "}\n", - "\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain > llm:ChatOpenAI] [1.27s] Exiting LLM run with output:\n", - "\u001b[0m{\n", - " \"generations\": [\n", - " [\n", - " {\n", - " \"text\": \"Yes, the Cozy Comfort Pullover Set has side pockets.\",\n", - " \"generation_info\": {\n", - " \"finish_reason\": \"stop\",\n", - " \"logprobs\": null\n", - " },\n", - " \"type\": \"ChatGeneration\",\n", - " \"message\": {\n", - " \"lc\": 1,\n", - " \"type\": \"constructor\",\n", - " \"id\": [\n", - " \"langchain\",\n", - " \"schema\",\n", - " \"messages\",\n", - " \"AIMessage\"\n", - " ],\n", - " \"kwargs\": {\n", - " \"content\": \"Yes, the Cozy Comfort Pullover Set has side pockets.\",\n", - " \"response_metadata\": {\n", - " \"token_usage\": {\n", - " \"completion_tokens\": 13,\n", - " \"prompt_tokens\": 504,\n", - " \"total_tokens\": 517\n", - " },\n", - " \"model_name\": \"gpt-3.5-turbo\",\n", - " \"system_fingerprint\": null,\n", - " \"finish_reason\": \"stop\",\n", - " \"logprobs\": null\n", - " },\n", - " \"type\": \"ai\",\n", - " \"id\": \"run-a161a758-1627-4ee4-a205-3a12f334bd82-0\",\n", - " \"tool_calls\": [],\n", - " \"invalid_tool_calls\": []\n", - " }\n", - " }\n", - " }\n", - " ]\n", - " ],\n", - " \"llm_output\": {\n", - " \"token_usage\": {\n", - " \"completion_tokens\": 13,\n", - " \"prompt_tokens\": 504,\n", - " \"total_tokens\": 517\n", - " },\n", - " \"model_name\": \"gpt-3.5-turbo\",\n", - " \"system_fingerprint\": null\n", - " },\n", - " \"run\": null\n", - "}\n", - "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] [1.27s] Exiting Chain run with output:\n", - "\u001b[0m{\n", - " \"text\": \"Yes, the Cozy Comfort Pullover Set has side pockets.\"\n", - "}\n", - "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain] [1.27s] Exiting Chain run with output:\n", - "\u001b[0m{\n", - " \"output_text\": \"Yes, the Cozy Comfort Pullover Set has side pockets.\"\n", - "}\n", - "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[chain:RetrievalQA] [1.30s] Exiting Chain run with output:\n", - "\u001b[0m{\n", - " \"result\": \"Yes, the Cozy Comfort Pullover Set has side pockets.\"\n", - "}\n" - ] - }, - { - "data": { - "text/plain": [ - "{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", - " 'result': 'Yes, the Cozy Comfort Pullover Set has side pockets.'}" - ] - }, - "execution_count": 86, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "qa.invoke(examples[0][\"query\"])" - ] - }, - { - "cell_type": "code", - "execution_count": 88, - "id": "b3d6bef0", - "metadata": { - "height": 47, - "tags": [] - }, - "outputs": [], - "source": [ - "# Turn off the debug mode\n", - "langchain.debug = False" - ] - }, - { - "cell_type": "markdown", - "id": "d5bdbdce", - "metadata": {}, - "source": [ - "### LLM assisted evaluation" - ] - }, - { - "cell_type": "code", - "execution_count": 146, - "id": "a54769b0-3daf-4cac-b259-89a10dd9b5a2", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "examples += d_flattened" - ] - }, - { - "cell_type": "code", - "execution_count": 90, - "id": "8ea95385-1b4c-440a-9fea-8500b4cc2154", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", - " 'answer': 'Yes'},\n", - " {'query': 'What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?',\n", - " 'answer': 'The DownTek collection'},\n", - " {'query': \"According to the document, what is the approximate weight of the Women's Campside Oxfords per pair?\",\n", - " 'answer': \"The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\"},\n", - " {'query': 'What are the dimensions of the small and medium sizes of the Recycled Waterhog Dog Mat, Chevron Weave?',\n", - " 'answer': 'The small size of the Recycled Waterhog Dog Mat, Chevron Weave has dimensions of 18\" x 28\", while the medium size has dimensions of 22.5\" x 34.5\".'},\n", - " {'query': \"What are some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece as described in the document?\",\n", - " 'answer': \"Some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+ rated fabric for sun protection, crossover no-slip straps, fully lined bottom for secure fit and maximum coverage. It is recommended to machine wash and line dry for best results.\"},\n", - " {'query': 'What is the fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top?',\n", - " 'answer': 'The body of the tankini top is made of 82% recycled nylon and 18% Lycra® spandex, while the lining is made of 90% recycled nylon and 10% Lycra® spandex.'},\n", - " {'query': 'What technology does the EcoFlex 3L Storm Pants feature that makes them more breathable than previous versions?',\n", - " 'answer': 'The EcoFlex 3L Storm Pants feature TEK O2 technology, which offers the most breathability ever tested by the company.'}]" - ] - }, - "execution_count": 90, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "examples" - ] - }, - { - "cell_type": "code", - "execution_count": 92, - "id": "a4dca05a", - "metadata": { - "height": 30, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\n", - "\n", - "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", - "\n", - "\n", - "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", - "\n", - "\n", - "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", - "\n", - "\n", - "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", - "\n", - "\n", - "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", - "\n", - "\n", - "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", - "\n", - "\n", - "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n", - "\n", - "\u001b[1m> Finished chain.\u001b[0m\n" - ] - } - ], - "source": [ - "predictions = qa.batch(examples)" - ] - }, - { - "cell_type": "code", - "execution_count": 94, - "id": "ae7e8b4e-4468-4048-8544-c9936704ea93", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", - " 'answer': 'Yes',\n", - " 'result': 'Yes, the Cozy Comfort Pullover Set does have side pockets.'},\n", - " {'query': 'What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?',\n", - " 'answer': 'The DownTek collection',\n", - " 'result': 'The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.'},\n", - " {'query': \"According to the document, what is the approximate weight of the Women's Campside Oxfords per pair?\",\n", - " 'answer': \"The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\",\n", - " 'result': \"The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\"},\n", - " {'query': 'What are the dimensions of the small and medium sizes of the Recycled Waterhog Dog Mat, Chevron Weave?',\n", - " 'answer': 'The small size of the Recycled Waterhog Dog Mat, Chevron Weave has dimensions of 18\" x 28\", while the medium size has dimensions of 22.5\" x 34.5\".',\n", - " 'result': 'The dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave are 18\" x 28\", and the dimensions of the medium size are 22.5\" x 34.5\".'},\n", - " {'query': \"What are some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece as described in the document?\",\n", - " 'answer': \"Some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+ rated fabric for sun protection, crossover no-slip straps, fully lined bottom for secure fit and maximum coverage. It is recommended to machine wash and line dry for best results.\",\n", - " 'result': \"Some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece are:\\n- Bright colors, ruffles, and exclusive whimsical prints\\n- Four-way-stretch and chlorine-resistant fabric\\n- UPF 50+ rated fabric for high sun protection\\n- Crossover no-slip straps and fully lined bottom for a secure fit and coverage\\n- Machine washable and line dry for best results\"},\n", - " {'query': 'What is the fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top?',\n", - " 'answer': 'The body of the tankini top is made of 82% recycled nylon and 18% Lycra® spandex, while the lining is made of 90% recycled nylon and 10% Lycra® spandex.',\n", - " 'result': 'The fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top is as follows:\\n- Body: 82% recycled nylon, 18% Lycra® spandex\\n- Lining: 90% recycled nylon, 10% Lycra® spandex'},\n", - " {'query': 'What technology does the EcoFlex 3L Storm Pants feature that makes them more breathable than previous versions?',\n", - " 'answer': 'The EcoFlex 3L Storm Pants feature TEK O2 technology, which offers the most breathability ever tested by the company.',\n", - " 'result': 'The EcoFlex 3L Storm Pants feature TEK O2 technology, which offers the most breathability tested by the brand.'}]" - ] - }, - "execution_count": 94, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "predictions" - ] - }, - { - "cell_type": "code", - "execution_count": 96, - "id": "6012a3e0", - "metadata": { - "height": 30, - "tags": [] - }, - "outputs": [], - "source": [ - "from langchain.evaluation.qa import QAEvalChain" - ] - }, - { - "cell_type": "code", - "execution_count": 98, - "id": "724b1c0b", - "metadata": { - "height": 47, - "tags": [] - }, - "outputs": [], - "source": [ - "llm = ChatOpenAI(temperature=0)\n", - "eval_chain = QAEvalChain.from_llm(llm)" - ] - }, - { - "cell_type": "code", - "execution_count": 100, - "id": "8b46ae55", - "metadata": { - "height": 47, - "tags": [] - }, - "outputs": [], - "source": [ - "graded_outputs = eval_chain.evaluate(examples, predictions)" - ] - }, - { - "cell_type": "code", - "execution_count": 102, - "id": "dc42eb35-c2d7-4581-8004-d315ade63eef", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'results': 'CORRECT'},\n", - " {'results': 'CORRECT'},\n", - " {'results': 'CORRECT'},\n", - " {'results': 'CORRECT'},\n", - " {'results': 'CORRECT'},\n", - " {'results': 'CORRECT'},\n", - " {'results': 'CORRECT'}]" - ] - }, - "execution_count": 102, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "graded_outputs" - ] - }, - { - "cell_type": "code", - "execution_count": 104, - "id": "3437cfbe", - "metadata": { - "height": 149, - "tags": [] - }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Example 0:\n", - "Question: Do the Cozy Comfort Pullover Set have side pockets?\n", - "Real Answer: Yes\n", - "Predicted Answer: Yes, the Cozy Comfort Pullover Set does have side pockets.\n", - "\n", - "Example 1:\n", - "Question: What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?\n", - "Real Answer: The DownTek collection\n", - "Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.\n", - "\n", - "Example 2:\n", - "Question: According to the document, what is the approximate weight of the Women's Campside Oxfords per pair?\n", - "Real Answer: The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\n", - "Predicted Answer: The approximate weight of the Women's Campside Oxfords per pair is 1 lb. 1 oz.\n", - "\n", - "Example 3:\n", - "Question: What are the dimensions of the small and medium sizes of the Recycled Waterhog Dog Mat, Chevron Weave?\n", - "Real Answer: The small size of the Recycled Waterhog Dog Mat, Chevron Weave has dimensions of 18\" x 28\", while the medium size has dimensions of 22.5\" x 34.5\".\n", - "Predicted Answer: The dimensions of the small size of the Recycled Waterhog Dog Mat, Chevron Weave are 18\" x 28\", and the dimensions of the medium size are 22.5\" x 34.5\".\n", - "\n", - "Example 4:\n", - "Question: What are some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece as described in the document?\n", - "Real Answer: Some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece include bright colors, ruffles, exclusive whimsical prints, four-way-stretch and chlorine-resistant fabric, UPF 50+ rated fabric for sun protection, crossover no-slip straps, fully lined bottom for secure fit and maximum coverage. It is recommended to machine wash and line dry for best results.\n", - "Predicted Answer: Some key features of the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece are:\n", - "- Bright colors, ruffles, and exclusive whimsical prints\n", - "- Four-way-stretch and chlorine-resistant fabric\n", - "- UPF 50+ rated fabric for high sun protection\n", - "- Crossover no-slip straps and fully lined bottom for a secure fit and coverage\n", - "- Machine washable and line dry for best results\n", - "\n", - "Example 5:\n", - "Question: What is the fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top?\n", - "Real Answer: The body of the tankini top is made of 82% recycled nylon and 18% Lycra® spandex, while the lining is made of 90% recycled nylon and 10% Lycra® spandex.\n", - "Predicted Answer: The fabric composition of the Refresh Swimwear V-Neck Tankini Contrasts top is as follows:\n", - "- Body: 82% recycled nylon, 18% Lycra® spandex\n", - "- Lining: 90% recycled nylon, 10% Lycra® spandex\n", - "\n", - "Example 6:\n", - "Question: What technology does the EcoFlex 3L Storm Pants feature that makes them more breathable than previous versions?\n", - "Real Answer: The EcoFlex 3L Storm Pants feature TEK O2 technology, which offers the most breathability ever tested by the company.\n", - "Predicted Answer: The EcoFlex 3L Storm Pants feature TEK O2 technology, which offers the most breathability tested by the brand.\n", - "\n" - ] - } - ], - "source": [ - "for i, eg in enumerate(examples):\n", - " print(f\"Example {i}:\")\n", - " print(\"Question: \" + predictions[i]['query'])\n", - " print(\"Real Answer: \" + predictions[i]['answer'])\n", - " print(\"Predicted Answer: \" + predictions[i]['result'])\n", - " # print(\"Predicted Grade: \" + graded_outputs[i]['text'])\n", - " print()" - ] - }, - { - "cell_type": "markdown", - "id": "721d127a-a9e3-465d-a8ae-0e2c4b4a2659", - "metadata": {}, - "source": [ - "### Example 2\n", - "One can also easily evaluate your QA chains with the metrics offered in ragas" - ] - }, - { - "cell_type": "code", - "execution_count": 24, - "id": "a5ef0493-34ff-4801-b405-69c76ce86c38", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/opt/anaconda3/lib/python3.11/site-packages/langchain/indexes/vectorstore.py:129: UserWarning: Using InMemoryVectorStore as the default vectorstore.This memory store won't persist data. You should explicitlyspecify a vectorstore when using VectorstoreIndexCreator\n", - " warnings.warn(\n" - ] - } - ], - "source": [ - "from langchain_huggingface import HuggingFaceEmbeddings\n", - "loader = TextLoader(\"../data/nyc_text.txt\")\n", - "index = VectorstoreIndexCreator(embedding=HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\", model_kwargs = {'device': 'mps'})).from_loaders([loader])\n", - "\n", - "\n", - "llm = ChatOpenAI(temperature= 0)\n", - "qa_chain = RetrievalQA.from_chain_type(\n", - " llm,\n", - " retriever=index.vectorstore.as_retriever(),\n", - " return_source_documents=True,\n", - ")" - ] - }, - { - "cell_type": "code", - "execution_count": 26, - "id": "f0449cae-de25-4ef6-ae64-78ccf5e06a5a", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", - "To disable this warning, you can either:\n", - "\t- Avoid using `tokenizers` before the fork if possible\n", - "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" - ] - }, - { - "data": { - "text/plain": [ - "'New York City was originally named New Amsterdam by Dutch colonists in 1626. When the city came under British control in 1664, it was renamed New York after King Charles II of England granted the lands to his brother, the Duke of York. The city has been continuously named New York since November 1674.'" - ] - }, - "execution_count": 26, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "# testing it out\n", - "\n", - "question = \"How did New York City get its name?\"\n", - "result = qa_chain.invoke({\"query\": question})\n", - "result[\"result\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 28, - "id": "9e846b3d-f79f-46eb-8075-c816268c0500", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'query': 'How did New York City get its name?',\n", - " 'result': 'New York City was originally named New Amsterdam by Dutch colonists in 1626. When the city came under British control in 1664, it was renamed New York after King Charles II of England granted the lands to his brother, the Duke of York. The city has been continuously named New York since November 1674.',\n", - " 'source_documents': [Document(page_content='The city and its metropolitan area constitute the premier gateway for legal immigration to the United States. As many as 800 languages are spoken in New York, making it the most linguistically diverse city in the world. New York City is home to more than 3.2 million residents born outside the U.S., the largest foreign-born population of any city in the world as of 2016.New York City traces its origins to a trading post founded on the southern tip of Manhattan Island by Dutch colonists in approximately 1624. The settlement was named New Amsterdam (Dutch: Nieuw Amsterdam) in 1626 and was chartered as a city in 1653. The city came under British control in 1664 and was renamed New York after King Charles II of England granted the lands to his brother, the Duke of York. The city was regained by the Dutch in July 1673 and was renamed New Orange for one year and three months; the city has been continuously named New York since November 1674. New York City was the capital of the United States', metadata={'source': '../data/nyc_text.txt'}),\n", - " Document(page_content='New York City has been a metropolitan municipality with a Strong mayor–council form of government since its consolidation in 1898. In New York City, the city government is responsible for public education, correctional institutions, public safety, recreational facilities, sanitation, water supply, and welfare services.', metadata={'source': '../data/nyc_text.txt'}),\n", - " Document(page_content=\"Despite New York's heavy reliance on its vast public transit system, streets are a defining feature of the city. The Commissioners' Plan of 1811 greatly influenced the city's physical development. Several of the city's streets and avenues, including Broadway, Wall Street, Madison Avenue, and Seventh Avenue are also used as metonyms for national industries there: the theater, finance, advertising, and fashion organizations, respectively.\", metadata={'source': '../data/nyc_text.txt'}),\n", - " Document(page_content=\"In the pre-Columbian era, the area of present-day New York City was inhabited by Algonquian Native Americans, including the Lenape. Their homeland, known as Lenapehoking, included the present-day areas of Staten Island, Manhattan, the Bronx, the western portion of Long Island (including the areas that would later become the boroughs of Brooklyn and Queens), and the Lower Hudson Valley.The first documented visit into New York Harbor by a European was in 1524 by Italian Giovanni da Verrazzano, an explorer from Florence in the service of the French crown. He claimed the area for France and named it Nouvelle Angoulême (New Angoulême). A Spanish expedition, led by the Portuguese captain Estêvão Gomes sailing for Emperor Charles V, arrived in New York Harbor in January 1525 and charted the mouth of the Hudson River, which he named Río de San Antonio ('Saint Anthony's River'). The Padrón Real of 1527, the first scientific map to show the East Coast of North America continuously, was informed\", metadata={'source': '../data/nyc_text.txt'})]}" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "result" - ] - }, - { - "cell_type": "markdown", - "id": "069d9da8-a593-4fc6-9d4b-fea2af6bdfd0", - "metadata": {}, - "source": [ - "Now in order to evaluate the qa system we generated a few relevant questions. We've generated a few question for you but feel free to add any you want." - ] - }, - { - "cell_type": "code", - "execution_count": 31, - "id": "a2e2cade-0005-41c1-b775-c6a7175bcf3b", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "eval_questions = [\n", - " \"What is the population of New York City as of 2020?\",\n", - " \"Which borough of New York City has the highest population?\",\n", - " \"What is the economic significance of New York City?\",\n", - " \"How did New York City get its name?\",\n", - " \"What is the significance of the Statue of Liberty in New York City?\",\n", - "]\n", - "\n", - "eval_answers = [\n", - " \"8,804,190\",\n", - " \"Brooklyn\",\n", - " \"New York City's economic significance is vast, as it serves as the global financial capital, housing Wall Street and major financial institutions. Its diverse economy spans technology, media, healthcare, education, and more, making it resilient to economic fluctuations. NYC is a hub for international business, attracting global companies, and boasts a large, skilled labor force. Its real estate market, tourism, cultural industries, and educational institutions further fuel its economic prowess. The city's transportation network and global influence amplify its impact on the world stage, solidifying its status as a vital economic player and cultural epicenter.\",\n", - " \"New York City got its name when it came under British control in 1664. King Charles II of England granted the lands to his brother, the Duke of York, who named the city New York in his own honor.\",\n", - " \"The Statue of Liberty in New York City holds great significance as a symbol of the United States and its ideals of liberty and peace. It greeted millions of immigrants who arrived in the U.S. by ship in the late 19th and early 20th centuries, representing hope and freedom for those seeking a better life. It has since become an iconic landmark and a global symbol of cultural diversity and freedom.\",\n", - "]\n", - "\n", - "examples = [\n", - " {\"query\": q, \"ground_truths\": [eval_answers[i]]}\n", - " for i, q in enumerate(eval_questions)\n", - "]" - ] - }, - { - "cell_type": "code", - "execution_count": 33, - "id": "aac9358e-f8bc-4992-aea3-c83160ff0ab0", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "[{'query': 'What is the population of New York City as of 2020?',\n", - " 'ground_truths': ['8,804,190']},\n", - " {'query': 'Which borough of New York City has the highest population?',\n", - " 'ground_truths': ['Brooklyn']},\n", - " {'query': 'What is the economic significance of New York City?',\n", - " 'ground_truths': [\"New York City's economic significance is vast, as it serves as the global financial capital, housing Wall Street and major financial institutions. Its diverse economy spans technology, media, healthcare, education, and more, making it resilient to economic fluctuations. NYC is a hub for international business, attracting global companies, and boasts a large, skilled labor force. Its real estate market, tourism, cultural industries, and educational institutions further fuel its economic prowess. The city's transportation network and global influence amplify its impact on the world stage, solidifying its status as a vital economic player and cultural epicenter.\"]},\n", - " {'query': 'How did New York City get its name?',\n", - " 'ground_truths': ['New York City got its name when it came under British control in 1664. King Charles II of England granted the lands to his brother, the Duke of York, who named the city New York in his own honor.']},\n", - " {'query': 'What is the significance of the Statue of Liberty in New York City?',\n", - " 'ground_truths': ['The Statue of Liberty in New York City holds great significance as a symbol of the United States and its ideals of liberty and peace. It greeted millions of immigrants who arrived in the U.S. by ship in the late 19th and early 20th centuries, representing hope and freedom for those seeking a better life. It has since become an iconic landmark and a global symbol of cultural diversity and freedom.']}]" - ] - }, - "execution_count": 33, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "examples" - ] - }, - { - "cell_type": "markdown", - "id": "6a21efe8-7c30-449a-9b8e-5b79778e305b", - "metadata": {}, - "source": [ - "#### Introducing RagasEvaluatorChain" - ] - }, - { - "cell_type": "markdown", - "id": "139c2214-a6eb-4d4f-9403-7e1574b97a36", - "metadata": {}, - "source": [ - "`RagasEvaluatorChain` creates a wrapper around the metrics ragas provides (documented [here](https://github.com/explodinggradients/ragas/blob/main/docs/metrics.md)), making it easier to run these evaluation with langchain and langsmith.\n", - "\n", - "The evaluator chain has the following APIs\n", - "\n", - "- `__call__()`: call the `RagasEvaluatorChain` directly on the result of a QA chain.\n", - "- `evaluate()`: evaluate on a list of examples (with the input queries) and predictions (outputs from the QA chain). \n", - "- `evaluate_run()`: method implemented that is called by langsmith evaluators to evaluate langsmith datasets.\n", - "\n", - "lets see each of them in action to learn more." - ] - }, - { - "cell_type": "code", - "execution_count": 37, - "id": "022c8aae-fe5f-4274-b638-9209151b9491", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "'Manhattan (New York County) has the highest population density of any borough in New York City.'" - ] - }, - "execution_count": 37, - "metadata": {}, - "output_type": "execute_result" + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "view-in-github", + "colab_type": "text" + }, + "source": [ + "\"Open" + ] + }, + { + "cell_type": "markdown", + "id": "1b00dae2-c271-4a69-a579-742e084a9058", + "metadata": { + "id": "1b00dae2-c271-4a69-a579-742e084a9058" + }, + "source": [ + "# Lab | Langchain Evaluation\n", + "\n", + "## Intro\n", + "\n", + "Pick different sets of data and re-run this notebook. The point is for you to understand all steps involve and the many different ways one can and should evaluate LLM applications.\n", + "\n", + "What did you learn? - Let's discuss that in class" + ] + }, + { + "cell_type": "markdown", + "id": "52824b89-532a-4e54-87e9-1410813cd39e", + "metadata": { + "id": "52824b89-532a-4e54-87e9-1410813cd39e" + }, + "source": [ + "## LangChain: Evaluation\n", + "\n", + "### Outline:\n", + "\n", + "* Example generation\n", + "* Manual evaluation (and debuging)\n", + "* LLM-assisted evaluation" + ] + }, + { + "cell_type": "code", + "source": [ + "! pip install --upgrade langchain" + ], + "metadata": { + "id": "28Fu48fmBZFD", + "outputId": "099db7c6-343d-4e60-af66-d4ead637e3b4", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "28Fu48fmBZFD", + "execution_count": 1, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: langchain in /usr/local/lib/python3.11/dist-packages (0.3.15)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain) (2.0.37)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/dist-packages (from langchain) (3.11.11)\n", + "Requirement already satisfied: langchain-core<0.4.0,>=0.3.31 in /usr/local/lib/python3.11/dist-packages (from langchain) (0.3.31)\n", + "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain) (0.3.5)\n", + "Requirement already satisfied: langsmith<0.4,>=0.1.17 in /usr/local/lib/python3.11/dist-packages (from langchain) (0.2.10)\n", + "Requirement already satisfied: numpy<2,>=1.22.4 in /usr/local/lib/python3.11/dist-packages (from langchain) (1.26.4)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/dist-packages (from langchain) (2.10.5)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/dist-packages (from langchain) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain) (9.0.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.3)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.31->langchain) (1.33)\n", + "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.31->langchain) (24.2)\n", + "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.31->langchain) (4.12.2)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain) (0.28.1)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain) (3.10.14)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain) (1.0.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain) (2024.12.14)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.1.1)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain) (0.14.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.31->langchain) (3.0.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain) (1.3.1)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install python-dotenv" + ], + "metadata": { + "id": "BJWQ1R35BpUR", + "outputId": "461e563d-9ee2-4011-bcea-2a453cbc0757", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "BJWQ1R35BpUR", + "execution_count": 2, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: python-dotenv in /usr/local/lib/python3.11/dist-packages (1.0.1)\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b7ed03ed-1322-49e3-b2a2-33e94fb592ef", + "metadata": { + "height": 98, + "tags": [], + "id": "b7ed03ed-1322-49e3-b2a2-33e94fb592ef" + }, + "outputs": [], + "source": [ + "from dotenv import load_dotenv, find_dotenv\n", + "import os\n", + "_ = load_dotenv(find_dotenv())\n", + "\n", + "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')" + ] + }, + { + "cell_type": "markdown", + "id": "e7e29d2c-ba67-4cba-8ded-375fe040b9ba", + "metadata": { + "id": "e7e29d2c-ba67-4cba-8ded-375fe040b9ba" + }, + "source": [ + "### Example 1" + ] + }, + { + "cell_type": "markdown", + "id": "28008949", + "metadata": { + "id": "28008949" + }, + "source": [ + "#### Create our QandA application" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install langchain_openai" + ], + "metadata": { + "collapsed": true, + "id": "uUcb9QwuCGc2", + "outputId": "cef7ce8d-27ff-4d48-892c-9a27b5fe0d0f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "uUcb9QwuCGc2", + "execution_count": 4, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: langchain_openai in /usr/local/lib/python3.11/dist-packages (0.3.1)\n", + "Requirement already satisfied: langchain-core<0.4.0,>=0.3.30 in /usr/local/lib/python3.11/dist-packages (from langchain_openai) (0.3.31)\n", + "Requirement already satisfied: openai<2.0.0,>=1.58.1 in /usr/local/lib/python3.11/dist-packages (from langchain_openai) (1.59.6)\n", + "Requirement already satisfied: tiktoken<1,>=0.7 in /usr/local/lib/python3.11/dist-packages (from langchain_openai) (0.8.0)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.30->langchain_openai) (6.0.2)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.30->langchain_openai) (1.33)\n", + "Requirement already satisfied: langsmith<0.4,>=0.1.125 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.30->langchain_openai) (0.2.10)\n", + "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.30->langchain_openai) (24.2)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.5.2 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.30->langchain_openai) (2.10.5)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.30->langchain_openai) (9.0.0)\n", + "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.30->langchain_openai) (4.12.2)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.11/dist-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (0.28.1)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (0.8.2)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (1.3.1)\n", + "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.11/dist-packages (from openai<2.0.0,>=1.58.1->langchain_openai) (4.67.1)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.11/dist-packages (from tiktoken<1,>=0.7->langchain_openai) (2024.11.6)\n", + "Requirement already satisfied: requests>=2.26.0 in /usr/local/lib/python3.11/dist-packages (from tiktoken<1,>=0.7->langchain_openai) (2.32.3)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5,>=3.5.0->openai<2.0.0,>=1.58.1->langchain_openai) (3.10)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.58.1->langchain_openai) (2024.12.14)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->openai<2.0.0,>=1.58.1->langchain_openai) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai<2.0.0,>=1.58.1->langchain_openai) (0.14.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.30->langchain_openai) (3.0.0)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.30->langchain_openai) (3.10.14)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.30->langchain_openai) (1.0.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.3.30->langchain_openai) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.3.30->langchain_openai) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->tiktoken<1,>=0.7->langchain_openai) (3.4.1)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.26.0->tiktoken<1,>=0.7->langchain_openai) (2.3.0)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install langchain-community" + ], + "metadata": { + "collapsed": true, + "id": "EV1l-8VHCpOz", + "outputId": "88acf9dc-d1ad-48c1-9863-c0deddc7360b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "EV1l-8VHCpOz", + "execution_count": 6, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting langchain-community\n", + " Downloading langchain_community-0.3.15-py3-none-any.whl.metadata (2.9 kB)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain-community) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain-community) (2.0.37)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/dist-packages (from langchain-community) (3.11.11)\n", + "Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)\n", + " Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)\n", + "Collecting httpx-sse<0.5.0,>=0.4.0 (from langchain-community)\n", + " Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)\n", + "Requirement already satisfied: langchain<0.4.0,>=0.3.15 in /usr/local/lib/python3.11/dist-packages (from langchain-community) (0.3.15)\n", + "Requirement already satisfied: langchain-core<0.4.0,>=0.3.31 in /usr/local/lib/python3.11/dist-packages (from langchain-community) (0.3.31)\n", + "Requirement already satisfied: langsmith<0.4,>=0.1.125 in /usr/local/lib/python3.11/dist-packages (from langchain-community) (0.2.10)\n", + "Requirement already satisfied: numpy<2,>=1.22.4 in /usr/local/lib/python3.11/dist-packages (from langchain-community) (1.26.4)\n", + "Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)\n", + " Downloading pydantic_settings-2.7.1-py3-none-any.whl.metadata (3.5 kB)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/dist-packages (from langchain-community) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain-community) (9.0.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain-community) (1.18.3)\n", + "Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n", + " Downloading marshmallow-3.25.1-py3-none-any.whl.metadata (7.3 kB)\n", + "Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)\n", + " Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)\n", + "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain<0.4.0,>=0.3.15->langchain-community) (0.3.5)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/dist-packages (from langchain<0.4.0,>=0.3.15->langchain-community) (2.10.5)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.31->langchain-community) (1.33)\n", + "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.31->langchain-community) (24.2)\n", + "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.31->langchain-community) (4.12.2)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.125->langchain-community) (0.28.1)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.125->langchain-community) (3.10.14)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.125->langchain-community) (1.0.0)\n", + "Requirement already satisfied: python-dotenv>=0.21.0 in /usr/local/lib/python3.11/dist-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community) (1.0.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain-community) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain-community) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain-community) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain-community) (2024.12.14)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain-community) (3.1.1)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community) (0.14.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.31->langchain-community) (3.0.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain<0.4.0,>=0.3.15->langchain-community) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain<0.4.0,>=0.3.15->langchain-community) (2.27.2)\n", + "Collecting mypy-extensions>=0.3.0 (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community)\n", + " Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-community) (1.3.1)\n", + "Downloading langchain_community-0.3.15-py3-none-any.whl (2.5 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m2.5/2.5 MB\u001b[0m \u001b[31m27.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dataclasses_json-0.6.7-py3-none-any.whl (28 kB)\n", + "Downloading httpx_sse-0.4.0-py3-none-any.whl (7.8 kB)\n", + "Downloading pydantic_settings-2.7.1-py3-none-any.whl (29 kB)\n", + "Downloading marshmallow-3.25.1-py3-none-any.whl (49 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m49.6/49.6 kB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading typing_inspect-0.9.0-py3-none-any.whl (8.8 kB)\n", + "Downloading mypy_extensions-1.0.0-py3-none-any.whl (4.7 kB)\n", + "Installing collected packages: mypy-extensions, marshmallow, httpx-sse, typing-inspect, pydantic-settings, dataclasses-json, langchain-community\n", + "Successfully installed dataclasses-json-0.6.7 httpx-sse-0.4.0 langchain-community-0.3.15 marshmallow-3.25.1 mypy-extensions-1.0.0 pydantic-settings-2.7.1 typing-inspect-0.9.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install langchain" + ], + "metadata": { + "collapsed": true, + "id": "32oopmfQC5IB", + "outputId": "3fc7a2ba-ba72-499a-b41a-06fd78de23bc", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "32oopmfQC5IB", + "execution_count": 8, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Requirement already satisfied: langchain in /usr/local/lib/python3.11/dist-packages (0.3.15)\n", + "Requirement already satisfied: PyYAML>=5.3 in /usr/local/lib/python3.11/dist-packages (from langchain) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain) (2.0.37)\n", + "Requirement already satisfied: aiohttp<4.0.0,>=3.8.3 in /usr/local/lib/python3.11/dist-packages (from langchain) (3.11.11)\n", + "Requirement already satisfied: langchain-core<0.4.0,>=0.3.31 in /usr/local/lib/python3.11/dist-packages (from langchain) (0.3.31)\n", + "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain) (0.3.5)\n", + "Requirement already satisfied: langsmith<0.4,>=0.1.17 in /usr/local/lib/python3.11/dist-packages (from langchain) (0.2.10)\n", + "Requirement already satisfied: numpy<2,>=1.22.4 in /usr/local/lib/python3.11/dist-packages (from langchain) (1.26.4)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.7.4 in /usr/local/lib/python3.11/dist-packages (from langchain) (2.10.5)\n", + "Requirement already satisfied: requests<3,>=2 in /usr/local/lib/python3.11/dist-packages (from langchain) (2.32.3)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain) (9.0.0)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp<4.0.0,>=3.8.3->langchain) (1.18.3)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.31->langchain) (1.33)\n", + "Requirement already satisfied: packaging<25,>=23.2 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.31->langchain) (24.2)\n", + "Requirement already satisfied: typing-extensions>=4.7 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.31->langchain) (4.12.2)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain) (0.28.1)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain) (3.10.14)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain) (1.0.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.7.4->langchain) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests<3,>=2->langchain) (2024.12.14)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain) (3.1.1)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain) (0.14.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.31->langchain) (3.0.0)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.17->langchain) (1.3.1)\n" + ] + } + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install langchain_huggingface" + ], + "metadata": { + "collapsed": true, + "id": "wrJrc1sdDDMF", + "outputId": "23d480ac-780e-4534-9b98-294f5292205d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "wrJrc1sdDDMF", + "execution_count": 10, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting langchain_huggingface\n", + " Downloading langchain_huggingface-0.1.2-py3-none-any.whl.metadata (1.3 kB)\n", + "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langchain_huggingface) (0.27.1)\n", + "Requirement already satisfied: langchain-core<0.4.0,>=0.3.15 in /usr/local/lib/python3.11/dist-packages (from langchain_huggingface) (0.3.31)\n", + "Requirement already satisfied: sentence-transformers>=2.6.0 in /usr/local/lib/python3.11/dist-packages (from langchain_huggingface) (3.3.1)\n", + "Requirement already satisfied: tokenizers>=0.19.1 in /usr/local/lib/python3.11/dist-packages (from langchain_huggingface) (0.21.0)\n", + "Requirement already satisfied: transformers>=4.39.0 in /usr/local/lib/python3.11/dist-packages (from langchain_huggingface) (4.47.1)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (24.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (6.0.2)\n", + "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (2.32.3)\n", + "Requirement already satisfied: tqdm>=4.42.1 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (4.67.1)\n", + "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub>=0.23.0->langchain_huggingface) (4.12.2)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (1.33)\n", + "Requirement already satisfied: langsmith<0.4,>=0.1.125 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (0.2.10)\n", + "Requirement already satisfied: pydantic<3.0.0,>=2.5.2 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (2.10.5)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10.0.0,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (9.0.0)\n", + "Requirement already satisfied: torch>=1.11.0 in /usr/local/lib/python3.11/dist-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (2.5.1+cu121)\n", + "Requirement already satisfied: scikit-learn in /usr/local/lib/python3.11/dist-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (1.6.0)\n", + "Requirement already satisfied: scipy in /usr/local/lib/python3.11/dist-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (1.13.1)\n", + "Requirement already satisfied: Pillow in /usr/local/lib/python3.11/dist-packages (from sentence-transformers>=2.6.0->langchain_huggingface) (11.1.0)\n", + "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.39.0->langchain_huggingface) (1.26.4)\n", + "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.39.0->langchain_huggingface) (2024.11.6)\n", + "Requirement already satisfied: safetensors>=0.4.1 in /usr/local/lib/python3.11/dist-packages (from transformers>=4.39.0->langchain_huggingface) (0.5.2)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (3.0.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (0.28.1)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (3.10.14)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (1.0.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3.0.0,>=2.5.2->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (2.27.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.23.0->langchain_huggingface) (3.4.1)\n", + "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.23.0->langchain_huggingface) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.23.0->langchain_huggingface) (2.3.0)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->huggingface-hub>=0.23.0->langchain_huggingface) (2024.12.14)\n", + "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.4.2)\n", + "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.1.5)\n", + "Requirement already satisfied: nvidia-cuda-nvrtc-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-runtime-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (12.1.105)\n", + "Requirement already satisfied: nvidia-cuda-cupti-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (12.1.105)\n", + "Requirement already satisfied: nvidia-cudnn-cu12==9.1.0.70 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (9.1.0.70)\n", + "Requirement already satisfied: nvidia-cublas-cu12==12.1.3.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (12.1.3.1)\n", + "Requirement already satisfied: nvidia-cufft-cu12==11.0.2.54 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (11.0.2.54)\n", + "Requirement already satisfied: nvidia-curand-cu12==10.3.2.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (10.3.2.106)\n", + "Requirement already satisfied: nvidia-cusolver-cu12==11.4.5.107 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (11.4.5.107)\n", + "Requirement already satisfied: nvidia-cusparse-cu12==12.1.0.106 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (12.1.0.106)\n", + "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (2.21.5)\n", + "Requirement already satisfied: nvidia-nvtx-cu12==12.1.105 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (12.1.105)\n", + "Requirement already satisfied: triton==3.1.0 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.1.0)\n", + "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (1.13.1)\n", + "Requirement already satisfied: nvidia-nvjitlink-cu12 in /usr/local/lib/python3.11/dist-packages (from nvidia-cusolver-cu12==11.4.5.107->torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (12.6.85)\n", + "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (1.3.0)\n", + "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain_huggingface) (1.4.2)\n", + "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.11/dist-packages (from scikit-learn->sentence-transformers>=2.6.0->langchain_huggingface) (3.5.0)\n", + "Requirement already satisfied: anyio in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (3.7.1)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (0.14.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch>=1.11.0->sentence-transformers>=2.6.0->langchain_huggingface) (3.0.2)\n", + "Requirement already satisfied: sniffio>=1.1 in /usr/local/lib/python3.11/dist-packages (from anyio->httpx<1,>=0.23.0->langsmith<0.4,>=0.1.125->langchain-core<0.4.0,>=0.3.15->langchain_huggingface) (1.3.1)\n", + "Downloading langchain_huggingface-0.1.2-py3-none-any.whl (21 kB)\n", + "Installing collected packages: langchain_huggingface\n", + "Successfully installed langchain_huggingface-0.1.2\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "974acf8e-8f88-42de-88f8-40a82cb58e8b", + "metadata": { + "height": 115, + "tags": [], + "id": "974acf8e-8f88-42de-88f8-40a82cb58e8b" + }, + "outputs": [], + "source": [ + "from langchain.chains import RetrievalQA\n", + "from langchain_openai import ChatOpenAI\n", + "from langchain.llms import OpenAI\n", + "from langchain_huggingface import HuggingFaceEmbeddings\n", + "from langchain.document_loaders import CSVLoader, TextLoader\n", + "from langchain.indexes import VectorstoreIndexCreator\n", + "from langchain.vectorstores import DocArrayInMemorySearch\n", + "from langchain.chains import LLMChain\n" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "9ec1106d", + "metadata": { + "height": 64, + "tags": [], + "id": "9ec1106d" + }, + "outputs": [], + "source": [ + "file = '/content/OutdoorClothingCatalog_1000.csv'\n", + "loader = CSVLoader(file_path=file)\n", + "data = loader.load()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "550eb642-c223-4d78-8f92-0f265ef78b86", + "metadata": { + "tags": [], + "id": "550eb642-c223-4d78-8f92-0f265ef78b86" + }, + "outputs": [], + "source": [ + "# !pip install --upgrade --force-reinstall sentence-transformers" + ] + }, + { + "cell_type": "code", + "source": [ + "! pip install docarray" + ], + "metadata": { + "collapsed": true, + "id": "RMG3MFJxIuG4", + "outputId": "369ca486-f8f2-445b-85b2-29653d465138", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "RMG3MFJxIuG4", + "execution_count": 17, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting docarray\n", + " Downloading docarray-0.40.0-py3-none-any.whl.metadata (36 kB)\n", + "Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.11/dist-packages (from docarray) (1.26.4)\n", + "Requirement already satisfied: orjson>=3.8.2 in /usr/local/lib/python3.11/dist-packages (from docarray) (3.10.14)\n", + "Requirement already satisfied: pydantic>=1.10.8 in /usr/local/lib/python3.11/dist-packages (from docarray) (2.10.5)\n", + "Requirement already satisfied: rich>=13.1.0 in /usr/local/lib/python3.11/dist-packages (from docarray) (13.9.4)\n", + "Collecting types-requests>=2.28.11.6 (from docarray)\n", + " Downloading types_requests-2.32.0.20241016-py3-none-any.whl.metadata (1.9 kB)\n", + "Requirement already satisfied: typing-inspect>=0.8.0 in /usr/local/lib/python3.11/dist-packages (from docarray) (0.9.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic>=1.10.8->docarray) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=1.10.8->docarray) (2.27.2)\n", + "Requirement already satisfied: typing-extensions>=4.12.2 in /usr/local/lib/python3.11/dist-packages (from pydantic>=1.10.8->docarray) (4.12.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /usr/local/lib/python3.11/dist-packages (from rich>=13.1.0->docarray) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /usr/local/lib/python3.11/dist-packages (from rich>=13.1.0->docarray) (2.18.0)\n", + "Requirement already satisfied: urllib3>=2 in /usr/local/lib/python3.11/dist-packages (from types-requests>=2.28.11.6->docarray) (2.3.0)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from typing-inspect>=0.8.0->docarray) (1.0.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /usr/local/lib/python3.11/dist-packages (from markdown-it-py>=2.2.0->rich>=13.1.0->docarray) (0.1.2)\n", + "Downloading docarray-0.40.0-py3-none-any.whl (270 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m270.2/270.2 kB\u001b[0m \u001b[31m6.3 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading types_requests-2.32.0.20241016-py3-none-any.whl (15 kB)\n", + "Installing collected packages: types-requests, docarray\n", + "Successfully installed docarray-0.40.0 types-requests-2.32.0.20241016\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 18, + "id": "b31c218f", + "metadata": { + "height": 64, + "tags": [], + "id": "b31c218f", + "outputId": "b50e0c87-cd66-4a08-86a7-a548b14fb5c8", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/pydantic/_migration.py:283: UserWarning: `pydantic.error_wrappers:ValidationError` has been moved to `pydantic:ValidationError`.\n", + " warnings.warn(f'`{import_path}` has been moved to `{new_location}`.')\n" + ] + } + ], + "source": [ + "index = VectorstoreIndexCreator(\n", + " vectorstore_cls=DocArrayInMemorySearch,\n", + " embedding=HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\", model_kwargs = {'device': 'cpu'})\n", + ").from_loaders([loader])" + ] + }, + { + "cell_type": "code", + "source": [ + "import os\n", + "from google.colab import userdata\n", + "OPENAI_API_KEY = userdata.get('OPENAI_API_KEY')\n", + "os.environ['OPENAI_API_KEY'] = OPENAI_API_KEY" + ], + "metadata": { + "id": "EpexoM6qJqlw" + }, + "id": "EpexoM6qJqlw", + "execution_count": 20, + "outputs": [] + }, + { + "cell_type": "code", + "execution_count": 21, + "id": "a2006054", + "metadata": { + "height": 183, + "tags": [], + "id": "a2006054" + }, + "outputs": [], + "source": [ + "llm = ChatOpenAI(temperature = 0.0)\n", + "qa = RetrievalQA.from_chain_type(\n", + " llm=llm,\n", + " chain_type=\"stuff\",\n", + " retriever=index.vectorstore.as_retriever(),\n", + " verbose=True,\n", + " chain_type_kwargs = {\n", + " \"document_separator\": \"<<<<>>>>>\"\n", + " }\n", + ")" + ] + }, + { + "cell_type": "markdown", + "id": "791ebd73", + "metadata": { + "id": "791ebd73" + }, + "source": [ + "#### Coming up with test datapoints" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "id": "fb04a0f9", + "metadata": { + "height": 30, + "tags": [], + "id": "fb04a0f9", + "outputId": "98010988-d638-4fa6-a693-5d4764cef506", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Document(metadata={'source': '/content/OutdoorClothingCatalog_1000.csv', 'row': 10}, page_content=\": 10\\nname: Cozy Comfort Pullover Set, Stripe\\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\\n\\nSize & Fit\\n- Pants are Favorite Fit: Sits lower on the waist.\\n- Relaxed Fit: Our most generous fit sits farthest from the body.\\n\\nFabric & Care\\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features\\n- Relaxed fit top with raglan sleeves and rounded hem.\\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\\n\\nImported.\")" + ] + }, + "metadata": {}, + "execution_count": 22 + } + ], + "source": [ + "data[10]" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "id": "fe4a88c2", + "metadata": { + "height": 30, + "tags": [], + "id": "fe4a88c2", + "outputId": "084b8245-4489-401d-85f5-232c3374e8a3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Document(metadata={'source': '/content/OutdoorClothingCatalog_1000.csv', 'row': 11}, page_content=': 11\\nname: Ultra-Lofty 850 Stretch Down Hooded Jacket\\ndescription: This technical stretch down jacket from our DownTek collection is sure to keep you warm and comfortable with its full-stretch construction providing exceptional range of motion. With a slightly fitted style that falls at the hip and best with a midweight layer, this jacket is suitable for light activity up to 20° and moderate activity up to -30°. The soft and durable 100% polyester shell offers complete windproof protection and is insulated with warm, lofty goose down. Other features include welded baffles for a no-stitch construction and excellent stretch, an adjustable hood, an interior media port and mesh stash pocket and a hem drawcord. Machine wash and dry. Imported.')" + ] + }, + "metadata": {}, + "execution_count": 23 + } + ], + "source": [ + "data[11]" + ] + }, + { + "cell_type": "markdown", + "id": "8d548aef", + "metadata": { + "id": "8d548aef" + }, + "source": [ + "#### Hard-coded examples" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "id": "106fbd91-f7bc-4d6b-b090-54b6a485ce39", + "metadata": { + "tags": [], + "id": "106fbd91-f7bc-4d6b-b090-54b6a485ce39" + }, + "outputs": [], + "source": [ + "from langchain.prompts import PromptTemplate" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "id": "0c5d5af6-36db-4421-b635-46384e677847", + "metadata": { + "tags": [], + "id": "0c5d5af6-36db-4421-b635-46384e677847", + "outputId": "c1eaf907-4b58-4034-cde8-68f21688739f", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + ":46: LangChainDeprecationWarning: The class `LLMChain` was deprecated in LangChain 0.1.17 and will be removed in 1.0. Use :meth:`~RunnableSequence, e.g., `prompt | llm`` instead.\n", + " llm_chain = LLMChain(\n", + ":56: LangChainDeprecationWarning: The method `Chain.run` was deprecated in langchain 0.1.0 and will be removed in 1.0. Use :meth:`~invoke` instead.\n", + " result = llm_chain.run({\"query\": query})\n" + ] + }, + { + "output_type": "stream", + "name": "stdout", + "text": [ + "answer='Yes, it is available in three different colors: grey, navy, and black.'\n" + ] + } + ], + "source": [ + "from langchain.prompts import PromptTemplate\n", + "from langchain.schema import BaseOutputParser\n", + "from pydantic import BaseModel, Field\n", + "\n", + "examples = [\n", + " {\n", + " \"query\": \"Do the Cozy Comfort Pullover Set\\\n", + " have side pockets?\",\n", + " \"answer\": \"Yes\"\n", + " },\n", + " {\n", + " \"query\": \"What collection is the Ultra-Lofty \\\n", + " 850 Stretch Down Hooded Jacket from?\",\n", + " \"answer\": \"The DownTek collection\"\n", + " }\n", + "]\n", + "\n", + "# Define the prompt template\n", + "prompt_template = PromptTemplate(\n", + " input_variables=[\"query\"],\n", + " template=\"Examples:\\n\"\n", + " \"1. Query: Do the Cozy Comfort Pullover Set have side pockets?\\n\"\n", + " \" Answer: Yes\\n\"\n", + " \"2. Query: What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?\\n\"\n", + " \" Answer: The DownTek collection\\n\"\n", + " \"Query: {query}\\n\"\n", + " \"Answer:\"\n", + ")\n", + "\n", + "# Define the output model\n", + "class Answer(BaseModel):\n", + " answer: str = Field(description=\"The answer to the query\")\n", + "\n", + "# Create the output parser\n", + "class AnswerOutputParser(BaseOutputParser):\n", + " def parse(self, text: str) -> Answer:\n", + " # Split the response to get the answer\n", + " answer = text.strip().split(\"Answer:\")[-1].strip()\n", + " return Answer(answer=answer)\n", + "\n", + "# Initialize the LLM\n", + "# llm = OpenAI()\n", + "llm = ChatOpenAI()\n", + "\n", + "# Create the LLMChain\n", + "llm_chain = LLMChain(\n", + " llm=llm,\n", + " prompt=prompt_template,\n", + " output_parser=AnswerOutputParser()\n", + ")\n", + "\n", + "# Example query\n", + "query = \"Is the Cozy Comfort Pullover Set available in different colors?\"\n", + "\n", + "# Run the chain\n", + "result = llm_chain.run({\"query\": query})\n", + "\n", + "# Print the result\n", + "print(result)\n" + ] + }, + { + "cell_type": "markdown", + "id": "c7ce3e4f", + "metadata": { + "id": "c7ce3e4f" + }, + "source": [ + "#### LLM-Generated examples" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "id": "d44f8376", + "metadata": { + "height": 64, + "tags": [], + "id": "d44f8376" + }, + "outputs": [], + "source": [ + "from langchain.evaluation.qa import QAGenerateChain" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "id": "34e87816", + "metadata": { + "height": 47, + "tags": [], + "id": "34e87816" + }, + "outputs": [], + "source": [ + "example_gen_chain = QAGenerateChain.from_llm(ChatOpenAI())" + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "id": "acb34772-368f-4b5e-b4bd-da9b637cc7e8", + "metadata": { + "id": "acb34772-368f-4b5e-b4bd-da9b637cc7e8" + }, + "outputs": [], + "source": [ + "llm_chain = LLMChain(llm=llm, prompt=prompt_template)" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "id": "62abae09", + "metadata": { + "height": 64, + "tags": [], + "id": "62abae09", + "outputId": "5a6d6c9d-c5db-4041-867a-6238ea380270", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/langchain/chains/llm.py:369: UserWarning: The apply_and_parse method is deprecated, instead pass an output parser directly to LLMChain.\n", + " warnings.warn(\n" + ] + } + ], + "source": [ + "new_examples = example_gen_chain.apply_and_parse(\n", + " [{\"doc\": t} for t in data[:5]]\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "id": "97ab28b5", + "metadata": { + "height": 30, + "tags": [], + "id": "97ab28b5", + "outputId": "0434c4ec-f554-4692-fda1-980a7f579aaa", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'qa_pairs': {'query': \"What is the weight of each pair of Women's Campside Oxfords?\",\n", + " 'answer': 'The approximate weight of each pair is 1 lb. 1 oz.'}}" + ] + }, + "metadata": {}, + "execution_count": 30 + } + ], + "source": [ + "new_examples[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "id": "0ebe4228", + "metadata": { + "height": 30, + "tags": [], + "id": "0ebe4228", + "outputId": "0286f126-c2d0-40b8-858b-abaeae2db88d", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "Document(metadata={'source': '/content/OutdoorClothingCatalog_1000.csv', 'row': 0}, page_content=\": 0\\nname: Women's Campside Oxfords\\ndescription: This ultracomfortable lace-to-toe Oxford boasts a super-soft canvas, thick cushioning, and quality construction for a broken-in feel from the first time you put them on. \\n\\nSize & Fit: Order regular shoe size. For half sizes not offered, order up to next whole size. \\n\\nSpecs: Approx. weight: 1 lb.1 oz. per pair. \\n\\nConstruction: Soft canvas material for a broken-in feel and look. Comfortable EVA innersole with Cleansport NXT® antimicrobial odor control. Vintage hunt, fish and camping motif on innersole. Moderate arch contour of innersole. EVA foam midsole for cushioning and support. Chain-tread-inspired molded rubber outsole with modified chain-tread pattern. Imported. \\n\\nQuestions? Please contact us for any inquiries.\")" + ] + }, + "metadata": {}, + "execution_count": 31 + } + ], + "source": [ + "data[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 32, + "id": "7693fe86-feeb-4d73-b400-e66e79315274", + "metadata": { + "tags": [], + "id": "7693fe86-feeb-4d73-b400-e66e79315274", + "outputId": "adf49421-34e7-4c96-a153-cdd29ec2ccb8", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'query': \"What is the weight of each pair of Women's Campside Oxfords?\",\n", + " 'answer': 'The approximate weight of each pair is 1 lb. 1 oz.'},\n", + " {'query': 'What are some key features of the Recycled Waterhog Dog Mat, Chevron Weave?',\n", + " 'answer': 'Some key features of the Recycled Waterhog Dog Mat, Chevron Weave include its rugged construction from recycled plastic materials, its ability to help keep dirt and water off floors, its 24 oz. polyester fabric made from 94% recycled materials, rubber backing, exclusive design, thick and thin fibers for scraping dirt and absorbing water, quick drying, resistance to fading, rotting, mildew, and shedding, and the option to use it indoors or outdoors.'},\n", + " {'query': \"What features does the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offer in terms of sun protection and fit?\",\n", + " 'answer': 'The swimsuit is made of four-way-stretch and chlorine-resistant fabric, has UPF 50+ rated fabric for sun protection, crossover no-slip straps for a secure fit, and a fully lined bottom for maximum coverage.'},\n", + " {'query': 'What is the fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top?',\n", + " 'answer': 'The body of the swimtop is made from 82% recycled nylon and 18% Lycra® spandex, while the lining is made from 90% recycled nylon and 10% Lycra® spandex.'},\n", + " {'query': 'What new technology is featured in the EcoFlex 3L Storm Pants that enhances breathability?',\n", + " 'answer': 'The new TEK O2 technology is featured in the EcoFlex 3L Storm Pants, providing enhanced breathability.'}]" + ] + }, + "metadata": {}, + "execution_count": 32 + } + ], + "source": [ + "d_flattened = [data['qa_pairs'] for data in new_examples]\n", + "d_flattened" + ] + }, + { + "cell_type": "markdown", + "id": "faf25f2f", + "metadata": { + "id": "faf25f2f" + }, + "source": [ + "#### Combine examples" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "id": "ada2a3fc", + "metadata": { + "height": 30, + "tags": [], + "id": "ada2a3fc" + }, + "outputs": [], + "source": [ + "# examples += new_example\n", + "examples += d_flattened" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "id": "2184b9d7-22ab-43a5-9ba5-b27fef024874", + "metadata": { + "tags": [], + "id": "2184b9d7-22ab-43a5-9ba5-b27fef024874", + "outputId": "a4647fb7-5158-4e45-e94d-3267c644adde", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", + " 'answer': 'Yes'}" + ] + }, + "metadata": {}, + "execution_count": 34 + } + ], + "source": [ + "examples[0]" + ] + }, + { + "cell_type": "code", + "execution_count": 35, + "id": "9cdf5cf5", + "metadata": { + "height": 30, + "tags": [], + "id": "9cdf5cf5", + "outputId": "33f77086-ab62-44a1-e928-7171bd71211e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", + " 'result': 'Yes, the Cozy Comfort Pullover Set does have side pockets.'}" + ] + }, + "metadata": {}, + "execution_count": 35 + } + ], + "source": [ + "qa.invoke(examples[0][\"query\"])" + ] + }, + { + "cell_type": "markdown", + "id": "63f3cb08", + "metadata": { + "id": "63f3cb08" + }, + "source": [ + "### Manual Evaluation - Fun part" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "id": "fcaf622e", + "metadata": { + "height": 47, + "tags": [], + "id": "fcaf622e" + }, + "outputs": [], + "source": [ + "import langchain\n", + "langchain.debug = True" + ] + }, + { + "cell_type": "code", + "execution_count": 37, + "id": "8a142638", + "metadata": { + "height": 30, + "tags": [], + "id": "8a142638", + "outputId": "333f13bc-aaa0-4bcb-f31c-22b695c64b2c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[chain:RetrievalQA] Entering Chain run with input:\n", + "\u001b[0m{\n", + " \"query\": \"Do the Cozy Comfort Pullover Set have side pockets?\"\n", + "}\n", + "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain] Entering Chain run with input:\n", + "\u001b[0m[inputs]\n", + "\u001b[32;1m\u001b[1;3m[chain/start]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] Entering Chain run with input:\n", + "\u001b[0m{\n", + " \"question\": \"Do the Cozy Comfort Pullover Set have side pockets?\",\n", + " \"context\": \": 73\\nname: Cozy Cuddles Knit Pullover Set\\ndescription: Perfect for lounging, this knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out. \\n\\nSize & Fit \\nPants are Favorite Fit: Sits lower on the waist. \\nRelaxed Fit: Our most generous fit sits farthest from the body. \\n\\nFabric & Care \\nIn the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features \\nRelaxed fit top with raglan sleeves and rounded hem. \\nPull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg. \\nImported.<<<<>>>>>Berber fleece-lined pockets keep hands warm. One pocket has a hidden security pocket tucked inside. Imported.<<<<>>>>>: 10\\nname: Cozy Comfort Pullover Set, Stripe\\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\\n\\nSize & Fit\\n- Pants are Favorite Fit: Sits lower on the waist.\\n- Relaxed Fit: Our most generous fit sits farthest from the body.\\n\\nFabric & Care\\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features\\n- Relaxed fit top with raglan sleeves and rounded hem.\\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\\n\\nImported.<<<<>>>>>Additional Features: Bonded construction insulates for extra warmth and won't stretch out of shape. Classic shirt jac styling, with collar and modified hem for extra coverage. Full snap placket for easy on/off. Chest pockets with snap closures. Pencil pocket on chest. Underarm gussets enhance mobility. Berber fleece-lined pockets keep hands warm. One pocket has a hidden security pocket tucked inside. Imported\"\n", + "}\n", + "\u001b[32;1m\u001b[1;3m[llm/start]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain > llm:ChatOpenAI] Entering LLM run with input:\n", + "\u001b[0m{\n", + " \"prompts\": [\n", + " \"System: Use the following pieces of context to answer the user's question. \\nIf you don't know the answer, just say that you don't know, don't try to make up an answer.\\n----------------\\n: 73\\nname: Cozy Cuddles Knit Pullover Set\\ndescription: Perfect for lounging, this knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out. \\n\\nSize & Fit \\nPants are Favorite Fit: Sits lower on the waist. \\nRelaxed Fit: Our most generous fit sits farthest from the body. \\n\\nFabric & Care \\nIn the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features \\nRelaxed fit top with raglan sleeves and rounded hem. \\nPull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg. \\nImported.<<<<>>>>>Berber fleece-lined pockets keep hands warm. One pocket has a hidden security pocket tucked inside. Imported.<<<<>>>>>: 10\\nname: Cozy Comfort Pullover Set, Stripe\\ndescription: Perfect for lounging, this striped knit set lives up to its name. We used ultrasoft fabric and an easy design that's as comfortable at bedtime as it is when we have to make a quick run out.\\n\\nSize & Fit\\n- Pants are Favorite Fit: Sits lower on the waist.\\n- Relaxed Fit: Our most generous fit sits farthest from the body.\\n\\nFabric & Care\\n- In the softest blend of 63% polyester, 35% rayon and 2% spandex.\\n\\nAdditional Features\\n- Relaxed fit top with raglan sleeves and rounded hem.\\n- Pull-on pants have a wide elastic waistband and drawstring, side pockets and a modern slim leg.\\n\\nImported.<<<<>>>>>Additional Features: Bonded construction insulates for extra warmth and won't stretch out of shape. Classic shirt jac styling, with collar and modified hem for extra coverage. Full snap placket for easy on/off. Chest pockets with snap closures. Pencil pocket on chest. Underarm gussets enhance mobility. Berber fleece-lined pockets keep hands warm. One pocket has a hidden security pocket tucked inside. Imported\\nHuman: Do the Cozy Comfort Pullover Set have side pockets?\"\n", + " ]\n", + "}\n", + "\u001b[36;1m\u001b[1;3m[llm/end]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain > llm:ChatOpenAI] [516ms] Exiting LLM run with output:\n", + "\u001b[0m{\n", + " \"generations\": [\n", + " [\n", + " {\n", + " \"text\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\",\n", + " \"generation_info\": {\n", + " \"finish_reason\": \"stop\",\n", + " \"logprobs\": null\n", + " },\n", + " \"type\": \"ChatGeneration\",\n", + " \"message\": {\n", + " \"lc\": 1,\n", + " \"type\": \"constructor\",\n", + " \"id\": [\n", + " \"langchain\",\n", + " \"schema\",\n", + " \"messages\",\n", + " \"AIMessage\"\n", + " ],\n", + " \"kwargs\": {\n", + " \"content\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\",\n", + " \"additional_kwargs\": {\n", + " \"refusal\": null\n", + " },\n", + " \"response_metadata\": {\n", + " \"token_usage\": {\n", + " \"completion_tokens\": 15,\n", + " \"prompt_tokens\": 504,\n", + " \"total_tokens\": 519,\n", + " \"completion_tokens_details\": {\n", + " \"accepted_prediction_tokens\": 0,\n", + " \"audio_tokens\": 0,\n", + " \"reasoning_tokens\": 0,\n", + " \"rejected_prediction_tokens\": 0\n", + " },\n", + " \"prompt_tokens_details\": {\n", + " \"audio_tokens\": 0,\n", + " \"cached_tokens\": 0\n", + " }\n", + " },\n", + " \"model_name\": \"gpt-3.5-turbo-0125\",\n", + " \"system_fingerprint\": null,\n", + " \"finish_reason\": \"stop\",\n", + " \"logprobs\": null\n", + " },\n", + " \"type\": \"ai\",\n", + " \"id\": \"run-213c3d08-dc35-4349-ba03-40b889ad601d-0\",\n", + " \"usage_metadata\": {\n", + " \"input_tokens\": 504,\n", + " \"output_tokens\": 15,\n", + " \"total_tokens\": 519,\n", + " \"input_token_details\": {\n", + " \"audio\": 0,\n", + " \"cache_read\": 0\n", + " },\n", + " \"output_token_details\": {\n", + " \"audio\": 0,\n", + " \"reasoning\": 0\n", + " }\n", + " },\n", + " \"tool_calls\": [],\n", + " \"invalid_tool_calls\": []\n", + " }\n", + " }\n", + " }\n", + " ]\n", + " ],\n", + " \"llm_output\": {\n", + " \"token_usage\": {\n", + " \"completion_tokens\": 15,\n", + " \"prompt_tokens\": 504,\n", + " \"total_tokens\": 519,\n", + " \"completion_tokens_details\": {\n", + " \"accepted_prediction_tokens\": 0,\n", + " \"audio_tokens\": 0,\n", + " \"reasoning_tokens\": 0,\n", + " \"rejected_prediction_tokens\": 0\n", + " },\n", + " \"prompt_tokens_details\": {\n", + " \"audio_tokens\": 0,\n", + " \"cached_tokens\": 0\n", + " }\n", + " },\n", + " \"model_name\": \"gpt-3.5-turbo-0125\",\n", + " \"system_fingerprint\": null\n", + " },\n", + " \"run\": null,\n", + " \"type\": \"LLMResult\"\n", + "}\n", + "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain > chain:LLMChain] [518ms] Exiting Chain run with output:\n", + "\u001b[0m{\n", + " \"text\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\"\n", + "}\n", + "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[chain:RetrievalQA > chain:StuffDocumentsChain] [520ms] Exiting Chain run with output:\n", + "\u001b[0m{\n", + " \"output_text\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\"\n", + "}\n", + "\u001b[36;1m\u001b[1;3m[chain/end]\u001b[0m \u001b[1m[chain:RetrievalQA] [549ms] Exiting Chain run with output:\n", + "\u001b[0m{\n", + " \"result\": \"Yes, the Cozy Comfort Pullover Set does have side pockets.\"\n", + "}\n" + ] + }, + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", + " 'result': 'Yes, the Cozy Comfort Pullover Set does have side pockets.'}" + ] + }, + "metadata": {}, + "execution_count": 37 + } + ], + "source": [ + "qa.invoke(examples[0][\"query\"])" + ] + }, + { + "cell_type": "code", + "execution_count": 38, + "id": "b3d6bef0", + "metadata": { + "height": 47, + "tags": [], + "id": "b3d6bef0" + }, + "outputs": [], + "source": [ + "# Turn off the debug mode\n", + "langchain.debug = False" + ] + }, + { + "cell_type": "markdown", + "id": "d5bdbdce", + "metadata": { + "id": "d5bdbdce" + }, + "source": [ + "### LLM assisted evaluation" + ] + }, + { + "cell_type": "code", + "execution_count": 39, + "id": "a54769b0-3daf-4cac-b259-89a10dd9b5a2", + "metadata": { + "tags": [], + "id": "a54769b0-3daf-4cac-b259-89a10dd9b5a2" + }, + "outputs": [], + "source": [ + "examples += d_flattened" + ] + }, + { + "cell_type": "code", + "execution_count": 40, + "id": "8ea95385-1b4c-440a-9fea-8500b4cc2154", + "metadata": { + "tags": [], + "id": "8ea95385-1b4c-440a-9fea-8500b4cc2154", + "outputId": "1690bc20-016a-4fe4-e369-737376ed0af8", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", + " 'answer': 'Yes'},\n", + " {'query': 'What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?',\n", + " 'answer': 'The DownTek collection'},\n", + " {'query': \"What is the weight of each pair of Women's Campside Oxfords?\",\n", + " 'answer': 'The approximate weight of each pair is 1 lb. 1 oz.'},\n", + " {'query': 'What are some key features of the Recycled Waterhog Dog Mat, Chevron Weave?',\n", + " 'answer': 'Some key features of the Recycled Waterhog Dog Mat, Chevron Weave include its rugged construction from recycled plastic materials, its ability to help keep dirt and water off floors, its 24 oz. polyester fabric made from 94% recycled materials, rubber backing, exclusive design, thick and thin fibers for scraping dirt and absorbing water, quick drying, resistance to fading, rotting, mildew, and shedding, and the option to use it indoors or outdoors.'},\n", + " {'query': \"What features does the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offer in terms of sun protection and fit?\",\n", + " 'answer': 'The swimsuit is made of four-way-stretch and chlorine-resistant fabric, has UPF 50+ rated fabric for sun protection, crossover no-slip straps for a secure fit, and a fully lined bottom for maximum coverage.'},\n", + " {'query': 'What is the fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top?',\n", + " 'answer': 'The body of the swimtop is made from 82% recycled nylon and 18% Lycra® spandex, while the lining is made from 90% recycled nylon and 10% Lycra® spandex.'},\n", + " {'query': 'What new technology is featured in the EcoFlex 3L Storm Pants that enhances breathability?',\n", + " 'answer': 'The new TEK O2 technology is featured in the EcoFlex 3L Storm Pants, providing enhanced breathability.'},\n", + " {'query': \"What is the weight of each pair of Women's Campside Oxfords?\",\n", + " 'answer': 'The approximate weight of each pair is 1 lb. 1 oz.'},\n", + " {'query': 'What are some key features of the Recycled Waterhog Dog Mat, Chevron Weave?',\n", + " 'answer': 'Some key features of the Recycled Waterhog Dog Mat, Chevron Weave include its rugged construction from recycled plastic materials, its ability to help keep dirt and water off floors, its 24 oz. polyester fabric made from 94% recycled materials, rubber backing, exclusive design, thick and thin fibers for scraping dirt and absorbing water, quick drying, resistance to fading, rotting, mildew, and shedding, and the option to use it indoors or outdoors.'},\n", + " {'query': \"What features does the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offer in terms of sun protection and fit?\",\n", + " 'answer': 'The swimsuit is made of four-way-stretch and chlorine-resistant fabric, has UPF 50+ rated fabric for sun protection, crossover no-slip straps for a secure fit, and a fully lined bottom for maximum coverage.'},\n", + " {'query': 'What is the fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top?',\n", + " 'answer': 'The body of the swimtop is made from 82% recycled nylon and 18% Lycra® spandex, while the lining is made from 90% recycled nylon and 10% Lycra® spandex.'},\n", + " {'query': 'What new technology is featured in the EcoFlex 3L Storm Pants that enhances breathability?',\n", + " 'answer': 'The new TEK O2 technology is featured in the EcoFlex 3L Storm Pants, providing enhanced breathability.'}]" + ] + }, + "metadata": {}, + "execution_count": 40 + } + ], + "source": [ + "examples" + ] + }, + { + "cell_type": "code", + "execution_count": 41, + "id": "a4dca05a", + "metadata": { + "height": 30, + "tags": [], + "id": "a4dca05a", + "outputId": "2718b9b4-67a6-4489-e0b8-2334bb5be0c6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\n", + "\u001b[1m> Entering new RetrievalQA chain...\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n", + "\n", + "\u001b[1m> Finished chain.\u001b[0m\n" + ] + } + ], + "source": [ + "predictions = qa.batch(examples)" + ] + }, + { + "cell_type": "code", + "execution_count": 42, + "id": "ae7e8b4e-4468-4048-8544-c9936704ea93", + "metadata": { + "tags": [], + "id": "ae7e8b4e-4468-4048-8544-c9936704ea93", + "outputId": "eb7e0bef-131e-48ed-c67e-efad63d2fcfa", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'query': 'Do the Cozy Comfort Pullover Set have side pockets?',\n", + " 'answer': 'Yes',\n", + " 'result': 'Yes, the Cozy Comfort Pullover Set does have side pockets.'},\n", + " {'query': 'What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?',\n", + " 'answer': 'The DownTek collection',\n", + " 'result': 'The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.'},\n", + " {'query': \"What is the weight of each pair of Women's Campside Oxfords?\",\n", + " 'answer': 'The approximate weight of each pair is 1 lb. 1 oz.',\n", + " 'result': \"The weight of each pair of Women's Campside Oxfords is approximately 1 lb. 1 oz.\"},\n", + " {'query': 'What are some key features of the Recycled Waterhog Dog Mat, Chevron Weave?',\n", + " 'answer': 'Some key features of the Recycled Waterhog Dog Mat, Chevron Weave include its rugged construction from recycled plastic materials, its ability to help keep dirt and water off floors, its 24 oz. polyester fabric made from 94% recycled materials, rubber backing, exclusive design, thick and thin fibers for scraping dirt and absorbing water, quick drying, resistance to fading, rotting, mildew, and shedding, and the option to use it indoors or outdoors.',\n", + " 'result': 'Some key features of the Recycled Waterhog Dog Mat, Chevron Weave are:\\n- Constructed from recycled plastic materials\\n- Helps keep dirt and water off floors\\n- Made from 24 oz. polyester fabric with 94% recycled materials\\n- Features thick and thin fibers for scraping dirt and absorbing water\\n- Dries quickly and resists fading, rotting, mildew, and shedding\\n- Can be used indoors or outdoors\\n- Made in the USA'},\n", + " {'query': \"What features does the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offer in terms of sun protection and fit?\",\n", + " 'answer': 'The swimsuit is made of four-way-stretch and chlorine-resistant fabric, has UPF 50+ rated fabric for sun protection, crossover no-slip straps for a secure fit, and a fully lined bottom for maximum coverage.',\n", + " 'result': \"The Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offers UPF 50+ rated fabric, which provides the highest rated sun protection possible by blocking 98% of the sun's harmful rays. The swimsuit also has crossover no-slip straps and a fully lined bottom to ensure a secure fit and maximum coverage.\"},\n", + " {'query': 'What is the fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top?',\n", + " 'answer': 'The body of the swimtop is made from 82% recycled nylon and 18% Lycra® spandex, while the lining is made from 90% recycled nylon and 10% Lycra® spandex.',\n", + " 'result': 'The fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top is as follows:\\n- Body: 82% recycled nylon, 18% Lycra® spandex\\n- Lining: 90% recycled nylon, 10% Lycra® spandex'},\n", + " {'query': 'What new technology is featured in the EcoFlex 3L Storm Pants that enhances breathability?',\n", + " 'answer': 'The new TEK O2 technology is featured in the EcoFlex 3L Storm Pants, providing enhanced breathability.',\n", + " 'result': 'The new technology featured in the EcoFlex 3L Storm Pants that enhances breathability is the TEK O2 technology.'},\n", + " {'query': \"What is the weight of each pair of Women's Campside Oxfords?\",\n", + " 'answer': 'The approximate weight of each pair is 1 lb. 1 oz.',\n", + " 'result': \"The weight of each pair of Women's Campside Oxfords is approximately 1 lb. 1 oz.\"},\n", + " {'query': 'What are some key features of the Recycled Waterhog Dog Mat, Chevron Weave?',\n", + " 'answer': 'Some key features of the Recycled Waterhog Dog Mat, Chevron Weave include its rugged construction from recycled plastic materials, its ability to help keep dirt and water off floors, its 24 oz. polyester fabric made from 94% recycled materials, rubber backing, exclusive design, thick and thin fibers for scraping dirt and absorbing water, quick drying, resistance to fading, rotting, mildew, and shedding, and the option to use it indoors or outdoors.',\n", + " 'result': 'Some key features of the Recycled Waterhog Dog Mat, Chevron Weave are:\\n- Constructed from recycled plastic materials\\n- Helps keep dirt and water off floors\\n- Made from 24 oz. polyester fabric (94% recycled materials)\\n- Rubber backing for durability\\n- Features thick and thin fibers for scraping dirt and absorbing water\\n- Dries quickly and resists fading, rotting, mildew, and shedding\\n- Suitable for indoor or outdoor use\\n- Made in the USA'},\n", + " {'query': \"What features does the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offer in terms of sun protection and fit?\",\n", + " 'answer': 'The swimsuit is made of four-way-stretch and chlorine-resistant fabric, has UPF 50+ rated fabric for sun protection, crossover no-slip straps for a secure fit, and a fully lined bottom for maximum coverage.',\n", + " 'result': \"The Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offers UPF 50+ rated fabric, which provides the highest rated sun protection possible by blocking 98% of the sun's harmful rays. The swimsuit also has crossover no-slip straps and a fully lined bottom to ensure a secure fit and maximum coverage.\"},\n", + " {'query': 'What is the fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top?',\n", + " 'answer': 'The body of the swimtop is made from 82% recycled nylon and 18% Lycra® spandex, while the lining is made from 90% recycled nylon and 10% Lycra® spandex.',\n", + " 'result': 'The fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top is as follows:\\n- Body: 82% recycled nylon, 18% Lycra® spandex\\n- Lining: 90% recycled nylon, 10% Lycra® spandex'},\n", + " {'query': 'What new technology is featured in the EcoFlex 3L Storm Pants that enhances breathability?',\n", + " 'answer': 'The new TEK O2 technology is featured in the EcoFlex 3L Storm Pants, providing enhanced breathability.',\n", + " 'result': 'The new technology featured in the EcoFlex 3L Storm Pants that enhances breathability is the TEK O2 technology.'}]" + ] + }, + "metadata": {}, + "execution_count": 42 + } + ], + "source": [ + "predictions" + ] + }, + { + "cell_type": "code", + "execution_count": 43, + "id": "6012a3e0", + "metadata": { + "height": 30, + "tags": [], + "id": "6012a3e0" + }, + "outputs": [], + "source": [ + "from langchain.evaluation.qa import QAEvalChain" + ] + }, + { + "cell_type": "code", + "execution_count": 44, + "id": "724b1c0b", + "metadata": { + "height": 47, + "tags": [], + "id": "724b1c0b" + }, + "outputs": [], + "source": [ + "llm = ChatOpenAI(temperature=0)\n", + "eval_chain = QAEvalChain.from_llm(llm)" + ] + }, + { + "cell_type": "code", + "execution_count": 45, + "id": "8b46ae55", + "metadata": { + "height": 47, + "tags": [], + "id": "8b46ae55" + }, + "outputs": [], + "source": [ + "graded_outputs = eval_chain.evaluate(examples, predictions)" + ] + }, + { + "cell_type": "code", + "execution_count": 46, + "id": "dc42eb35-c2d7-4581-8004-d315ade63eef", + "metadata": { + "tags": [], + "id": "dc42eb35-c2d7-4581-8004-d315ade63eef", + "outputId": "cfdeda66-66b6-4b9f-8974-767fc56f4f07", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'},\n", + " {'results': 'CORRECT'}]" + ] + }, + "metadata": {}, + "execution_count": 46 + } + ], + "source": [ + "graded_outputs" + ] + }, + { + "cell_type": "code", + "execution_count": 47, + "id": "3437cfbe", + "metadata": { + "height": 149, + "tags": [], + "id": "3437cfbe", + "outputId": "adbbaf0a-15c7-4b33-ad2f-c64cc01d5af3", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Example 0:\n", + "Question: Do the Cozy Comfort Pullover Set have side pockets?\n", + "Real Answer: Yes\n", + "Predicted Answer: Yes, the Cozy Comfort Pullover Set does have side pockets.\n", + "\n", + "Example 1:\n", + "Question: What collection is the Ultra-Lofty 850 Stretch Down Hooded Jacket from?\n", + "Real Answer: The DownTek collection\n", + "Predicted Answer: The Ultra-Lofty 850 Stretch Down Hooded Jacket is from the DownTek collection.\n", + "\n", + "Example 2:\n", + "Question: What is the weight of each pair of Women's Campside Oxfords?\n", + "Real Answer: The approximate weight of each pair is 1 lb. 1 oz.\n", + "Predicted Answer: The weight of each pair of Women's Campside Oxfords is approximately 1 lb. 1 oz.\n", + "\n", + "Example 3:\n", + "Question: What are some key features of the Recycled Waterhog Dog Mat, Chevron Weave?\n", + "Real Answer: Some key features of the Recycled Waterhog Dog Mat, Chevron Weave include its rugged construction from recycled plastic materials, its ability to help keep dirt and water off floors, its 24 oz. polyester fabric made from 94% recycled materials, rubber backing, exclusive design, thick and thin fibers for scraping dirt and absorbing water, quick drying, resistance to fading, rotting, mildew, and shedding, and the option to use it indoors or outdoors.\n", + "Predicted Answer: Some key features of the Recycled Waterhog Dog Mat, Chevron Weave are:\n", + "- Constructed from recycled plastic materials\n", + "- Helps keep dirt and water off floors\n", + "- Made from 24 oz. polyester fabric with 94% recycled materials\n", + "- Features thick and thin fibers for scraping dirt and absorbing water\n", + "- Dries quickly and resists fading, rotting, mildew, and shedding\n", + "- Can be used indoors or outdoors\n", + "- Made in the USA\n", + "\n", + "Example 4:\n", + "Question: What features does the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offer in terms of sun protection and fit?\n", + "Real Answer: The swimsuit is made of four-way-stretch and chlorine-resistant fabric, has UPF 50+ rated fabric for sun protection, crossover no-slip straps for a secure fit, and a fully lined bottom for maximum coverage.\n", + "Predicted Answer: The Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offers UPF 50+ rated fabric, which provides the highest rated sun protection possible by blocking 98% of the sun's harmful rays. The swimsuit also has crossover no-slip straps and a fully lined bottom to ensure a secure fit and maximum coverage.\n", + "\n", + "Example 5:\n", + "Question: What is the fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top?\n", + "Real Answer: The body of the swimtop is made from 82% recycled nylon and 18% Lycra® spandex, while the lining is made from 90% recycled nylon and 10% Lycra® spandex.\n", + "Predicted Answer: The fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top is as follows:\n", + "- Body: 82% recycled nylon, 18% Lycra® spandex\n", + "- Lining: 90% recycled nylon, 10% Lycra® spandex\n", + "\n", + "Example 6:\n", + "Question: What new technology is featured in the EcoFlex 3L Storm Pants that enhances breathability?\n", + "Real Answer: The new TEK O2 technology is featured in the EcoFlex 3L Storm Pants, providing enhanced breathability.\n", + "Predicted Answer: The new technology featured in the EcoFlex 3L Storm Pants that enhances breathability is the TEK O2 technology.\n", + "\n", + "Example 7:\n", + "Question: What is the weight of each pair of Women's Campside Oxfords?\n", + "Real Answer: The approximate weight of each pair is 1 lb. 1 oz.\n", + "Predicted Answer: The weight of each pair of Women's Campside Oxfords is approximately 1 lb. 1 oz.\n", + "\n", + "Example 8:\n", + "Question: What are some key features of the Recycled Waterhog Dog Mat, Chevron Weave?\n", + "Real Answer: Some key features of the Recycled Waterhog Dog Mat, Chevron Weave include its rugged construction from recycled plastic materials, its ability to help keep dirt and water off floors, its 24 oz. polyester fabric made from 94% recycled materials, rubber backing, exclusive design, thick and thin fibers for scraping dirt and absorbing water, quick drying, resistance to fading, rotting, mildew, and shedding, and the option to use it indoors or outdoors.\n", + "Predicted Answer: Some key features of the Recycled Waterhog Dog Mat, Chevron Weave are:\n", + "- Constructed from recycled plastic materials\n", + "- Helps keep dirt and water off floors\n", + "- Made from 24 oz. polyester fabric (94% recycled materials)\n", + "- Rubber backing for durability\n", + "- Features thick and thin fibers for scraping dirt and absorbing water\n", + "- Dries quickly and resists fading, rotting, mildew, and shedding\n", + "- Suitable for indoor or outdoor use\n", + "- Made in the USA\n", + "\n", + "Example 9:\n", + "Question: What features does the Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offer in terms of sun protection and fit?\n", + "Real Answer: The swimsuit is made of four-way-stretch and chlorine-resistant fabric, has UPF 50+ rated fabric for sun protection, crossover no-slip straps for a secure fit, and a fully lined bottom for maximum coverage.\n", + "Predicted Answer: The Infant and Toddler Girls' Coastal Chill Swimsuit, Two-Piece offers UPF 50+ rated fabric, which provides the highest rated sun protection possible by blocking 98% of the sun's harmful rays. The swimsuit also has crossover no-slip straps and a fully lined bottom to ensure a secure fit and maximum coverage.\n", + "\n", + "Example 10:\n", + "Question: What is the fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top?\n", + "Real Answer: The body of the swimtop is made from 82% recycled nylon and 18% Lycra® spandex, while the lining is made from 90% recycled nylon and 10% Lycra® spandex.\n", + "Predicted Answer: The fabric composition of the Refresh Swimwear, V-Neck Tankini Contrasts top is as follows:\n", + "- Body: 82% recycled nylon, 18% Lycra® spandex\n", + "- Lining: 90% recycled nylon, 10% Lycra® spandex\n", + "\n", + "Example 11:\n", + "Question: What new technology is featured in the EcoFlex 3L Storm Pants that enhances breathability?\n", + "Real Answer: The new TEK O2 technology is featured in the EcoFlex 3L Storm Pants, providing enhanced breathability.\n", + "Predicted Answer: The new technology featured in the EcoFlex 3L Storm Pants that enhances breathability is the TEK O2 technology.\n", + "\n" + ] + } + ], + "source": [ + "for i, eg in enumerate(examples):\n", + " print(f\"Example {i}:\")\n", + " print(\"Question: \" + predictions[i]['query'])\n", + " print(\"Real Answer: \" + predictions[i]['answer'])\n", + " print(\"Predicted Answer: \" + predictions[i]['result'])\n", + " # print(\"Predicted Grade: \" + graded_outputs[i]['text'])\n", + " print()" + ] + }, + { + "cell_type": "markdown", + "id": "721d127a-a9e3-465d-a8ae-0e2c4b4a2659", + "metadata": { + "id": "721d127a-a9e3-465d-a8ae-0e2c4b4a2659" + }, + "source": [ + "### Example 2\n", + "One can also easily evaluate your QA chains with the metrics offered in ragas" + ] + }, + { + "cell_type": "code", + "execution_count": 49, + "id": "a5ef0493-34ff-4801-b405-69c76ce86c38", + "metadata": { + "tags": [], + "id": "a5ef0493-34ff-4801-b405-69c76ce86c38", + "outputId": "c5ef3f10-e184-46b3-814e-009bdc87f181", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 373 + } + }, + "outputs": [ + { + "output_type": "error", + "ename": "RuntimeError", + "evalue": "PyTorch is not linked with support for mps devices", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mRuntimeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mfrom\u001b[0m \u001b[0mlangchain_huggingface\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mHuggingFaceEmbeddings\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mloader\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mTextLoader\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"/content/OutdoorClothingCatalog_1000.csv\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mVectorstoreIndexCreator\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0membedding\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mHuggingFaceEmbeddings\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"all-MiniLM-L6-v2\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mmodel_kwargs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0;34m'device'\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0;34m'mps'\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfrom_loaders\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mloader\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain_huggingface/embeddings/huggingface.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m 57\u001b[0m ) from exc\n\u001b[1;32m 58\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m self._client = sentence_transformers.SentenceTransformer(\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_name\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcache_folder\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcache_folder\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mmodel_kwargs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m )\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/sentence_transformers/SentenceTransformer.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, model_name_or_path, modules, device, prompts, default_prompt_name, similarity_fn_name, cache_folder, trust_remote_code, revision, local_files_only, token, use_auth_token, truncate_dim, model_kwargs, tokenizer_kwargs, config_kwargs, model_card_data, backend)\u001b[0m\n\u001b[1;32m 345\u001b[0m \u001b[0;32mpass\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 346\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 347\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdevice\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 348\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_hpu_graph_enabled\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mFalse\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 349\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36mto\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 1338\u001b[0m \u001b[0;32mraise\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1339\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1340\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mconvert\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1341\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1342\u001b[0m def register_full_backward_pre_hook(\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrecurse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 899\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodule\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchildren\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 900\u001b[0;31m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 902\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompute_should_use_set_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtensor_applied\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrecurse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 899\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodule\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchildren\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 900\u001b[0;31m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 902\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompute_should_use_set_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtensor_applied\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrecurse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 899\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodule\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchildren\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 900\u001b[0;31m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 902\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompute_should_use_set_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtensor_applied\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 898\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mrecurse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 899\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mmodule\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mchildren\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 900\u001b[0;31m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_apply\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 901\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 902\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mcompute_should_use_set_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mtensor\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtensor_applied\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36m_apply\u001b[0;34m(self, fn, recurse)\u001b[0m\n\u001b[1;32m 925\u001b[0m \u001b[0;31m# `with torch.no_grad():`\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 926\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mtorch\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mno_grad\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 927\u001b[0;31m \u001b[0mparam_applied\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparam\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 928\u001b[0m \u001b[0mp_should_use_set_data\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompute_should_use_set_data\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mparam\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mparam_applied\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 929\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/torch/nn/modules/module.py\u001b[0m in \u001b[0;36mconvert\u001b[0;34m(t)\u001b[0m\n\u001b[1;32m 1324\u001b[0m \u001b[0mmemory_format\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mconvert_to_format\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1325\u001b[0m )\n\u001b[0;32m-> 1326\u001b[0;31m return t.to(\n\u001b[0m\u001b[1;32m 1327\u001b[0m \u001b[0mdevice\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1328\u001b[0m \u001b[0mdtype\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_floating_point\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mt\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_complex\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32melse\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mRuntimeError\u001b[0m: PyTorch is not linked with support for mps devices" + ] + } + ], + "source": [ + "from langchain_huggingface import HuggingFaceEmbeddings\n", + "loader = TextLoader(\"/content/OutdoorClothingCatalog_1000.csv\")\n", + "index = VectorstoreIndexCreator(embedding=HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\", model_kwargs = {'device': 'mps'})).from_loaders([loader])\n", + "\n", + "\n", + "llm = ChatOpenAI(temperature= 0)\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=index.vectorstore.as_retriever(),\n", + " return_source_documents=True,\n", + ")" + ] + }, + { + "cell_type": "code", + "source": [ + "from langchain_huggingface import HuggingFaceEmbeddings\n", + "loader = TextLoader(\"/content/OutdoorClothingCatalog_1000.csv\")\n", + "index = VectorstoreIndexCreator(embedding=HuggingFaceEmbeddings(model_name=\"all-MiniLM-L6-v2\", model_kwargs = {'device': 'cpu'})).from_loaders([loader]) # Changed device to 'cpu'\n", + "\n", + "\n", + "llm = ChatOpenAI(temperature= 0)\n", + "qa_chain = RetrievalQA.from_chain_type(\n", + " llm,\n", + " retriever=index.vectorstore.as_retriever(),\n", + " return_source_documents=True,\n", + ")" + ], + "metadata": { + "id": "Uye8c-BbMIgk", + "outputId": "e57b9123-7834-42ac-bc3d-2e7654f9f133", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "Uye8c-BbMIgk", + "execution_count": 50, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/langchain/indexes/vectorstore.py:128: UserWarning: Using InMemoryVectorStore as the default vectorstore.This memory store won't persist data. You should explicitlyspecify a vectorstore when using VectorstoreIndexCreator\n", + " warnings.warn(\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 51, + "id": "f0449cae-de25-4ef6-ae64-78ccf5e06a5a", + "metadata": { + "tags": [], + "id": "f0449cae-de25-4ef6-ae64-78ccf5e06a5a", + "outputId": "4ae83d57-568b-4f86-d528-de929f227842", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 70 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'New York City was named after the Duke of York, who later became King James II of England. King Charles II of England granted the land to his brother, the Duke of York, in the 17th century. The city was originally called New Amsterdam when it was a Dutch colony, but it was renamed New York when the English took control of the area.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 51 + } + ], + "source": [ + "# testing it out\n", + "\n", + "question = \"How did New York City get its name?\"\n", + "result = qa_chain.invoke({\"query\": question})\n", + "result[\"result\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 52, + "id": "9e846b3d-f79f-46eb-8075-c816268c0500", + "metadata": { + "tags": [], + "id": "9e846b3d-f79f-46eb-8075-c816268c0500", + "outputId": "ec609944-d859-4e4e-aa21-879124693c7c", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'query': 'How did New York City get its name?',\n", + " 'result': 'New York City was named after the Duke of York, who later became King James II of England. King Charles II of England granted the land to his brother, the Duke of York, in the 17th century. The city was originally called New Amsterdam when it was a Dutch colony, but it was renamed New York when the English took control of the area.',\n", + " 'source_documents': [Document(id='2664d412-10e6-4e3d-90f0-589f4e4737a9', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='sold and thousands of 5-star reviews, our extra large duffle bag is the one customers trust to go the distance. \"'),\n", + " Document(id='f17b4231-f93e-43ba-8ff6-a2ea6ac8f378', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='585,Women\\'s Mountain Peak Full-Zip Jacket,\"We\\'ve taken our classic anorak design and given it a modern twist with a full-zip style and heritage-inspired details.'),\n", + " Document(id='06975cb2-0aa8-4f12-8cd0-5eb09e72d35d', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='Specs: Dimensions: 2\"\"H x 3½\"\"W.\\n\\nWhy We Love It: We\\'re dedicated to supporting organizations that help people get outside and we think we\\'ve found our perfect match. The National Park Foundation, the official charitable partner of the National Park Service, works to protect an amazing network of more than 400 national park sites, many of which you\\'ll find just a short trip away.\\n\\nFabric & Care: Machine wash and dry.\\n\\nAdditional Features: Simply iron on to a backpack or jacket; or sew on fabric surface for extra durability. Get your 2019 National Park annual pass with us. Learn more at bbarnstormer.com/nationalparks. Made in the USA.'),\n", + " Document(id='1359cc47-4bb9-487d-8846-8d489f13b6b9', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='USA. Assembly instructions are included.\"')]}" + ] + }, + "metadata": {}, + "execution_count": 52 + } + ], + "source": [ + "result" + ] + }, + { + "cell_type": "markdown", + "id": "069d9da8-a593-4fc6-9d4b-fea2af6bdfd0", + "metadata": { + "id": "069d9da8-a593-4fc6-9d4b-fea2af6bdfd0" + }, + "source": [ + "Now in order to evaluate the qa system we generated a few relevant questions. We've generated a few question for you but feel free to add any you want." + ] + }, + { + "cell_type": "code", + "execution_count": 53, + "id": "a2e2cade-0005-41c1-b775-c6a7175bcf3b", + "metadata": { + "tags": [], + "id": "a2e2cade-0005-41c1-b775-c6a7175bcf3b" + }, + "outputs": [], + "source": [ + "eval_questions = [\n", + " \"What is the population of New York City as of 2020?\",\n", + " \"Which borough of New York City has the highest population?\",\n", + " \"What is the economic significance of New York City?\",\n", + " \"How did New York City get its name?\",\n", + " \"What is the significance of the Statue of Liberty in New York City?\",\n", + "]\n", + "\n", + "eval_answers = [\n", + " \"8,804,190\",\n", + " \"Brooklyn\",\n", + " \"New York City's economic significance is vast, as it serves as the global financial capital, housing Wall Street and major financial institutions. Its diverse economy spans technology, media, healthcare, education, and more, making it resilient to economic fluctuations. NYC is a hub for international business, attracting global companies, and boasts a large, skilled labor force. Its real estate market, tourism, cultural industries, and educational institutions further fuel its economic prowess. The city's transportation network and global influence amplify its impact on the world stage, solidifying its status as a vital economic player and cultural epicenter.\",\n", + " \"New York City got its name when it came under British control in 1664. King Charles II of England granted the lands to his brother, the Duke of York, who named the city New York in his own honor.\",\n", + " \"The Statue of Liberty in New York City holds great significance as a symbol of the United States and its ideals of liberty and peace. It greeted millions of immigrants who arrived in the U.S. by ship in the late 19th and early 20th centuries, representing hope and freedom for those seeking a better life. It has since become an iconic landmark and a global symbol of cultural diversity and freedom.\",\n", + "]\n", + "\n", + "examples = [\n", + " {\"query\": q, \"ground_truths\": [eval_answers[i]]}\n", + " for i, q in enumerate(eval_questions)\n", + "]" + ] + }, + { + "cell_type": "code", + "execution_count": 54, + "id": "aac9358e-f8bc-4992-aea3-c83160ff0ab0", + "metadata": { + "tags": [], + "id": "aac9358e-f8bc-4992-aea3-c83160ff0ab0", + "outputId": "74832767-f35b-4166-ac9e-e35d1bf027a6", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "[{'query': 'What is the population of New York City as of 2020?',\n", + " 'ground_truths': ['8,804,190']},\n", + " {'query': 'Which borough of New York City has the highest population?',\n", + " 'ground_truths': ['Brooklyn']},\n", + " {'query': 'What is the economic significance of New York City?',\n", + " 'ground_truths': [\"New York City's economic significance is vast, as it serves as the global financial capital, housing Wall Street and major financial institutions. Its diverse economy spans technology, media, healthcare, education, and more, making it resilient to economic fluctuations. NYC is a hub for international business, attracting global companies, and boasts a large, skilled labor force. Its real estate market, tourism, cultural industries, and educational institutions further fuel its economic prowess. The city's transportation network and global influence amplify its impact on the world stage, solidifying its status as a vital economic player and cultural epicenter.\"]},\n", + " {'query': 'How did New York City get its name?',\n", + " 'ground_truths': ['New York City got its name when it came under British control in 1664. King Charles II of England granted the lands to his brother, the Duke of York, who named the city New York in his own honor.']},\n", + " {'query': 'What is the significance of the Statue of Liberty in New York City?',\n", + " 'ground_truths': ['The Statue of Liberty in New York City holds great significance as a symbol of the United States and its ideals of liberty and peace. It greeted millions of immigrants who arrived in the U.S. by ship in the late 19th and early 20th centuries, representing hope and freedom for those seeking a better life. It has since become an iconic landmark and a global symbol of cultural diversity and freedom.']}]" + ] + }, + "metadata": {}, + "execution_count": 54 + } + ], + "source": [ + "examples" + ] + }, + { + "cell_type": "markdown", + "id": "6a21efe8-7c30-449a-9b8e-5b79778e305b", + "metadata": { + "id": "6a21efe8-7c30-449a-9b8e-5b79778e305b" + }, + "source": [ + "#### Introducing RagasEvaluatorChain" + ] + }, + { + "cell_type": "markdown", + "id": "139c2214-a6eb-4d4f-9403-7e1574b97a36", + "metadata": { + "id": "139c2214-a6eb-4d4f-9403-7e1574b97a36" + }, + "source": [ + "`RagasEvaluatorChain` creates a wrapper around the metrics ragas provides (documented [here](https://github.com/explodinggradients/ragas/blob/main/docs/metrics.md)), making it easier to run these evaluation with langchain and langsmith.\n", + "\n", + "The evaluator chain has the following APIs\n", + "\n", + "- `__call__()`: call the `RagasEvaluatorChain` directly on the result of a QA chain.\n", + "- `evaluate()`: evaluate on a list of examples (with the input queries) and predictions (outputs from the QA chain).\n", + "- `evaluate_run()`: method implemented that is called by langsmith evaluators to evaluate langsmith datasets.\n", + "\n", + "lets see each of them in action to learn more." + ] + }, + { + "cell_type": "code", + "execution_count": 55, + "id": "022c8aae-fe5f-4274-b638-9209151b9491", + "metadata": { + "tags": [], + "id": "022c8aae-fe5f-4274-b638-9209151b9491", + "outputId": "68312a21-ecc3-42d5-d56f-ee53912e0e86", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 36 + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "'The borough of Brooklyn in New York City has the highest population.'" + ], + "application/vnd.google.colaboratory.intrinsic+json": { + "type": "string" + } + }, + "metadata": {}, + "execution_count": 55 + } + ], + "source": [ + "result = qa_chain.invoke({\"query\": eval_questions[1]})\n", + "result[\"result\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 56, + "id": "eae31c80-42c9-4b1f-95b3-c05ceadb103f", + "metadata": { + "tags": [], + "id": "eae31c80-42c9-4b1f-95b3-c05ceadb103f" + }, + "outputs": [], + "source": [ + "key_mapping = {\n", + " \"query\": \"question\",\n", + " \"result\": \"answer\",\n", + " \"source_documents\": \"contexts\"\n", + "}\n", + "\n", + "result_updated = {}\n", + "for old_key, new_key in key_mapping.items():\n", + " if old_key in result:\n", + " result_updated[new_key] = result[old_key]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 57, + "id": "ecd4dd9f-16d7-43d4-ac8e-6c5aa5e3f7b0", + "metadata": { + "tags": [], + "id": "ecd4dd9f-16d7-43d4-ac8e-6c5aa5e3f7b0", + "outputId": "bafc6c15-9ba7-4c10-a898-3df6928e0a7b", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'question': 'Which borough of New York City has the highest population?',\n", + " 'answer': 'The borough of Brooklyn in New York City has the highest population.',\n", + " 'contexts': [Document(id='f17b4231-f93e-43ba-8ff6-a2ea6ac8f378', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='585,Women\\'s Mountain Peak Full-Zip Jacket,\"We\\'ve taken our classic anorak design and given it a modern twist with a full-zip style and heritage-inspired details.'),\n", + " Document(id='f6c05378-731c-482c-b357-ecd9e4bd8d85', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='Specs\\nCapacity: 2 people plus gear. \\nTent area: 37.5 sq. ft.\\nTent floor dimensions: 7\\'6\"\" x 5\\'. \\nPeak height: 47\"\". \\nMinimum weight: 5 lb. 6 oz. \\nPacked size: 22\"\" x 8\"\".'),\n", + " Document(id='2664d412-10e6-4e3d-90f0-589f4e4737a9', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='sold and thousands of 5-star reviews, our extra large duffle bag is the one customers trust to go the distance. \"'),\n", + " Document(id='c5e91c56-7ab5-4d9d-95e3-d7ceb15cc40d', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='Regular: \\nDimensions: 34\"\" outside diam. x 10\"\"H. \\nWeight: 10 lb.\\n\\nExtra-Large: \\nDimensions: 40\"\" outside diam. x 12\"\"H. \\nWeight: 17 lb.\\n\\nDesigned for: Ages 5 and up or adult/tandem sledding.\\n\\nVideo: Inflation Instructions\\n\\nAdditional Features: \\nSturdy base coasts on packed or powdered snow. \\nRugged shell helps protect inner tube from punctures and abrasion. \\nFor solo sledding, we recommend the regular size for smaller children. \\nSturdy handles and built-in tow strap make it easy to carry and pull back up the hill. \\nImported.')]}" + ] + }, + "metadata": {}, + "execution_count": 57 + } + ], + "source": [ + "result_updated" + ] + }, + { + "cell_type": "code", + "execution_count": 58, + "id": "00d5cc30-d25e-41bd-8695-9246b73938bc", + "metadata": { + "tags": [], + "id": "00d5cc30-d25e-41bd-8695-9246b73938bc" + }, + "outputs": [], + "source": [ + "# !pip install --no-cache-dir recordclass" + ] + }, + { + "cell_type": "code", + "execution_count": 59, + "id": "775d55d9-437d-40c4-bb5f-87c7b65fa567", + "metadata": { + "tags": [], + "id": "775d55d9-437d-40c4-bb5f-87c7b65fa567" + }, + "outputs": [], + "source": [ + "# !pip install ragas==0.1.9" + ] + }, + { + "cell_type": "code", + "source": [ + "!pip install ragas==0.1.9" + ], + "metadata": { + "collapsed": true, + "id": "fvpCTEmuM1Pe", + "outputId": "7fc2f4b7-53e2-400e-c0b2-b5aa1094a40a", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "id": "fvpCTEmuM1Pe", + "execution_count": 61, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Collecting ragas==0.1.9\n", + " Downloading ragas-0.1.9-py3-none-any.whl.metadata (5.2 kB)\n", + "Requirement already satisfied: numpy in /usr/local/lib/python3.11/dist-packages (from ragas==0.1.9) (1.26.4)\n", + "Collecting datasets (from ragas==0.1.9)\n", + " Downloading datasets-3.2.0-py3-none-any.whl.metadata (20 kB)\n", + "Requirement already satisfied: tiktoken in /usr/local/lib/python3.11/dist-packages (from ragas==0.1.9) (0.8.0)\n", + "Requirement already satisfied: langchain in /usr/local/lib/python3.11/dist-packages (from ragas==0.1.9) (0.3.15)\n", + "Requirement already satisfied: langchain-core in /usr/local/lib/python3.11/dist-packages (from ragas==0.1.9) (0.3.31)\n", + "Requirement already satisfied: langchain-community in /usr/local/lib/python3.11/dist-packages (from ragas==0.1.9) (0.3.15)\n", + "Requirement already satisfied: langchain-openai in /usr/local/lib/python3.11/dist-packages (from ragas==0.1.9) (0.3.1)\n", + "Requirement already satisfied: openai>1 in /usr/local/lib/python3.11/dist-packages (from ragas==0.1.9) (1.59.6)\n", + "Collecting pysbd>=0.3.4 (from ragas==0.1.9)\n", + " Downloading pysbd-0.3.4-py3-none-any.whl.metadata (6.1 kB)\n", + "Requirement already satisfied: nest-asyncio in /usr/local/lib/python3.11/dist-packages (from ragas==0.1.9) (1.6.0)\n", + "Collecting appdirs (from ragas==0.1.9)\n", + " Downloading appdirs-1.4.4-py2.py3-none-any.whl.metadata (9.0 kB)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /usr/local/lib/python3.11/dist-packages (from openai>1->ragas==0.1.9) (3.7.1)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /usr/local/lib/python3.11/dist-packages (from openai>1->ragas==0.1.9) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from openai>1->ragas==0.1.9) (0.28.1)\n", + "Requirement already satisfied: jiter<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from openai>1->ragas==0.1.9) (0.8.2)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /usr/local/lib/python3.11/dist-packages (from openai>1->ragas==0.1.9) (2.10.5)\n", + "Requirement already satisfied: sniffio in /usr/local/lib/python3.11/dist-packages (from openai>1->ragas==0.1.9) (1.3.1)\n", + "Requirement already satisfied: tqdm>4 in /usr/local/lib/python3.11/dist-packages (from openai>1->ragas==0.1.9) (4.67.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.11 in /usr/local/lib/python3.11/dist-packages (from openai>1->ragas==0.1.9) (4.12.2)\n", + "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from datasets->ragas==0.1.9) (3.16.1)\n", + "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.11/dist-packages (from datasets->ragas==0.1.9) (17.0.0)\n", + "Collecting dill<0.3.9,>=0.3.0 (from datasets->ragas==0.1.9)\n", + " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n", + "Requirement already satisfied: pandas in /usr/local/lib/python3.11/dist-packages (from datasets->ragas==0.1.9) (2.2.2)\n", + "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.11/dist-packages (from datasets->ragas==0.1.9) (2.32.3)\n", + "Collecting xxhash (from datasets->ragas==0.1.9)\n", + " Downloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n", + "Collecting multiprocess<0.70.17 (from datasets->ragas==0.1.9)\n", + " Downloading multiprocess-0.70.16-py311-none-any.whl.metadata (7.2 kB)\n", + "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets->ragas==0.1.9)\n", + " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: aiohttp in /usr/local/lib/python3.11/dist-packages (from datasets->ragas==0.1.9) (3.11.11)\n", + "Requirement already satisfied: huggingface-hub>=0.23.0 in /usr/local/lib/python3.11/dist-packages (from datasets->ragas==0.1.9) (0.27.1)\n", + "Requirement already satisfied: packaging in /usr/local/lib/python3.11/dist-packages (from datasets->ragas==0.1.9) (24.2)\n", + "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from datasets->ragas==0.1.9) (6.0.2)\n", + "Requirement already satisfied: SQLAlchemy<3,>=1.4 in /usr/local/lib/python3.11/dist-packages (from langchain->ragas==0.1.9) (2.0.37)\n", + "Requirement already satisfied: langchain-text-splitters<0.4.0,>=0.3.3 in /usr/local/lib/python3.11/dist-packages (from langchain->ragas==0.1.9) (0.3.5)\n", + "Requirement already satisfied: langsmith<0.4,>=0.1.17 in /usr/local/lib/python3.11/dist-packages (from langchain->ragas==0.1.9) (0.2.10)\n", + "Requirement already satisfied: tenacity!=8.4.0,<10,>=8.1.0 in /usr/local/lib/python3.11/dist-packages (from langchain->ragas==0.1.9) (9.0.0)\n", + "Requirement already satisfied: jsonpatch<2.0,>=1.33 in /usr/local/lib/python3.11/dist-packages (from langchain-core->ragas==0.1.9) (1.33)\n", + "Requirement already satisfied: dataclasses-json<0.7,>=0.5.7 in /usr/local/lib/python3.11/dist-packages (from langchain-community->ragas==0.1.9) (0.6.7)\n", + "Requirement already satisfied: httpx-sse<0.5.0,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from langchain-community->ragas==0.1.9) (0.4.0)\n", + "Requirement already satisfied: pydantic-settings<3.0.0,>=2.4.0 in /usr/local/lib/python3.11/dist-packages (from langchain-community->ragas==0.1.9) (2.7.1)\n", + "Requirement already satisfied: regex>=2022.1.18 in /usr/local/lib/python3.11/dist-packages (from tiktoken->ragas==0.1.9) (2024.11.6)\n", + "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->ragas==0.1.9) (2.4.4)\n", + "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->ragas==0.1.9) (1.3.2)\n", + "Requirement already satisfied: attrs>=17.3.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->ragas==0.1.9) (24.3.0)\n", + "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->ragas==0.1.9) (1.5.0)\n", + "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->ragas==0.1.9) (6.1.0)\n", + "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->ragas==0.1.9) (0.2.1)\n", + "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.11/dist-packages (from aiohttp->datasets->ragas==0.1.9) (1.18.3)\n", + "Requirement already satisfied: idna>=2.8 in /usr/local/lib/python3.11/dist-packages (from anyio<5,>=3.5.0->openai>1->ragas==0.1.9) (3.10)\n", + "Requirement already satisfied: marshmallow<4.0.0,>=3.18.0 in /usr/local/lib/python3.11/dist-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community->ragas==0.1.9) (3.25.1)\n", + "Requirement already satisfied: typing-inspect<1,>=0.4.0 in /usr/local/lib/python3.11/dist-packages (from dataclasses-json<0.7,>=0.5.7->langchain-community->ragas==0.1.9) (0.9.0)\n", + "Requirement already satisfied: certifi in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->openai>1->ragas==0.1.9) (2024.12.14)\n", + "Requirement already satisfied: httpcore==1.* in /usr/local/lib/python3.11/dist-packages (from httpx<1,>=0.23.0->openai>1->ragas==0.1.9) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /usr/local/lib/python3.11/dist-packages (from httpcore==1.*->httpx<1,>=0.23.0->openai>1->ragas==0.1.9) (0.14.0)\n", + "Requirement already satisfied: jsonpointer>=1.9 in /usr/local/lib/python3.11/dist-packages (from jsonpatch<2.0,>=1.33->langchain-core->ragas==0.1.9) (3.0.0)\n", + "Requirement already satisfied: orjson<4.0.0,>=3.9.14 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain->ragas==0.1.9) (3.10.14)\n", + "Requirement already satisfied: requests-toolbelt<2.0.0,>=1.0.0 in /usr/local/lib/python3.11/dist-packages (from langsmith<0.4,>=0.1.17->langchain->ragas==0.1.9) (1.0.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=1.9.0->openai>1->ragas==0.1.9) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.2 in /usr/local/lib/python3.11/dist-packages (from pydantic<3,>=1.9.0->openai>1->ragas==0.1.9) (2.27.2)\n", + "Requirement already satisfied: python-dotenv>=0.21.0 in /usr/local/lib/python3.11/dist-packages (from pydantic-settings<3.0.0,>=2.4.0->langchain-community->ragas==0.1.9) (1.0.1)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets->ragas==0.1.9) (3.4.1)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests>=2.32.2->datasets->ragas==0.1.9) (2.3.0)\n", + "Requirement already satisfied: greenlet!=0.4.17 in /usr/local/lib/python3.11/dist-packages (from SQLAlchemy<3,>=1.4->langchain->ragas==0.1.9) (3.1.1)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->ragas==0.1.9) (2.8.2)\n", + "Requirement already satisfied: pytz>=2020.1 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->ragas==0.1.9) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.11/dist-packages (from pandas->datasets->ragas==0.1.9) (2024.2)\n", + "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.11/dist-packages (from python-dateutil>=2.8.2->pandas->datasets->ragas==0.1.9) (1.17.0)\n", + "Requirement already satisfied: mypy-extensions>=0.3.0 in /usr/local/lib/python3.11/dist-packages (from typing-inspect<1,>=0.4.0->dataclasses-json<0.7,>=0.5.7->langchain-community->ragas==0.1.9) (1.0.0)\n", + "Downloading ragas-0.1.9-py3-none-any.whl (86 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m86.1/86.1 kB\u001b[0m \u001b[31m3.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading pysbd-0.3.4-py3-none-any.whl (71 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m71.1/71.1 kB\u001b[0m \u001b[31m3.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading appdirs-1.4.4-py2.py3-none-any.whl (9.6 kB)\n", + "Downloading datasets-3.2.0-py3-none-any.whl (480 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m11.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m7.5 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m11.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading multiprocess-0.70.16-py311-none-any.whl (143 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m143.5/143.5 kB\u001b[0m \u001b[31m9.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hDownloading xxhash-3.5.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.8/194.8 kB\u001b[0m \u001b[31m12.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hInstalling collected packages: appdirs, xxhash, pysbd, fsspec, dill, multiprocess, datasets, ragas\n", + " Attempting uninstall: fsspec\n", + " Found existing installation: fsspec 2024.10.0\n", + " Uninstalling fsspec-2024.10.0:\n", + " Successfully uninstalled fsspec-2024.10.0\n", + "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n", + "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n", + "\u001b[0mSuccessfully installed appdirs-1.4.4 datasets-3.2.0 dill-0.3.8 fsspec-2024.9.0 multiprocess-0.70.16 pysbd-0.3.4 ragas-0.1.9 xxhash-3.5.0\n" + ] + } + ] + }, + { + "cell_type": "code", + "execution_count": 62, + "id": "29b14c07-bf6c-4e86-ad1a-2a6c4d1a509d", + "metadata": { + "tags": [], + "id": "29b14c07-bf6c-4e86-ad1a-2a6c4d1a509d", + "outputId": "11d6c31e-5bf9-4ed9-bcfe-a7ac37bd26cf", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stderr", + "text": [ + "/usr/local/lib/python3.11/dist-packages/ragas/metrics/__init__.py:1: LangChainDeprecationWarning: As of langchain-core 0.3.0, LangChain uses pydantic v2 internally. The langchain_core.pydantic_v1 module was a compatibility shim for pydantic v1, and should no longer be used. Please update the code to import from Pydantic directly.\n", + "\n", + "For example, replace imports like: `from langchain_core.pydantic_v1 import BaseModel`\n", + "with: `from pydantic import BaseModel`\n", + "or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. \tfrom pydantic.v1 import BaseModel\n", + "\n", + " from ragas.metrics._answer_correctness import AnswerCorrectness, answer_correctness\n", + "/usr/local/lib/python3.11/dist-packages/ragas/metrics/__init__.py:4: LangChainDeprecationWarning: As of langchain-core 0.3.0, LangChain uses pydantic v2 internally. The langchain.pydantic_v1 module was a compatibility shim for pydantic v1, and should no longer be used. Please update the code to import from Pydantic directly.\n", + "\n", + "For example, replace imports like: `from langchain.pydantic_v1 import BaseModel`\n", + "with: `from pydantic import BaseModel`\n", + "or the v1 compatibility namespace if you are working in a code base that has not been fully upgraded to pydantic 2 yet. \tfrom pydantic.v1 import BaseModel\n", + "\n", + " from ragas.metrics._context_entities_recall import (\n" + ] + } + ], + "source": [ + "from ragas.integrations.langchain import EvaluatorChain\n", + "# from ragas import evaluate\n", + "from ragas.metrics import (\n", + " faithfulness,\n", + " answer_relevancy,\n", + " context_relevancy,\n", + " context_recall,\n", + ")\n", + "\n", + "# create evaluation chains\n", + "faithfulness_chain = EvaluatorChain(metric=faithfulness)\n", + "answer_rel_chain = EvaluatorChain(metric=answer_relevancy)\n", + "context_rel_chain = EvaluatorChain(metric=context_relevancy)\n", + "context_recall_chain = EvaluatorChain(metric=context_recall)" + ] + }, + { + "cell_type": "markdown", + "id": "41a8b636-d738-41bd-ac68-21cce2c4b720", + "metadata": { + "id": "41a8b636-d738-41bd-ac68-21cce2c4b720" + }, + "source": [ + "1. `__call__()`\n", + "\n", + "Directly run the evaluation chain with the results from the QA chain. Do note that metrics like context_relevancy and faithfulness require the `source_documents` to be present." + ] + }, + { + "cell_type": "code", + "execution_count": 63, + "id": "7815671c-1fc8-46ba-8356-4a0bd5558530", + "metadata": { + "id": "7815671c-1fc8-46ba-8356-4a0bd5558530", + "outputId": "982abdcd-f27d-4e24-fcec-16e19526f17e", + "colab": { + "base_uri": "https://localhost:8080/" + } + }, + "outputs": [ + { + "output_type": "execute_result", + "data": { + "text/plain": [ + "{'query': 'Which borough of New York City has the highest population?',\n", + " 'result': 'The borough of Brooklyn in New York City has the highest population.',\n", + " 'source_documents': [Document(id='f17b4231-f93e-43ba-8ff6-a2ea6ac8f378', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='585,Women\\'s Mountain Peak Full-Zip Jacket,\"We\\'ve taken our classic anorak design and given it a modern twist with a full-zip style and heritage-inspired details.'),\n", + " Document(id='f6c05378-731c-482c-b357-ecd9e4bd8d85', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='Specs\\nCapacity: 2 people plus gear. \\nTent area: 37.5 sq. ft.\\nTent floor dimensions: 7\\'6\"\" x 5\\'. \\nPeak height: 47\"\". \\nMinimum weight: 5 lb. 6 oz. \\nPacked size: 22\"\" x 8\"\".'),\n", + " Document(id='2664d412-10e6-4e3d-90f0-589f4e4737a9', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='sold and thousands of 5-star reviews, our extra large duffle bag is the one customers trust to go the distance. \"'),\n", + " Document(id='c5e91c56-7ab5-4d9d-95e3-d7ceb15cc40d', metadata={'source': '/content/OutdoorClothingCatalog_1000.csv'}, page_content='Regular: \\nDimensions: 34\"\" outside diam. x 10\"\"H. \\nWeight: 10 lb.\\n\\nExtra-Large: \\nDimensions: 40\"\" outside diam. x 12\"\"H. \\nWeight: 17 lb.\\n\\nDesigned for: Ages 5 and up or adult/tandem sledding.\\n\\nVideo: Inflation Instructions\\n\\nAdditional Features: \\nSturdy base coasts on packed or powdered snow. \\nRugged shell helps protect inner tube from punctures and abrasion. \\nFor solo sledding, we recommend the regular size for smaller children. \\nSturdy handles and built-in tow strap make it easy to carry and pull back up the hill. \\nImported.')]}" + ] + }, + "metadata": {}, + "execution_count": 63 + } + ], + "source": [ + "# Recheck the result that we are going to validate.\n", + "result" + ] + }, + { + "cell_type": "markdown", + "id": "8cfdd07c-956c-458f-8844-e056f29e3c83", + "metadata": { + "id": "8cfdd07c-956c-458f-8844-e056f29e3c83" + }, + "source": [ + "**Faithfulness**" + ] + }, + { + "cell_type": "code", + "execution_count": 75, + "id": "304f5f0f-a237-4584-becb-a35607caf26b", + "metadata": { + "collapsed": true, + "id": "304f5f0f-a237-4584-becb-a35607caf26b", + "outputId": "567ba50f-f360-4cc9-dac0-1d437a6c7dab", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 158 + } + }, + "outputs": [ + { + "output_type": "error", + "ename": "KeyError", + "evalue": "'faithfulness_score'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0meval_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfaithfulness_chain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult_updated\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0meval_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"faithfulness_score\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m: 'faithfulness_score'" + ] + } + ], + "source": [ + "eval_result = faithfulness_chain(result_updated)\n", + "eval_result[\"faithfulness_score\"]" + ] + }, + { + "cell_type": "markdown", + "id": "8fbd78a9-d7c5-42a0-8705-4c544ec6408e", + "metadata": { + "id": "8fbd78a9-d7c5-42a0-8705-4c544ec6408e" + }, + "source": [ + "High faithfulness_score means that there are exact consistency between the source documents and the answer.\n", + "\n", + "You can check lower faithfulness scores by changing the result (answer from LLM) or source_documents to something else." + ] + }, + { + "cell_type": "code", + "execution_count": 69, + "id": "15f42b97-a84f-4015-9da5-fa3b0b703c24", + "metadata": { + "collapsed": true, + "id": "15f42b97-a84f-4015-9da5-fa3b0b703c24", + "outputId": "c534cd4b-d727-4a6b-916c-eb2408e40a7c", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 339 + } + }, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Missing some input keys: {'answer', 'question', 'contexts'}", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0mfake_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mfake_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"result\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m\"we are the champions\"\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0meval_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfaithfulness_chain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfake_result\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0meval_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"faithfulness_score\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain_core/_api/deprecation.py\u001b[0m in \u001b[0;36mwarning_emitting_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mwarned\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[0memit_warning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 182\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;32masync\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mawarning_emitting_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[1;32m 387\u001b[0m }\n\u001b[1;32m 388\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 389\u001b[0;31m return self.invoke(\n\u001b[0m\u001b[1;32m 390\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 391\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mRunnableConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_chain_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 170\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 171\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_chain_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m 156\u001b[0m )\n\u001b[1;32m 157\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 158\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 159\u001b[0m outputs = (\n\u001b[1;32m 160\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrun_manager\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36m_validate_inputs\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0mmissing_keys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minput_keys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdifference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmissing_keys\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Missing some input keys: {missing_keys}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_validate_outputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mDict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Missing some input keys: {'answer', 'question', 'contexts'}" + ] + } + ], + "source": [ + "fake_result = result.copy()\n", + "fake_result[\"result\"] = \"we are the champions\"\n", + "eval_result = faithfulness_chain(fake_result)\n", + "eval_result[\"faithfulness_score\"]" + ] + }, + { + "cell_type": "code", + "source": [ + "# Recheck the result that we are going to validate.\n", + "result\n", + "\n", + "# Extract the page_content from the Document objects in source_documents\n", + "result_updated['contexts'] = [doc.page_content for doc in result['source_documents']]\n", + "\n", + "eval_result = faithfulness_chain(result_updated) # Re-compute eval_result here\n", + "faithfulness_score_str = str(eval_result[\"faithfulness_score\"])" + ], + "metadata": { + "id": "0KxFKcvPYaid", + "outputId": "5556a2a7-4b81-4255-f5b5-de4bd7bf5cdc", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 176 + } + }, + "id": "0KxFKcvPYaid", + "execution_count": 72, + "outputs": [ + { + "output_type": "error", + "ename": "KeyError", + "evalue": "'faithfulness_score'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 6\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0meval_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfaithfulness_chain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult_updated\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;31m# Re-compute eval_result here\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 8\u001b[0;31m \u001b[0mfaithfulness_score_str\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0meval_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"faithfulness_score\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mKeyError\u001b[0m: 'faithfulness_score'" + ] + } + ] + }, + { + "cell_type": "markdown", + "id": "02348380-159a-4578-a95e-493cdcfcebe7", + "metadata": { + "id": "02348380-159a-4578-a95e-493cdcfcebe7" + }, + "source": [ + "**Context Relevancy**" + ] + }, + { + "cell_type": "code", + "execution_count": 78, + "id": "0fd2fb9e-19d1-4cf6-9773-897546c2d6bb", + "metadata": { + "id": "0fd2fb9e-19d1-4cf6-9773-897546c2d6bb", + "outputId": "00164b77-f5c7-4988-ea76-6134d5fdeed4", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 304 + } + }, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Missing some input keys: {'ground_truth', 'question', 'contexts'}", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0meval_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcontext_recall_chain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0meval_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"context_recall_score\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain_core/_api/deprecation.py\u001b[0m in \u001b[0;36mwarning_emitting_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mwarned\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[0memit_warning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 182\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;32masync\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mawarning_emitting_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[1;32m 387\u001b[0m }\n\u001b[1;32m 388\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 389\u001b[0;31m return self.invoke(\n\u001b[0m\u001b[1;32m 390\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 391\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mRunnableConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_chain_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 170\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 171\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_chain_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m 156\u001b[0m )\n\u001b[1;32m 157\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 158\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 159\u001b[0m outputs = (\n\u001b[1;32m 160\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrun_manager\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36m_validate_inputs\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0mmissing_keys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minput_keys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdifference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmissing_keys\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Missing some input keys: {missing_keys}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_validate_outputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mDict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Missing some input keys: {'ground_truth', 'question', 'contexts'}" + ] + } + ], + "source": [ + "eval_result = context_recall_chain(result)\n", + "eval_result[\"context_recall_score\"]" + ] + }, + { + "cell_type": "markdown", + "id": "118cba80-c2a3-408e-9d8d-857521cbe723", + "metadata": { + "id": "118cba80-c2a3-408e-9d8d-857521cbe723" + }, + "source": [ + "High context_recall_score means that the ground truth is present in the source documents.\n", + "\n", + "You can check lower context recall scores by changing the source_documents to something else." + ] + }, + { + "cell_type": "code", + "execution_count": 77, + "id": "f6610c5f-5f8e-406c-885c-093012a5dc44", + "metadata": { + "id": "f6610c5f-5f8e-406c-885c-093012a5dc44", + "outputId": "facdd5de-d891-45d8-c0e3-cd129781da1b", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 339 + } + }, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "Missing some input keys: {'ground_truth', 'question', 'contexts'}", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mfake_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mfake_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"source_documents\"\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mDocument\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpage_content\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m\"I love christmas\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m \u001b[0meval_result\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcontext_recall_chain\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfake_result\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 5\u001b[0m \u001b[0meval_result\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m\"context_recall_score\"\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain_core/_api/deprecation.py\u001b[0m in \u001b[0;36mwarning_emitting_wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 180\u001b[0m \u001b[0mwarned\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 181\u001b[0m \u001b[0memit_warning\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 182\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mwrapped\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 183\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 184\u001b[0m \u001b[0;32masync\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mawarning_emitting_wrapper\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs, return_only_outputs, callbacks, tags, metadata, run_name, include_run_info)\u001b[0m\n\u001b[1;32m 387\u001b[0m }\n\u001b[1;32m 388\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 389\u001b[0;31m return self.invoke(\n\u001b[0m\u001b[1;32m 390\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 391\u001b[0m \u001b[0mcast\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mRunnableConfig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m{\u001b[0m\u001b[0mk\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mitems\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mv\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m}\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mBaseException\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 169\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_chain_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0me\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 170\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0me\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 171\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mon_chain_end\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moutputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 172\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36minvoke\u001b[0;34m(self, input, config, **kwargs)\u001b[0m\n\u001b[1;32m 156\u001b[0m )\n\u001b[1;32m 157\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 158\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate_inputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 159\u001b[0m outputs = (\n\u001b[1;32m 160\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_call\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mrun_manager\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mrun_manager\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/langchain/chains/base.py\u001b[0m in \u001b[0;36m_validate_inputs\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 288\u001b[0m \u001b[0mmissing_keys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0minput_keys\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mdifference\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 289\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mmissing_keys\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 290\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf\"Missing some input keys: {missing_keys}\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 291\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 292\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m_validate_outputs\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0moutputs\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mDict\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mValueError\u001b[0m: Missing some input keys: {'ground_truth', 'question', 'contexts'}" + ] + } + ], + "source": [ + "from langchain.schema import Document\n", + "fake_result = result.copy()\n", + "fake_result[\"source_documents\"] = [Document(page_content=\"I love christmas\")]\n", + "eval_result = context_recall_chain(fake_result)\n", + "eval_result[\"context_recall_score\"]" + ] + }, + { + "cell_type": "markdown", + "id": "3dc06403-c514-47fd-ac82-d95ece8d2f06", + "metadata": { + "id": "3dc06403-c514-47fd-ac82-d95ece8d2f06" + }, + "source": [ + "2. `evaluate()`\n", + "\n", + "Evaluate a list of inputs/queries and the outputs/predictions from the QA chain." + ] + }, + { + "cell_type": "code", + "execution_count": 79, + "id": "f6b23708-b94d-4649-acd9-0e268f72f94a", + "metadata": { + "id": "f6b23708-b94d-4649-acd9-0e268f72f94a", + "outputId": "30f56162-3ca9-4cca-8ffc-e24a4e3755ba", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 332 + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "evaluating...\n" + ] + }, + { + "output_type": "error", + "ename": "AttributeError", + "evalue": "'EvaluatorChain' object has no attribute 'evaluate'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# evaluate\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"evaluating...\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 6\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfaithfulness_chain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexamples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 7\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/pydantic/main.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 889\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 890\u001b[0m \u001b[0;31m# this is the current error\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 891\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'{type(self).__name__!r} object has no attribute {item!r}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 892\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'EvaluatorChain' object has no attribute 'evaluate'" + ] + } + ], + "source": [ + "# run the queries as a batch for efficiency\n", + "predictions = qa_chain.batch(examples)\n", + "\n", + "# evaluate\n", + "print(\"evaluating...\")\n", + "r = faithfulness_chain.evaluate(examples, predictions)\n", + "r" + ] + }, + { + "cell_type": "code", + "execution_count": 76, + "id": "d661f6b9-04c7-40b7-874b-25f53cfab9d9", + "metadata": { + "id": "d661f6b9-04c7-40b7-874b-25f53cfab9d9", + "outputId": "d8d25b00-ae0b-4d11-d4c2-eddbb63f694a", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 332 + } + }, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "evaluating...\n" + ] + }, + { + "output_type": "error", + "ename": "AttributeError", + "evalue": "'EvaluatorChain' object has no attribute 'evaluate'", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# evaluate context recall\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"evaluating...\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 3\u001b[0;31m \u001b[0mr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcontext_recall_chain\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mevaluate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mexamples\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpredictions\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 4\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;32m/usr/local/lib/python3.11/dist-packages/pydantic/main.py\u001b[0m in \u001b[0;36m__getattr__\u001b[0;34m(self, item)\u001b[0m\n\u001b[1;32m 889\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 890\u001b[0m \u001b[0;31m# this is the current error\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 891\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mAttributeError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34mf'{type(self).__name__!r} object has no attribute {item!r}'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 892\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 893\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__setattr__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mname\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mAny\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m->\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31mAttributeError\u001b[0m: 'EvaluatorChain' object has no attribute 'evaluate'" + ] + } + ], + "source": [ + "# evaluate context recall\n", + "print(\"evaluating...\")\n", + "r = context_recall_chain.evaluate(examples, predictions)\n", + "r" + ] } - ], - "source": [ - "result = qa_chain.invoke({\"query\": eval_questions[1]})\n", - "result[\"result\"]" - ] - }, - { - "cell_type": "code", - "execution_count": 51, - "id": "eae31c80-42c9-4b1f-95b3-c05ceadb103f", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "key_mapping = {\n", - " \"query\": \"question\",\n", - " \"result\": \"answer\",\n", - " \"source_documents\": \"contexts\"\n", - "}\n", - "\n", - "result_updated = {}\n", - "for old_key, new_key in key_mapping.items():\n", - " if old_key in result:\n", - " result_updated[new_key] = result[old_key]\n" - ] - }, - { - "cell_type": "code", - "execution_count": 53, - "id": "ecd4dd9f-16d7-43d4-ac8e-6c5aa5e3f7b0", - "metadata": { - "tags": [] - }, - "outputs": [ - { - "data": { - "text/plain": [ - "{'question': 'Which borough of New York City has the highest population?',\n", - " 'answer': 'Manhattan (New York County) has the highest population density of any borough in New York City.',\n", - " 'contexts': [Document(page_content=\"New York City is the most populous city in the United States, with 8,804,190 residents incorporating more immigration into the city than outmigration since the 2010 United States census. More than twice as many people live in New York City as compared to Los Angeles, the second-most populous U.S. city; and New York has more than three times the population of Chicago, the third-most populous U.S. city. New York City gained more residents between 2010 and 2020 (629,000) than any other U.S. city, and a greater amount than the total sum of the gains over the same decade of the next four largest U.S. cities, Los Angeles, Chicago, Houston, and Phoenix, Arizona combined. New York City's population is about 44% of New York State's population, and about 39% of the population of the New York metropolitan area. The majority of New York City residents in 2020 (5,141,538, or 58.4%) were living on Long Island, in Brooklyn, or in Queens. The New York City metropolitan statistical area, has the\", metadata={'source': '../data/nyc_text.txt'}),\n", - " Document(page_content=\"New York, often called New York City or NYC, is the most populous city in the United States. With a 2020 population of 8,804,190 distributed over 300.46 square miles (778.2 km2), New York City is the most densely populated major city in the United States and more than twice as populous as Los Angeles, the nation's second-largest city. New York City is located at the southern tip of New York State. It constitutes the geographical and demographic center of both the Northeast megalopolis and the New York metropolitan area, the largest metropolitan area in the U.S. by both population and urban area. With over 20.1 million people in its metropolitan statistical area and 23.5 million in its combined statistical area as of 2020, New York is one of the world's most populous megacities, and over 58 million people live within 250 mi (400 km) of the city. New York City is a global cultural, financial, entertainment, and media center with a significant influence on commerce, health care and life\", metadata={'source': '../data/nyc_text.txt'}),\n", - " Document(page_content=\"Manhattan (New York County) is the geographically smallest and most densely populated borough. It is home to Central Park and most of the city's skyscrapers, and is sometimes locally known as The City. Manhattan's population density of 72,033 people per square mile (27,812/km2) in 2015 makes it the highest of any county in the United States and higher than the density of any individual American city.Manhattan is the cultural, administrative, and financial center of New York City and contains the headquarters of many major multinational corporations, the United Nations headquarters, Wall Street, and a number of important universities. The borough of Manhattan is often described as the financial and cultural center of the world.Most of the borough is situated on Manhattan Island, at the mouth of the Hudson River and the East River, and its southern tip, at the confluence of the two rivers, represents the birthplace of New York City itself. Several small islands also compose part of the\", metadata={'source': '../data/nyc_text.txt'}),\n", - " Document(page_content=\"=== Population density ===\\n\\nIn 2020, the city had an estimated population density of 29,302.37 inhabitants per square mile (11,313.71/km2), rendering it the nation's most densely populated of all larger municipalities (those with more than 100,000 residents), with several small cities (of fewer than 100,000) in adjacent Hudson County, New Jersey having greater density, as per the 2010 census. Geographically co-extensive with New York County, the borough of Manhattan's 2017 population density of 72,918 inhabitants per square mile (28,154/km2) makes it the highest of any county in the United States and higher than the density of any individual American city. The next three densest counties in the United States, placing second through fourth, are also New York boroughs: Brooklyn, the Bronx, and Queens respectively.\\n\\n\\n=== Race and ethnicity ===\", metadata={'source': '../data/nyc_text.txt'})]}" - ] - }, - "execution_count": 53, - "metadata": {}, - "output_type": "execute_result" + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.8" + }, + "colab": { + "provenance": [], + "include_colab_link": true } - ], - "source": [ - "result_updated" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "id": "00d5cc30-d25e-41bd-8695-9246b73938bc", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# !pip install --no-cache-dir recordclass" - ] - }, - { - "cell_type": "code", - "execution_count": 158, - "id": "775d55d9-437d-40c4-bb5f-87c7b65fa567", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "# !pip install ragas==0.1.9" - ] - }, - { - "cell_type": "code", - "execution_count": 13, - "id": "29b14c07-bf6c-4e86-ad1a-2a6c4d1a509d", - "metadata": { - "tags": [] - }, - "outputs": [], - "source": [ - "from ragas.integrations.langchain import EvaluatorChain \n", - "# from ragas import evaluate\n", - "from ragas.metrics import (\n", - " faithfulness,\n", - " answer_relevancy,\n", - " context_relevancy,\n", - " context_recall,\n", - ")\n", - "\n", - "# create evaluation chains\n", - "faithfulness_chain = EvaluatorChain(metric=faithfulness)\n", - "answer_rel_chain = EvaluatorChain(metric=answer_relevancy)\n", - "context_rel_chain = EvaluatorChain(metric=context_relevancy)\n", - "context_recall_chain = EvaluatorChain(metric=context_recall)" - ] - }, - { - "cell_type": "markdown", - "id": "41a8b636-d738-41bd-ac68-21cce2c4b720", - "metadata": {}, - "source": [ - "1. `__call__()`\n", - "\n", - "Directly run the evaluation chain with the results from the QA chain. Do note that metrics like context_relevancy and faithfulness require the `source_documents` to be present." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "7815671c-1fc8-46ba-8356-4a0bd5558530", - "metadata": {}, - "outputs": [], - "source": [ - "# Recheck the result that we are going to validate.\n", - "result" - ] - }, - { - "cell_type": "markdown", - "id": "8cfdd07c-956c-458f-8844-e056f29e3c83", - "metadata": {}, - "source": [ - "**Faithfulness**" - ] - }, - { - "cell_type": "code", - "execution_count": 87, - "id": "304f5f0f-a237-4584-becb-a35607caf26b", - "metadata": {}, - "outputs": [], - "source": [ - "eval_result = faithfulness_chain(result_updated)\n", - "eval_result[\"faithfulness_score\"]" - ] - }, - { - "cell_type": "markdown", - "id": "8fbd78a9-d7c5-42a0-8705-4c544ec6408e", - "metadata": {}, - "source": [ - "High faithfulness_score means that there are exact consistency between the source documents and the answer.\n", - "\n", - "You can check lower faithfulness scores by changing the result (answer from LLM) or source_documents to something else." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "15f42b97-a84f-4015-9da5-fa3b0b703c24", - "metadata": {}, - "outputs": [], - "source": [ - "fake_result = result.copy()\n", - "fake_result[\"result\"] = \"we are the champions\"\n", - "eval_result = faithfulness_chain(fake_result)\n", - "eval_result[\"faithfulness_score\"]" - ] - }, - { - "cell_type": "markdown", - "id": "02348380-159a-4578-a95e-493cdcfcebe7", - "metadata": {}, - "source": [ - "**Context Relevancy**" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "0fd2fb9e-19d1-4cf6-9773-897546c2d6bb", - "metadata": {}, - "outputs": [], - "source": [ - "eval_result = context_recall_chain(result)\n", - "eval_result[\"context_recall_score\"]" - ] - }, - { - "cell_type": "markdown", - "id": "118cba80-c2a3-408e-9d8d-857521cbe723", - "metadata": {}, - "source": [ - "High context_recall_score means that the ground truth is present in the source documents.\n", - "\n", - "You can check lower context recall scores by changing the source_documents to something else." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6610c5f-5f8e-406c-885c-093012a5dc44", - "metadata": {}, - "outputs": [], - "source": [ - "from langchain.schema import Document\n", - "fake_result = result.copy()\n", - "fake_result[\"source_documents\"] = [Document(page_content=\"I love christmas\")]\n", - "eval_result = context_recall_chain(fake_result)\n", - "eval_result[\"context_recall_score\"]" - ] - }, - { - "cell_type": "markdown", - "id": "3dc06403-c514-47fd-ac82-d95ece8d2f06", - "metadata": {}, - "source": [ - "2. `evaluate()`\n", - "\n", - "Evaluate a list of inputs/queries and the outputs/predictions from the QA chain." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "f6b23708-b94d-4649-acd9-0e268f72f94a", - "metadata": {}, - "outputs": [], - "source": [ - "# run the queries as a batch for efficiency\n", - "predictions = qa_chain.batch(examples)\n", - "\n", - "# evaluate\n", - "print(\"evaluating...\")\n", - "r = faithfulness_chain.evaluate(examples, predictions)\n", - "r" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "d661f6b9-04c7-40b7-874b-25f53cfab9d9", - "metadata": {}, - "outputs": [], - "source": [ - "# evaluate context recall\n", - "print(\"evaluating...\")\n", - "r = context_recall_chain.evaluate(examples, predictions)\n", - "r" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3 (ipykernel)", - "language": "python", - "name": "python3" }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.8" - } - }, - "nbformat": 4, - "nbformat_minor": 5 -} + "nbformat": 4, + "nbformat_minor": 5 +} \ No newline at end of file