add eval functions nb

alan-turing-institute · Sep 4, 2023 · 9c09f0c · 9c09f0c
1 parent b478920
commit 9c09f0c
Showing 1 changed file with 285 additions and 0 deletions.
diff --git a/models/llama-index-hack/llama2_eval.ipynb b/models/llama-index-hack/llama2_eval.ipynb
@@ -0,0 +1,285 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.llms import LlamaCPP\n",
+    "from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt\n",
+    "from llama_index import Document\n",
+    "from llama_index import VectorStoreIndex\n",
+    "from llama_index import LLMPredictor, PromptHelper, ServiceContext\n",
+    "\n",
+    "import pandas as pd"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "llm = LlamaCPP(\n",
+    "    model_path=\"../../../llama/llama-2-7b-chat/ggml-model-q4_0.bin\",\n",
+    "    temperature=0.1,\n",
+    "    max_new_tokens=256,\n",
+    "    # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room\n",
+    "    context_window=3900,\n",
+    "    # kwargs to pass to __call__()\n",
+    "    generate_kwargs={},\n",
+    "    # kwargs to pass to __init__()\n",
+    "    # set to at least 1 to use GPU\n",
+    "    model_kwargs={\"n_gpu_layers\": 1},\n",
+    "    # transform inputs into Llama2 format\n",
+    "    messages_to_prompt=messages_to_prompt,\n",
+    "    completion_to_prompt=completion_to_prompt,\n",
+    "    verbose=True,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def load_documents():\n",
+    "    wiki_scraped=pd.read_csv(\"../../data/turing_internal/wiki-scraped.csv\")\n",
+    "    wiki_scraped.dropna(subset=\"body\", inplace=True)\n",
+    "    wiki_scraped_text=[str(i) for i in wiki_scraped[\"body\"].values]\n",
+    "\n",
+    "    handbook_scraped=pd.read_csv(\"../../data/public/handbook-scraped.csv\")\n",
+    "    handbook_scraped.dropna(subset=\"body\", inplace=True)\n",
+    "    handbook_scraped_text=[str(i) for i in handbook_scraped[\"body\"].values]\n",
+    "\n",
+    "    turingacuk=pd.read_csv(\"../../data/public/turingacuk-no-boilerplate.csv\")\n",
+    "    turingacuk.dropna(subset=\"body\", inplace=True)\n",
+    "    turingacuk_text=[str(i) for i in turingacuk[\"body\"].values]\n",
+    "\n",
+    "    documents = [Document(text=i) for i in wiki_scraped_text]\n",
+    "    documents.extend([Document(text=i) for i in handbook_scraped_text])\n",
+    "    documents.extend([Document(text=i) for i in turingacuk_text])\n",
+    "\n",
+    "    return documents"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "documents = load_documents()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(documents)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "test_docs=documents[10:20]\n",
+    "test_docs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.node_parser import SimpleNodeParser\n",
+    "from llama_index.node_parser.extractors import (\n",
+    "    MetadataExtractor,\n",
+    "    # SummaryExtractor,\n",
+    "    # QuestionsAnsweredExtractor,\n",
+    "    # TitleExtractor,\n",
+    "    KeywordExtractor,\n",
+    "    # EntityExtractor,\n",
+    "    MetadataFeatureExtractor,\n",
+    ")\n",
+    "from llama_index.text_splitter import TokenTextSplitter\n",
+    "\n",
+    "text_splitter = TokenTextSplitter(separator=\" \", chunk_size=512, chunk_overlap=128)\n",
+    "\n",
+    "class CustomExtractor(MetadataFeatureExtractor):\n",
+    "    def extract(self, nodes):\n",
+    "        metadata_list = [\n",
+    "            {\n",
+    "                \"custom\": node.metadata[\"document_title\"]\n",
+    "                + \"\\n\"\n",
+    "                + node.metadata[\"excerpt_keywords\"]\n",
+    "            }\n",
+    "            for node in nodes\n",
+    "        ]\n",
+    "        return metadata_list\n",
+    "    \n",
+    "metadata_extractor = MetadataExtractor(\n",
+    "    extractors=[\n",
+    "        # TitleExtractor(nodes=5, llm=llm),\n",
+    "        # QuestionsAnsweredExtractor(questions=3, llm=llm),\n",
+    "        # EntityExtractor(prediction_threshold=0.5),\n",
+    "        # SummaryExtractor(summaries=[\"prev\", \"self\"], llm=llm),\n",
+    "        KeywordExtractor(keywords=3, llm=llm),\n",
+    "        # CustomExtractor()\n",
+    "    ],\n",
+    ")\n",
+    "\n",
+    "node_parser = SimpleNodeParser.from_defaults(\n",
+    "    text_splitter=text_splitter,\n",
+    "    metadata_extractor=metadata_extractor,\n",
+    ")\n",
+    "\n",
+    "nodes = node_parser.get_nodes_from_documents(test_docs)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def create_service_context(\n",
+    "        model, \n",
+    "        max_input_size=1024,\n",
+    "        num_output=128,\n",
+    "        chunk_size_lim=512,\n",
+    "        overlap_ratio=0.1\n",
+    "    ):\n",
+    "    llm_predictor=LLMPredictor(llm=model)\n",
+    "    prompt_helper=PromptHelper(max_input_size,num_output,overlap_ratio,chunk_size_lim)\n",
+    "    service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, embed_model=\"local\")\n",
+    "    return service_context"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "service_context = create_service_context(llm)\n",
+    "index = VectorStoreIndex(nodes, service_context=service_context)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query_engine = index.as_query_engine()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.evaluation import ResponseEvaluator, QueryResponseEvaluator\n",
+    "\n",
+    "source_evaluator = ResponseEvaluator(service_context=service_context)\n",
+    "query_evaluator = QueryResponseEvaluator(service_context=service_context)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "query=\"Who is Ryan Chan?\"\n",
+    "response = query_engine.query(query)\n",
+    "print(response.response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(source_evaluator.evaluate(response))\n",
+    "print(query_evaluator.evaluate(query, response))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "len(nodes)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_index.evaluation import DatasetGenerator\n",
+    "\n",
+    "data_generator = DatasetGenerator(nodes, service_context=service_context, num_questions_per_chunk=3)\n",
+    "eval_questions = data_generator.generate_questions_from_nodes()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "eval_questions."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "llama",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.4"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}