-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #66 from alan-turing-institute/llama2
Implement llama2 + llama_index
- Loading branch information
Showing
10 changed files
with
9,283 additions
and
3,444 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
693 changes: 693 additions & 0 deletions
693
models/llama-index-hack/llama2_github_collaborators.ipynb
Large diffs are not rendered by default.
Oops, something went wrong.
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,277 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"from llama_index.llms import LlamaCPP\n", | ||
"from llama_index.llms.llama_utils import messages_to_prompt, completion_to_prompt\n", | ||
"from llama_index import Document\n", | ||
"from llama_index import VectorStoreIndex, StorageContext, load_index_from_storage\n", | ||
"from llama_index import LLMPredictor, PromptHelper, ServiceContext\n", | ||
"\n", | ||
"import pandas as pd" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 2, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"llama.cpp: loading model from ../../../llama/llama-2-7b-chat/ggml-model-q4_0.bin\n", | ||
"llama_model_load_internal: format = ggjt v3 (latest)\n", | ||
"llama_model_load_internal: n_vocab = 32000\n", | ||
"llama_model_load_internal: n_ctx = 3900\n", | ||
"llama_model_load_internal: n_embd = 4096\n", | ||
"llama_model_load_internal: n_mult = 256\n", | ||
"llama_model_load_internal: n_head = 32\n", | ||
"llama_model_load_internal: n_head_kv = 32\n", | ||
"llama_model_load_internal: n_layer = 32\n", | ||
"llama_model_load_internal: n_rot = 128\n", | ||
"llama_model_load_internal: n_gqa = 1\n", | ||
"llama_model_load_internal: rnorm_eps = 1.0e-06\n", | ||
"llama_model_load_internal: n_ff = 11008\n", | ||
"llama_model_load_internal: freq_base = 10000.0\n", | ||
"llama_model_load_internal: freq_scale = 1\n", | ||
"llama_model_load_internal: ftype = 2 (mostly Q4_0)\n", | ||
"llama_model_load_internal: model size = 7B\n", | ||
"llama_model_load_internal: ggml ctx size = 0.08 MB\n", | ||
"llama_model_load_internal: mem required = 4160.96 MB (+ 1950.00 MB per state)\n", | ||
"llama_new_context_with_model: kv self size = 1950.00 MB\n", | ||
"AVX = 0 | AVX2 = 0 | AVX512 = 0 | AVX512_VBMI = 0 | AVX512_VNNI = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | \n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"llm = LlamaCPP(\n", | ||
" model_path=\"../../../llama/llama-2-7b-chat/gguf-model-q4_0.gguf\",\n", | ||
" temperature=0.1,\n", | ||
" max_new_tokens=256,\n", | ||
" # llama2 has a context window of 4096 tokens, but we set it lower to allow for some wiggle room\n", | ||
" context_window=3900,\n", | ||
" # kwargs to pass to __call__()\n", | ||
" generate_kwargs={},\n", | ||
" # kwargs to pass to __init__()\n", | ||
" # set to at least 1 to use GPU\n", | ||
" model_kwargs={\"n_gpu_layers\": 1},\n", | ||
" # transform inputs into Llama2 format\n", | ||
" messages_to_prompt=messages_to_prompt,\n", | ||
" completion_to_prompt=completion_to_prompt,\n", | ||
" verbose=True,\n", | ||
")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 37, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def load_documents():\n", | ||
" df = pd.read_json(\"../../data/handbook_qa/data/qAndA.jsonl\", lines=True)\n", | ||
" df[\"prompt\"]=df[\"prompt\"]+\"?\"\n", | ||
" documents = [Document(text=str(i)) for i in df.values]\n", | ||
"\n", | ||
" return documents" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 38, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"documents = load_documents()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 39, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"'[\\'What are some of the shortcodes in the handbook theme ?\\'\\n \"Some of the shortcodes in the handbook theme include figure, gist, highlight, and KaTeX, as well as custom ones specific to the theme such as hints, expand, and tabs. Shortcodes are templates that can be parametrized and can be included in the content section of a Markdown file to enable more complex features than Markdown\\'s simple syntax allows.\"]'" | ||
] | ||
}, | ||
"execution_count": 39, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"documents[10].text" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 40, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def create_service_context(\n", | ||
" model, \n", | ||
" max_input_size=1024,\n", | ||
" num_output=128,\n", | ||
" chunk_size_lim=256,\n", | ||
" overlap_ratio=0.1\n", | ||
" ):\n", | ||
" llm_predictor=LLMPredictor(llm=model)\n", | ||
" prompt_helper=PromptHelper(max_input_size,num_output,overlap_ratio,chunk_size_lim)\n", | ||
" service_context = ServiceContext.from_defaults(llm_predictor=llm_predictor, prompt_helper=prompt_helper, embed_model=\"local\")\n", | ||
" return service_context" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 41, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"/Users/rwood/Library/Caches/pypoetry/virtualenvs/reginald-slack-ZVq5BSHv-py3.11/lib/python3.11/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n", | ||
" from .autonotebook import tqdm as notebook_tqdm\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"service_context = create_service_context(llm)\n", | ||
"index = VectorStoreIndex.from_documents(documents, service_context=service_context)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 42, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"query_engine = index.as_query_engine()" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 43, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" Based on the provided context information, I understand that REG refers to the Research and Engineering Group (REG) within the Turing Institute.\n", | ||
"According to the context, the REG newsletter is sent out monthly around a week before the monthly team meeting, and the all-REG meeting is also held monthly where new joiners are welcomed and updates from around REG or the Turing are presented, followed by a discussion on a topic of interest for the wider team.\n", | ||
"Therefore, based on the information provided, REG appears to be a group within the Turing Institute that focuses on research and engineering activities, and has regular meetings to share updates and discuss topics of interest.\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"llama_print_timings: load time = 5081.84 ms\n", | ||
"llama_print_timings: sample time = 104.11 ms / 148 runs ( 0.70 ms per token, 1421.53 tokens per second)\n", | ||
"llama_print_timings: prompt eval time = 5081.79 ms / 238 tokens ( 21.35 ms per token, 46.83 tokens per second)\n", | ||
"llama_print_timings: eval time = 7902.76 ms / 147 runs ( 53.76 ms per token, 18.60 tokens per second)\n", | ||
"llama_print_timings: total time = 13274.83 ms\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"response = query_engine.query(\"Who are REG\")\n", | ||
"print(response.response)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 44, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"['What is the REG newsletter and how often is it sent out?'\n", | ||
" 'The REG newsletter is a monthly email containing short project updates and other news/updates from around the team and the institute. It is sent around a week before the monthly team meeting and comes to the Hut23 mailing list.']\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"print(response.source_nodes[0].node.text)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 48, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"Llama.generate: prefix-match hit\n" | ||
] | ||
}, | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
" Thank you for asking! The Alan Turing Institute is based in London, UK.\n" | ||
] | ||
}, | ||
{ | ||
"name": "stderr", | ||
"output_type": "stream", | ||
"text": [ | ||
"\n", | ||
"llama_print_timings: load time = 5081.84 ms\n", | ||
"llama_print_timings: sample time = 13.43 ms / 19 runs ( 0.71 ms per token, 1415.27 tokens per second)\n", | ||
"llama_print_timings: prompt eval time = 4391.42 ms / 212 tokens ( 20.71 ms per token, 48.28 tokens per second)\n", | ||
"llama_print_timings: eval time = 957.65 ms / 18 runs ( 53.20 ms per token, 18.80 tokens per second)\n", | ||
"llama_print_timings: total time = 5386.18 ms\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"response = query_engine.query(\"Where is the Alan Turing Institute based?\")\n", | ||
"print(response.response)" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "llama", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.4" | ||
}, | ||
"orig_nbformat": 4 | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |
Oops, something went wrong.