Refactor codebase towards pip format (#9)

* Refactor codebase towards pip format * Add termcolor dependency
virajmalia · May 20, 2024 · 665b8de · 665b8de
1 parent 72afbdc
commit 665b8de
Show file tree

Hide file tree

Showing 7 changed files with 136 additions and 87 deletions.
diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml
@@ -31,17 +31,13 @@ jobs:
     - name: Install dependencies
       run: |
         ./setup.sh cpu
-        pip install flake8 huggingface-hub>=0.17.1
+        pip install flake8
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
-    - name: Download model file
-      run: |
-        mkdir -p ./models/
-        huggingface-cli download PawanKrd/Llama-3-8B-Instruct-GGUF llama-3-8b-instruct.Q3_K_M.gguf --local-dir ./models/
     - name: Test run
       run: |
-        ./llm.py ./models/ "Which is the best Llama model to use?"
+        ./src/llama4u.py -q "Which is the best Llama model to use?"
diff --git a/README.md b/README.md
@@ -1,19 +1,38 @@
 # The Llama4U Project
 
 ## Vision
-Develop a free and open source, fully-featured AI assistant that matches in performance with industry leading paid solutions.
+Develop a free and open source, fully-featured AI solution with agents.
 
 ## Current motivations for feature-set
 - Perplexity AI
 - ChatGPT/GPT4o
 
 ## Rule
-- Only free frameworks allowed - no use of APIs that have usage limitations or require keys to be registered with an online account.
+- APIs that have usage limitations or require keys to be registered with an online account are not permitted to be added to this project.
 
 ## System requirements
 - Nvidia GPU (>=8G VRAM)
 - Ubuntu 22.04
+- Works on WSL2 with nvidia CUDA
 
 ## Steps to run
 1. `./setup.sh`
-2. `./llm.py <local_model_dir>`
+2. `./src/llama4u.py`
+
+    Default model: https://huggingface.co/PawanKrd/Meta-Llama-3-8B-Instruct-GGUF/blob/main/llama-3-8b-instruct.Q3_K_M.gguf
+
+Full CLI: `./src/llama4u.py -r <repo_id> -f <filename> -q <query>`
+
+## Description
+Llama4U is an AI assistant developed using [LlamaCPP][1], [LangChain][2] and [Llama3][3]. A completely free AI solution that can be hosted locally, while providing online capabilities in a responsible and user-controllable way.
+
+## Credits
+- Meta, for the open source Llama models
+- HuggingFace community
+- LlamaCPP and llama-cpp-python communities
+- LangChain community
+
+
+[1]: https://github.com/abetlen/llama-cpp-python
+[2]: https://python.langchain.com/v0.1/docs/get_started/introduction/
+[3]: https://huggingface.co/blog/llama3
diff --git a/llm.py b/llm.py
diff --git a/requirements.txt b/requirements.txt
@@ -1,10 +1,12 @@
 # LLM essentials
 huggingface_hub
 llama-cpp-python -C cmake.args="-DLLAMA_CUDA=on"
-tensorflow[and-cuda]
 
 # Web Retriever
 langchain
 langchain-community
 langchain-chroma
 duckduckgo_search
+
+# Other
+termcolor
diff --git a/setup.sh b/setup.sh
@@ -18,9 +18,3 @@ fi
 # project dependencies
 sudo apt install python3-pip
 pip install -r requirements.txt
-
-if [ "$1" != "cpu" ]; then
-    # GPU device verification
-    # If not using GPU, comment or ignore this check and also remove tensorflow from requirements.txt
-    python3 -c "import tensorflow as tf; print(tf.config.list_physical_devices('GPU'))" 2> /dev/null
-fi
diff --git a/src/__init__.py b/src/__init__.py
@@ -0,0 +1,4 @@
+""" __init__.py for Llama4U """
+from . import llama4u
+
+__all__ = ["llama4u"]
diff --git a/src/llama4u.py b/src/llama4u.py
@@ -0,0 +1,105 @@
+#!/bin/env python3
+import sys
+import argparse
+from math import exp
+from statistics import median
+from os import devnull
+from contextlib import contextmanager,redirect_stderr
+from termcolor import colored
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+
+class Llama4U():
+    """ Llama4U """
+    def __init__(self,
+                 hf_repo_id,
+                 model_filename
+                 ):
+        if hf_repo_id is None:
+            hf_repo_id="PawanKrd/Meta-Llama-3-8B-Instruct-GGUF"
+        if model_filename is None:
+            model_filename="llama-3-8b-instruct.Q3_K_M.gguf"
+        model_path = hf_hub_download(repo_id=hf_repo_id, filename=model_filename)
+
+        # Instantiate model from downloaded file
+        self.llm = Llama(
+            n_gpu_layers=-1,
+            max_new_tokens=2048,
+            model_path=model_path,
+            logits_all=True,
+        )
+
+    def start_chat_session(self):
+        """ Chat session loop """
+        my_messages = [
+            {"role": "system",
+             "content": "A chat between a curious user and an artificial intelligence assistant. \
+                The assistant gives helpful, and polite answers to the user's questions."},
+        ]
+
+        for _ in range(50):
+
+            # User's turn
+            print(colored('You: =====', 'yellow'))
+            user_prompt = input()
+            if user_prompt.lower() in ["exit", "quit", "bye"]:
+                print(colored(f'Assistant(Median Prob:1.0): =====', 'yellow'))
+                print("Chat session ended. Goodbye!")
+                break
+            my_messages.append({"role": "user", "content": user_prompt})
+
+            # AI's turn
+            response = self.llm.create_chat_completion(messages=my_messages,
+                                                  logprobs=True,
+                                                  top_logprobs=1,
+                                                  )
+            logprobs = response["choices"][0]["logprobs"]["token_logprobs"]
+            # Convert logprobs to probabilities
+            probabilities = [exp(logprob) for logprob in logprobs]
+            print(colored(f'Assistant(Median Prob:{median(probabilities)}): =====', 'yellow'))
+            print(response["choices"][0]["message"]["content"])
+
+    def single_query(self, query):
+        """ Single Query Mode """
+        response = self.llm.create_chat_completion([{"role": "user", "content": query}],
+                                              logprobs=True,
+                                              top_logprobs=1,
+                                              )
+        if response:
+            logprobs = response["choices"][0]["logprobs"]["token_logprobs"]
+            # Convert logprobs to probabilities
+            probabilities = [exp(logprob) for logprob in logprobs]
+            print(f'Assistant(Median Prob:{median(probabilities)}): =====')
+            print(response["choices"][0]["message"]["content"])
+            sys.exit(0)
+        else:
+            print("Query failed")
+            sys.exit(1)
+
+@contextmanager
+def suppress_stderr():
+    """A context manager that redirects stderr to devnull"""
+    with open(devnull, 'w', encoding='utf-8') as fnull:
+        with redirect_stderr(fnull) as err:
+            yield err
+
+def parse_arguments():
+    """ parse input arguments """
+    parser = argparse.ArgumentParser(description='Llama4U Input Parser')
+    parser.add_argument('-r', '--repo_id', type=str, required=False, help='Repository ID')
+    parser.add_argument('-f', '--filename', type=str, required=False, help='Filename')
+    parser.add_argument('-q', '--query', type=str, required=False, help='Single Query')
+    return parser.parse_args()
+
+if __name__ == '__main__':
+    args = parse_arguments()
+    repo_id = args.repo_id
+    filename = args.filename
+
+    with suppress_stderr():
+        llama4u = Llama4U(repo_id, filename)
+
+        if args.query:
+            llama4u.single_query(args.query)
+        else:
+            llama4u.start_chat_session()