Only require autoawq for x86_64, closes #6

neuml · Aug 8, 2024 · 1db6edf · 1db6edf
1 parent 8710aaf
commit 1db6edf
Show file tree

Hide file tree

Showing 3 changed files with 14 additions and 3 deletions.
diff --git a/README.md b/README.md
@@ -106,11 +106,14 @@ The RAG application has a number of environment variables that can be set to con
 | Variable    | Description                              | Default Value                       |
 |:----------- |:---------------------------------------- |:----------------------------------- | 
 | TITLE       | Sets the main title of the application   | 🚀 RAG with txtai                   |
-| LLM         | Sets the LLM                             | [Mistral-7B-OpenOrca-AWQ](https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-AWQ) |
+| LLM         | Sets the LLM                             | x86-64: [Mistral-7B-OpenOrca-AWQ](https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-AWQ) |
+|             |                                          | arm64 : [Mistral-7B-OpenOrca-GGUF](https://huggingface.co/TheBloke/Mistral-7B-OpenOrca-GGUF) |
 | EMBEDDINGS  | Sets the embeddings database path        | [neuml/txtai-wikipedia-slim](https://huggingface.co/NeuML/txtai-wikipedia-slim) |
 | DATA        | Optionally sets the input data directory | None                                |
 | TOPICSBATCH | Optionally batches topic LLM queries     | None                                |
 
+*Note: AWQ models are only supported on `x86-64` machines*
+
 See the following examples for setting this configuration with the Docker container. When running within a Python virtual environment, simply set these as environment variables.
 
 Run with Llama 3.1 8B.

diff --git a/rag.py b/rag.py
@@ -3,6 +3,7 @@
 """
 
 import os
+import platform
 import re
 
 from glob import glob
@@ -286,7 +287,14 @@ def __init__(self):
         """
 
         # Load LLM
-        self.llm = LLM(os.environ.get("LLM", "TheBloke/Mistral-7B-OpenOrca-AWQ"))
+        self.llm = LLM(
+            os.environ.get(
+                "LLM",
+                "TheBloke/Mistral-7B-OpenOrca-AWQ"
+                if platform.machine() in ("x86_64", "AMD")
+                else "TheBloke/Mistral-7B-OpenOrca-GGUF/mistral-7b-openorca.Q4_K_M.gguf",
+            )
+        )
 
         # Load embeddings
         self.embeddings = self.load()

diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,4 @@
-autoawq
+autoawq; platform_machine == "x86_64" or platform_machine == "AMD64"
 matplotlib
 streamlit
 txtai[graph,pipeline-data,pipeline-llm]