feat: llama 3.3 70b (#448)

Signed-off-by: Sertac Ozercan <[email protected]>
sozercan · Dec 7, 2024 · 8ad7653 · 8ad7653
1 parent d07f20c
commit 8ad7653
Show file tree

Hide file tree

Showing 6 changed files with 30 additions and 27 deletions.
diff --git a/.github/workflows/patch-models.yaml b/.github/workflows/patch-models.yaml
@@ -19,7 +19,7 @@ jobs:
           matrix:
             images:
               - ghcr.io/sozercan/llama3.1:8b
-              - ghcr.io/sozercan/llama3.1:70b
+              - ghcr.io/sozercan/llama3.3:70b
               - ghcr.io/sozercan/llama3.2:1b
               - ghcr.io/sozercan/llama3.2:3b
               - ghcr.io/sozercan/mixtral:8x7b

diff --git a/.github/workflows/update-models-self.yaml b/.github/workflows/update-models-self.yaml
@@ -3,8 +3,13 @@ name: update-models-self
 on:
   workflow_dispatch:
     inputs:
+      models:
+        description: "models to update"
+        required: true
+        default: '["llama-3.3-70b-instruct", "mixtral-8x7b-instruct", "codestral-22b", "qwq-32b-preview"]'
+        type: string
       staging:
-        description: 'push to test registry'
+        description: "push to test registry"
         required: false
         default: false
         type: boolean
@@ -15,15 +20,11 @@ permissions:
   id-token: write
 
 jobs:
- update-models-self:
+  update-models-self:
     strategy:
       fail-fast: false
       matrix:
-        model:
-         - llama-3.1-70b-instruct
-         - mixtral-8x7b-instruct
-         - codestral-22b
-         - qwq-32b-preview
+        model: ${{ fromJSON(github.event.inputs.models) }}
     runs-on: self-hosted
     timeout-minutes: 360
     steps:

diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
@@ -8,6 +8,16 @@ on:
         required: false
         default: false
         type: boolean
+      models:
+        description: "models to update"
+        required: true
+        default: '["llama-3.2-1b-instruct", "llama-3.2-3b-instruct", "llama-3.1-8b-instruct", "phi-3.5-3.8b-instruct", "gemma-2-2b-instruct", "flux-1-dev"]'
+        type: string
+      runtime:
+        description: "runtime to build"
+        required: true
+        default: '["cuda", "applesilicon"]'
+        type: string
 
 permissions:
   contents: write
@@ -19,16 +29,8 @@ jobs:
     strategy:
       fail-fast: false
       matrix:
-        model:
-          - llama-3.2-1b-instruct
-          - llama-3.2-3b-instruct
-          - llama-3.1-8b-instruct
-          - phi-3.5-3.8b-instruct
-          - gemma-2-2b-instruct
-          - flux-1-dev
-        runtime:
-          - cuda
-          - applesilicon
+        model: ${{ fromJSON(github.event.inputs.models) }}
+        runtime: ${{ fromJSON(github.event.inputs.runtime) }}
         exclude:
         - model: flux-1-dev # requires cuda runtime
           runtime: applesilicon

diff --git a/README.md b/README.md
@@ -91,7 +91,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.2     | Instruct     | 1B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.2:1b`   | `llama-3.2-1b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                        |
 | 🦙 Llama 3.2     | Instruct     | 3B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.2:3b`   | `llama-3.2-3b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                        |
 | 🦙 Llama 3.1     | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                        |
-| 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                        |  |
+| 🦙 Llama 3.3     | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.3:70b`  | `llama-3.3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                        |  |
 | Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                          |
 | 🅿️ Phi 3.5       | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
 | 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                         |
@@ -111,7 +111,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.2     | Instruct      | 1B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.2:1b`   | `llama-3.2-1b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                                                                 |
 | 🦙 Llama 3.2     | Instruct      | 3B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.2:3b`   | `llama-3.2-3b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                                                                 |
 | 🦙 Llama 3.1     | Instruct      | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                                                                 |
-| 🦙 Llama 3.1     | Instruct      | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                                                                 |  |
+| 🦙 Llama 3.3     | Instruct     | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.3:70b`  | `llama-3.3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                        |  |
 | Ⓜ️ Mixtral       | Instruct      | 8x7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                                                                   |
 | 🅿️ Phi 3.5       | Instruct      | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE)                                          |
 | 🔡 Gemma 2       | Instruct      | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                                                                  |

diff --git a/models/llama-3.1-70b-instruct.yaml → models/llama-3.3-70b-instruct.yaml b/models/llama-3.1-70b-instruct.yaml → models/llama-3.3-70b-instruct.yaml
@@ -3,9 +3,9 @@ apiVersion: v1alpha1
 debug: true
 runtime: cuda
 models:
-  - name: llama-3.1-70b-instruct
-    source: https://huggingface.co/MaziyarPanahi/Meta-Llama-3.1-70B-Instruct-GGUF/resolve/main/Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
-    sha256: "3f16ab17da4521fe3ed7c5d7beed960d3fe7b5b64421ee9650aa53d6b649ccab"
+  - name: llama-3.3-70b-instruct
+    source: https://huggingface.co/MaziyarPanahi/Llama-3.3-70B-Instruct-GGUF/resolve/main/Llama-3.3-70B-Instruct.Q4_K_M.gguf
+    sha256: "4f3b04ecae278bdb0fd545b47c210bc5edf823e5ebf7d41e0b526c81d54b1ff3"
     promptTemplates:
       - name: chatMsg
         template: |
@@ -54,7 +54,7 @@ models:
       - name: completion
           {{.Input}}
 config: |
-  - name: llama-3.1-70b-instruct
+  - name: llama-3.3-70b-instruct
     backend: llama
     function:
       disable_no_action: true
@@ -63,7 +63,7 @@ config: |
       response_regex:
       - <function=(?P<name>\w+)>(?P<arguments>.*)</function>
     parameters:
-      model: Meta-Llama-3.1-70B-Instruct.Q4_K_M.gguf
+      model: Llama-3.3-70B-Instruct.Q4_K_M.gguf
     context_size: 8192
     f16: true
     template:

diff --git a/website/docs/premade-models.md b/website/docs/premade-models.md
@@ -18,7 +18,7 @@ Depending on your CPU capabilities, AIKit will automatically select the most opt
 | 🦙 Llama 3.2     | Instruct     | 1B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.2:1b`   | `llama-3.2-1b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                        |
 | 🦙 Llama 3.2     | Instruct     | 3B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.2:3b`   | `llama-3.2-3b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                        |
 | 🦙 Llama 3.1     | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                        |
-| 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                        |
+| 🦙 Llama 3.3     | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.3:70b`  | `llama-3.3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                        |  |
 | Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                          |
 | 🅿️ Phi 3.5       | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
 | 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                         |
@@ -32,7 +32,7 @@ Depending on your CPU capabilities, AIKit will automatically select the most opt
 | 🦙 Llama 3.2     | Instruct      | 1B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.2:1b`   | `llama-3.2-1b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                                                                 |
 | 🦙 Llama 3.2     | Instruct      | 3B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.2:3b`   | `llama-3.2-3b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                                                                 |
 | 🦙 Llama 3.1     | Instruct      | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:8b`   | `llama-3.1-8b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                                                                 |
-| 🦙 Llama 3.1     | Instruct      | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                                                                 |  |
+| 🦙 Llama 3.3     | Instruct      | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.3:70b`  | `llama-3.3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                                                                 |  |
 | Ⓜ️ Mixtral       | Instruct      | 8x7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                                                                   |
 | 🅿️ Phi 3.5       | Instruct      | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3.5:3.8b`   | `phi-3.5-3.8b-instruct`  | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE)                                          |
 | 🔡 Gemma 2       | Instruct      | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                                                                  |