diff --git a/.github/workflows/patch-models.yaml b/.github/workflows/patch-models.yaml index dda4df02..91c22050 100644 --- a/.github/workflows/patch-models.yaml +++ b/.github/workflows/patch-models.yaml @@ -24,7 +24,7 @@ jobs: - ghcr.io/sozercan/llama3:70b - ghcr.io/sozercan/mixtral:8x7b - ghcr.io/sozercan/phi3:3.8b - - ghcr.io/sozercan/gemma1.1:2b + - ghcr.io/sozercan/gemma2:2b - ghcr.io/sozercan/codestral0.1:22b steps: - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1 diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml index 1ce57fcf..647f9df2 100644 --- a/.github/workflows/update-models.yaml +++ b/.github/workflows/update-models.yaml @@ -22,7 +22,7 @@ jobs: model: - llama-3.1-8b-instruct - phi-3-3.8b - - gemma-2b-instruct + - gemma-2-2b-instruct runs-on: ubuntu-latest timeout-minutes: 360 steps: diff --git a/README.md b/README.md index a8decfcc..3c2d31c4 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ If it doesn't include a specific model, you can always [create your own images]( | 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | | Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) | | 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | -| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | +| 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | ### NVIDIA CUDA @@ -102,7 +102,7 @@ If it doesn't include a specific model, you can always [create your own images]( | 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | | Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) | | 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | -| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | +| 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | ## What's next? diff --git a/models/gemma-2b-instruct.yaml b/models/gemma-2-2b-instruct.yaml similarity index 72% rename from models/gemma-2b-instruct.yaml rename to models/gemma-2-2b-instruct.yaml index 34e41759..2e3720a4 100644 --- a/models/gemma-2b-instruct.yaml +++ b/models/gemma-2-2b-instruct.yaml @@ -3,9 +3,9 @@ apiVersion: v1alpha1 debug: true runtime: cuda models: - - name: gemma-2b-instruct - source: https://huggingface.co/lmstudio-community/gemma-1.1-2b-it-GGUF/resolve/main/gemma-1.1-2b-it-Q4_K_M.gguf - sha256: cc2118e1d780fa33582738d8c99223d62c8734b06ef65076c01618d484d081d4 + - name: gemma-2-2b-instruct + source: https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf + sha256: e0aee85060f168f0f2d8473d7ea41ce2f3230c1bc1374847505ea599288a7787 promptTemplates: - name: chatMsg template: | @@ -21,10 +21,10 @@ models: template: | {{ .Input }} config: | - - name: gemma-2b-instruct + - name: gemma-2-2b-instruct backend: llama parameters: - model: gemma-1.1-2b-it-Q4_K_M.gguf + model: gemma-2-2b-it-Q4_K_M.gguf context_size: 8192 template: chat_message: chatMsg @@ -35,6 +35,5 @@ config: | - \"\" - \"\" - \"<|im_end|>\" - gpu_layers: 35 f16: true mmap: true diff --git a/scripts/parse-models.sh b/scripts/parse-models.sh index 7f65eab7..d2dc146a 100755 --- a/scripts/parse-models.sh +++ b/scripts/parse-models.sh @@ -17,7 +17,7 @@ extract_model_type() { } # Run and display results for each example -for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "gemma-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct"; do +for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct"; do echo "Model: $MODEL" echo " Name: $(extract_model_name $MODEL)" echo " Size: $(extract_model_size $MODEL)" diff --git a/website/docs/premade-models.md b/website/docs/premade-models.md index 0419b465..3c09c24d 100644 --- a/website/docs/premade-models.md +++ b/website/docs/premade-models.md @@ -14,7 +14,7 @@ If it doesn't include a specific model, you can always [create your own images]( | 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) | | 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | -| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | +| 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | ## NVIDIA CUDA @@ -25,7 +25,7 @@ If it doesn't include a specific model, you can always [create your own images]( | 🦙 Llama 3.1 | Instruct | 70B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b` | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | | | Ⓜ️ Mixtral | Instruct | 8x7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b` | `mixtral-8x7b-instruct` | [Apache](https://choosealicense.com/licenses/apache-2.0/) | | 🅿️ Phi 3 | Instruct | 3.8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b` | `phi-3-3.8b` | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) | -| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | +| 🔡 Gemma 2 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | :::note @@ -44,22 +44,25 @@ If you need to use these specific models, you can always [create your own images ### CPU -| Model | Optimization | Parameters | Command | License | -| --------- | ------------ | ---------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| 🐬 Orca 2 | | 13B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/orca2:13b` | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) | -| 🅿️ Phi 2 | Instruct | 2.7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi2:2.7b` | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE) | -| 🦙 Llama 3 | Instruct | 8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:8b` | `llama-3-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 3 | Instruct | 70B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 2 | Chat | 7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:7b` | `llama-2-7b-chat` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 2 | Chat | 13B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| Model | Optimization | Parameters | Command | License | +| ----------- | ------------ | ---------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------- | +| 🐬 Orca 2 | | 13B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/orca2:13b` | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) | +| 🅿️ Phi 2 | Instruct | 2.7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi2:2.7b` | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE) | +| 🦙 Llama 3 | Instruct | 8B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:8b` | `llama-3-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 3 | Instruct | 70B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 2 | Chat | 7B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:7b` | `llama-2-7b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 2 | Chat | 13B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | + ### NVIDIA CUDA -| Model | Optimization | Parameters | Command | License | -| --------- | ------------ | ---------- | ---------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | -| 🐬 Orca 2 | | 13B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/orca2:13b-cuda` | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) | -| 🅿️ Phi 2 | Instruct | 2.7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi2:2.7b-cuda` | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE) | -| 🦙 Llama 3 | Instruct | 8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:8b` | `llama-3-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 3 | Instruct | 70B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 2 | Chat | 7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:7b` | `llama-2-7b-chat` | [Llama](https://ai.meta.com/llama/license/) | -| 🦙 Llama 2 | Chat | 13B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| Model | Optimization | Parameters | Command | License | +| ----------- | ------------ | ---------- | ---------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- | +| 🐬 Orca 2 | | 13B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/orca2:13b-cuda` | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) | +| 🅿️ Phi 2 | Instruct | 2.7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi2:2.7b-cuda` | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE) | +| 🦙 Llama 3 | Instruct | 8B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:8b` | `llama-3-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 3 | Instruct | 70B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 2 | Chat | 7B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:7b` | `llama-2-7b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 2 | Chat | 13B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat` | [Llama](https://ai.meta.com/llama/license/) | +| 🔡 Gemma 1.1 | Instruct | 2B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b` | `gemma-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) |