diff --git a/.github/workflows/patch-models.yaml b/.github/workflows/patch-models.yaml
index dda4df02..91c22050 100644
--- a/.github/workflows/patch-models.yaml
+++ b/.github/workflows/patch-models.yaml
@@ -24,7 +24,7 @@ jobs:
               - ghcr.io/sozercan/llama3:70b
               - ghcr.io/sozercan/mixtral:8x7b
               - ghcr.io/sozercan/phi3:3.8b
-              - ghcr.io/sozercan/gemma1.1:2b
+              - ghcr.io/sozercan/gemma2:2b
               - ghcr.io/sozercan/codestral0.1:22b
         steps:
         - uses: jlumbroso/free-disk-space@54081f138730dfa15788a46383842cd2f914a1be # v1.3.1
diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
index 1ce57fcf..647f9df2 100644
--- a/.github/workflows/update-models.yaml
+++ b/.github/workflows/update-models.yaml
@@ -22,7 +22,7 @@ jobs:
         model:
          - llama-3.1-8b-instruct
          - phi-3-3.8b
-         - gemma-2b-instruct
+         - gemma-2-2b-instruct
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:
diff --git a/README.md b/README.md
index a8decfcc..3c2d31c4 100644
--- a/README.md
+++ b/README.md
@@ -87,7 +87,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                         |  |
 | Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
 | 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
-| 🔡 Gemma 1.1     | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b`      | `gemma-2b-instruct`      | [Gemma](https://ai.google.dev/gemma/terms)                                          |
+| 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                          |
 | ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
 
 ### NVIDIA CUDA
@@ -102,7 +102,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                         |  |
 | Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
 | 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
-| 🔡 Gemma 1.1     | Instruct     | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b`      | `gemma-2b-instruct`      | [Gemma](https://ai.google.dev/gemma/terms)                                          |
+| 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                          |
 | ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
 
 ## What's next?
diff --git a/models/gemma-2b-instruct.yaml b/models/gemma-2-2b-instruct.yaml
similarity index 72%
rename from models/gemma-2b-instruct.yaml
rename to models/gemma-2-2b-instruct.yaml
index 34e41759..2e3720a4 100644
--- a/models/gemma-2b-instruct.yaml
+++ b/models/gemma-2-2b-instruct.yaml
@@ -3,9 +3,9 @@ apiVersion: v1alpha1
 debug: true
 runtime: cuda
 models:
-  - name: gemma-2b-instruct
-    source: https://huggingface.co/lmstudio-community/gemma-1.1-2b-it-GGUF/resolve/main/gemma-1.1-2b-it-Q4_K_M.gguf
-    sha256: cc2118e1d780fa33582738d8c99223d62c8734b06ef65076c01618d484d081d4
+  - name: gemma-2-2b-instruct
+    source: https://huggingface.co/lmstudio-community/gemma-2-2b-it-GGUF/resolve/main/gemma-2-2b-it-Q4_K_M.gguf
+    sha256: e0aee85060f168f0f2d8473d7ea41ce2f3230c1bc1374847505ea599288a7787
     promptTemplates:
       - name: chatMsg
         template: |
@@ -21,10 +21,10 @@ models:
         template: |
           {{ .Input }}
 config: |
-  - name: gemma-2b-instruct
+  - name: gemma-2-2b-instruct
     backend: llama
     parameters:
-      model: gemma-1.1-2b-it-Q4_K_M.gguf
+      model: gemma-2-2b-it-Q4_K_M.gguf
     context_size: 8192
     template:
       chat_message: chatMsg
@@ -35,6 +35,5 @@ config: |
      - \"<start_of_turn>\"
      - \"<end_of_turn>\"
      - \"<|im_end|>\"
-    gpu_layers: 35
     f16: true
     mmap: true
diff --git a/scripts/parse-models.sh b/scripts/parse-models.sh
index 7f65eab7..d2dc146a 100755
--- a/scripts/parse-models.sh
+++ b/scripts/parse-models.sh
@@ -17,7 +17,7 @@ extract_model_type() {
 }
 
 # Run and display results for each example
-for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "gemma-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct"; do
+for MODEL in "llama-2-7b-chat" "llama-2-13b-chat" "llama-3-8b-instruct" "llama-3.1-8b-instruct" "phi-3-3.8b" "gemma-2b-instruct" "gemma-2-2b-instruct" "codestral-22b" "llama-3-70b-instruct" "llama-3.1-70b-instruct" "mixtral-8x7b-instruct"; do
     echo "Model: $MODEL"
     echo "  Name: $(extract_model_name $MODEL)"
     echo "  Size: $(extract_model_size $MODEL)"
diff --git a/website/docs/premade-models.md b/website/docs/premade-models.md
index 0419b465..3c09c24d 100644
--- a/website/docs/premade-models.md
+++ b/website/docs/premade-models.md
@@ -14,7 +14,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                         |
 | Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
 | 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
-| 🔡 Gemma 1.1     | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b`      | `gemma-2b-instruct`      | [Gemma](https://ai.google.dev/gemma/terms)                                          |
+| 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                          |
 | ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
 
 ## NVIDIA CUDA
@@ -25,7 +25,7 @@ If it doesn't include a specific model, you can always [create your own images](
 | 🦙 Llama 3.1     | Instruct     | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3.1:70b`  | `llama-3.1-70b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                         |  |
 | Ⓜ️ Mixtral       | Instruct     | 8x7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/mixtral:8x7b`  | `mixtral-8x7b-instruct`  | [Apache](https://choosealicense.com/licenses/apache-2.0/)                           |
 | 🅿️ Phi 3         | Instruct     | 3.8B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi3:3.8b`     | `phi-3-3.8b`             | [MIT](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct/resolve/main/LICENSE) |
-| 🔡 Gemma 1.1     | Instruct     | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b`      | `gemma-2b-instruct`      | [Gemma](https://ai.google.dev/gemma/terms)                                          |
+| 🔡 Gemma 2       | Instruct     | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma2:2b`     | `gemma-2-2b-instruct`    | [Gemma](https://ai.google.dev/gemma/terms)                                          |
 | ⌨️ Codestral 0.1 | Code         | 22B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                     |
 
 :::note
@@ -44,22 +44,25 @@ If you need to use these specific models, you can always [create your own images
 
 ### CPU
 
-| Model     | Optimization | Parameters | Command                                                       | License                                                                             |
-| --------- | ------------ | ---------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
-| 🐬 Orca 2  |              | 13B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/orca2:13b`  | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) |
-| 🅿️ Phi 2   | Instruct     | 2.7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi2:2.7b`  | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE)                  |
-| 🦙 Llama 3 | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:8b`  | `llama-3-8b-instruct`                                                               | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 3 | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct`                                                              | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 2 | Chat         | 7B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:7b`  | `llama-2-7b-chat`                                                                   | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 2 | Chat         | 13B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat`                                                                  | [Llama](https://ai.meta.com/llama/license/) |
+| Model       | Optimization | Parameters | Command                                                       | License                                                                             |
+| ----------- | ------------ | ---------- | ------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
+| 🐬 Orca 2    |              | 13B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/orca2:13b`  | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) |
+| 🅿️ Phi 2     | Instruct     | 2.7B       | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/phi2:2.7b`  | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE)                  |
+| 🦙 Llama 3   | Instruct     | 8B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:8b`  | `llama-3-8b-instruct`                                                               | [Llama](https://ai.meta.com/llama/license/) |
+| 🦙 Llama 3   | Instruct     | 70B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3:70b` | `llama-3-70b-instruct`                                                              | [Llama](https://ai.meta.com/llama/license/) |
+| 🦙 Llama 2   | Chat         | 7B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:7b`  | `llama-2-7b-chat`                                                                   | [Llama](https://ai.meta.com/llama/license/) |
+| 🦙 Llama 2   | Chat         | 13B        | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama2:13b` | `llama-2-13b-chat`                                                                  | [Llama](https://ai.meta.com/llama/license/) |
+| 🔡 Gemma 1.1 | Instruct     | 2B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/gemma:2b`   | `gemma-2b-instruct`                                                                 | [Gemma](https://ai.google.dev/gemma/terms)  |
+
 
 ### NVIDIA CUDA
 
-| Model     | Optimization | Parameters | Command                                                                      | License                                                                             |
-| --------- | ------------ | ---------- | ---------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
-| 🐬 Orca 2  |              | 13B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/orca2:13b-cuda` | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) |
-| 🅿️ Phi 2   | Instruct     | 2.7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi2:2.7b-cuda` | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE)                  |
-| 🦙 Llama 3 | Instruct     | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:8b`      | `llama-3-8b-instruct`                                                               | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 3 | Instruct     | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:70b`     | `llama-3-70b-instruct`                                                              | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 2 | Chat         | 7B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:7b`      | `llama-2-7b-chat`                                                                   | [Llama](https://ai.meta.com/llama/license/) |
-| 🦙 Llama 2 | Chat         | 13B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:13b`     | `llama-2-13b-chat`                                                                  | [Llama](https://ai.meta.com/llama/license/) |
+| Model       | Optimization | Parameters | Command                                                                      | License                                                                             |
+| ----------- | ------------ | ---------- | ---------------------------------------------------------------------------- | ----------------------------------------------------------------------------------- |
+| 🐬 Orca 2    |              | 13B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/orca2:13b-cuda` | [Microsoft Research](https://huggingface.co/microsoft/Orca-2-13b/blob/main/LICENSE) |
+| 🅿️ Phi 2     | Instruct     | 2.7B       | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/phi2:2.7b-cuda` | [MIT](https://huggingface.co/microsoft/phi-2/resolve/main/LICENSE)                  |
+| 🦙 Llama 3   | Instruct     | 8B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:8b`      | `llama-3-8b-instruct`                                                               | [Llama](https://ai.meta.com/llama/license/) |
+| 🦙 Llama 3   | Instruct     | 70B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama3:70b`     | `llama-3-70b-instruct`                                                              | [Llama](https://ai.meta.com/llama/license/) |
+| 🦙 Llama 2   | Chat         | 7B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:7b`      | `llama-2-7b-chat`                                                                   | [Llama](https://ai.meta.com/llama/license/) |
+| 🦙 Llama 2   | Chat         | 13B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/llama2:13b`     | `llama-2-13b-chat`                                                                  | [Llama](https://ai.meta.com/llama/license/) |
+| 🔡 Gemma 1.1 | Instruct     | 2B         | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/gemma:2b`       | `gemma-2b-instruct`                                                                 | [Gemma](https://ai.google.dev/gemma/terms)  |