sozercan · sozercan · Nov 25, 2024 · Nov 25, 2024 · Nov 25, 2024 · Nov 25, 2024
diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml
@@ -18,6 +18,7 @@ updates:
     directory: /
     schedule:
       interval: "weekly"
+
   - package-ecosystem: docker
     directory: /charts/aikit
     schedule:

diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
@@ -46,4 +46,4 @@ jobs:
       - name: lint
         uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1
         with:
-          version: v1.60.3
+          version: v1.62.0
diff --git a/.github/workflows/release-base.yaml b/.github/workflows/release-base.yaml
@@ -14,6 +14,12 @@ jobs:
   release-base:
     runs-on: ubuntu-latest
     timeout-minutes: 360
+    strategy:
+      fail-fast: true
+      matrix:
+        runtime:
+          - base
+          - applesilicon
     steps:
       - name: Harden Runner
         uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2
@@ -38,18 +44,34 @@ jobs:
           username: ${{ github.actor }}
           password: ${{ secrets.GITHUB_TOKEN }}
 
+      - name: Set runtime variables for matrix
+        run: |
+          if [ ${{ matrix.runtime }} == "base" ]; then
+            export PLATFORMS="linux/amd64,linux/arm64"
+            export FILE="Dockerfile.base"
+            export TAG="ghcr.io/sozercan/base:latest"
+            export CACHE_FROM="type=gha,scope=base"
+            export CACHE_TO="type=gha,scope=base,mode=max"
+          elif [ ${{ matrix.runtime }} == "applesilicon" ]; then
+            export PLATFORMS="linux/arm64"
+            export FILE="Dockerfile.base-applesilicon"
+            export TAG="ghcr.io/sozercan/applesilicon/base:latest"
+            export CACHE_FROM="type=gha,scope=base-applesilicon"
+            export CACHE_TO="type=gha,scope=base-applesilicon,mode=max"
+          fi
+
       - name: Build and push
         uses: docker/build-push-action@4f58ea79222b3b9dc2c8bbdd6debcef730109a75 # v6.9.0
         id: build-and-push
         with:
           push: true
-          tags: ghcr.io/sozercan/base:latest
-          cache-from: type=gha,scope=base
-          cache-to: type=gha,scope=base,mode=max
           sbom: true
           provenance: true
-          platforms: linux/amd64,linux/arm64
-          file: Dockerfile.base
+          tags: ${{ env.TAG }}
+          cache-from: ${{ env.CACHE_FROM }}
+          cache-to: ${{ env.CACHE_TO }}
+          platforms: ${{ env.PLATFORMS }}
+          file: ${{ env.FILE }}
 
       - name: Sign the images with GitHub OIDC Token
         env:

diff --git a/.github/workflows/test-podman-applesilicon.yaml b/.github/workflows/test-podman-applesilicon.yaml
@@ -0,0 +1,70 @@
+name: podman-test-gpu
+
+on:
+  workflow_dispatch:
+
+permissions: read-all
+
+jobs:
+  test:
+    runs-on: self-hosted
+    timeout-minutes: 240
+    steps:
+      - name: cleanup workspace
+        run: |
+          rm -rf ./* || true
+          rm -rf ./.??* || true
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+
+        # use default docker driver builder with containerd image store for local aikit image
+        # these must be setup before running this test
+      - run: docker buildx use default
+
+      - name: build aikit
+        run: |
+          docker buildx build . -t aikit:test \
+            --load --provenance=false --progress plain
+
+      - name: build test model
+        run: |
+          docker buildx build . -t testmodel:test \
+            -f test/aikitfile-llama.yaml \
+            --load --provenance=false --progress plain \
+            --build-arg="runtime=applesilicon"
+
+      - name: list images
+        run: docker images
+
+      - name: run test model
+        run: podman run --name testmodel -d --rm --device /dev/dri -p 8080:8080 --pull always testmodel:test
+
+      - name: run test (gguf)
+        run: |
+          result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{
+            "model": "llama-3.2-1b-instruct",
+            "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}]
+          }')
+          echo $result
+
+          choices=$(echo "$result" | jq '.choices')
+          if [ -z "$choices" ]; then
+            exit 1
+          fi
+
+      - name: save logs
+        if: always()
+        run: podman logs testmodel > /tmp/podman-gpu.log
+
+      - run: podman stop testmodel
+        if: always()
+
+      - run: podman system prune -a -f --volumes || true
+        if: always()
+
+      - name: publish test artifacts
+        if: always()
+        uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3
+        with:
+          name: test-podman-gpu
+          path: |
+            /tmp/*.log
diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml
@@ -4,7 +4,7 @@ on:
   workflow_dispatch:
     inputs:
       staging:
-        description: 'push to test registry'
+        description: "push to test registry"
         required: false
         default: false
         type: boolean
@@ -15,17 +15,23 @@ permissions:
   id-token: write
 
 jobs:
- update-models:
+  update-models:
     strategy:
       fail-fast: false
       matrix:
         model:
-        - llama-3.2-1b-instruct
-        - llama-3.2-3b-instruct
-        - llama-3.1-8b-instruct
-        - phi-3.5-3.8b-instruct
-        - gemma-2-2b-instruct
-        - flux-1-dev
+          - llama-3.2-1b-instruct
+          - llama-3.2-3b-instruct
+          - llama-3.1-8b-instruct
+          - phi-3.5-3.8b-instruct
+          - gemma-2-2b-instruct
+          - flux-1-dev
+        runtime:
+          - cuda
+          - applesilicon
+        exclude:
+        - model: flux-1-dev # requires cuda runtime
+          runtime: applesilicon
     runs-on: ubuntu-latest
     timeout-minutes: 360
     steps:
@@ -96,23 +102,40 @@ jobs:
           fi
 
           if ${{ inputs.staging }}; then
-            export REGISTRY=ghcr.io/sozercan/test
+            if [ ${{ matrix.runtime }} == "applesilicon" ]; then
+              export REGISTRY=ghcr.io/sozercan/test/applesilicon
+            else
+              export REGISTRY=ghcr.io/sozercan/test
+            fi
           else
-            export REGISTRY=ghcr.io/sozercan
+            if [ ${{ matrix.runtime }} == "applesilicon" ]; then
+              export REGISTRY=ghcr.io/sozercan/applesilicon
+            else
+              export REGISTRY=ghcr.io/sozercan
+            fi
           fi
 
           export PLATFORMS="linux/amd64,linux/arm64"
           if [ ${{ matrix.model }} == "flux-1-dev" ]; then
             export PLATFORMS="linux/amd64"
           fi
+          if [ ${{ matrix.runtime }} == "applesilicon" ]; then
+            export PLATFORMS="linux/arm64"
+          fi
+
+          if [ ${{ matrix.runtime }} == "applesilicon" ]; then
+            export BUILD_ARGS="--build-arg runtime=applesilicon"
+          else
+            export BUILD_ARGS=""
+          fi
 
           docker buildx build . \
             -t ${REGISTRY}/${MODEL_NAME}:${MODEL_SIZE} \
             -t ${REGISTRY}/${MODEL_NAME}:${MODEL_SIZE}${MODEL_TYPE} \
             -f models/${{ matrix.model }}.yaml \
             --push --progress plain \
             --sbom=true --provenance=true \
-            --platform ${PLATFORMS}
+            --platform ${PLATFORMS} ${BUILD_ARGS}
           echo "DIGEST=$(cosign triangulate ${REGISTRY}/${MODEL_NAME}:${MODEL_SIZE} --type digest)" >> $GITHUB_ENV
 
       - name: Sign the images with GitHub OIDC Token

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -4,7 +4,7 @@ repos:
   hooks:
   - id: gitleaks
 - repo: https://github.com/golangci/golangci-lint
-  rev: v1.61.0
+  rev: v1.62.0
   hooks:
   - id: golangci-lint
 - repo: https://github.com/jumanjihouse/pre-commit-hooks
@@ -16,3 +16,7 @@ repos:
   hooks:
   - id: end-of-file-fixer
   - id: trailing-whitespace
+- repo: https://github.com/crate-ci/typos
+  rev: v1.27.3
+  hooks:
+    - id: typos
diff --git a/Dockerfile b/Dockerfile
@@ -2,7 +2,7 @@ FROM golang:1.23-bookworm@sha256:3f3b9daa3de608f3e869cd2ff8baf21555cf0fca9fd3425
 ARG LDFLAGS
 COPY . /go/src/github.com/sozercan/aikit
 WORKDIR /go/src/github.com/sozercan/aikit
-RUN CGO_ENABLED=0 go build -o /aikit -ldflags "${LDFLAGS} -extldflags '-static'" ./cmd/frontend
+RUN CGO_ENABLED=0 go build -o /aikit -ldflags "${LDFLAGS} -w -s -extldflags '-static'" ./cmd/frontend
 
 FROM scratch
 COPY --from=builder /aikit /bin/aikit

diff --git a/Dockerfile.base-applesilicon b/Dockerfile.base-applesilicon
@@ -0,0 +1,12 @@
+ARG MESA_VERSION=23.3.5-102
+
+FROM fedora:39@sha256:d63d63fe593749a5e8dbc8152427d40bbe0ece53d884e00e5f3b44859efa5077
+USER 0
+
+# Install the patched mesa-krunkit drivers
+RUN dnf -y install dnf-plugins-core && \
+    dnf -y copr enable slp/mesa-krunkit && \
+    dnf -y install \
+        mesa-vulkan-drivers-${MESA_VERSION} \
+        vulkan-loader-devel vulkan-tools vulkan-loader && \
+    dnf clean all
diff --git a/Makefile b/Makefile
@@ -1,13 +1,15 @@
 VERSION := v0.14.0
 
 REGISTRY ?= ghcr.io/sozercan
-KIND_VERSION ?= 0.23.0
-KUBERNETES_VERSION ?= 1.30.1
-HELM_VERSION ?= 3.15.1
+KIND_VERSION ?= 0.25.0
+KUBERNETES_VERSION ?= 1.31.2
+HELM_VERSION ?= 3.16.3
 TAG ?= test
 OUTPUT_TYPE ?= type=docker
 TEST_IMAGE_NAME ?= testmodel
 TEST_FILE ?= test/aikitfile-llama.yaml
+RUNTIME ?= ""
+PLATFORMS ?= linux/amd64,linux/arm64
 
 GIT_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1)
 GIT_TAG := $(shell git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null || true)
@@ -19,17 +21,22 @@ lint:
 
 .PHONY: build-aikit
 build-aikit:
-	docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE} --build-arg LDFLAGS=${LDFLAGS} \
+	docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE} \
+		--build-arg LDFLAGS=${LDFLAGS} \
 		--progress=plain
 
 .PHONY: build-test-model
 build-test-model:
-	docker buildx build . -t ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG} -f ${TEST_FILE} --output=${OUTPUT_TYPE} \
-		--progress=plain --provenance=false
+	docker buildx build . -t ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG} -f ${TEST_FILE} \
+		--progress=plain --provenance=false \
+		--output=${OUTPUT_TYPE} \
+		--build-arg runtime=${RUNTIME} \
+		--platform ${PLATFORMS}
 
 .PHONY: build-distroless-base
 push-distroless-base:
-	docker buildx build . -t sozercan/aikit-base:latest -f Dockerfile.base --platform linux/amd64,linux/arm64 \
+	docker buildx build . -t sozercan/aikit-base:latest -f Dockerfile.base \
+		--platform linux/amd64,linux/arm64 \
 		--sbom=true --push
 
 .PHONY: run-test-model
@@ -40,6 +47,10 @@ run-test-model:
 run-test-model-gpu:
 	docker run --rm -p 8080:8080 --gpus all ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG}
 
+.PHONY: run-test-model-applesilicon
+run-test-model-applesilicon:
+	podman run --rm -p 8080:8080 --device /dev/dri ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG}
+
 .PHONY: test
 test:
 	go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic

diff --git a/README.md b/README.md
@@ -81,6 +81,10 @@ If it doesn't include a specific model, you can always [create your own images](
 
 ## CPU
 
+> [!NOTE]
+> AIKit supports both AMD64 and ARM64 CPUs. You can run the same command on either architecture, and Docker will automatically pull the correct image for your CPU.
+> Depending on your CPU capabilities, AIKit will automatically select the most optimized instruction set.
+
 | Model           | Optimization | Parameters | Command                                                          | Model Name               | License                                                                            |
 | --------------- | ------------ | ---------- | ---------------------------------------------------------------- | ------------------------ | ---------------------------------------------------------------------------------- |
 | 🦙 Llama 3.2     | Instruct     | 1B         | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.2:1b`   | `llama-3.2-1b-instruct`  | [Llama](https://ai.meta.com/llama/license/)                                        |
@@ -110,6 +114,21 @@ If it doesn't include a specific model, you can always [create your own images](
 | ⌨️ Codestral 0.1 | Code          | 22B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b`          | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md)                                                                             |
 | 📸 Flux 1 Dev    | Text to image | 12B        | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev`     | `flux-1-dev`             | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) |
 
+### Apple Silicon (experimental)
+
+> [!NOTE]
+> To enable GPU acceleration on Apple Silicon, please see [Podman Desktop documentation](https://podman-desktop.io/docs/podman/gpu).
+> Apple Silicon is an _experimental_ runtime and it may change in the future. This runtime is specific to Apple Silicon only, and it will not work as expected on other architectures, including Intel Macs.
+> Only `gguf` models are supported on Apple Silicon.
+
+| Model       | Optimization | Parameters | Command                                                                                       | Model Name              | License                                                                            |
+| ----------- | ------------ | ---------- | --------------------------------------------------------------------------------------------- | ----------------------- | ---------------------------------------------------------------------------------- |
+| 🦙 Llama 3.2 | Instruct     | 1B         | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.2:1b` | `llama-3.2-1b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                        |
+| 🦙 Llama 3.2 | Instruct     | 3B         | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.2:3b` | `llama-3.2-3b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                        |
+| 🦙 Llama 3.1 | Instruct     | 8B         | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/)                                        |
+| 🅿️ Phi 3.5   | Instruct     | 3.8B       | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) |
+| 🔡 Gemma 2   | Instruct     | 2B         | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/gemma2:2b`   | `gemma-2-2b-instruct`   | [Gemma](https://ai.google.dev/gemma/terms)                                         |
+
 ## What's next?
 
 👉 For more information and how to fine tune models or create your own images, please see [AIKit website](https://sozercan.github.io/aikit/)!