diff --git a/.github/dependabot.yaml b/.github/dependabot.yaml index cec13bc7..503880a2 100644 --- a/.github/dependabot.yaml +++ b/.github/dependabot.yaml @@ -18,6 +18,7 @@ updates: directory: / schedule: interval: "weekly" + - package-ecosystem: docker directory: /charts/aikit schedule: diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index 7a84cd89..005c10d8 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -46,4 +46,4 @@ jobs: - name: lint uses: golangci/golangci-lint-action@971e284b6050e8a5849b72094c50ab08da042db8 # v6.1.1 with: - version: v1.60.3 + version: v1.62.0 diff --git a/.github/workflows/release-base.yaml b/.github/workflows/release-base.yaml index 3b40e6cf..a0da4170 100644 --- a/.github/workflows/release-base.yaml +++ b/.github/workflows/release-base.yaml @@ -14,6 +14,12 @@ jobs: release-base: runs-on: ubuntu-latest timeout-minutes: 360 + strategy: + fail-fast: true + matrix: + runtime: + - base + - applesilicon steps: - name: Harden Runner uses: step-security/harden-runner@0080882f6c36860b6ba35c610c98ce87d4e2f26f # v2.10.2 @@ -38,18 +44,34 @@ jobs: username: ${{ github.actor }} password: ${{ secrets.GITHUB_TOKEN }} + - name: Set runtime variables for matrix + run: | + if [ ${{ matrix.runtime }} == "base" ]; then + export PLATFORMS="linux/amd64,linux/arm64" + export FILE="Dockerfile.base" + export TAG="ghcr.io/sozercan/base:latest" + export CACHE_FROM="type=gha,scope=base" + export CACHE_TO="type=gha,scope=base,mode=max" + elif [ ${{ matrix.runtime }} == "applesilicon" ]; then + export PLATFORMS="linux/arm64" + export FILE="Dockerfile.base-applesilicon" + export TAG="ghcr.io/sozercan/applesilicon/base:latest" + export CACHE_FROM="type=gha,scope=base-applesilicon" + export CACHE_TO="type=gha,scope=base-applesilicon,mode=max" + fi + - name: Build and push uses: docker/build-push-action@4f58ea79222b3b9dc2c8bbdd6debcef730109a75 # v6.9.0 id: build-and-push with: push: true - tags: ghcr.io/sozercan/base:latest - cache-from: type=gha,scope=base - cache-to: type=gha,scope=base,mode=max sbom: true provenance: true - platforms: linux/amd64,linux/arm64 - file: Dockerfile.base + tags: ${{ env.TAG }} + cache-from: ${{ env.CACHE_FROM }} + cache-to: ${{ env.CACHE_TO }} + platforms: ${{ env.PLATFORMS }} + file: ${{ env.FILE }} - name: Sign the images with GitHub OIDC Token env: diff --git a/.github/workflows/test-podman-applesilicon.yaml b/.github/workflows/test-podman-applesilicon.yaml new file mode 100644 index 00000000..5916c6db --- /dev/null +++ b/.github/workflows/test-podman-applesilicon.yaml @@ -0,0 +1,70 @@ +name: podman-test-gpu + +on: + workflow_dispatch: + +permissions: read-all + +jobs: + test: + runs-on: self-hosted + timeout-minutes: 240 + steps: + - name: cleanup workspace + run: | + rm -rf ./* || true + rm -rf ./.??* || true + - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + + # use default docker driver builder with containerd image store for local aikit image + # these must be setup before running this test + - run: docker buildx use default + + - name: build aikit + run: | + docker buildx build . -t aikit:test \ + --load --provenance=false --progress plain + + - name: build test model + run: | + docker buildx build . -t testmodel:test \ + -f test/aikitfile-llama.yaml \ + --load --provenance=false --progress plain \ + --build-arg="runtime=applesilicon" + + - name: list images + run: docker images + + - name: run test model + run: podman run --name testmodel -d --rm --device /dev/dri -p 8080:8080 --pull always testmodel:test + + - name: run test (gguf) + run: | + result=$(curl --fail --retry 10 --retry-all-errors http://127.0.0.1:8080/v1/chat/completions -H "Content-Type: application/json" -d '{ + "model": "llama-3.2-1b-instruct", + "messages": [{"role": "user", "content": "explain kubernetes in a sentence"}] + }') + echo $result + + choices=$(echo "$result" | jq '.choices') + if [ -z "$choices" ]; then + exit 1 + fi + + - name: save logs + if: always() + run: podman logs testmodel > /tmp/podman-gpu.log + + - run: podman stop testmodel + if: always() + + - run: podman system prune -a -f --volumes || true + if: always() + + - name: publish test artifacts + if: always() + uses: actions/upload-artifact@b4b15b8c7c6ac21ea08fcf65892d2ee8f75cf882 # v4.4.3 + with: + name: test-podman-gpu + path: | + /tmp/*.log diff --git a/.github/workflows/update-models.yaml b/.github/workflows/update-models.yaml index e9f806d5..50d415ac 100644 --- a/.github/workflows/update-models.yaml +++ b/.github/workflows/update-models.yaml @@ -4,7 +4,7 @@ on: workflow_dispatch: inputs: staging: - description: 'push to test registry' + description: "push to test registry" required: false default: false type: boolean @@ -15,17 +15,23 @@ permissions: id-token: write jobs: - update-models: + update-models: strategy: fail-fast: false matrix: model: - - llama-3.2-1b-instruct - - llama-3.2-3b-instruct - - llama-3.1-8b-instruct - - phi-3.5-3.8b-instruct - - gemma-2-2b-instruct - - flux-1-dev + - llama-3.2-1b-instruct + - llama-3.2-3b-instruct + - llama-3.1-8b-instruct + - phi-3.5-3.8b-instruct + - gemma-2-2b-instruct + - flux-1-dev + runtime: + - cuda + - applesilicon + exclude: + - model: flux-1-dev # requires cuda runtime + runtime: applesilicon runs-on: ubuntu-latest timeout-minutes: 360 steps: @@ -96,15 +102,32 @@ jobs: fi if ${{ inputs.staging }}; then - export REGISTRY=ghcr.io/sozercan/test + if [ ${{ matrix.runtime }} == "applesilicon" ]; then + export REGISTRY=ghcr.io/sozercan/test/applesilicon + else + export REGISTRY=ghcr.io/sozercan/test + fi else - export REGISTRY=ghcr.io/sozercan + if [ ${{ matrix.runtime }} == "applesilicon" ]; then + export REGISTRY=ghcr.io/sozercan/applesilicon + else + export REGISTRY=ghcr.io/sozercan + fi fi export PLATFORMS="linux/amd64,linux/arm64" if [ ${{ matrix.model }} == "flux-1-dev" ]; then export PLATFORMS="linux/amd64" fi + if [ ${{ matrix.runtime }} == "applesilicon" ]; then + export PLATFORMS="linux/arm64" + fi + + if [ ${{ matrix.runtime }} == "applesilicon" ]; then + export BUILD_ARGS="--build-arg runtime=applesilicon" + else + export BUILD_ARGS="" + fi docker buildx build . \ -t ${REGISTRY}/${MODEL_NAME}:${MODEL_SIZE} \ @@ -112,7 +135,7 @@ jobs: -f models/${{ matrix.model }}.yaml \ --push --progress plain \ --sbom=true --provenance=true \ - --platform ${PLATFORMS} + --platform ${PLATFORMS} ${BUILD_ARGS} echo "DIGEST=$(cosign triangulate ${REGISTRY}/${MODEL_NAME}:${MODEL_SIZE} --type digest)" >> $GITHUB_ENV - name: Sign the images with GitHub OIDC Token diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c3126a58..4f90fe2a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -4,7 +4,7 @@ repos: hooks: - id: gitleaks - repo: https://github.com/golangci/golangci-lint - rev: v1.61.0 + rev: v1.62.0 hooks: - id: golangci-lint - repo: https://github.com/jumanjihouse/pre-commit-hooks @@ -16,3 +16,7 @@ repos: hooks: - id: end-of-file-fixer - id: trailing-whitespace +- repo: https://github.com/crate-ci/typos + rev: v1.27.3 + hooks: + - id: typos diff --git a/Dockerfile b/Dockerfile index 439d297a..280cfdb0 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ FROM golang:1.23-bookworm@sha256:3f3b9daa3de608f3e869cd2ff8baf21555cf0fca9fd3425 ARG LDFLAGS COPY . /go/src/github.com/sozercan/aikit WORKDIR /go/src/github.com/sozercan/aikit -RUN CGO_ENABLED=0 go build -o /aikit -ldflags "${LDFLAGS} -extldflags '-static'" ./cmd/frontend +RUN CGO_ENABLED=0 go build -o /aikit -ldflags "${LDFLAGS} -w -s -extldflags '-static'" ./cmd/frontend FROM scratch COPY --from=builder /aikit /bin/aikit diff --git a/Dockerfile.base-applesilicon b/Dockerfile.base-applesilicon new file mode 100644 index 00000000..fcc101ae --- /dev/null +++ b/Dockerfile.base-applesilicon @@ -0,0 +1,12 @@ +ARG MESA_VERSION=23.3.5-102 + +FROM fedora:39@sha256:d63d63fe593749a5e8dbc8152427d40bbe0ece53d884e00e5f3b44859efa5077 +USER 0 + +# Install the patched mesa-krunkit drivers +RUN dnf -y install dnf-plugins-core && \ + dnf -y copr enable slp/mesa-krunkit && \ + dnf -y install \ + mesa-vulkan-drivers-${MESA_VERSION} \ + vulkan-loader-devel vulkan-tools vulkan-loader && \ + dnf clean all diff --git a/Makefile b/Makefile index 8ceacfa3..497c6140 100644 --- a/Makefile +++ b/Makefile @@ -1,13 +1,15 @@ VERSION := v0.14.0 REGISTRY ?= ghcr.io/sozercan -KIND_VERSION ?= 0.23.0 -KUBERNETES_VERSION ?= 1.30.1 -HELM_VERSION ?= 3.15.1 +KIND_VERSION ?= 0.25.0 +KUBERNETES_VERSION ?= 1.31.2 +HELM_VERSION ?= 3.16.3 TAG ?= test OUTPUT_TYPE ?= type=docker TEST_IMAGE_NAME ?= testmodel TEST_FILE ?= test/aikitfile-llama.yaml +RUNTIME ?= "" +PLATFORMS ?= linux/amd64,linux/arm64 GIT_COMMIT := $(shell git rev-list --abbrev-commit --tags --max-count=1) GIT_TAG := $(shell git describe --abbrev=0 --tags ${GIT_COMMIT} 2>/dev/null || true) @@ -19,17 +21,22 @@ lint: .PHONY: build-aikit build-aikit: - docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE} --build-arg LDFLAGS=${LDFLAGS} \ + docker buildx build . -t ${REGISTRY}/aikit:${TAG} --output=${OUTPUT_TYPE} \ + --build-arg LDFLAGS=${LDFLAGS} \ --progress=plain .PHONY: build-test-model build-test-model: - docker buildx build . -t ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG} -f ${TEST_FILE} --output=${OUTPUT_TYPE} \ - --progress=plain --provenance=false + docker buildx build . -t ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG} -f ${TEST_FILE} \ + --progress=plain --provenance=false \ + --output=${OUTPUT_TYPE} \ + --build-arg runtime=${RUNTIME} \ + --platform ${PLATFORMS} .PHONY: build-distroless-base push-distroless-base: - docker buildx build . -t sozercan/aikit-base:latest -f Dockerfile.base --platform linux/amd64,linux/arm64 \ + docker buildx build . -t sozercan/aikit-base:latest -f Dockerfile.base \ + --platform linux/amd64,linux/arm64 \ --sbom=true --push .PHONY: run-test-model @@ -40,6 +47,10 @@ run-test-model: run-test-model-gpu: docker run --rm -p 8080:8080 --gpus all ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG} +.PHONY: run-test-model-applesilicon +run-test-model-applesilicon: + podman run --rm -p 8080:8080 --device /dev/dri ${REGISTRY}/${TEST_IMAGE_NAME}:${TAG} + .PHONY: test test: go test -v ./... -race -coverprofile=coverage.txt -covermode=atomic diff --git a/README.md b/README.md index 36521cb0..9ca50ec8 100644 --- a/README.md +++ b/README.md @@ -81,6 +81,10 @@ If it doesn't include a specific model, you can always [create your own images]( ## CPU +> [!NOTE] +> AIKit supports both AMD64 and ARM64 CPUs. You can run the same command on either architecture, and Docker will automatically pull the correct image for your CPU. +> Depending on your CPU capabilities, AIKit will automatically select the most optimized instruction set. + | Model | Optimization | Parameters | Command | Model Name | License | | --------------- | ------------ | ---------- | ---------------------------------------------------------------- | ------------------------ | ---------------------------------------------------------------------------------- | | 🦙 Llama 3.2 | Instruct | 1B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.2:1b` | `llama-3.2-1b-instruct` | [Llama](https://ai.meta.com/llama/license/) | @@ -110,6 +114,21 @@ If it doesn't include a specific model, you can always [create your own images]( | ⌨️ Codestral 0.1 | Code | 22B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/codestral:22b` | `codestral-22b` | [MNLP](https://mistral.ai/licenses/MNPL-0.1.md) | | 📸 Flux 1 Dev | Text to image | 12B | `docker run -d --rm --gpus all -p 8080:8080 ghcr.io/sozercan/flux1:dev` | `flux-1-dev` | [FLUX.1 [dev] Non-Commercial License](https://github.com/black-forest-labs/flux/blob/main/model_licenses/LICENSE-FLUX1-dev) | +### Apple Silicon (experimental) + +> [!NOTE] +> To enable GPU acceleration on Apple Silicon, please see [Podman Desktop documentation](https://podman-desktop.io/docs/podman/gpu). +> Apple Silicon is an _experimental_ runtime and it may change in the future. This runtime is specific to Apple Silicon only, and it will not work as expected on other architectures, including Intel Macs. +> Only `gguf` models are supported on Apple Silicon. + +| Model | Optimization | Parameters | Command | Model Name | License | +| ----------- | ------------ | ---------- | --------------------------------------------------------------------------------------------- | ----------------------- | ---------------------------------------------------------------------------------- | +| 🦙 Llama 3.2 | Instruct | 1B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.2:1b` | `llama-3.2-1b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 3.2 | Instruct | 3B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.2:3b` | `llama-3.2-3b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 3.1 | Instruct | 8B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🅿️ Phi 3.5 | Instruct | 3.8B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | +| 🔡 Gemma 2 | Instruct | 2B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | + ## What's next? 👉 For more information and how to fine tune models or create your own images, please see [AIKit website](https://sozercan.github.io/aikit/)! diff --git a/pkg/aikit2llb/inference/convert.go b/pkg/aikit2llb/inference/convert.go index 7348f987..374a91e6 100644 --- a/pkg/aikit2llb/inference/convert.go +++ b/pkg/aikit2llb/inference/convert.go @@ -14,14 +14,18 @@ import ( const ( distrolessBase = "ghcr.io/sozercan/base:latest" localAIRepo = "https://github.com/mudler/LocalAI" - localAIVersion = "v2.22.1" + localAIVersion = "v2.23.0" cudaVersion = "12-5" ) // Aikit2LLB converts an InferenceConfig to an LLB state. func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State, *specs.Image, error) { - var merge llb.State - state := llb.Image(utils.UbuntuBase, llb.Platform(*platform)) + var merge, state llb.State + if c.Runtime == utils.RuntimeAppleSilicon { + state = llb.Image(utils.AppleSiliconBase, llb.Platform(*platform)) + } else { + state = llb.Image(utils.UbuntuBase, llb.Platform(*platform)) + } base := getBaseImage(c, platform) var err error @@ -30,7 +34,7 @@ func Aikit2LLB(c *config.InferenceConfig, platform *specs.Platform) (llb.State, return state, nil, err } - state, merge, err = addLocalAI(state, merge, *platform) + state, merge, err = addLocalAI(c, state, merge, *platform) if err != nil { return state, nil, err } @@ -63,6 +67,9 @@ func getBaseImage(c *config.InferenceConfig, platform *specs.Platform) llb.State if len(c.Backends) > 0 { return llb.Image(utils.UbuntuBase, llb.Platform(*platform)) } + if c.Runtime == utils.RuntimeAppleSilicon { + return llb.Image(utils.AppleSiliconBase, llb.Platform(*platform)) + } return llb.Image(distrolessBase, llb.Platform(*platform)) } @@ -148,16 +155,21 @@ func installCuda(c *config.InferenceConfig, s llb.State, merge llb.State) (llb.S } // addLocalAI adds the LocalAI binary to the image. -func addLocalAI(s llb.State, merge llb.State, platform specs.Platform) (llb.State, llb.State, error) { - binaryNames := map[string]string{ - utils.PlatformAMD64: "local-ai-Linux-x86_64", - utils.PlatformARM64: "local-ai-Linux-arm64", - } - binaryName, exists := binaryNames[platform.Architecture] - if !exists { - return s, merge, fmt.Errorf("unsupported architecture %s", platform.Architecture) +func addLocalAI(c *config.InferenceConfig, s llb.State, merge llb.State, platform specs.Platform) (llb.State, llb.State, error) { + var localAIURL string + if c.Runtime == utils.RuntimeAppleSilicon { + localAIURL = fmt.Sprintf("https://sertacstoragevs.blob.core.windows.net/localai/%[1]s/vulkan/local-ai", localAIVersion) + } else { + binaryNames := map[string]string{ + utils.PlatformAMD64: "local-ai-Linux-x86_64", + utils.PlatformARM64: "local-ai-Linux-arm64", + } + binaryName, exists := binaryNames[platform.Architecture] + if !exists { + return s, merge, fmt.Errorf("unsupported architecture %s", platform.Architecture) + } + localAIURL = fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%[1]s/%[2]s", localAIVersion, binaryName) } - localAIURL := fmt.Sprintf("https://github.com/mudler/LocalAI/releases/download/%[1]s/%[2]s", localAIVersion, binaryName) savedState := s diff --git a/pkg/build/args.go b/pkg/build/args.go index a2e44035..2d089844 100644 --- a/pkg/build/args.go +++ b/pkg/build/args.go @@ -20,6 +20,12 @@ func parseBuildArgs(opts map[string]string, inferenceCfg *config.InferenceConfig modelArg := getBuildArg(opts, "model") runtimeArg := getBuildArg(opts, "runtime") + // Set the runtime if provided + if runtimeArg != "" { + inferenceCfg.Runtime = runtimeArg + } + + // Set the model if provided if modelArg != "" { var modelName, modelSource string var err error @@ -50,7 +56,6 @@ func parseBuildArgs(opts map[string]string, inferenceCfg *config.InferenceConfig } // Set the inference configuration - inferenceCfg.Runtime = runtimeArg inferenceCfg.Models = []config.Model{ { Name: modelName, diff --git a/pkg/build/build.go b/pkg/build/build.go index d690a4dc..03d3d0e1 100644 --- a/pkg/build/build.go +++ b/pkg/build/build.go @@ -123,6 +123,14 @@ func buildInference(ctx context.Context, c client.Client, cfg *config.InferenceC targetPlatforms = []*specs.Platform{&defaultBuildPlatform} } + if cfg.Runtime == utils.RuntimeAppleSilicon { + for _, tp := range targetPlatforms { + if tp.Architecture != utils.PlatformARM64 { + return nil, errors.New("apple silicon runtime only supports arm64 platform") + } + } + } + isMultiPlatform := len(targetPlatforms) > 1 exportPlatforms := &exptypes.Platforms{ Platforms: make([]exptypes.Platform, len(targetPlatforms)), @@ -452,6 +460,10 @@ func validateInferenceConfig(c *config.InferenceConfig) error { return errors.New("exllama, mamba, and diffusers backends only supports nvidia cuda runtime. please add 'runtime: cuda' to your aikitfile.yaml") } + if c.Runtime == utils.RuntimeAppleSilicon && len(c.Backends) > 0 { + return errors.New("apple silicon runtime only supports the default llama-cpp backend") + } + backends := []string{utils.BackendExllamaV2, utils.BackendStableDiffusion, utils.BackendMamba, utils.BackendDiffusers} for _, b := range c.Backends { if !slices.Contains(backends, b) { @@ -459,7 +471,7 @@ func validateInferenceConfig(c *config.InferenceConfig) error { } } - runtimes := []string{"", utils.RuntimeNVIDIA} + runtimes := []string{"", utils.RuntimeNVIDIA, utils.RuntimeAppleSilicon} if !slices.Contains(runtimes, c.Runtime) { return errors.Errorf("runtime %s is not supported", c.Runtime) } diff --git a/pkg/utils/const.go b/pkg/utils/const.go index 0ebd3bea..f8ece0c1 100644 --- a/pkg/utils/const.go +++ b/pkg/utils/const.go @@ -1,7 +1,8 @@ package utils const ( - RuntimeNVIDIA = "cuda" + RuntimeNVIDIA = "cuda" + RuntimeAppleSilicon = "applesilicon" // experimental apple silicon runtime with vulkan arm64 support BackendStableDiffusion = "stablediffusion" BackendExllamaV2 = "exllama2" @@ -14,8 +15,9 @@ const ( APIv1alpha1 = "v1alpha1" - UbuntuBase = "docker.io/library/ubuntu:22.04" - CudaDevel = "nvcr.io/nvidia/cuda:12.3.2-devel-ubuntu22.04" + UbuntuBase = "docker.io/library/ubuntu:22.04" + AppleSiliconBase = "ghcr.io/sozercan/applesilicon/base:latest" + CudaDevel = "nvcr.io/nvidia/cuda:12.3.2-devel-ubuntu22.04" PlatformLinux = "linux" PlatformAMD64 = "amd64" diff --git a/website/docs/premade-models.md b/website/docs/premade-models.md index b89d6575..6bd3b29d 100644 --- a/website/docs/premade-models.md +++ b/website/docs/premade-models.md @@ -8,6 +8,11 @@ If it doesn't include a specific model, you can always [create your own images]( ## CPU +:::note +AIKit supports both AMD64 and ARM64 CPUs. You can run the same command on either architecture, and Docker will automatically pull the correct image for your CPU. +Depending on your CPU capabilities, AIKit will automatically select the most optimized instruction set. +::: + | Model | Optimization | Parameters | Command | Model Name | License | | --------------- | ------------ | ---------- | ---------------------------------------------------------------- | ------------------------ | ---------------------------------------------------------------------------------- | | 🦙 Llama 3.2 | Instruct | 1B | `docker run -d --rm -p 8080:8080 ghcr.io/sozercan/llama3.2:1b` | `llama-3.2-1b-instruct` | [Llama](https://ai.meta.com/llama/license/) | @@ -41,6 +46,24 @@ If not being offloaded to GPU VRAM, minimum of 8GB of RAM is required for 7B mod All pre-made models include CUDA v12 libraries. They are used with [NVIDIA GPU acceleration](gpu.md). If a supported NVIDIA GPU is not found in your system, AIKit will automatically fallback to CPU with the most optimized runtime (`avx2`, `avx`, or `fallback`). ::: +## Apple Silicon (experimental) + +:::note +To enable GPU acceleration on Apple Silicon, please see [Podman Desktop documentation](https://podman-desktop.io/docs/podman/gpu). + +Apple Silicon is an _experimental_ runtime and it may change in the future. This runtime is specific to Apple Silicon only, and it will not work as expected on other architectures, including Intel Macs. + +Only `gguf` models are supported on Apple Silicon. +::: + +| Model | Optimization | Parameters | Command | Model Name | License | +| ----------- | ------------ | ---------- | --------------------------------------------------------------------------------------------- | ----------------------- | ---------------------------------------------------------------------------------- | +| 🦙 Llama 3.2 | Instruct | 1B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.2:1b` | `llama-3.2-1b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 3.2 | Instruct | 3B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.2:3b` | `llama-3.2-3b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🦙 Llama 3.1 | Instruct | 8B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/llama3.1:8b` | `llama-3.1-8b-instruct` | [Llama](https://ai.meta.com/llama/license/) | +| 🅿️ Phi 3.5 | Instruct | 3.8B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/phi3.5:3.8b` | `phi-3.5-3.8b-instruct` | [MIT](https://huggingface.co/microsoft/Phi-3.5-mini-instruct/resolve/main/LICENSE) | +| 🔡 Gemma 2 | Instruct | 2B | `podman run -d --rm --device /dev/dri -p 8080:8080 ghcr.io/sozercan/applesilicon/gemma2:2b` | `gemma-2-2b-instruct` | [Gemma](https://ai.google.dev/gemma/terms) | + ## Deprecated Models The following pre-made models are deprecated and no longer updated. Images will continue to be pullable, if needed.