Skip to content

Commit

Permalink
Merge pull request #1 from LLukas22/ci
Browse files Browse the repository at this point in the history
Check entire project
  • Loading branch information
LLukas22 authored Mar 28, 2024
2 parents 3207f84 + c1da26c commit cd52555
Show file tree
Hide file tree
Showing 4 changed files with 228 additions and 1 deletion.
69 changes: 69 additions & 0 deletions .github/workflows/build_cpu.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
name: Build and push CPU docker image to registry

on:
workflow_dispatch:
push:
branches:
- 'main'
tags:
- 'v*'
pull_request:
branches:
- 'main'

jobs:
build-and-push-image:
runs-on: [ubuntu-latest]
permissions:
contents: write
packages: write
# This is used to complete the identity challenge
# with sigstore/fulcio when running outside of PRs.
id-token: write
security-events: write
steps:
- name: Checkout repository
uses: actions/checkout@v3

- name: Initialize Docker Buildx
uses: docker/[email protected]
with:
install: true

- name: Inject slug/short variables
uses: rlespinasse/[email protected]

- name: Login to GitHub Container Registry
if: github.event_name != 'pull_request'
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Extract metadata (tags, labels) for Docker
id: meta-cpu
uses: docker/[email protected]
with:
images: |
ghcr.io/LLukas22/mistral.rs
flavor: |
latest=false
tags: |
type=semver,pattern=cpu-{{version}}
type=semver,pattern=cpu-{{major}}.{{minor}}
type=raw,value=cpu-latest,enable=${{ github.ref == format('refs/heads/{0}', github.event.repository.default_branch) }}
type=raw,value=cpu-sha-${{ env.GITHUB_SHA_SHORT }}
- name: Build and push Docker image
id: build-and-push-cpu
uses: docker/build-push-action@v4
with:
context: .
file: Dockerfile
push: ${{ github.event_name != 'pull_request' }}
platforms: 'linux/amd64'
build-args: |
tags: ${{ steps.meta-cpu.outputs.tags }}
labels: ${{ steps.meta-cpu.outputs.labels }}

2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ on:
branches:
- master
pull_request:
workflow_dispatch:

name: Continuous integration

Expand All @@ -26,7 +27,6 @@ jobs:
- uses: actions-rs/cargo@v1
with:
command: check
args: --examples

test:
name: Test Suite
Expand Down
35 changes: 35 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
FROM rust:latest as builder

WORKDIR /mistralrs

COPY mistralrs mistralrs
COPY mistralrs-core mistralrs-core
COPY mistralrs-lora mistralrs-lora
COPY mistralrs-pyo3 mistralrs-pyo3
COPY mistralrs-server mistralrs-server
COPY Cargo.toml ./
COPY Cargo.lock ./

RUN cargo build --release

FROM debian:bookworm-slim as base

ENV HUGGINGFACE_HUB_CACHE=/data \
PORT=80 \
MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
RAYON_NUM_THREADS=8 \
LD_LIBRARY_PATH=/usr/local/lib

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
libomp-dev \
ca-certificates \
libssl-dev \
curl \
pkg-config \
&& rm -rf /var/lib/apt/lists/*

FROM base

COPY --from=builder /mistralrs/target/release/mistralrs-server /usr/local/bin/mistralrs-server
RUN chmod +x /usr/local/bin/mistralrs-server
ENTRYPOINT ["mistralrs-server", "--port", "80", "--token-source", "env:HUGGING_FACE_HUB_TOKEN"]
123 changes: 123 additions & 0 deletions Dockerfile-cuda
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
FROM nvidia/cuda:12.2.0-devel-ubuntu22.04 AS base-builder

ENV SCCACHE=0.5.4
ENV RUSTC_WRAPPER=/usr/local/bin/sccache
ENV PATH="/root/.cargo/bin:${PATH}"

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
curl \
libssl-dev \
pkg-config \
&& rm -rf /var/lib/apt/lists/*

# Donwload and configure sccache

Check warning on line 13 in Dockerfile-cuda

View workflow job for this annotation

GitHub Actions / Typos

"Donwload" should be "Download".
RUN curl -fsSL https://github.com/mozilla/sccache/releases/download/v$SCCACHE/sccache-v$SCCACHE-x86_64-unknown-linux-musl.tar.gz | tar -xzv --strip-components=1 -C /usr/local/bin sccache-v$SCCACHE-x86_64-unknown-linux-musl/sccache && \
chmod +x /usr/local/bin/sccache

RUN curl https://sh.rustup.rs -sSf | bash -s -- -y
RUN cargo install cargo-chef --locked

FROM base-builder AS planner

WORKDIR /usr/src

COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./

RUN cargo chef prepare --recipe-path recipe.json

FROM base-builder AS builder

ARG CUDA_COMPUTE_CAP=80
ARG GIT_SHA
ARG DOCKER_LABEL

# sccache specific variables
ARG ACTIONS_CACHE_URL
ARG ACTIONS_RUNTIME_TOKEN
ARG SCCACHE_GHA_ENABLED

WORKDIR /usr/src

RUN if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \
then \
nvprune --generate-code code=sm_${CUDA_COMPUTE_CAP} /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \
elif [ ${CUDA_COMPUTE_CAP} -ge 80 -a ${CUDA_COMPUTE_CAP} -lt 90 ]; \
then \
nvprune --generate-code code=sm_80 --generate-code code=sm_${CUDA_COMPUTE_CAP} /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \
elif [ ${CUDA_COMPUTE_CAP} -eq 90 ]; \
then \
nvprune --generate-code code=sm_90 /usr/local/cuda/lib64/libcublas_static.a -o /usr/local/cuda/lib64/libcublas_static.a; \
else \
echo "cuda compute cap ${CUDA_COMPUTE_CAP} is not supported"; exit 1; \
fi;

COPY --from=planner /usr/src/recipe.json recipe.json

RUN if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \
then \
cargo chef cook --release --features candle-cuda-turing --features static-linking --no-default-features --recipe-path recipe.json && sccache -s; \
else \
cargo chef cook --release --features candle-cuda --features static-linking --no-default-features --recipe-path recipe.json && sccache -s; \
fi;

COPY backends backends
COPY core core
COPY router router
COPY Cargo.toml ./
COPY Cargo.lock ./

FROM builder as http-builder

RUN if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \
then \
cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F static-linking -F http --no-default-features && sccache -s; \
else \
cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F http --no-default-features && sccache -s; \
fi;

FROM builder as grpc-builder

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
unzip \
&& rm -rf /var/lib/apt/lists/*

RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \
curl -OL https://github.com/protocolbuffers/protobuf/releases/download/v21.12/$PROTOC_ZIP && \
unzip -o $PROTOC_ZIP -d /usr/local bin/protoc && \
unzip -o $PROTOC_ZIP -d /usr/local 'include/*' && \
rm -f $PROTOC_ZIP

COPY proto proto

RUN if [ ${CUDA_COMPUTE_CAP} -ge 75 -a ${CUDA_COMPUTE_CAP} -lt 80 ]; \
then \
cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F static-linking -F grpc --no-default-features && sccache -s; \
else \
cargo build --release --bin text-embeddings-router -F candle-cuda -F static-linking -F grpc --no-default-features && sccache -s; \
fi;

FROM nvidia/cuda:12.2.0-base-ubuntu22.04 as base

ARG DEFAULT_USE_FLASH_ATTENTION=True

ENV HUGGINGFACE_HUB_CACHE=/data \
PORT=80 \
USE_FLASH_ATTENTION=$DEFAULT_USE_FLASH_ATTENTION

FROM base as grpc

COPY --from=grpc-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router

ENTRYPOINT ["text-embeddings-router"]
CMD ["--json-output"]

FROM base

COPY --from=http-builder /usr/src/target/release/text-embeddings-router /usr/local/bin/text-embeddings-router

ENTRYPOINT ["text-embeddings-router"]
CMD ["--json-output"]

0 comments on commit cd52555

Please sign in to comment.