Skip to content

Commit

Permalink
fix: fix cuda-all image (huggingface#239)
Browse files Browse the repository at this point in the history
  • Loading branch information
OlivierDehaene authored Apr 15, 2024
1 parent a7309fb commit 8b8d3e4
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 11 deletions.
20 changes: 10 additions & 10 deletions Dockerfile-cuda-all
Original file line number Diff line number Diff line change
Expand Up @@ -48,9 +48,9 @@ FROM builder as builder-75

RUN if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features google --features candle-cuda-turing --features http --no-default-features --recipe-path recipe.json && sccache -s; \
else \
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
CUDA_COMPUTE_CAP=75 cargo chef cook --release --features candle-cuda-turing --no-default-features --features http --recipe-path recipe.json && sccache -s; \
fi;

COPY backends backends
Expand All @@ -70,9 +70,9 @@ FROM builder as builder-80

RUN if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features google --features candle-cuda --features http --no-default-features --recipe-path recipe.json && sccache -s; \
else \
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
CUDA_COMPUTE_CAP=80 cargo chef cook --release --features candle-cuda --no-default-features --features http --recipe-path recipe.json && sccache -s; \
fi;

COPY backends backends
Expand All @@ -83,18 +83,18 @@ COPY Cargo.lock ./

RUN if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F http -F google --no-default-features && sccache -s; \
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F http -F google --no-default-features && sccache -s; \
else \
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F http --no-default-features && sccache -s; \
CUDA_COMPUTE_CAP=80 cargo build --release --bin text-embeddings-router -F candle-cuda -F http --no-default-features && sccache -s; \
fi;

FROM builder as builder-90

RUN if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features google --features candle-cuda --features http --no-default-features --recipe-path recipe.json && sccache -s; \
else \
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda-turing --no-default-features --recipe-path recipe.json && sccache -s; \
CUDA_COMPUTE_CAP=90 cargo chef cook --release --features candle-cuda --features http --no-default-features --recipe-path recipe.json && sccache -s; \
fi;

COPY backends backends
Expand All @@ -105,9 +105,9 @@ COPY Cargo.lock ./

RUN if [ $VERTEX = "true" ]; \
then \
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F http -F google --no-default-features && sccache -s; \
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F http -F google --no-default-features && sccache -s; \
else \
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda-turing -F http --no-default-features && sccache -s; \
CUDA_COMPUTE_CAP=90 cargo build --release --bin text-embeddings-router -F candle-cuda -F http --no-default-features && sccache -s; \
fi;

FROM nvidia/cuda:12.2.0-runtime-ubuntu22.04 as base
Expand Down
2 changes: 1 addition & 1 deletion backends/candle/src/flash_attn.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ pub(crate) fn flash_attn_varlen(
return attention;
}
#[cfg(not(feature = "flash-attn"))]
candle::bail!("Flash attention is not installed. Use `flash-attn-v1` feature.")
candle::bail!("Flash attention is not installed. Use `flash-attn` feature.")
}
candle::bail!(
"GPU with CUDA capability {} is not supported",
Expand Down

0 comments on commit 8b8d3e4

Please sign in to comment.