Skip to content

Commit

Permalink
feat(onnx): add onnx runtime for better CPU perf (huggingface#328)
Browse files Browse the repository at this point in the history
  • Loading branch information
OlivierDehaene authored and pi314ever committed Aug 26, 2024
1 parent 037d65b commit 6f3c453
Show file tree
Hide file tree
Showing 15 changed files with 640 additions and 221 deletions.
83 changes: 83 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
members = [
"backends",
"backends/candle",
"backends/ort",
"backends/core",
"backends/python",
"backends/grpc-client",
Expand Down
38 changes: 4 additions & 34 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -28,22 +28,9 @@ ARG ACTIONS_CACHE_URL
ARG ACTIONS_RUNTIME_TOKEN
ARG SCCACHE_GHA_ENABLED

RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB \
| gpg --dearmor | tee /usr/share/keyrings/oneapi-archive-keyring.gpg > /dev/null && \
echo "deb [signed-by=/usr/share/keyrings/oneapi-archive-keyring.gpg] https://apt.repos.intel.com/oneapi all main" | \
tee /etc/apt/sources.list.d/oneAPI.list

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
intel-oneapi-mkl-devel=2024.0.0-49656 \
build-essential \
&& rm -rf /var/lib/apt/lists/*

RUN echo "int mkl_serv_intel_cpu_true() {return 1;}" > fakeintel.c && \
gcc -shared -fPIC -o libfakeintel.so fakeintel.c

COPY --from=planner /usr/src/recipe.json recipe.json

RUN cargo chef cook --release --features candle --features mkl-dynamic --no-default-features --recipe-path recipe.json && sccache -s
RUN cargo chef cook --release --features ort --no-default-features --recipe-path recipe.json && sccache -s

COPY backends backends
COPY core core
Expand All @@ -53,7 +40,7 @@ COPY Cargo.lock ./

FROM builder as http-builder

RUN cargo build --release --bin text-embeddings-router -F candle -F mkl-dynamic -F http --no-default-features && sccache -s
RUN cargo build --release --bin text-embeddings-router -F ort -F http --no-default-features && sccache -s

FROM builder as grpc-builder

Expand All @@ -65,35 +52,18 @@ RUN PROTOC_ZIP=protoc-21.12-linux-x86_64.zip && \

COPY proto proto

RUN cargo build --release --bin text-embeddings-router -F grpc -F candle -F mkl-dynamic --no-default-features && sccache -s
RUN cargo build --release --bin text-embeddings-router -F grpc -F ort --no-default-features && sccache -s

FROM debian:bookworm-slim as base

ENV HUGGINGFACE_HUB_CACHE=/data \
PORT=80 \
MKL_ENABLE_INSTRUCTIONS=AVX512_E4 \
RAYON_NUM_THREADS=8 \
LD_PRELOAD=/usr/local/libfakeintel.so \
LD_LIBRARY_PATH=/usr/local/lib
PORT=80

RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends \
libomp-dev \
ca-certificates \
libssl-dev \
curl \
&& rm -rf /var/lib/apt/lists/*

# Copy a lot of the Intel shared objects because of the mkl_serv_intel_cpu_true patch...
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_lp64.so.2 /usr/local/lib/libmkl_intel_lp64.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_intel_thread.so.2 /usr/local/lib/libmkl_intel_thread.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_core.so.2 /usr/local/lib/libmkl_core.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_def.so.2 /usr/local/lib/libmkl_vml_def.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_def.so.2 /usr/local/lib/libmkl_def.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx2.so.2 /usr/local/lib/libmkl_vml_avx2.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_vml_avx512.so.2 /usr/local/lib/libmkl_vml_avx512.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx2.so.2 /usr/local/lib/libmkl_avx2.so.2
COPY --from=builder /opt/intel/oneapi/mkl/latest/lib/intel64/libmkl_avx512.so.2 /usr/local/lib/libmkl_avx512.so.2
COPY --from=builder /usr/src/libfakeintel.so /usr/local/libfakeintel.so

FROM base as grpc

Expand Down
92 changes: 0 additions & 92 deletions Dockerfile-arm64

This file was deleted.

8 changes: 6 additions & 2 deletions backends/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,16 +7,20 @@ homepage.workspace = true

[dependencies]
clap = { workspace = true, optional = true }
hf-hub = { workspace = true }
serde_json = { workspace = true }
text-embeddings-backend-core = { path = "core" }
text-embeddings-backend-python = { path = "python", optional = true }
text-embeddings-backend-candle = { path = "candle", optional = true }
tokio = { version = "^1.25", features = ["sync"] }
tracing = "^0.1"
text-embeddings-backend-ort = { path = "ort", optional = true }
tokio = { workspace = true }
tracing = { workspace = true }
rand = "^0.8"

[features]
clap = ["dep:clap", "text-embeddings-backend-core/clap"]
python = ["dep:text-embeddings-backend-python"]
ort = ["dep:text-embeddings-backend-ort"]
candle = ["dep:text-embeddings-backend-candle"]
cuda = ["text-embeddings-backend-candle?/cuda"]
metal = ["text-embeddings-backend-candle?/metal"]
Expand Down
17 changes: 17 additions & 0 deletions backends/ort/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[package]
name = "text-embeddings-backend-ort"
version.workspace = true
edition.workspace = true
authors.workspace = true
homepage.workspace = true

[dependencies]
anyhow = { workspace = true }
nohash-hasher = { workspace = true }
ndarray = "0.15.6"
ort = { version = "2.0.0-rc.2", default-features = false, features = ["download-binaries", "half", "onednn", "ndarray"] }
text-embeddings-backend-core = { path = "../core" }
tracing = { workspace = true }
thiserror = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
Loading

0 comments on commit 6f3c453

Please sign in to comment.