Skip to content

Commit

Permalink
Merge pull request #1041 from roboflow/fix/reduce-gpu-image-size
Browse files Browse the repository at this point in the history
Fix the multistage build for smaller image
  • Loading branch information
PawelPeczek-Roboflow authored Feb 21, 2025
2 parents 326999f + 7991f36 commit 6dc0e7b
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 6 deletions.
29 changes: 24 additions & 5 deletions docker/dockerfiles/Dockerfile.onnx.gpu
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 as base
FROM nvcr.io/nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04 as builder

WORKDIR /app

Expand Down Expand Up @@ -49,13 +49,32 @@ RUN python3 -m pip install \
&& rm -rf ~/.cache/pip

# Install setup.py requirements for flash_attn
RUN python3 -m pip install packaging==24.1 && rm -rf ~/.cache/pip
RUN python3 -m pip install packaging==24.1 && rm -rf ~/.cache/pip

# Install flash_attn required for Paligemma and Florence2
RUN python3 -m pip install -r requirements.pali.flash_attn.txt --no-dependencies --no-build-isolation && rm -rf ~/.cache/pip

FROM scratch
COPY --from=base / /
# Start runtime stage
FROM nvcr.io/nvidia/cuda:11.8.0-cudnn8-runtime-ubuntu22.04 as runtime

WORKDIR /app

# Copy Python and installed packages from builder
COPY --from=builder /usr/local/lib/python3.10 /usr/local/lib/python3.10
COPY --from=builder /usr/local/bin /usr/local/bin

# Install runtime dependencies
RUN rm -rf /var/lib/apt/lists/* && apt-get clean && apt-get update -y && DEBIAN_FRONTEND=noninteractive apt-get install -y \
libxext6 \
libopencv-dev \
uvicorn \
python3-pip \
git \
libgdal-dev \
wget \
rustc \
cargo \
&& rm -rf /var/lib/apt/lists/*

WORKDIR /build
COPY . .
Expand Down Expand Up @@ -86,4 +105,4 @@ ENV ENABLE_WORKFLOWS_PROFILING=True
ENV ENABLE_PROMETHEUS=True
ENV STREAM_API_PRELOADED_PROCESSES=2

ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
ENTRYPOINT uvicorn gpu_http:app --workers $NUM_WORKERS --host $HOST --port $PORT
2 changes: 1 addition & 1 deletion inference/core/version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.39.0rc2"
__version__ = "0.39.0"


if __name__ == "__main__":
Expand Down

0 comments on commit 6dc0e7b

Please sign in to comment.