Merge pull request #6 from mobiusml/dstack_deploy

Add Configuration Files for dstack Deploy
mobiusml · Dec 3, 2024 · b943bbc · b943bbc
2 parents a337473 + 3f339f5
commit b943bbc
Show file tree

Hide file tree

Showing 8 changed files with 161 additions and 105 deletions.
diff --git a/aana_chat_with_video/configs/deployments.py b/aana_chat_with_video/configs/deployments.py
@@ -1,6 +1,7 @@
 from aana.core.models.sampling import SamplingParams
 from aana.core.models.types import Dtype
 from aana.deployments.vad_deployment import VadConfig, VadDeployment
+from aana.deployments.hf_blip2_deployment import HFBlip2Config, HFBlip2Deployment
 from aana.deployments.vllm_deployment import VLLMConfig, VLLMDeployment
 from aana.deployments.whisper_deployment import (
     WhisperComputeType,
@@ -40,19 +41,15 @@
     },
     {
         "name": "captioning_deployment",
-        "instance": VLLMDeployment.options(
+        "instance": HFBlip2Deployment.options(
             num_replicas=1,
+            max_ongoing_requests=1000,
             ray_actor_options={"num_gpus": 0.25},
-            user_config=VLLMConfig(
-                model="Qwen/Qwen2-VL-2B-Instruct",
-                dtype=Dtype.AUTO,
-                gpu_memory_reserved=12000,
-                max_model_len=32768,
-                enforce_eager=True,
-                default_sampling_params=SamplingParams(
-                    temperature=0.0, top_p=1.0, top_k=-1, max_tokens=512
-                ),
-                engine_args={"trust_remote_code": True},
+            user_config=HFBlip2Config(
+                model="Salesforce/blip2-opt-2.7b",
+                dtype=Dtype.FLOAT16,
+                batch_size=2,
+                num_processing_threads=2,
             ).model_dump(mode="json"),
         ),
     },

diff --git a/aana_chat_with_video/configs/settings.py b/aana_chat_with_video/configs/settings.py
@@ -5,7 +5,7 @@ class Settings(AanaSettings):
     """A pydantic model for App settings."""
 
     asr_model_name: str = "whisper_medium"
-    captioning_model_name: str = "qwen2-vl-2b-instruct"
+    captioning_model_name: str = "hf_blip2_opt_2_7b"
     max_video_len: int = 60 * 20  # 20 minutes
 
 

diff --git a/aana_chat_with_video/endpoints/index_video.py b/aana_chat_with_video/endpoints/index_video.py
@@ -162,19 +162,13 @@ async def run(  # noqa: C901
 
                 timestamps.extend(frames_dict["timestamps"])
                 frame_ids.extend(frames_dict["frame_ids"])
-                chat_prompt = "Describe the content of the following image in a single sentence:"
-                dialogs = [
-                    ImageChatDialog.from_prompt(prompt=chat_prompt, images=[frame]) for frame in frames_dict["frames"]
-                ]
-
-                # Collect the tasks to run concurrently and wait for them to finish
-                tasks = [self.captioning_handle.chat(dialog) for dialog in dialogs]
-                captioning_output = await asyncio.gather(*tasks)
-                captioning_output = [caption["message"].content for caption in captioning_output]
-                captions.extend(captioning_output)
+                captioning_output = await self.captioning_handle.generate_batch(
+                    images=frames_dict["frames"]
+                )
+                captions.extend(captioning_output["captions"])
 
                 yield {
-                    "captions": captioning_output,
+                    "captions": captioning_output["captions"],
                     "timestamps": frames_dict["timestamps"],
                 }
 

diff --git a/app.dstack.yaml b/app.dstack.yaml
@@ -0,0 +1,50 @@
+type: service
+
+name: aana-chat-with-video
+
+image: nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04
+
+env:
+  - NUM_WORKERS=5
+  - TASK_QUEUE__EXECUTION_TIMEOUT=10000
+  - TASK_QUEUE__NUM_WORKERS=5
+  - TMP_DATA_DIR=/demo_data/aana
+  - IMAGE_DIR=/demo_data/aana/images
+  - VIDEO_DIR=/demo_data/aana/videos
+  - AUDIO_DIR=/demo_data/aana/audios
+  - MODEL_DIR=/demo_data/aana/models
+  - DB_CONFIG__DATASTORE_TYPE=sqlite
+  - DB_CONFIG__DATASTORE_CONFIG__PATH=/demo_data/aana.db
+
+commands:
+  - apt-get update
+  - apt-get install -y libgl1 libglib2.0-0 ffmpeg python3 python3-dev git nvtop htop sqlite3 cron
+  - curl -sSL https://install.python-poetry.org | python3 -
+  - export PATH=$PATH:/root/.local/bin
+  - sh install.sh
+  - mkdir -p /demo_data
+  - mkdir -p /demo_data/hf_cache
+  - (crontab -l 2>/dev/null; echo "0 3 * * * find $TMP_DATA_DIR/videos/* -type f -atime +3 -exec rm -f {} \;") | sort -u | crontab -
+  - (crontab -l 2>/dev/null; echo "0 3 * * * find $TMP_DATA_DIR/audios/* -type f -atime +3 -exec rm -f {} \;") | sort -u | crontab -
+  - (crontab -l 2>/dev/null; echo "0 3 * * * find $TMP_DATA_DIR/images/* -type f -atime +3 -exec rm -f {} \;") | sort -u | crontab -
+  - HF_HUB_CACHE="/demo_data/hf_cache" CUDA_VISIBLE_DEVICES="0" poetry run aana deploy aana_chat_with_video.app:aana_app
+
+port: 8000
+
+replicas: 1
+
+auth: False
+
+spot_policy: on-demand
+
+max_price: 0.5
+
+volumes:
+  - name: demo-data
+    path: /demo_data
+
+resources:
+ gpu: 48GB..
+ cpu: 8..
+ memory: 50GB..
+ disk: 50GB..
diff --git a/install.sh b/install.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+poetry install
+poetry run pip install flash-attn --no-build-isolation # temporary fix for flash-attn bug in vLLM
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -7,8 +7,9 @@ readme = "README.md"
 
 [tool.poetry.dependencies]
 python = "^3.10"
-aana = {git = "https://github.com/mobiusml/aana_sdk.git", rev = "91de5b5"}
+aana = "0.2.3"
 vllm = "0.6.3.post1"
+transformers = {git = "https://github.com/huggingface/transformers.git", rev = "0b5b5e6"}
 
 [tool.poetry.group.dev.dependencies]
 ipykernel = "^6.29.4"

diff --git a/volume.dstack.yaml b/volume.dstack.yaml
@@ -0,0 +1,7 @@
+type: volume
+name: demo-data
+
+backend: runpod
+region: EU-SE-1
+
+size: 100GB