Skip to content

Commit

Permalink
Merge pull request #6 from mobiusml/dstack_deploy
Browse files Browse the repository at this point in the history
Add Configuration Files for dstack Deploy
  • Loading branch information
movchan74 authored Dec 3, 2024
2 parents a337473 + 3f339f5 commit b943bbc
Show file tree
Hide file tree
Showing 8 changed files with 161 additions and 105 deletions.
19 changes: 8 additions & 11 deletions aana_chat_with_video/configs/deployments.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
from aana.core.models.sampling import SamplingParams
from aana.core.models.types import Dtype
from aana.deployments.vad_deployment import VadConfig, VadDeployment
from aana.deployments.hf_blip2_deployment import HFBlip2Config, HFBlip2Deployment
from aana.deployments.vllm_deployment import VLLMConfig, VLLMDeployment
from aana.deployments.whisper_deployment import (
WhisperComputeType,
Expand Down Expand Up @@ -40,19 +41,15 @@
},
{
"name": "captioning_deployment",
"instance": VLLMDeployment.options(
"instance": HFBlip2Deployment.options(
num_replicas=1,
max_ongoing_requests=1000,
ray_actor_options={"num_gpus": 0.25},
user_config=VLLMConfig(
model="Qwen/Qwen2-VL-2B-Instruct",
dtype=Dtype.AUTO,
gpu_memory_reserved=12000,
max_model_len=32768,
enforce_eager=True,
default_sampling_params=SamplingParams(
temperature=0.0, top_p=1.0, top_k=-1, max_tokens=512
),
engine_args={"trust_remote_code": True},
user_config=HFBlip2Config(
model="Salesforce/blip2-opt-2.7b",
dtype=Dtype.FLOAT16,
batch_size=2,
num_processing_threads=2,
).model_dump(mode="json"),
),
},
Expand Down
2 changes: 1 addition & 1 deletion aana_chat_with_video/configs/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ class Settings(AanaSettings):
"""A pydantic model for App settings."""

asr_model_name: str = "whisper_medium"
captioning_model_name: str = "qwen2-vl-2b-instruct"
captioning_model_name: str = "hf_blip2_opt_2_7b"
max_video_len: int = 60 * 20 # 20 minutes


Expand Down
16 changes: 5 additions & 11 deletions aana_chat_with_video/endpoints/index_video.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,19 +162,13 @@ async def run( # noqa: C901

timestamps.extend(frames_dict["timestamps"])
frame_ids.extend(frames_dict["frame_ids"])
chat_prompt = "Describe the content of the following image in a single sentence:"
dialogs = [
ImageChatDialog.from_prompt(prompt=chat_prompt, images=[frame]) for frame in frames_dict["frames"]
]

# Collect the tasks to run concurrently and wait for them to finish
tasks = [self.captioning_handle.chat(dialog) for dialog in dialogs]
captioning_output = await asyncio.gather(*tasks)
captioning_output = [caption["message"].content for caption in captioning_output]
captions.extend(captioning_output)
captioning_output = await self.captioning_handle.generate_batch(
images=frames_dict["frames"]
)
captions.extend(captioning_output["captions"])

yield {
"captions": captioning_output,
"captions": captioning_output["captions"],
"timestamps": frames_dict["timestamps"],
}

Expand Down
50 changes: 50 additions & 0 deletions app.dstack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
type: service

name: aana-chat-with-video

image: nvidia/cuda:12.3.2-cudnn9-devel-ubuntu22.04

env:
- NUM_WORKERS=5
- TASK_QUEUE__EXECUTION_TIMEOUT=10000
- TASK_QUEUE__NUM_WORKERS=5
- TMP_DATA_DIR=/demo_data/aana
- IMAGE_DIR=/demo_data/aana/images
- VIDEO_DIR=/demo_data/aana/videos
- AUDIO_DIR=/demo_data/aana/audios
- MODEL_DIR=/demo_data/aana/models
- DB_CONFIG__DATASTORE_TYPE=sqlite
- DB_CONFIG__DATASTORE_CONFIG__PATH=/demo_data/aana.db

commands:
- apt-get update
- apt-get install -y libgl1 libglib2.0-0 ffmpeg python3 python3-dev git nvtop htop sqlite3 cron
- curl -sSL https://install.python-poetry.org | python3 -
- export PATH=$PATH:/root/.local/bin
- sh install.sh
- mkdir -p /demo_data
- mkdir -p /demo_data/hf_cache
- (crontab -l 2>/dev/null; echo "0 3 * * * find $TMP_DATA_DIR/videos/* -type f -atime +3 -exec rm -f {} \;") | sort -u | crontab -
- (crontab -l 2>/dev/null; echo "0 3 * * * find $TMP_DATA_DIR/audios/* -type f -atime +3 -exec rm -f {} \;") | sort -u | crontab -
- (crontab -l 2>/dev/null; echo "0 3 * * * find $TMP_DATA_DIR/images/* -type f -atime +3 -exec rm -f {} \;") | sort -u | crontab -
- HF_HUB_CACHE="/demo_data/hf_cache" CUDA_VISIBLE_DEVICES="0" poetry run aana deploy aana_chat_with_video.app:aana_app

port: 8000

replicas: 1

auth: False

spot_policy: on-demand

max_price: 0.5

volumes:
- name: demo-data
path: /demo_data

resources:
gpu: 48GB..
cpu: 8..
memory: 50GB..
disk: 50GB..
3 changes: 3 additions & 0 deletions install.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
#!/bin/sh
poetry install
poetry run pip install flash-attn --no-build-isolation # temporary fix for flash-attn bug in vLLM
166 changes: 85 additions & 81 deletions poetry.lock

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@ readme = "README.md"

[tool.poetry.dependencies]
python = "^3.10"
aana = {git = "https://github.com/mobiusml/aana_sdk.git", rev = "91de5b5"}
aana = "0.2.3"
vllm = "0.6.3.post1"
transformers = {git = "https://github.com/huggingface/transformers.git", rev = "0b5b5e6"}

[tool.poetry.group.dev.dependencies]
ipykernel = "^6.29.4"
Expand Down
7 changes: 7 additions & 0 deletions volume.dstack.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
type: volume
name: demo-data

backend: runpod
region: EU-SE-1

size: 100GB

0 comments on commit b943bbc

Please sign in to comment.