vllm-project · Player256 · Dec 16, 2024 · Jan 4, 2025 · Jan 4, 2025 · Jan 4, 2025
diff --git a/examples/offline_inference/vision_language.py b/examples/offline_inference/vision_language.py
@@ -413,6 +413,21 @@ def run_nvlm_d(question: str, modality: str):
     stop_token_ids = None
     return llm, prompt, stop_token_ids
 
+#Ovis
+def run_ovis(question: str, modality: str):
+    assert modality == "image"
+
+    model_name = "AIDC-AI/Ovis1.6-Llama3.2-3B"
+
+    llm = LLM(
+        model=model_name,
+        dtype="float16",
+        max_model_len=8192,
+        trust_remote_code=True,
+    )
+    stop_token_ids = [1,107]
+    prompt = f"<image>\n{question}"
+    return llm, prompt, stop_token_ids
 
 # PaliGemma
 def run_paligemma(question: str, modality: str):
@@ -586,9 +601,10 @@ def run_qwen2_5_vl(question: str, modality: str):
     "mllama": run_mllama,
     "molmo": run_molmo,
     "NVLM_D": run_nvlm_d,
+    "ovis": run_ovis,
     "paligemma": run_paligemma,
     "paligemma2": run_paligemma2,
-    "phi3_v": run_phi3v,
+    "phi3_v": run_phi3v,    
     "pixtral_hf": run_pixtral_hf,
     "qwen_vl": run_qwen_vl,
     "qwen2_vl": run_qwen2_vl,

diff --git a/examples/offline_inference/vision_language_multi_image.py b/examples/offline_inference/vision_language_multi_image.py
@@ -233,6 +233,26 @@ def load_nvlm_d(question: str, image_urls: List[str]):
         chat_template=None,
     )
 
+def load_ovis(question: str, image_urls: List[str]) -> ModelRequestData:
+    model = "AIDC-AI/Ovis1.6-Gemma2-9B"
+
+    llm = LLM(
+        model=model,
+        max_model_len=8192,
+        dtype="bfloat16",
+        trust_remote_code=True
+    )
+    prompt = f"<image>\n{question}"
+
+    stop_token_ids = [1,107]  
+
+    return ModelRequestData(
+        llm=llm,
+        prompt=prompt,
+        stop_token_ids=stop_token_ids,
+        image_data=[fetch_image(url) for url in image_urls],
+        chat_template=None,
+    )
 
 def load_pixtral_hf(question: str, image_urls: List[str]) -> ModelRequestData:
     model_name = "mistral-community/pixtral-12b"
@@ -458,6 +478,7 @@ def load_qwen2_5_vl(question, image_urls: List[str]) -> ModelRequestData:
     "internvl_chat": load_internvl,
     "mllama": load_mllama,
     "NVLM_D": load_nvlm_d,
+    "ovis": load_ovis,
     "phi3_v": load_phi3v,
     "pixtral_hf": load_pixtral_hf,
     "qwen_vl_chat": load_qwen_vl_chat,

@@ -270,6 +270,7 @@ def check_available_online(
                                         trust_remote_code=True),
     "NVLM_D": _HfExamplesInfo("nvidia/NVLM-D-72B",
                               trust_remote_code=True),
+    "Ovis": _HfExamplesInfo("AIDC-AI/Ovis1.6-Llama3.2-3B",trust_remote_code=True),  # noqa: E501
     "PaliGemmaForConditionalGeneration": _HfExamplesInfo("google/paligemma-3b-mix-224",  # noqa: E501
                                                          extras={"v2": "google/paligemma2-3b-ft-docci-448"}),  # noqa: E501
     "Phi3VForCausalLM": _HfExamplesInfo("microsoft/Phi-3-vision-128k-instruct",