From 3b13a40d3bbab9a2154d8f8d266f89f9f7ab86d4 Mon Sep 17 00:00:00 2001
From: Vladimir Zlobin <vladimir.zlobin@intel.com>
Date: Fri, 31 Jan 2025 14:36:16 +0400
Subject: [PATCH] Allow overriding eos_token_id (#1654)

Phi3_V eos_token_id has different values for GenerationConfig and
Tokenizer. It's required to allow overriding the token_id to align with
the sample from model cards.

My patches to the original models are ignored:
1.
https://huggingface.co/microsoft/Phi-3-vision-128k-instruct/discussions/68
2.
https://huggingface.co/microsoft/Phi-3.5-vision-instruct/discussions/35
---
 src/cpp/src/generation_config.cpp       | 9 +--------
 tests/python_tests/test_vlm_pipeline.py | 6 ++++--
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp
index 3914e217c4..f9352da101 100644
--- a/src/cpp/src/generation_config.cpp
+++ b/src/cpp/src/generation_config.cpp
@@ -96,14 +96,7 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) {
 }
 
 void GenerationConfig::set_eos_token_id(size_t tokenizer_eos_token_id) {
-    if (eos_token_id < 0) {
-        eos_token_id = tokenizer_eos_token_id;
-    } else {
-        OPENVINO_ASSERT(eos_token_id == tokenizer_eos_token_id,
-            "EOS token ID is different in generation config (", eos_token_id, ") and tokenizer (",
-            tokenizer_eos_token_id, ")");
-    }
-    // Merge user defined stop tokens with model EOS token
+    eos_token_id = tokenizer_eos_token_id;
     stop_token_ids.insert(eos_token_id);
 }
 
diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py
index 3c188b26b2..ae0d073ddb 100644
--- a/tests/python_tests/test_vlm_pipeline.py
+++ b/tests/python_tests/test_vlm_pipeline.py
@@ -57,14 +57,16 @@ def streamer(word: str) -> bool:
         return False
 
     models_path = get_ov_model(model_id, cache)
-    generation_config = GenerationConfig(max_new_tokens=30)
+    ov_pipe = VLMPipeline(models_path, "CPU")
+    generation_config = ov_pipe.get_generation_config()
+    generation_config.max_new_tokens = 30
+    generation_config.set_eos_token_id(ov_pipe.get_tokenizer().get_eos_token_id())
 
     for links in image_links_for_testing:
         images = []
         for link in links:
             images.append(get_image_by_link(link))
 
-        ov_pipe = VLMPipeline(models_path, "CPU")
         ov_pipe.start_chat()
 
         result_from_streamer = []