From 3b13a40d3bbab9a2154d8f8d266f89f9f7ab86d4 Mon Sep 17 00:00:00 2001 From: Vladimir Zlobin Date: Fri, 31 Jan 2025 14:36:16 +0400 Subject: [PATCH] Allow overriding eos_token_id (#1654) Phi3_V eos_token_id has different values for GenerationConfig and Tokenizer. It's required to allow overriding the token_id to align with the sample from model cards. My patches to the original models are ignored: 1. https://huggingface.co/microsoft/Phi-3-vision-128k-instruct/discussions/68 2. https://huggingface.co/microsoft/Phi-3.5-vision-instruct/discussions/35 --- src/cpp/src/generation_config.cpp | 9 +-------- tests/python_tests/test_vlm_pipeline.py | 6 ++++-- 2 files changed, 5 insertions(+), 10 deletions(-) diff --git a/src/cpp/src/generation_config.cpp b/src/cpp/src/generation_config.cpp index 3914e217c4..f9352da101 100644 --- a/src/cpp/src/generation_config.cpp +++ b/src/cpp/src/generation_config.cpp @@ -96,14 +96,7 @@ GenerationConfig::GenerationConfig(const std::filesystem::path& json_path) { } void GenerationConfig::set_eos_token_id(size_t tokenizer_eos_token_id) { - if (eos_token_id < 0) { - eos_token_id = tokenizer_eos_token_id; - } else { - OPENVINO_ASSERT(eos_token_id == tokenizer_eos_token_id, - "EOS token ID is different in generation config (", eos_token_id, ") and tokenizer (", - tokenizer_eos_token_id, ")"); - } - // Merge user defined stop tokens with model EOS token + eos_token_id = tokenizer_eos_token_id; stop_token_ids.insert(eos_token_id); } diff --git a/tests/python_tests/test_vlm_pipeline.py b/tests/python_tests/test_vlm_pipeline.py index 3c188b26b2..ae0d073ddb 100644 --- a/tests/python_tests/test_vlm_pipeline.py +++ b/tests/python_tests/test_vlm_pipeline.py @@ -57,14 +57,16 @@ def streamer(word: str) -> bool: return False models_path = get_ov_model(model_id, cache) - generation_config = GenerationConfig(max_new_tokens=30) + ov_pipe = VLMPipeline(models_path, "CPU") + generation_config = ov_pipe.get_generation_config() + generation_config.max_new_tokens = 30 + generation_config.set_eos_token_id(ov_pipe.get_tokenizer().get_eos_token_id()) for links in image_links_for_testing: images = [] for link in links: images.append(get_image_by_link(link)) - ov_pipe = VLMPipeline(models_path, "CPU") ov_pipe.start_chat() result_from_streamer = []