diff --git a/haystack/utils/openai_utils.py b/haystack/utils/openai_utils.py index c77e0c65cd..8a894103aa 100644 --- a/haystack/utils/openai_utils.py +++ b/haystack/utils/openai_utils.py @@ -65,34 +65,38 @@ def _openai_text_completion_tokenization_details(model_name: str): :param model_name: Name of the OpenAI model. """ tokenizer_name = "gpt2" - max_tokens_limit = 2049 # Based on this ref: https://platform.openai.com/docs/models/gpt-3 + max_tokens_limit = 4096 # It is the minimum max_tokens_limit value based on this ref: https://platform.openai.com/docs/models/overview try: model_tokenizer = tiktoken.encoding_name_for_model(model_name) except KeyError: model_tokenizer = None if model_tokenizer: - # Based on OpenAI models page, 'davinci' considers have 2049 tokens, - ## therefore, it is better to add `text-davinci` instead to the condition. - ## Ref: https://platform.openai.com/docs/models/gpt-3-5 - ## https://platform.openai.com/docs/models/gpt-3 - if "text-davinci" in model_name: - max_tokens_limit = 4097 - tokenizer_name = model_tokenizer - elif model_name.startswith("gpt-3.5-turbo-16k") or model_name.startswith("gpt-35-turbo-16k"): - max_tokens_limit = 16384 - tokenizer_name = model_tokenizer - elif model_name.startswith("gpt-3.5-turbo-1106") or model_name.startswith("gpt-35-turbo-1106"): + # Based on OpenAI models page, the following are the max_tokens_limit values for the corresponding models + ## Ref: https://platform.openai.com/docs/models/overview + if "davinci" in model_name: max_tokens_limit = 16384 tokenizer_name = model_tokenizer - elif model_name.startswith("gpt-3"): + elif ( + model_name.startswith("gpt-3.5-turbo-instruct") + or model_name.startswith("gpt-35-turbo-instruct") + or model_name.startswith("gpt-3.5-turbo-0613") + or model_name.startswith("gpt-35-turbo-0613") + ): max_tokens_limit = 4096 tokenizer_name = model_tokenizer + elif model_name.startswith("gpt-3.5-turbo") or model_name.startswith("gpt-35-turbo"): + max_tokens_limit = 16384 + tokenizer_name = model_tokenizer # Ref: https://platform.openai.com/docs/models/gpt-4 elif model_name.startswith("gpt-4-32k"): max_tokens_limit = 32768 # tokens tokenizer_name = model_tokenizer - elif model_name.startswith("gpt-4-1106-preview"): + elif ( + model_name.startswith("gpt-4-1106") + or model_name.startswith("gpt-4-turbo-preview") + or model_name.startswith("gpt-4-0125-preview") + ): max_tokens_limit = 128000 # tokens tokenizer_name = model_tokenizer elif model_name.startswith("gpt-4"): diff --git a/test/prompt/invocation_layer/test_chatgpt.py b/test/prompt/invocation_layer/test_chatgpt.py index c1b816d493..b7db4dbf9a 100644 --- a/test/prompt/invocation_layer/test_chatgpt.py +++ b/test/prompt/invocation_layer/test_chatgpt.py @@ -48,12 +48,12 @@ def test_chatgpt_token_limit_warning_single_prompt(mock_openai_tokenizer, caplog model_name_or_path="gpt-3.5-turbo", api_key="fake_api_key", api_base="https://fake_api_base.com", - max_length=4090, + max_length=16379, ) with caplog.at_level(logging.WARNING): _ = invocation_layer._ensure_token_limit(prompt="This is a test for a mock openai tokenizer.") assert "The prompt has been truncated from" in caplog.text - assert "and answer length (4090 tokens) fit within the max token limit (4096 tokens)." in caplog.text + assert "and answer length (16379 tokens) fit within the max token limit (16384 tokens)." in caplog.text @pytest.mark.unit @@ -70,7 +70,7 @@ def test_chatgpt_token_limit_warning_with_messages(mock_openai_tokenizer, caplog model_name_or_path="gpt-3.5-turbo", api_key="fake_api_key", api_base="https://fake_api_base.com", - max_length=4060, + max_length=16379, ) with pytest.raises(ValueError): _ = invocation_layer._ensure_token_limit(prompt=messages) diff --git a/test/prompt/invocation_layer/test_openai.py b/test/prompt/invocation_layer/test_openai.py index 63a47b31ad..8acb8fc778 100644 --- a/test/prompt/invocation_layer/test_openai.py +++ b/test/prompt/invocation_layer/test_openai.py @@ -41,12 +41,12 @@ def test_custom_api_base(mock_open_ai_request, load_openai_tokenizer): @pytest.mark.unit def test_openai_token_limit_warning(mock_openai_tokenizer, caplog): invocation_layer = OpenAIInvocationLayer( - model_name_or_path="text-ada-001", api_key="fake_api_key", api_base="https://fake_api_base.com", max_length=2045 + model_name_or_path="davinci-002", api_key="fake_api_key", api_base="https://fake_api_base.com", max_length=16379 ) with caplog.at_level(logging.WARNING): _ = invocation_layer._ensure_token_limit(prompt="This is a test for a mock openai tokenizer.") assert "The prompt has been truncated from" in caplog.text - assert "and answer length (2045 tokens) fit within the max token limit (2049 tokens)." in caplog.text + assert "and answer length (16379 tokens) fit within the max token limit (16384 tokens)." in caplog.text @pytest.mark.unit @@ -54,9 +54,12 @@ def test_openai_token_limit_warning(mock_openai_tokenizer, caplog): "model_name,max_tokens_limit", [ ("gpt-3.5-turbo-instruct", 4096), - ("gpt-3.5-turbo", 4096), - ("gpt-3.5-turbo-16k", 16384), + ("gpt-3.5-turbo-0613", 4096), + ("gpt-3.5-turbo", 16384), ("gpt-4-32k", 32768), + ("gpt-4-1106", 128000), + ("gpt-4-turbo-preview", 128000), + ("gpt-4-0125-preview", 128000), ("gpt-4", 8192), ], ) @@ -77,9 +80,12 @@ def test_openai_token_limit_warning_not_triggered(caplog, mock_openai_tokenizer, "model_name,max_tokens_limit", [ ("gpt-3.5-turbo-instruct", 4096), - ("gpt-3.5-turbo", 4096), - ("gpt-3.5-turbo-16k", 16384), + ("gpt-3.5-turbo-0613", 4096), + ("gpt-3.5-turbo", 16384), ("gpt-4-32k", 32768), + ("gpt-4-1106", 128000), + ("gpt-4-turbo-preview", 128000), + ("gpt-4-0125-preview", 128000), ("gpt-4", 8192), ], ) diff --git a/test/utils/test_openai_utils.py b/test/utils/test_openai_utils.py index 3380a09eff..a78b5592cf 100644 --- a/test/utils/test_openai_utils.py +++ b/test/utils/test_openai_utils.py @@ -17,11 +17,11 @@ def test_openai_text_completion_tokenization_details_gpt_default(): tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="not-recognized-name") assert tokenizer_name == "gpt2" - assert max_tokens_limit == 2049 + assert max_tokens_limit == 4096 @pytest.mark.unit -def test_openai_text_completion_tokenization_details_gpt_davinci(): +def test_openai_text_completion_tokenization_details_gpt_3_5_turbo_instruct(): tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo-instruct") assert tokenizer_name == "cl100k_base" assert max_tokens_limit == 4096 @@ -31,14 +31,14 @@ def test_openai_text_completion_tokenization_details_gpt_davinci(): def test_openai_text_completion_tokenization_details_gpt3_5_azure(): tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-35-turbo") assert tokenizer_name == "cl100k_base" - assert max_tokens_limit == 4096 + assert max_tokens_limit == 16384 @pytest.mark.unit def test_openai_text_completion_tokenization_details_gpt3_5(): tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo") assert tokenizer_name == "cl100k_base" - assert max_tokens_limit == 4096 + assert max_tokens_limit == 16384 @pytest.mark.unit @@ -62,6 +62,27 @@ def test_openai_text_completion_tokenization_details_gpt_4_32k(): assert max_tokens_limit == 32768 +@pytest.mark.unit +def test_openai_text_completion_tokenization_details_gpt_4_1106(): + tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-1106") + assert tokenizer_name == "cl100k_base" + assert max_tokens_limit == 128000 + + +@pytest.mark.unit +def test_openai_text_completion_tokenization_details_gpt_4_turbo_preview(): + tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-turbo-preview") + assert tokenizer_name == "cl100k_base" + assert max_tokens_limit == 128000 + + +@pytest.mark.unit +def test_openai_text_completion_tokenization_details_gpt_4_0125_preview(): + tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-0125-preview") + assert tokenizer_name == "cl100k_base" + assert max_tokens_limit == 128000 + + @pytest.mark.unit @patch("haystack.utils.openai_utils.requests") def test_openai_request_retries_generic_error(mock_requests):