Added back changes in openai_utils.py and tests are now working

deepset-ai · Mar 6, 2024 · 937c6df · 937c6df
1 parent 7be8f1e
commit 937c6df
Show file tree

Hide file tree

Showing 4 changed files with 58 additions and 27 deletions.
diff --git a/haystack/utils/openai_utils.py b/haystack/utils/openai_utils.py
@@ -65,34 +65,38 @@ def _openai_text_completion_tokenization_details(model_name: str):
     :param model_name: Name of the OpenAI model.
     """
     tokenizer_name = "gpt2"
-    max_tokens_limit = 2049  # Based on this ref: https://platform.openai.com/docs/models/gpt-3
+    max_tokens_limit = 4096  # It is the minimum max_tokens_limit value based on this ref: https://platform.openai.com/docs/models/overview
     try:
         model_tokenizer = tiktoken.encoding_name_for_model(model_name)
     except KeyError:
         model_tokenizer = None
 
     if model_tokenizer:
-        # Based on OpenAI models page, 'davinci' considers have 2049 tokens,
-        ## therefore, it is better to add `text-davinci` instead to the condition.
-        ## Ref: https://platform.openai.com/docs/models/gpt-3-5
-        ##      https://platform.openai.com/docs/models/gpt-3
-        if "text-davinci" in model_name:
-            max_tokens_limit = 4097
-            tokenizer_name = model_tokenizer
-        elif model_name.startswith("gpt-3.5-turbo-16k") or model_name.startswith("gpt-35-turbo-16k"):
-            max_tokens_limit = 16384
-            tokenizer_name = model_tokenizer
-        elif model_name.startswith("gpt-3.5-turbo-1106") or model_name.startswith("gpt-35-turbo-1106"):
+        # Based on OpenAI models page, the following are the max_tokens_limit values for the corresponding models
+        ## Ref: https://platform.openai.com/docs/models/overview
+        if "davinci" in model_name:
             max_tokens_limit = 16384
             tokenizer_name = model_tokenizer
-        elif model_name.startswith("gpt-3"):
+        elif (
+            model_name.startswith("gpt-3.5-turbo-instruct")
+            or model_name.startswith("gpt-35-turbo-instruct")
+            or model_name.startswith("gpt-3.5-turbo-0613")
+            or model_name.startswith("gpt-35-turbo-0613")
+        ):
             max_tokens_limit = 4096
             tokenizer_name = model_tokenizer
+        elif model_name.startswith("gpt-3.5-turbo") or model_name.startswith("gpt-35-turbo"):
+            max_tokens_limit = 16384
+            tokenizer_name = model_tokenizer
         # Ref: https://platform.openai.com/docs/models/gpt-4
         elif model_name.startswith("gpt-4-32k"):
             max_tokens_limit = 32768  # tokens
             tokenizer_name = model_tokenizer
-        elif model_name.startswith("gpt-4-1106-preview"):
+        elif (
+            model_name.startswith("gpt-4-1106")
+            or model_name.startswith("gpt-4-turbo-preview")
+            or model_name.startswith("gpt-4-0125-preview")
+        ):
             max_tokens_limit = 128000  # tokens
             tokenizer_name = model_tokenizer
         elif model_name.startswith("gpt-4"):

diff --git a/test/prompt/invocation_layer/test_chatgpt.py b/test/prompt/invocation_layer/test_chatgpt.py
@@ -48,12 +48,12 @@ def test_chatgpt_token_limit_warning_single_prompt(mock_openai_tokenizer, caplog
         model_name_or_path="gpt-3.5-turbo",
         api_key="fake_api_key",
         api_base="https://fake_api_base.com",
-        max_length=4090,
+        max_length=16379,
     )
     with caplog.at_level(logging.WARNING):
         _ = invocation_layer._ensure_token_limit(prompt="This is a test for a mock openai tokenizer.")
         assert "The prompt has been truncated from" in caplog.text
-        assert "and answer length (4090 tokens) fit within the max token limit (4096 tokens)." in caplog.text
+        assert "and answer length (16379 tokens) fit within the max token limit (16384 tokens)." in caplog.text
 
 
 @pytest.mark.unit
@@ -70,7 +70,7 @@ def test_chatgpt_token_limit_warning_with_messages(mock_openai_tokenizer, caplog
             model_name_or_path="gpt-3.5-turbo",
             api_key="fake_api_key",
             api_base="https://fake_api_base.com",
-            max_length=4060,
+            max_length=16379,
         )
         with pytest.raises(ValueError):
             _ = invocation_layer._ensure_token_limit(prompt=messages)
diff --git a/test/prompt/invocation_layer/test_openai.py b/test/prompt/invocation_layer/test_openai.py
@@ -41,22 +41,25 @@ def test_custom_api_base(mock_open_ai_request, load_openai_tokenizer):
 @pytest.mark.unit
 def test_openai_token_limit_warning(mock_openai_tokenizer, caplog):
     invocation_layer = OpenAIInvocationLayer(
-        model_name_or_path="text-ada-001", api_key="fake_api_key", api_base="https://fake_api_base.com", max_length=2045
+        model_name_or_path="davinci-002", api_key="fake_api_key", api_base="https://fake_api_base.com", max_length=16379
     )
     with caplog.at_level(logging.WARNING):
         _ = invocation_layer._ensure_token_limit(prompt="This is a test for a mock openai tokenizer.")
         assert "The prompt has been truncated from" in caplog.text
-        assert "and answer length (2045 tokens) fit within the max token limit (2049 tokens)." in caplog.text
+        assert "and answer length (16379 tokens) fit within the max token limit (16384 tokens)." in caplog.text
 
 
 @pytest.mark.unit
 @pytest.mark.parametrize(
     "model_name,max_tokens_limit",
     [
         ("gpt-3.5-turbo-instruct", 4096),
-        ("gpt-3.5-turbo", 4096),
-        ("gpt-3.5-turbo-16k", 16384),
+        ("gpt-3.5-turbo-0613", 4096),
+        ("gpt-3.5-turbo", 16384),
         ("gpt-4-32k", 32768),
+        ("gpt-4-1106", 128000),
+        ("gpt-4-turbo-preview", 128000),
+        ("gpt-4-0125-preview", 128000),
         ("gpt-4", 8192),
     ],
 )
@@ -77,9 +80,12 @@ def test_openai_token_limit_warning_not_triggered(caplog, mock_openai_tokenizer,
     "model_name,max_tokens_limit",
     [
         ("gpt-3.5-turbo-instruct", 4096),
-        ("gpt-3.5-turbo", 4096),
-        ("gpt-3.5-turbo-16k", 16384),
+        ("gpt-3.5-turbo-0613", 4096),
+        ("gpt-3.5-turbo", 16384),
         ("gpt-4-32k", 32768),
+        ("gpt-4-1106", 128000),
+        ("gpt-4-turbo-preview", 128000),
+        ("gpt-4-0125-preview", 128000),
         ("gpt-4", 8192),
     ],
 )

diff --git a/test/utils/test_openai_utils.py b/test/utils/test_openai_utils.py
@@ -17,11 +17,11 @@
 def test_openai_text_completion_tokenization_details_gpt_default():
     tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="not-recognized-name")
     assert tokenizer_name == "gpt2"
-    assert max_tokens_limit == 2049
+    assert max_tokens_limit == 4096
 
 
 @pytest.mark.unit
-def test_openai_text_completion_tokenization_details_gpt_davinci():
+def test_openai_text_completion_tokenization_details_gpt_3_5_turbo_instruct():
     tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo-instruct")
     assert tokenizer_name == "cl100k_base"
     assert max_tokens_limit == 4096
@@ -31,14 +31,14 @@ def test_openai_text_completion_tokenization_details_gpt_davinci():
 def test_openai_text_completion_tokenization_details_gpt3_5_azure():
     tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-35-turbo")
     assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 4096
+    assert max_tokens_limit == 16384
 
 
 @pytest.mark.unit
 def test_openai_text_completion_tokenization_details_gpt3_5():
     tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-3.5-turbo")
     assert tokenizer_name == "cl100k_base"
-    assert max_tokens_limit == 4096
+    assert max_tokens_limit == 16384
 
 
 @pytest.mark.unit
@@ -62,6 +62,27 @@ def test_openai_text_completion_tokenization_details_gpt_4_32k():
     assert max_tokens_limit == 32768
 
 
+@pytest.mark.unit
+def test_openai_text_completion_tokenization_details_gpt_4_1106():
+    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-1106")
+    assert tokenizer_name == "cl100k_base"
+    assert max_tokens_limit == 128000
+
+
+@pytest.mark.unit
+def test_openai_text_completion_tokenization_details_gpt_4_turbo_preview():
+    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-turbo-preview")
+    assert tokenizer_name == "cl100k_base"
+    assert max_tokens_limit == 128000
+
+
+@pytest.mark.unit
+def test_openai_text_completion_tokenization_details_gpt_4_0125_preview():
+    tokenizer_name, max_tokens_limit = _openai_text_completion_tokenization_details(model_name="gpt-4-0125-preview")
+    assert tokenizer_name == "cl100k_base"
+    assert max_tokens_limit == 128000
+
+
 @pytest.mark.unit
 @patch("haystack.utils.openai_utils.requests")
 def test_openai_request_retries_generic_error(mock_requests):