bumped version and changed default model to gpt-4 turbo

elias-jhsph · Mar 9, 2024 · 4532186 · 4532186
1 parent 291f9ee
commit 4532186
Show file tree

Hide file tree

Showing 9 changed files with 321 additions and 147 deletions.
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "jarvis_conversationalist"
-version = "0.4.6"
+version = "0.5.0"
 authors = [{name="Elias Weston-Farber", email="[email protected]"}]
 description = "A voice assistant for the command line"
 readme = "README.md"
@@ -9,8 +9,8 @@ classifiers = [
     "License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
     "Operating System :: OS Independent",
 ]
-dependencies = ["torch>2.0.0", "openai==1.2.3", "openai-whisper",
-    "chromadb<=0.4.15","soundfile==0.12.1","sounddevice","pydub==0.25.1", "pyannote.audio==3.1.0", "faiss-cpu",
+dependencies = ["torch>2.0.0", "openai==1.13.3", "openai-whisper",
+    "chromadb","soundfile==0.12.1","sounddevice","pydub==0.25.1", "pyannote.audio==3.1.0", "faiss-cpu",
     "gtts==2.4.0","spacy==3.7.2","beautifulsoup4==4.12.2","googlesearch-python==1.2.3",
     "tiktoken==0.5.1","geocoder==1.38.1","scrapy==2.11.0", "mycroft-mimic3-tts[all]; sys_platform == 'linux'",
 "pysqlite3-binary; sys_platform == 'linux'"]
@@ -24,7 +24,7 @@ where = ["src"]
 [project.urls]
 url = "https://github.com/elias-jhsph/jarvis-conversationalist"
 [options]
-python_requires = ">=3.11"
+python_requires = ">=3.11.8"
 [build-system]
 requires = ["setuptools>=61.0", "wheel"]
 

diff --git a/src/jarvis_conversationalist/__init__.py b/src/jarvis_conversationalist/__init__.py
@@ -2,5 +2,5 @@
 if sys.platform == 'linux':
     __import__('pysqlite3')
     sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
-__version__ = '0.4.6'
-# Path: src/jarvis_conversationalist/conversationalist.py
+__version__ = '0.5.0'
+# Path: src/jarvis_conversationalist/conversationalist.py
diff --git a/src/jarvis_conversationalist/assistant_history.py b/src/jarvis_conversationalist/assistant_history.py
@@ -58,6 +58,25 @@ def strip_entry(entry: dict or list):
         return {"role": entry["role"], "content": entry["content"]}
 
 
+def denormalize(entry: dict or list):
+    """
+    Remove all fields from the entry dictionary except 'role' and 'content'.
+
+    :param entry: A dictionary containing a conversation entry.
+    :type entry: dict
+    :return: A new dictionary containing only the 'role' and 'content' fields from the original entry.
+    :rtype: dict
+    """
+    if isinstance(entry, list):
+        new = []
+        for el in entry:
+            new.append(denormalize(el))
+        return new
+    else:
+        # dictionary comprehension to convert all values that are lists or dictionaries to strings
+        return {k: v if not isinstance(v, list) and not isinstance(v, dict) else str(v) for k, v in entry.items()}
+
+
 class AssistantHistory:
     """
     A class to manage the Assistant's conversation history, including storing, reducing,
@@ -305,6 +324,10 @@ def add_context(self, context: list) -> None:
         ids = []
         seed = str(uuid.uuid4())
         for i in range(len(context)):
+            for k in context[i].keys():
+                test = context[i][k]
+                if isinstance(test, list) or isinstance(test, dict):
+                    context[i][k] = str(test)
             context[i]['id'] = self.create_id(seed)
             ids.append(context[i]['id'])
             if first_id is None:
@@ -397,9 +420,9 @@ def reduce_context(self) -> None:
         self.update_ltm()
         self.to_summarize = []
 
-    def gather_context(self, query: str or list, minimum_recent_history_length: int = 2, max_tokens: int = None,
+    def gather_context(self, query: str or list, minimum_recent_history_length: int = 5, max_tokens: int = None,
                        only_summaries: bool = False, only_necessary_fields: bool = True,
-                       distance_cut_off: float = None, query_n_max: int = 3, verbose: bool = False) -> List[dict]:
+                       distance_cut_off: float = None, query_n_max: int = 6, verbose: bool = False) -> List[dict]:
         """
         Gathers relevant context for a given query from the chat assistant's history.
 
@@ -563,7 +586,7 @@ def gather_context(self, query: str or list, minimum_recent_history_length: int
         if only_necessary_fields:
             context_list = [strip_entry(entry) for entry in context_list]
 
-        self.last_context = system_message + context_list
+        self.last_context = denormalize(system_message + context_list)
 
         if verbose:
             from pprint import pprint
@@ -606,7 +629,7 @@ def get_history_from_id_and_earlier(self, hid=None, n_results=10):
                 entry["content"] = results["documents"][pos]
                 entry["id"] = tid
                 output.append(entry)
-        return output
+        return denormalize(output)
 
     def get_history_from_last_batch(self):
         """
@@ -678,7 +701,7 @@ def get_batches(self, bids):
                 time_stamp['content'] = stamp
                 time_stamp['role'] = 'system'
                 time_stamp = [time_stamp]
-        return time_stamp + output
+        return time_stamp + denormalize(output)
 
     def get_summary_from_id_and_earlier(self, id=None, n_results=10):
         """
@@ -708,7 +731,7 @@ def get_summary_from_id_and_earlier(self, id=None, n_results=10):
                 output.append(entry)
             if results["ids"].count(str(tid)) > 1:
                 warnings.warn("Chat assistant: More than one summary found for id {}".format(id))
-        return output
+        return denormalize(output)
 
     def truncate_input_context(self, context):
         """

diff --git a/src/jarvis_conversationalist/openai_functions/internet_helper.py b/src/jarvis_conversationalist/openai_functions/internet_helper.py
@@ -10,13 +10,13 @@
 client = OpenAI()
 _utils._logs.logger.setLevel("CRITICAL")
 
-basic_model = "gpt-3.5-turbo-16k"
-advanced_model = "gpt-4"
+basic_model = "gpt-3.5-turbo-1106"
+advanced_model = "gpt-4-turbo-preview"
 enc = encoding_for_model(advanced_model)
 temperature = 0.6
 
 
-def search(search_term: str, num_results: int = 10, advanced: bool = False) -> dict:
+def search(search_term: str, num_results: int = 15, advanced: bool = False) -> dict:
     """
     Searches for a term using either Google Custom Search API or a free alternative.
 
@@ -173,7 +173,7 @@ def synthesize_information(summaries: list, query: str) -> str:
     response = client.chat.completions.create(model=advanced_model,
     messages=[{"role": "system", "content": f"Given the following summaries about '{query}', please synthesize "
                                             f"a coherent and comprehensive response:\n{summaries_text}\n"}],
-    max_tokens=500,
+    max_tokens=1000,
     n=1,
     temperature=temperature)
     synthesized_info = response.choices[0].message.content
@@ -201,7 +201,7 @@ def truncate_content(content: str, max_tokens: int = 3500) -> str:
         return content
 
 
-def search_helper(query: str, result_number: int = 6, skip: threading.Event = None) -> dict:
+def search_helper(query: str, result_number: int = 15, skip: threading.Event = None) -> dict:
     """
     Helper function for search.
 
@@ -333,7 +333,7 @@ def generate_final_prompt(simplified_output: dict, max_tokens: int = 1800) -> st
     return prompt
 
 
-def create_internet_context(query: str, result_number: int = 10,
+def create_internet_context(query: str, result_number: int = 15,
                             max_tokens: int = 1800, skip: threading.Event = None) -> tuple:
     """
     Creates the internet context for the chatbot.

diff --git a/src/jarvis_conversationalist/openai_functions/weather_functions.py b/src/jarvis_conversationalist/openai_functions/weather_functions.py
@@ -5,7 +5,7 @@
 client = OpenAI()
 
 temperature = 0.6
-basic_model = "gpt-3.5-turbo-16k"
+basic_model = "gpt-3.5-turbo-1106"
 
 
 def geocoder_api(query):
@@ -25,13 +25,13 @@ def summarize(query:str, content: str) -> str:
     :rtype: str
     """
     response = client.chat.completions.create(model=basic_model,
-    messages=[{'role': 'system', 'content': f'There was a search for the following weather:\n"{query}"\nPlease '
-                                            f'provide a concise summary of the following content while keeping '
-                                            f'mind what will best respond to the query:\n{content}\n'}],
-    max_tokens=400,
-    n=1,
-    stop=None,
-    temperature=temperature)
+        messages=[{'role': 'system', 'content': f'There was a search for the following weather:\n"{query}"\nPlease '
+                                                f'provide a concise summary of the following content while keeping '
+                                                f'mind what will best respond to the query:\n{content}\n'}],
+        max_tokens=600,
+        n=1,
+        stop=None,
+        temperature=temperature)
     summary = response.choices[0].message.content
     return summary