From e4f531fbed9e13d98ef71ce8156f8ac6e29e9e2f Mon Sep 17 00:00:00 2001
From: Xuhui Zhou <zhouxuhui2018@gmial.com>
Date: Tue, 16 Jan 2024 07:47:47 +0000
Subject: [PATCH 01/15] Add new model version for gpt-3.5-turbo-finetuned

---
 sotopia/generation_utils/generate.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py
index fe48972c2..9d5fa0beb 100644
--- a/sotopia/generation_utils/generate.py
+++ b/sotopia/generation_utils/generate.py
@@ -50,6 +50,7 @@
     "togethercomputer/llama-2-70b-chat",
     "togethercomputer/mpt-30b-chat",
     "gpt-3.5-turbo",
+    "gpt-3.5-turbo-finetuned",
     "text-davinci-003",
     "gpt-4",
     "gpt-4-turbo",
@@ -294,6 +295,7 @@ def _return_fixed_model_version(
 ) -> str:
     return {
         "gpt-3.5-turbo": "gpt-3.5-turbo-0613",
+        "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8hK7f26o",
         "gpt-4": "gpt-4-0613",
         "gpt-4-turbo": "gpt-4-1106-preview",
     }[model_name]

From b8f2a2bfeeb3937f4b5d8ff37cffb7a5b06803be Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Mon, 29 Jan 2024 10:04:34 -0500
Subject: [PATCH 02/15] create scenarios craig list

---
 examples/generate_scenarios.py | 71 +++++++++++++++++++++++++++++++---
 1 file changed, 65 insertions(+), 6 deletions(-)

diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py
index f376d713b..d6efb33ba 100644
--- a/examples/generate_scenarios.py
+++ b/examples/generate_scenarios.py
@@ -1,5 +1,7 @@
 import ast
 import asyncio
+import json
+import random
 from typing import Any, cast
 
 import pandas as pd
@@ -37,11 +39,15 @@ def add_env_profiles(
 def check_existing_envs(
     env_profile: dict[str, Any], existing_envs: pd.DataFrame
 ) -> bool:
-    if (
-        env_profile["scenario"] in existing_envs["scenario"].to_list()
-        and str(env_profile["agent_goals"])
-        in existing_envs["agent_goals"].to_list()
-    ):
+    try:
+        if (
+            env_profile["scenario"] in existing_envs["scenario"].to_list()
+            and str(env_profile["agent_goals"])
+            in existing_envs["agent_goals"].to_list()
+        ):
+            return False
+    except KeyError:
+        print(env_profile)
         return False
     return True
 
@@ -50,7 +56,7 @@ def generate_newenv_profile(
     num: int,
     gen_model: LLM_Name = "gpt-4-turbo",
     temperature: float = 0.5,
-    type: str = "mutual_friend",
+    type: str = "craigslist_bargains",
 ) -> pd.DataFrame:
     env_profile_list = []  # type: ignore
     existing_envs = pd.read_csv(
@@ -70,6 +76,22 @@ def generate_newenv_profile(
             }
             if check_existing_envs(env_profile, existing_envs):
                 env_profile_list.append(env_profile)
+    elif type == "craigslist_bargains":
+        while len(env_profile_list) < num:
+            scenario, social_goals = asyncio.run(
+                generate_craigslist_bargains_envs()
+            )
+            env_profile = {
+                "codename": f"craigslist_bargains_{len(env_profile_list)+10}",
+                "scenario": scenario,
+                "agent_goals": social_goals,
+                "relationship": RelationshipType.stranger,
+                "age_constraint": "[(18, 80), (18, 80)]",
+                "occupation_constraint": None,
+                "source": "craigslist_bargains",
+            }
+            if check_existing_envs(env_profile, existing_envs):
+                env_profile_list.append(env_profile)
     else:
         raise NotImplementedError("Only mutual_friend is supported for now")
     return pd.DataFrame(env_profile_list)
@@ -116,5 +138,42 @@ def auto_generate_scenarios(
     Migrator().run()
 
 
+@app.command()
+def upload_env_profiles(
+    filepath: str = "./data/all_environment_profile.json",
+) -> None:
+    """
+    Function to upload environment profiles from csv file
+    """
+    env_profile_list = []  # type: ignore
+    existing_envs = pd.read_csv(
+        "./data/env_profiles_v1.csv"
+    )  # TODO: find a better way to deal with this
+    current_envs = json.load(open(filepath, "r"))
+    for key in current_envs:
+        env_profile = current_envs[key]
+        if env_profile and check_existing_envs(env_profile, existing_envs):
+            del env_profile["pk"]
+            env_profile_list.append(env_profile)
+    # randomly sample 210 envs
+    env_profile_list = random.sample(env_profile_list, 240)
+    env_profiles = add_env_profiles(env_profile_list)
+    print("New env profiles added to database:")
+    print(len(env_profiles))
+
+    count = 0
+    for env_profile in env_profiles:
+        assert env_profile.pk is not None
+        try:
+            _sample_env_agent_combo_and_push_to_db(env_profile.pk)
+            count += 1
+        except:
+            EnvironmentProfile.delete(env_profile.pk)
+            pass
+    print(f"New env-agent combo added to database: {count}")
+
+    Migrator().run()
+
+
 if __name__ == "__main__":
     app()

From bd50e024b11044205fc6a547148fb822670e6d12 Mon Sep 17 00:00:00 2001
From: Xuhui Zhou <zhouxuhui2018@gmial.com>
Date: Mon, 29 Jan 2024 16:24:16 +0000
Subject: [PATCH 03/15] clean envs

---
 examples/generate_scenarios.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py
index f376d713b..dfbf17cd6 100644
--- a/examples/generate_scenarios.py
+++ b/examples/generate_scenarios.py
@@ -7,7 +7,7 @@
 from experiment_eval import _sample_env_agent_combo_and_push_to_db
 from redis_om import Migrator
 
-from sotopia.database import EnvironmentProfile
+from sotopia.database import EnvironmentProfile, EnvAgentComboStorage
 from sotopia.database.persistent_profile import RelationshipType
 from sotopia.generation_utils import (
     LLM_Name,
@@ -116,5 +116,18 @@ def auto_generate_scenarios(
     Migrator().run()
 
 
+@app.command()
+def clean_env_wo_combos() -> None:
+    """
+    Function to clean up env-agent combos in the database
+    """
+    env_agent_combos = list(EnvAgentComboStorage.all_pks())
+    envs_id_in_combos = set([EnvAgentComboStorage.get(env_agent_combo).env_id for env_agent_combo in env_agent_combos])
+    envs = list(EnvironmentProfile.all_pks())
+    for env in envs:
+        if env not in envs_id_in_combos:
+            EnvironmentProfile.delete(env)
+        
+
 if __name__ == "__main__":
     app()

From 52b079701d75d48f024692051d48fabc953342ef Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Tue, 30 Jan 2024 11:18:50 -0500
Subject: [PATCH 04/15] update scripts

---
 .gitignore                           |  4 +---
 scripts/evaluate_finetuned_full.sh   | 12 ++++++++++++
 sotopia/generation_utils/generate.py |  2 +-
 3 files changed, 14 insertions(+), 4 deletions(-)
 create mode 100644 scripts/evaluate_finetuned_full.sh

diff --git a/.gitignore b/.gitignore
index 80953edfb..94134e147 100644
--- a/.gitignore
+++ b/.gitignore
@@ -140,6 +140,4 @@ deprecated/*
 *.csv
 
 #backup
-backup/*
-
-scripts/*
+backup/*
\ No newline at end of file
diff --git a/scripts/evaluate_finetuned_full.sh b/scripts/evaluate_finetuned_full.sh
new file mode 100644
index 000000000..94697b1ac
--- /dev/null
+++ b/scripts/evaluate_finetuned_full.sh
@@ -0,0 +1,12 @@
+python examples/experiment_eval.py \
+ --gin_file sotopia_conf/generation_utils_conf/generate.gin \
+ --gin_file sotopia_conf/server_conf/server.gin \
+ --gin_file sotopia_conf/run_async_server_in_batch.gin \
+ '--gin.ENV_IDS=[]' \
+ '--gin.SCRIPT_MODEL="gpt-3.5-turbo-finetuned"' \
+ '--gin.BATCH_SIZE=5' \
+ '--gin.TAG="finetuned_eval_full"' \
+ '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full"' \
+ '--gin.PUSH_TO_DB=True' \
+ '--gin.VERBOSE=False' \
+ '--gin.LITE=True' \
\ No newline at end of file
diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py
index 9d5fa0beb..c3b2836ad 100644
--- a/sotopia/generation_utils/generate.py
+++ b/sotopia/generation_utils/generate.py
@@ -295,7 +295,7 @@ def _return_fixed_model_version(
 ) -> str:
     return {
         "gpt-3.5-turbo": "gpt-3.5-turbo-0613",
-        "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8hK7f26o",
+        "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8mbqt3SF",
         "gpt-4": "gpt-4-0613",
         "gpt-4-turbo": "gpt-4-1106-preview",
     }[model_name]

From 02a2d842bbe85448852762f692b8df3fb0611810 Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Tue, 30 Jan 2024 11:19:43 -0500
Subject: [PATCH 05/15] fix styles

---
 .gitignore                         |  2 +-
 examples/generate_scenarios.py     | 12 +++++++++---
 scripts/evaluate_finetuned_full.sh |  2 +-
 3 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 94134e147..b85ee3e7c 100644
--- a/.gitignore
+++ b/.gitignore
@@ -140,4 +140,4 @@ deprecated/*
 *.csv
 
 #backup
-backup/*
\ No newline at end of file
+backup/*
diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py
index d60751596..ee09aa563 100644
--- a/examples/generate_scenarios.py
+++ b/examples/generate_scenarios.py
@@ -9,7 +9,7 @@
 from experiment_eval import _sample_env_agent_combo_and_push_to_db
 from redis_om import Migrator
 
-from sotopia.database import EnvironmentProfile, EnvAgentComboStorage
+from sotopia.database import EnvAgentComboStorage, EnvironmentProfile
 from sotopia.database.persistent_profile import RelationshipType
 from sotopia.generation_utils import (
     LLM_Name,
@@ -144,12 +144,18 @@ def clean_env_wo_combos() -> None:
     Function to clean up env-agent combos in the database
     """
     env_agent_combos = list(EnvAgentComboStorage.all_pks())
-    envs_id_in_combos = set([EnvAgentComboStorage.get(env_agent_combo).env_id for env_agent_combo in env_agent_combos])
+    envs_id_in_combos = set(
+        [
+            EnvAgentComboStorage.get(env_agent_combo).env_id
+            for env_agent_combo in env_agent_combos
+        ]
+    )
     envs = list(EnvironmentProfile.all_pks())
     for env in envs:
         if env not in envs_id_in_combos:
             EnvironmentProfile.delete(env)
-        
+
+
 @app.command()
 def upload_env_profiles(
     filepath: str = "./data/all_environment_profile.json",
diff --git a/scripts/evaluate_finetuned_full.sh b/scripts/evaluate_finetuned_full.sh
index 94697b1ac..6d42a819e 100644
--- a/scripts/evaluate_finetuned_full.sh
+++ b/scripts/evaluate_finetuned_full.sh
@@ -9,4 +9,4 @@ python examples/experiment_eval.py \
  '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full"' \
  '--gin.PUSH_TO_DB=True' \
  '--gin.VERBOSE=False' \
- '--gin.LITE=True' \
\ No newline at end of file
+ '--gin.LITE=True' \

From aab0c6a9e2dc8cd222d5cba56f0d48a3055a090e Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Tue, 30 Jan 2024 11:37:18 -0500
Subject: [PATCH 06/15] fix mypy

---
 examples/generate_scenarios.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py
index ee09aa563..8ae76bb06 100644
--- a/examples/generate_scenarios.py
+++ b/examples/generate_scenarios.py
@@ -163,7 +163,7 @@ def upload_env_profiles(
     """
     Function to upload environment profiles from csv file
     """
-    env_profile_list = []  # type: ignore
+    env_profile_list = []
     existing_envs = pd.read_csv(
         "./data/env_profiles_v1.csv"
     )  # TODO: find a better way to deal with this

From 5ddb6c5c1fa3eade951335b33bd20d27a1793437 Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Tue, 30 Jan 2024 11:42:42 -0500
Subject: [PATCH 07/15] add scripts

---
 {exp_scripts => scripts}/exp_instruction.md               | 0
 {exp_scripts => scripts}/fix_missing_episodes_with_tag.sh | 0
 {exp_scripts => scripts}/run_all.sh                       | 0
 {exp_scripts => scripts}/run_interaction.sh               | 0
 {exp_scripts => scripts}/run_script_full.sh               | 0
 5 files changed, 0 insertions(+), 0 deletions(-)
 rename {exp_scripts => scripts}/exp_instruction.md (100%)
 rename {exp_scripts => scripts}/fix_missing_episodes_with_tag.sh (100%)
 rename {exp_scripts => scripts}/run_all.sh (100%)
 rename {exp_scripts => scripts}/run_interaction.sh (100%)
 rename {exp_scripts => scripts}/run_script_full.sh (100%)

diff --git a/exp_scripts/exp_instruction.md b/scripts/exp_instruction.md
similarity index 100%
rename from exp_scripts/exp_instruction.md
rename to scripts/exp_instruction.md
diff --git a/exp_scripts/fix_missing_episodes_with_tag.sh b/scripts/fix_missing_episodes_with_tag.sh
similarity index 100%
rename from exp_scripts/fix_missing_episodes_with_tag.sh
rename to scripts/fix_missing_episodes_with_tag.sh
diff --git a/exp_scripts/run_all.sh b/scripts/run_all.sh
similarity index 100%
rename from exp_scripts/run_all.sh
rename to scripts/run_all.sh
diff --git a/exp_scripts/run_interaction.sh b/scripts/run_interaction.sh
similarity index 100%
rename from exp_scripts/run_interaction.sh
rename to scripts/run_interaction.sh
diff --git a/exp_scripts/run_script_full.sh b/scripts/run_script_full.sh
similarity index 100%
rename from exp_scripts/run_script_full.sh
rename to scripts/run_script_full.sh

From df1f9d529b0823c125a14a10ab2a525771da56c0 Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Tue, 30 Jan 2024 11:45:31 -0500
Subject: [PATCH 08/15] add instruction

---
 scripts/exp_instruction.md | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/scripts/exp_instruction.md b/scripts/exp_instruction.md
index 1ce481333..c587d4b09 100644
--- a/scripts/exp_instruction.md
+++ b/scripts/exp_instruction.md
@@ -1,5 +1,14 @@
+# Agent vs Storyteller Scripts
+
+### Basic Scripts
 Here are some of the script for running {gpt-3.5-turbo, mixtral-7b-moe} under {normal interaction, omniscient interaction, script generation} mode in {normal, lite} setting.
 If you need to run all interaction mode, you can use `run_all.sh`, the usage is `Usage: ./run_all.sh <model_name> <tag_base> <lite>`. For example, `./run_all.sh gpt-3.5-turbo exp0128 True`. You may find model_name in `LLM_Name`, and currently we are using `mistralai/Mixtral-8x7B-Instruct-v0.1` and `gpt-3.5-turbo`.
 If you want to run mode separately, you can use `run_interaction.sh` or `run_script_full.sh`.
 After running the above script, you may specify tags and fix those error episodes using `./fix_missing_episodes_with_tag.sh`.
 Current `fix_missing_episodes_with_tag.py` first detects erroneous episodes, delete them and regenerate them.
+
+### Fine-tuning
+
+* `evaluate_finetuned_full.sh`: evaluate the fine-tuned model (gpt-3.5 finetuned on the full dataset) on the sotopia lite setting.
+
+

From ef59d916ba5dccc83327d1b59f094f3d3dc1a3b5 Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Tue, 30 Jan 2024 11:47:02 -0500
Subject: [PATCH 09/15] improve format

---
 scripts/exp_instruction.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/scripts/exp_instruction.md b/scripts/exp_instruction.md
index c587d4b09..075e19485 100644
--- a/scripts/exp_instruction.md
+++ b/scripts/exp_instruction.md
@@ -10,5 +10,3 @@ Current `fix_missing_episodes_with_tag.py` first detects erroneous episodes, del
 ### Fine-tuning
 
 * `evaluate_finetuned_full.sh`: evaluate the fine-tuned model (gpt-3.5 finetuned on the full dataset) on the sotopia lite setting.
-
-

From 5f342a96d1991f85fc6bdd3fb566c77e0dd43afb Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Wed, 31 Jan 2024 14:07:44 -0500
Subject: [PATCH 10/15] update scripts

---
 scripts/evaluate_finetuned_full.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/scripts/evaluate_finetuned_full.sh b/scripts/evaluate_finetuned_full.sh
index 6d42a819e..b0934b3a1 100644
--- a/scripts/evaluate_finetuned_full.sh
+++ b/scripts/evaluate_finetuned_full.sh
@@ -4,9 +4,9 @@ python examples/experiment_eval.py \
  --gin_file sotopia_conf/run_async_server_in_batch.gin \
  '--gin.ENV_IDS=[]' \
  '--gin.SCRIPT_MODEL="gpt-3.5-turbo-finetuned"' \
- '--gin.BATCH_SIZE=5' \
- '--gin.TAG="finetuned_eval_full"' \
- '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full"' \
+ '--gin.BATCH_SIZE=1' \
+ '--gin.TAG="finetuned_eval_full_sotopia_normal"' \
+ '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full_sotopia_normal"' \
  '--gin.PUSH_TO_DB=True' \
  '--gin.VERBOSE=False' \
  '--gin.LITE=True' \

From f186c0c350a8f2bf445c77165b1f718575b98124 Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Thu, 1 Feb 2024 21:43:12 -0500
Subject: [PATCH 11/15] fix model import error

---
 scripts/evaluate_finetuned_full.sh   | 13 ++++++++-----
 sotopia/generation_utils/generate.py |  7 +++----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/scripts/evaluate_finetuned_full.sh b/scripts/evaluate_finetuned_full.sh
index b0934b3a1..613dff369 100644
--- a/scripts/evaluate_finetuned_full.sh
+++ b/scripts/evaluate_finetuned_full.sh
@@ -1,12 +1,15 @@
+MODEL_NAME=gpt-3.5-turbo-finetuned
+
 python examples/experiment_eval.py \
  --gin_file sotopia_conf/generation_utils_conf/generate.gin \
  --gin_file sotopia_conf/server_conf/server.gin \
  --gin_file sotopia_conf/run_async_server_in_batch.gin \
  '--gin.ENV_IDS=[]' \
- '--gin.SCRIPT_MODEL="gpt-3.5-turbo-finetuned"' \
- '--gin.BATCH_SIZE=1' \
- '--gin.TAG="finetuned_eval_full_sotopia_normal"' \
- '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full_sotopia_normal"' \
+ "--gin.AGENT1_MODEL=\"${MODEL_NAME}\"" \
+ "--gin.AGENT2_MODEL=\"${MODEL_NAME}\"" \
+ '--gin.BATCH_SIZE=5' \
+ '--gin.TAG="finetuned_gpt3.5"' \
+ '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_gpt3.5"' \
  '--gin.PUSH_TO_DB=True' \
  '--gin.VERBOSE=False' \
- '--gin.LITE=True' \
+ '--gin.LITE=False' \
diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py
index 9add102b9..18257db77 100644
--- a/sotopia/generation_utils/generate.py
+++ b/sotopia/generation_utils/generate.py
@@ -292,11 +292,11 @@ def _type(self) -> str:
 
 
 def _return_fixed_model_version(
-    model_name: Literal["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo"]
+    model_name: Literal["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-finetuned"]
 ) -> str:
     return {
         "gpt-3.5-turbo": "gpt-3.5-turbo-0613",
-        "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8mbqt3SF",
+        "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8nY2zgdt",
         "gpt-4": "gpt-4-0613",
         "gpt-4-turbo": "gpt-4-1106-preview",
     }[model_name]
@@ -315,7 +315,7 @@ def obtain_chain(
     Using langchain to sample profiles for participants
     """
     match model_name:
-        case "gpt-3.5-turbo" | "gpt-4" | "gpt-4-turbo":
+        case "gpt-3.5-turbo" | "gpt-4" | "gpt-4-turbo" | "gpt-3.5-turbo-finetuned":
             human_message_prompt = HumanMessagePromptTemplate(
                 prompt=PromptTemplate(
                     template=template,
@@ -783,7 +783,6 @@ async def agenerate_action(
                 Your action should follow the given format:
                 {format_instructions}
             """
-
         return await agenerate(
             model_name=model_name,
             template=template,

From 740c34ae9d59eba14d686c123a7c9160931724b3 Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Thu, 1 Feb 2024 21:43:33 -0500
Subject: [PATCH 12/15] fix format

---
 sotopia/generation_utils/generate.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py
index 18257db77..1ed00a8d5 100644
--- a/sotopia/generation_utils/generate.py
+++ b/sotopia/generation_utils/generate.py
@@ -292,7 +292,9 @@ def _type(self) -> str:
 
 
 def _return_fixed_model_version(
-    model_name: Literal["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-finetuned"]
+    model_name: Literal[
+        "gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-finetuned"
+    ]
 ) -> str:
     return {
         "gpt-3.5-turbo": "gpt-3.5-turbo-0613",

From ae4de7d46ff0bddec6aa6d197935cc3efbf616a6 Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Thu, 1 Feb 2024 21:49:48 -0500
Subject: [PATCH 13/15] get rid of magic number

---
 examples/generate_scenarios.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py
index 8ae76bb06..56246d21d 100644
--- a/examples/generate_scenarios.py
+++ b/examples/generate_scenarios.py
@@ -161,7 +161,8 @@ def upload_env_profiles(
     filepath: str = "./data/all_environment_profile.json",
 ) -> None:
     """
-    Function to upload environment profiles from csv file
+    Function to upload environment profiles from json file
+    The json file format is a direct dump from the database
     """
     env_profile_list = []
     existing_envs = pd.read_csv(
@@ -173,8 +174,6 @@ def upload_env_profiles(
         if env_profile and check_existing_envs(env_profile, existing_envs):
             del env_profile["pk"]
             env_profile_list.append(env_profile)
-    # randomly sample 210 envs
-    env_profile_list = random.sample(env_profile_list, 240)
     env_profiles = add_env_profiles(env_profile_list)
     print("New env profiles added to database:")
     print(len(env_profiles))

From 63235f5053e4900ef23d90bedea0472c549d4aba Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Sun, 4 Feb 2024 14:52:48 -0500
Subject: [PATCH 14/15] update model

---
 scripts/evaluate_finetuned_MF.sh     | 16 ++++++++++++++++
 sotopia/generation_utils/generate.py |  8 ++++----
 2 files changed, 20 insertions(+), 4 deletions(-)
 create mode 100644 scripts/evaluate_finetuned_MF.sh

diff --git a/scripts/evaluate_finetuned_MF.sh b/scripts/evaluate_finetuned_MF.sh
new file mode 100644
index 000000000..7464c4c95
--- /dev/null
+++ b/scripts/evaluate_finetuned_MF.sh
@@ -0,0 +1,16 @@
+MODEL_NAME_1=gpt-3.5-turbo-ft-MF
+MODEL_NAME_2=gpt-3.5-turbo
+
+python examples/experiment_eval.py \
+ --gin_file sotopia_conf/generation_utils_conf/generate.gin \
+ --gin_file sotopia_conf/server_conf/server.gin \
+ --gin_file sotopia_conf/run_async_server_in_batch.gin \
+ "--gin.ENV_IDS=['01H7VFHPKA2GGPPNVJWV967HZC', '01H7VFHPHWA2CYG7BC82NS4XH1', '01H7VFHPH567HKQRE0C745KH9C', '01H7VFHPMS6AJY0PFGGCFFK5GX', '01H7VFHPJKR16MD1KC71V4ZRCF', '01H7VFHPQ1712DHGTMPQFTXH02', '01H7VFHPP9SPQ8W6583JFZ7HZC', '01H7VFHPM3NVVKSGCCB4S10465', '01H7VFHPGABSWQXTACCC8C3X2F', '01H7VFHPNHZ2YYRHP0GXARD550']" \
+ "--gin.AGENT1_MODEL=\"${MODEL_NAME_1}\"" \
+ "--gin.AGENT2_MODEL=\"${MODEL_NAME_2}\"" \
+ '--gin.BATCH_SIZE=1' \
+ '--gin.TAG="finetuned_gpt3.5_gpt3.5ft_MF"' \
+ '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_gpt3.5_gpt3.5ft_MF"' \
+ '--gin.PUSH_TO_DB=True' \
+ '--gin.VERBOSE=False' \
+ '--gin.LITE=False' \
\ No newline at end of file
diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py
index 1ed00a8d5..4e94d5240 100644
--- a/sotopia/generation_utils/generate.py
+++ b/sotopia/generation_utils/generate.py
@@ -51,6 +51,7 @@
     "togethercomputer/mpt-30b-chat",
     "gpt-3.5-turbo",
     "gpt-3.5-turbo-finetuned",
+    "gpt-3.5-turbo-ft-MF",
     "text-davinci-003",
     "gpt-4",
     "gpt-4-turbo",
@@ -292,13 +293,12 @@ def _type(self) -> str:
 
 
 def _return_fixed_model_version(
-    model_name: Literal[
-        "gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-finetuned"
-    ]
+    model_name: LLM_Name
 ) -> str:
     return {
         "gpt-3.5-turbo": "gpt-3.5-turbo-0613",
         "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8nY2zgdt",
+        "gpt-3.5-turbo-ft-MF": "ft:gpt-3.5-turbo-0613:academicscmu::8nuER4bO",
         "gpt-4": "gpt-4-0613",
         "gpt-4-turbo": "gpt-4-1106-preview",
     }[model_name]
@@ -317,7 +317,7 @@ def obtain_chain(
     Using langchain to sample profiles for participants
     """
     match model_name:
-        case "gpt-3.5-turbo" | "gpt-4" | "gpt-4-turbo" | "gpt-3.5-turbo-finetuned":
+        case "gpt-3.5-turbo" | "gpt-4" | "gpt-4-turbo" | "gpt-3.5-turbo-finetuned" | "gpt-3.5-turbo-ft-MF":
             human_message_prompt = HumanMessagePromptTemplate(
                 prompt=PromptTemplate(
                     template=template,

From 4a3541082a37db1140e5ab24e7a9e1cda5db6ffc Mon Sep 17 00:00:00 2001
From: XuhuiZhou <zhouxuhui2018@gmail.com>
Date: Sun, 4 Feb 2024 14:58:24 -0500
Subject: [PATCH 15/15] fix format

---
 scripts/evaluate_finetuned_MF.sh     | 2 +-
 sotopia/generation_utils/generate.py | 4 +---
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/scripts/evaluate_finetuned_MF.sh b/scripts/evaluate_finetuned_MF.sh
index 7464c4c95..fbc1474f9 100644
--- a/scripts/evaluate_finetuned_MF.sh
+++ b/scripts/evaluate_finetuned_MF.sh
@@ -13,4 +13,4 @@ python examples/experiment_eval.py \
  '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_gpt3.5_gpt3.5ft_MF"' \
  '--gin.PUSH_TO_DB=True' \
  '--gin.VERBOSE=False' \
- '--gin.LITE=False' \
\ No newline at end of file
+ '--gin.LITE=False' \
diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py
index 4e94d5240..26267c759 100644
--- a/sotopia/generation_utils/generate.py
+++ b/sotopia/generation_utils/generate.py
@@ -292,9 +292,7 @@ def _type(self) -> str:
         return "str"
 
 
-def _return_fixed_model_version(
-    model_name: LLM_Name
-) -> str:
+def _return_fixed_model_version(model_name: LLM_Name) -> str:
     return {
         "gpt-3.5-turbo": "gpt-3.5-turbo-0613",
         "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8nY2zgdt",