From e4f531fbed9e13d98ef71ce8156f8ac6e29e9e2f Mon Sep 17 00:00:00 2001 From: Xuhui Zhou Date: Tue, 16 Jan 2024 07:47:47 +0000 Subject: [PATCH 01/15] Add new model version for gpt-3.5-turbo-finetuned --- sotopia/generation_utils/generate.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py index fe48972c2..9d5fa0beb 100644 --- a/sotopia/generation_utils/generate.py +++ b/sotopia/generation_utils/generate.py @@ -50,6 +50,7 @@ "togethercomputer/llama-2-70b-chat", "togethercomputer/mpt-30b-chat", "gpt-3.5-turbo", + "gpt-3.5-turbo-finetuned", "text-davinci-003", "gpt-4", "gpt-4-turbo", @@ -294,6 +295,7 @@ def _return_fixed_model_version( ) -> str: return { "gpt-3.5-turbo": "gpt-3.5-turbo-0613", + "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8hK7f26o", "gpt-4": "gpt-4-0613", "gpt-4-turbo": "gpt-4-1106-preview", }[model_name] From b8f2a2bfeeb3937f4b5d8ff37cffb7a5b06803be Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Mon, 29 Jan 2024 10:04:34 -0500 Subject: [PATCH 02/15] create scenarios craig list --- examples/generate_scenarios.py | 71 +++++++++++++++++++++++++++++++--- 1 file changed, 65 insertions(+), 6 deletions(-) diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py index f376d713b..d6efb33ba 100644 --- a/examples/generate_scenarios.py +++ b/examples/generate_scenarios.py @@ -1,5 +1,7 @@ import ast import asyncio +import json +import random from typing import Any, cast import pandas as pd @@ -37,11 +39,15 @@ def add_env_profiles( def check_existing_envs( env_profile: dict[str, Any], existing_envs: pd.DataFrame ) -> bool: - if ( - env_profile["scenario"] in existing_envs["scenario"].to_list() - and str(env_profile["agent_goals"]) - in existing_envs["agent_goals"].to_list() - ): + try: + if ( + env_profile["scenario"] in existing_envs["scenario"].to_list() + and str(env_profile["agent_goals"]) + in existing_envs["agent_goals"].to_list() + ): + return False + except KeyError: + print(env_profile) return False return True @@ -50,7 +56,7 @@ def generate_newenv_profile( num: int, gen_model: LLM_Name = "gpt-4-turbo", temperature: float = 0.5, - type: str = "mutual_friend", + type: str = "craigslist_bargains", ) -> pd.DataFrame: env_profile_list = [] # type: ignore existing_envs = pd.read_csv( @@ -70,6 +76,22 @@ def generate_newenv_profile( } if check_existing_envs(env_profile, existing_envs): env_profile_list.append(env_profile) + elif type == "craigslist_bargains": + while len(env_profile_list) < num: + scenario, social_goals = asyncio.run( + generate_craigslist_bargains_envs() + ) + env_profile = { + "codename": f"craigslist_bargains_{len(env_profile_list)+10}", + "scenario": scenario, + "agent_goals": social_goals, + "relationship": RelationshipType.stranger, + "age_constraint": "[(18, 80), (18, 80)]", + "occupation_constraint": None, + "source": "craigslist_bargains", + } + if check_existing_envs(env_profile, existing_envs): + env_profile_list.append(env_profile) else: raise NotImplementedError("Only mutual_friend is supported for now") return pd.DataFrame(env_profile_list) @@ -116,5 +138,42 @@ def auto_generate_scenarios( Migrator().run() +@app.command() +def upload_env_profiles( + filepath: str = "./data/all_environment_profile.json", +) -> None: + """ + Function to upload environment profiles from csv file + """ + env_profile_list = [] # type: ignore + existing_envs = pd.read_csv( + "./data/env_profiles_v1.csv" + ) # TODO: find a better way to deal with this + current_envs = json.load(open(filepath, "r")) + for key in current_envs: + env_profile = current_envs[key] + if env_profile and check_existing_envs(env_profile, existing_envs): + del env_profile["pk"] + env_profile_list.append(env_profile) + # randomly sample 210 envs + env_profile_list = random.sample(env_profile_list, 240) + env_profiles = add_env_profiles(env_profile_list) + print("New env profiles added to database:") + print(len(env_profiles)) + + count = 0 + for env_profile in env_profiles: + assert env_profile.pk is not None + try: + _sample_env_agent_combo_and_push_to_db(env_profile.pk) + count += 1 + except: + EnvironmentProfile.delete(env_profile.pk) + pass + print(f"New env-agent combo added to database: {count}") + + Migrator().run() + + if __name__ == "__main__": app() From bd50e024b11044205fc6a547148fb822670e6d12 Mon Sep 17 00:00:00 2001 From: Xuhui Zhou Date: Mon, 29 Jan 2024 16:24:16 +0000 Subject: [PATCH 03/15] clean envs --- examples/generate_scenarios.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py index f376d713b..dfbf17cd6 100644 --- a/examples/generate_scenarios.py +++ b/examples/generate_scenarios.py @@ -7,7 +7,7 @@ from experiment_eval import _sample_env_agent_combo_and_push_to_db from redis_om import Migrator -from sotopia.database import EnvironmentProfile +from sotopia.database import EnvironmentProfile, EnvAgentComboStorage from sotopia.database.persistent_profile import RelationshipType from sotopia.generation_utils import ( LLM_Name, @@ -116,5 +116,18 @@ def auto_generate_scenarios( Migrator().run() +@app.command() +def clean_env_wo_combos() -> None: + """ + Function to clean up env-agent combos in the database + """ + env_agent_combos = list(EnvAgentComboStorage.all_pks()) + envs_id_in_combos = set([EnvAgentComboStorage.get(env_agent_combo).env_id for env_agent_combo in env_agent_combos]) + envs = list(EnvironmentProfile.all_pks()) + for env in envs: + if env not in envs_id_in_combos: + EnvironmentProfile.delete(env) + + if __name__ == "__main__": app() From 52b079701d75d48f024692051d48fabc953342ef Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Tue, 30 Jan 2024 11:18:50 -0500 Subject: [PATCH 04/15] update scripts --- .gitignore | 4 +--- scripts/evaluate_finetuned_full.sh | 12 ++++++++++++ sotopia/generation_utils/generate.py | 2 +- 3 files changed, 14 insertions(+), 4 deletions(-) create mode 100644 scripts/evaluate_finetuned_full.sh diff --git a/.gitignore b/.gitignore index 80953edfb..94134e147 100644 --- a/.gitignore +++ b/.gitignore @@ -140,6 +140,4 @@ deprecated/* *.csv #backup -backup/* - -scripts/* +backup/* \ No newline at end of file diff --git a/scripts/evaluate_finetuned_full.sh b/scripts/evaluate_finetuned_full.sh new file mode 100644 index 000000000..94697b1ac --- /dev/null +++ b/scripts/evaluate_finetuned_full.sh @@ -0,0 +1,12 @@ +python examples/experiment_eval.py \ + --gin_file sotopia_conf/generation_utils_conf/generate.gin \ + --gin_file sotopia_conf/server_conf/server.gin \ + --gin_file sotopia_conf/run_async_server_in_batch.gin \ + '--gin.ENV_IDS=[]' \ + '--gin.SCRIPT_MODEL="gpt-3.5-turbo-finetuned"' \ + '--gin.BATCH_SIZE=5' \ + '--gin.TAG="finetuned_eval_full"' \ + '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full"' \ + '--gin.PUSH_TO_DB=True' \ + '--gin.VERBOSE=False' \ + '--gin.LITE=True' \ \ No newline at end of file diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py index 9d5fa0beb..c3b2836ad 100644 --- a/sotopia/generation_utils/generate.py +++ b/sotopia/generation_utils/generate.py @@ -295,7 +295,7 @@ def _return_fixed_model_version( ) -> str: return { "gpt-3.5-turbo": "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8hK7f26o", + "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8mbqt3SF", "gpt-4": "gpt-4-0613", "gpt-4-turbo": "gpt-4-1106-preview", }[model_name] From 02a2d842bbe85448852762f692b8df3fb0611810 Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Tue, 30 Jan 2024 11:19:43 -0500 Subject: [PATCH 05/15] fix styles --- .gitignore | 2 +- examples/generate_scenarios.py | 12 +++++++++--- scripts/evaluate_finetuned_full.sh | 2 +- 3 files changed, 11 insertions(+), 5 deletions(-) diff --git a/.gitignore b/.gitignore index 94134e147..b85ee3e7c 100644 --- a/.gitignore +++ b/.gitignore @@ -140,4 +140,4 @@ deprecated/* *.csv #backup -backup/* \ No newline at end of file +backup/* diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py index d60751596..ee09aa563 100644 --- a/examples/generate_scenarios.py +++ b/examples/generate_scenarios.py @@ -9,7 +9,7 @@ from experiment_eval import _sample_env_agent_combo_and_push_to_db from redis_om import Migrator -from sotopia.database import EnvironmentProfile, EnvAgentComboStorage +from sotopia.database import EnvAgentComboStorage, EnvironmentProfile from sotopia.database.persistent_profile import RelationshipType from sotopia.generation_utils import ( LLM_Name, @@ -144,12 +144,18 @@ def clean_env_wo_combos() -> None: Function to clean up env-agent combos in the database """ env_agent_combos = list(EnvAgentComboStorage.all_pks()) - envs_id_in_combos = set([EnvAgentComboStorage.get(env_agent_combo).env_id for env_agent_combo in env_agent_combos]) + envs_id_in_combos = set( + [ + EnvAgentComboStorage.get(env_agent_combo).env_id + for env_agent_combo in env_agent_combos + ] + ) envs = list(EnvironmentProfile.all_pks()) for env in envs: if env not in envs_id_in_combos: EnvironmentProfile.delete(env) - + + @app.command() def upload_env_profiles( filepath: str = "./data/all_environment_profile.json", diff --git a/scripts/evaluate_finetuned_full.sh b/scripts/evaluate_finetuned_full.sh index 94697b1ac..6d42a819e 100644 --- a/scripts/evaluate_finetuned_full.sh +++ b/scripts/evaluate_finetuned_full.sh @@ -9,4 +9,4 @@ python examples/experiment_eval.py \ '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full"' \ '--gin.PUSH_TO_DB=True' \ '--gin.VERBOSE=False' \ - '--gin.LITE=True' \ \ No newline at end of file + '--gin.LITE=True' \ From aab0c6a9e2dc8cd222d5cba56f0d48a3055a090e Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Tue, 30 Jan 2024 11:37:18 -0500 Subject: [PATCH 06/15] fix mypy --- examples/generate_scenarios.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py index ee09aa563..8ae76bb06 100644 --- a/examples/generate_scenarios.py +++ b/examples/generate_scenarios.py @@ -163,7 +163,7 @@ def upload_env_profiles( """ Function to upload environment profiles from csv file """ - env_profile_list = [] # type: ignore + env_profile_list = [] existing_envs = pd.read_csv( "./data/env_profiles_v1.csv" ) # TODO: find a better way to deal with this From 5ddb6c5c1fa3eade951335b33bd20d27a1793437 Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Tue, 30 Jan 2024 11:42:42 -0500 Subject: [PATCH 07/15] add scripts --- {exp_scripts => scripts}/exp_instruction.md | 0 {exp_scripts => scripts}/fix_missing_episodes_with_tag.sh | 0 {exp_scripts => scripts}/run_all.sh | 0 {exp_scripts => scripts}/run_interaction.sh | 0 {exp_scripts => scripts}/run_script_full.sh | 0 5 files changed, 0 insertions(+), 0 deletions(-) rename {exp_scripts => scripts}/exp_instruction.md (100%) rename {exp_scripts => scripts}/fix_missing_episodes_with_tag.sh (100%) rename {exp_scripts => scripts}/run_all.sh (100%) rename {exp_scripts => scripts}/run_interaction.sh (100%) rename {exp_scripts => scripts}/run_script_full.sh (100%) diff --git a/exp_scripts/exp_instruction.md b/scripts/exp_instruction.md similarity index 100% rename from exp_scripts/exp_instruction.md rename to scripts/exp_instruction.md diff --git a/exp_scripts/fix_missing_episodes_with_tag.sh b/scripts/fix_missing_episodes_with_tag.sh similarity index 100% rename from exp_scripts/fix_missing_episodes_with_tag.sh rename to scripts/fix_missing_episodes_with_tag.sh diff --git a/exp_scripts/run_all.sh b/scripts/run_all.sh similarity index 100% rename from exp_scripts/run_all.sh rename to scripts/run_all.sh diff --git a/exp_scripts/run_interaction.sh b/scripts/run_interaction.sh similarity index 100% rename from exp_scripts/run_interaction.sh rename to scripts/run_interaction.sh diff --git a/exp_scripts/run_script_full.sh b/scripts/run_script_full.sh similarity index 100% rename from exp_scripts/run_script_full.sh rename to scripts/run_script_full.sh From df1f9d529b0823c125a14a10ab2a525771da56c0 Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Tue, 30 Jan 2024 11:45:31 -0500 Subject: [PATCH 08/15] add instruction --- scripts/exp_instruction.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/scripts/exp_instruction.md b/scripts/exp_instruction.md index 1ce481333..c587d4b09 100644 --- a/scripts/exp_instruction.md +++ b/scripts/exp_instruction.md @@ -1,5 +1,14 @@ +# Agent vs Storyteller Scripts + +### Basic Scripts Here are some of the script for running {gpt-3.5-turbo, mixtral-7b-moe} under {normal interaction, omniscient interaction, script generation} mode in {normal, lite} setting. If you need to run all interaction mode, you can use `run_all.sh`, the usage is `Usage: ./run_all.sh `. For example, `./run_all.sh gpt-3.5-turbo exp0128 True`. You may find model_name in `LLM_Name`, and currently we are using `mistralai/Mixtral-8x7B-Instruct-v0.1` and `gpt-3.5-turbo`. If you want to run mode separately, you can use `run_interaction.sh` or `run_script_full.sh`. After running the above script, you may specify tags and fix those error episodes using `./fix_missing_episodes_with_tag.sh`. Current `fix_missing_episodes_with_tag.py` first detects erroneous episodes, delete them and regenerate them. + +### Fine-tuning + +* `evaluate_finetuned_full.sh`: evaluate the fine-tuned model (gpt-3.5 finetuned on the full dataset) on the sotopia lite setting. + + From ef59d916ba5dccc83327d1b59f094f3d3dc1a3b5 Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Tue, 30 Jan 2024 11:47:02 -0500 Subject: [PATCH 09/15] improve format --- scripts/exp_instruction.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/scripts/exp_instruction.md b/scripts/exp_instruction.md index c587d4b09..075e19485 100644 --- a/scripts/exp_instruction.md +++ b/scripts/exp_instruction.md @@ -10,5 +10,3 @@ Current `fix_missing_episodes_with_tag.py` first detects erroneous episodes, del ### Fine-tuning * `evaluate_finetuned_full.sh`: evaluate the fine-tuned model (gpt-3.5 finetuned on the full dataset) on the sotopia lite setting. - - From 5f342a96d1991f85fc6bdd3fb566c77e0dd43afb Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Wed, 31 Jan 2024 14:07:44 -0500 Subject: [PATCH 10/15] update scripts --- scripts/evaluate_finetuned_full.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/scripts/evaluate_finetuned_full.sh b/scripts/evaluate_finetuned_full.sh index 6d42a819e..b0934b3a1 100644 --- a/scripts/evaluate_finetuned_full.sh +++ b/scripts/evaluate_finetuned_full.sh @@ -4,9 +4,9 @@ python examples/experiment_eval.py \ --gin_file sotopia_conf/run_async_server_in_batch.gin \ '--gin.ENV_IDS=[]' \ '--gin.SCRIPT_MODEL="gpt-3.5-turbo-finetuned"' \ - '--gin.BATCH_SIZE=5' \ - '--gin.TAG="finetuned_eval_full"' \ - '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full"' \ + '--gin.BATCH_SIZE=1' \ + '--gin.TAG="finetuned_eval_full_sotopia_normal"' \ + '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full_sotopia_normal"' \ '--gin.PUSH_TO_DB=True' \ '--gin.VERBOSE=False' \ '--gin.LITE=True' \ From f186c0c350a8f2bf445c77165b1f718575b98124 Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Thu, 1 Feb 2024 21:43:12 -0500 Subject: [PATCH 11/15] fix model import error --- scripts/evaluate_finetuned_full.sh | 13 ++++++++----- sotopia/generation_utils/generate.py | 7 +++---- 2 files changed, 11 insertions(+), 9 deletions(-) diff --git a/scripts/evaluate_finetuned_full.sh b/scripts/evaluate_finetuned_full.sh index b0934b3a1..613dff369 100644 --- a/scripts/evaluate_finetuned_full.sh +++ b/scripts/evaluate_finetuned_full.sh @@ -1,12 +1,15 @@ +MODEL_NAME=gpt-3.5-turbo-finetuned + python examples/experiment_eval.py \ --gin_file sotopia_conf/generation_utils_conf/generate.gin \ --gin_file sotopia_conf/server_conf/server.gin \ --gin_file sotopia_conf/run_async_server_in_batch.gin \ '--gin.ENV_IDS=[]' \ - '--gin.SCRIPT_MODEL="gpt-3.5-turbo-finetuned"' \ - '--gin.BATCH_SIZE=1' \ - '--gin.TAG="finetuned_eval_full_sotopia_normal"' \ - '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_eval_full_sotopia_normal"' \ + "--gin.AGENT1_MODEL=\"${MODEL_NAME}\"" \ + "--gin.AGENT2_MODEL=\"${MODEL_NAME}\"" \ + '--gin.BATCH_SIZE=5' \ + '--gin.TAG="finetuned_gpt3.5"' \ + '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_gpt3.5"' \ '--gin.PUSH_TO_DB=True' \ '--gin.VERBOSE=False' \ - '--gin.LITE=True' \ + '--gin.LITE=False' \ diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py index 9add102b9..18257db77 100644 --- a/sotopia/generation_utils/generate.py +++ b/sotopia/generation_utils/generate.py @@ -292,11 +292,11 @@ def _type(self) -> str: def _return_fixed_model_version( - model_name: Literal["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo"] + model_name: Literal["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-finetuned"] ) -> str: return { "gpt-3.5-turbo": "gpt-3.5-turbo-0613", - "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8mbqt3SF", + "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8nY2zgdt", "gpt-4": "gpt-4-0613", "gpt-4-turbo": "gpt-4-1106-preview", }[model_name] @@ -315,7 +315,7 @@ def obtain_chain( Using langchain to sample profiles for participants """ match model_name: - case "gpt-3.5-turbo" | "gpt-4" | "gpt-4-turbo": + case "gpt-3.5-turbo" | "gpt-4" | "gpt-4-turbo" | "gpt-3.5-turbo-finetuned": human_message_prompt = HumanMessagePromptTemplate( prompt=PromptTemplate( template=template, @@ -783,7 +783,6 @@ async def agenerate_action( Your action should follow the given format: {format_instructions} """ - return await agenerate( model_name=model_name, template=template, From 740c34ae9d59eba14d686c123a7c9160931724b3 Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Thu, 1 Feb 2024 21:43:33 -0500 Subject: [PATCH 12/15] fix format --- sotopia/generation_utils/generate.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py index 18257db77..1ed00a8d5 100644 --- a/sotopia/generation_utils/generate.py +++ b/sotopia/generation_utils/generate.py @@ -292,7 +292,9 @@ def _type(self) -> str: def _return_fixed_model_version( - model_name: Literal["gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-finetuned"] + model_name: Literal[ + "gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-finetuned" + ] ) -> str: return { "gpt-3.5-turbo": "gpt-3.5-turbo-0613", From ae4de7d46ff0bddec6aa6d197935cc3efbf616a6 Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Thu, 1 Feb 2024 21:49:48 -0500 Subject: [PATCH 13/15] get rid of magic number --- examples/generate_scenarios.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/examples/generate_scenarios.py b/examples/generate_scenarios.py index 8ae76bb06..56246d21d 100644 --- a/examples/generate_scenarios.py +++ b/examples/generate_scenarios.py @@ -161,7 +161,8 @@ def upload_env_profiles( filepath: str = "./data/all_environment_profile.json", ) -> None: """ - Function to upload environment profiles from csv file + Function to upload environment profiles from json file + The json file format is a direct dump from the database """ env_profile_list = [] existing_envs = pd.read_csv( @@ -173,8 +174,6 @@ def upload_env_profiles( if env_profile and check_existing_envs(env_profile, existing_envs): del env_profile["pk"] env_profile_list.append(env_profile) - # randomly sample 210 envs - env_profile_list = random.sample(env_profile_list, 240) env_profiles = add_env_profiles(env_profile_list) print("New env profiles added to database:") print(len(env_profiles)) From 63235f5053e4900ef23d90bedea0472c549d4aba Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Sun, 4 Feb 2024 14:52:48 -0500 Subject: [PATCH 14/15] update model --- scripts/evaluate_finetuned_MF.sh | 16 ++++++++++++++++ sotopia/generation_utils/generate.py | 8 ++++---- 2 files changed, 20 insertions(+), 4 deletions(-) create mode 100644 scripts/evaluate_finetuned_MF.sh diff --git a/scripts/evaluate_finetuned_MF.sh b/scripts/evaluate_finetuned_MF.sh new file mode 100644 index 000000000..7464c4c95 --- /dev/null +++ b/scripts/evaluate_finetuned_MF.sh @@ -0,0 +1,16 @@ +MODEL_NAME_1=gpt-3.5-turbo-ft-MF +MODEL_NAME_2=gpt-3.5-turbo + +python examples/experiment_eval.py \ + --gin_file sotopia_conf/generation_utils_conf/generate.gin \ + --gin_file sotopia_conf/server_conf/server.gin \ + --gin_file sotopia_conf/run_async_server_in_batch.gin \ + "--gin.ENV_IDS=['01H7VFHPKA2GGPPNVJWV967HZC', '01H7VFHPHWA2CYG7BC82NS4XH1', '01H7VFHPH567HKQRE0C745KH9C', '01H7VFHPMS6AJY0PFGGCFFK5GX', '01H7VFHPJKR16MD1KC71V4ZRCF', '01H7VFHPQ1712DHGTMPQFTXH02', '01H7VFHPP9SPQ8W6583JFZ7HZC', '01H7VFHPM3NVVKSGCCB4S10465', '01H7VFHPGABSWQXTACCC8C3X2F', '01H7VFHPNHZ2YYRHP0GXARD550']" \ + "--gin.AGENT1_MODEL=\"${MODEL_NAME_1}\"" \ + "--gin.AGENT2_MODEL=\"${MODEL_NAME_2}\"" \ + '--gin.BATCH_SIZE=1' \ + '--gin.TAG="finetuned_gpt3.5_gpt3.5ft_MF"' \ + '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_gpt3.5_gpt3.5ft_MF"' \ + '--gin.PUSH_TO_DB=True' \ + '--gin.VERBOSE=False' \ + '--gin.LITE=False' \ \ No newline at end of file diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py index 1ed00a8d5..4e94d5240 100644 --- a/sotopia/generation_utils/generate.py +++ b/sotopia/generation_utils/generate.py @@ -51,6 +51,7 @@ "togethercomputer/mpt-30b-chat", "gpt-3.5-turbo", "gpt-3.5-turbo-finetuned", + "gpt-3.5-turbo-ft-MF", "text-davinci-003", "gpt-4", "gpt-4-turbo", @@ -292,13 +293,12 @@ def _type(self) -> str: def _return_fixed_model_version( - model_name: Literal[ - "gpt-3.5-turbo", "gpt-4", "gpt-4-turbo", "gpt-3.5-turbo-finetuned" - ] + model_name: LLM_Name ) -> str: return { "gpt-3.5-turbo": "gpt-3.5-turbo-0613", "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8nY2zgdt", + "gpt-3.5-turbo-ft-MF": "ft:gpt-3.5-turbo-0613:academicscmu::8nuER4bO", "gpt-4": "gpt-4-0613", "gpt-4-turbo": "gpt-4-1106-preview", }[model_name] @@ -317,7 +317,7 @@ def obtain_chain( Using langchain to sample profiles for participants """ match model_name: - case "gpt-3.5-turbo" | "gpt-4" | "gpt-4-turbo" | "gpt-3.5-turbo-finetuned": + case "gpt-3.5-turbo" | "gpt-4" | "gpt-4-turbo" | "gpt-3.5-turbo-finetuned" | "gpt-3.5-turbo-ft-MF": human_message_prompt = HumanMessagePromptTemplate( prompt=PromptTemplate( template=template, From 4a3541082a37db1140e5ab24e7a9e1cda5db6ffc Mon Sep 17 00:00:00 2001 From: XuhuiZhou Date: Sun, 4 Feb 2024 14:58:24 -0500 Subject: [PATCH 15/15] fix format --- scripts/evaluate_finetuned_MF.sh | 2 +- sotopia/generation_utils/generate.py | 4 +--- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/scripts/evaluate_finetuned_MF.sh b/scripts/evaluate_finetuned_MF.sh index 7464c4c95..fbc1474f9 100644 --- a/scripts/evaluate_finetuned_MF.sh +++ b/scripts/evaluate_finetuned_MF.sh @@ -13,4 +13,4 @@ python examples/experiment_eval.py \ '--gin.TAG_TO_CHECK_EXISTING_EPISODES="finetuned_gpt3.5_gpt3.5ft_MF"' \ '--gin.PUSH_TO_DB=True' \ '--gin.VERBOSE=False' \ - '--gin.LITE=False' \ \ No newline at end of file + '--gin.LITE=False' \ diff --git a/sotopia/generation_utils/generate.py b/sotopia/generation_utils/generate.py index 4e94d5240..26267c759 100644 --- a/sotopia/generation_utils/generate.py +++ b/sotopia/generation_utils/generate.py @@ -292,9 +292,7 @@ def _type(self) -> str: return "str" -def _return_fixed_model_version( - model_name: LLM_Name -) -> str: +def _return_fixed_model_version(model_name: LLM_Name) -> str: return { "gpt-3.5-turbo": "gpt-3.5-turbo-0613", "gpt-3.5-turbo-finetuned": "ft:gpt-3.5-turbo-0613:academicscmu::8nY2zgdt",