Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

change score_data to list of dict #2043

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 14 additions & 10 deletions dspy/teleprompt/mipro_optimizer_v2.py
Original file line number Diff line number Diff line change
Expand Up @@ -494,7 +494,7 @@ def _optimize_prompt_parameters(
best_score = default_score
best_program = program.deepcopy()
total_eval_calls = len(valset)
score_data= [(best_score, program.deepcopy(), True)]
score_data = [{"score": best_score, "program": program.deepcopy(), "full_eval": True}]
param_score_dict = defaultdict(list)
fully_evaled_param_combos = {}

Expand Down Expand Up @@ -542,7 +542,7 @@ def objective(trial):
logger.info(f"{GREEN}Best full score so far!{ENDC} Score: {score}")

# Log evaluation results
score_data.append((score, candidate_program, batch_size >= len(valset))) # score, prog, full_eval
score_data.append({"score": score, "program": candidate_program, "full_eval": batch_size >= len(valset)}) # score, prog, full_eval
if minibatch:
self._log_minibatch_eval(
score,
Expand Down Expand Up @@ -603,11 +603,11 @@ def objective(trial):
best_program.score = best_score
best_program.prompt_model_total_calls = self.prompt_model_total_calls
best_program.total_calls = self.total_calls
sorted_candidate_programs = sorted(score_data, key=lambda x: x[0], reverse=True)
sorted_candidate_programs = sorted(score_data, key=lambda x: x["score"], reverse=True)
# Attach all minibatch programs
best_program.mb_candidate_programs = [score_data for score_data in sorted_candidate_programs if not score_data[2]]
best_program.mb_candidate_programs = [score_data for score_data in sorted_candidate_programs if not score_data["full_eval"]]
# Attach all programs that were evaluated on the full trainset, in descending order of score
best_program.candidate_programs = [score_data for score_data in sorted_candidate_programs if score_data[2]]
best_program.candidate_programs = [score_data for score_data in sorted_candidate_programs if score_data["full_eval"]]

logger.info(f"Returning best identified program with score {best_score}!")

Expand Down Expand Up @@ -637,8 +637,10 @@ def _log_minibatch_eval(
logger.info(
f"Score: {score} on minibatch of size {batch_size} with parameters {chosen_params}."
)
logger.info(f"Minibatch scores so far: {'['+', '.join([f'{s[0]}' for s in score_data if not s[2]]) +']'}")
trajectory = "[" + ", ".join([f"{s[0]}" for s in score_data if s[2]]) + "]"
minibatch_scores = ', '.join([f'{s["score"]}' for s in score_data if not s["full_eval"]])
logger.info(f"Minibatch scores so far: {'['+ minibatch_scores +']'}")
full_eval_scores = ', '.join([f'{s["score"]}' for s in score_data if s["full_eval"]])
trajectory = "[" + full_eval_scores + "]"
logger.info(f"Full eval scores so far: {trajectory}")
logger.info(f"Best full score so far: {best_score}")
logger.info(
Expand All @@ -656,7 +658,8 @@ def _log_normal_eval(
trial_logs[trial_num]["full_eval_program"] = candidate_program.deepcopy()

logger.info(f"Score: {score} with parameters {chosen_params}.")
logger.info(f"Scores so far: {'['+', '.join([f'{s[0]}' for s in score_data if s[2]])+']'}")
full_eval_scores = ', '.join([f'{s["score"]}' for s in score_data if s["full_eval"]])
logger.info(f"Scores so far: {'['+full_eval_scores+']'}")
logger.info(f"Best score so far: {best_score}")
logger.info(f'{"="*len(f"===== Trial {trial.number+1} / {num_trials} =====")}\n\n')

Expand Down Expand Up @@ -722,7 +725,7 @@ def _perform_full_evaluation(
full_eval_score = eval_candidate_program(
len(valset), valset, highest_mean_program, evaluate, self.rng
)
score_data.append((full_eval_score, highest_mean_program, True))
score_data.append({"score": full_eval_score, "program": highest_mean_program, "full_eval": True})

# Log full evaluation results
fully_evaled_param_combos[combo_key] = {
Expand All @@ -745,7 +748,8 @@ def _perform_full_evaluation(
logger.info(f"{GREEN}New best full eval score!{ENDC} Score: {full_eval_score}")
best_score = full_eval_score
best_program = highest_mean_program.deepcopy()
trajectory = "[" + ", ".join([f"{s[0]}" for s in score_data if s[2]]) + "]"
full_eval_scores = ', '.join([f'{s["score"]}' for s in score_data if s["full_eval"]])
trajectory = "[" + full_eval_scores + "]"
logger.info(f"Full eval scores so far: {trajectory}")
logger.info(f"Best full score so far: {best_score}")
logger.info(len(f"===== Full Eval {len(fully_evaled_param_combos)+1} =====") * "=")
Expand Down
4 changes: 2 additions & 2 deletions dspy/teleprompt/random_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,15 +135,15 @@ def compile(self, student, *, teacher=None, trainset, valset=None, restrict=None
print(f"Scores so far: {scores}")
print(f"Best score so far: {max(scores)}")

score_data.append((score, subscores, seed, program))
score_data.append({"score": score, "subscores": subscores, "seed": seed, "program": program})

if self.stop_at_score is not None and score >= self.stop_at_score:
print(f"Stopping early because score {score} is >= stop_at_score {self.stop_at_score}")
break

# To best program, attach all program candidates in decreasing average score
best_program.candidate_programs = score_data
best_program.candidate_programs = sorted(best_program.candidate_programs, key=lambda x: x[0], reverse=True)
best_program.candidate_programs = sorted(best_program.candidate_programs, key=lambda x: x["score"], reverse=True)

print(f"{len(best_program.candidate_programs)} candidate programs found.")

Expand Down
Loading