-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathbuild_solution_explorer.py
110 lines (88 loc) · 3.29 KB
/
build_solution_explorer.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import glob
import json
from datetime import datetime
import pandas as pd
from lcb_runner.lm_styles import LanguageModelList, LMStyle
problems = []
all_outputs = {}
def get_url(jsonrow):
if jsonrow["platform"] == "leetcode":
return f"https://leetcode.com/problems/{jsonrow['question_title']}"
if jsonrow["platform"] == "atcoder":
return f"https://atcoder.jp/contests/{jsonrow['contest_id']}/tasks/{jsonrow['question_id']}"
if jsonrow["platform"] == "codeforces":
return f"https://codeforces.com/problemset/problem/{jsonrow['contest_id']}/{jsonrow['question_id'].split('_')[1]}"
reasoning_models_styles = [
# LMStyle.OpenAIReason, LMStyle.OpenAIReasonPreview,
LMStyle.QwQ,
LMStyle.GeminiThinking,
]
def comment_reasoning(reasoning_text):
lines = reasoning_text.split("\n")
return "\n".join([f"# {line}" for line in lines])
for idx, model in enumerate(LanguageModelList):
fnames = [
f"{model.model_repr}/Scenario.codegeneration_*_eval_all.json"
# for i in range(9)
]
fname = sum([glob.glob(fname) for fname in fnames], [])
if not fname:
print(fnames, "not found")
# print(f"{fname} does not exist")
fname = f".json"
fname = glob.glob(fname)
if not fname:
# print(f"{fname} does not exist")
continue
else:
assert len(fname) == 1
fname = fname[0]
else:
fname = [
f for f in fname if "_200" not in f and "_125" not in f and "_150" not in f
]
assert len(fname) == 1, fname
fname = fname[0]
checked_samples_file = fname
model_name = checked_samples_file.split("/")[-2]
with open(checked_samples_file, "r") as f:
checked_samples = json.load(f)
checked_samples = sorted(checked_samples, key=lambda x: str(x["question_id"]))
if len(checked_samples) != 880:
continue
if not problems:
for k in checked_samples:
problems.append(
{
"question_id": k["question_id"],
"question_title": k["question_title"],
"question_content": k["question_content"],
"contest_date": k["contest_date"],
"difficulty": k["difficulty"],
"url": get_url(k),
}
)
all_outputs[model_name] = []
for k in checked_samples:
if model.model_style in reasoning_models_styles:
code_list = [
k["code_list"][i]
+ f"\n\n\n# Reasoning:\n\n{comment_reasoning(k['output_list'][i])}"
for i in range(len(k["code_list"]))
]
else:
code_list = k["code_list"]
all_outputs[model_name].append(
{
"code_list": code_list,
"pass1_list": k["graded_list"],
"metadata_list": k["metadata"] if "metadata" in k else [],
}
)
assert len(checked_samples) == len(
problems
), f"{len(checked_samples)=} != {len(problems)=} for {model_name=}"
with open("../../code_generation_samples/problems.json", "w") as f:
json.dump(problems, f, indent=4)
with open("../../code_generation_samples/all_outputs.json", "w") as f:
json.dump(all_outputs, f, indent=4)