-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy path_condor_get_utt_sem_pairs.py
139 lines (119 loc) · 5.9 KB
/
_condor_get_utt_sem_pairs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#!/usr/bin/env python
__author__ = 'jesse'
import sys
sys.path.append('/u/jesse/phm/tsp/') # necessary to import CKYParser from above directory
import argparse
import os
import pickle
import time
def main():
# Hyperparams
time_limit = 60 * 10 # time in seconds allowed per epoch
poll_increment = 10 # poll for finished jobs every 10 seconds
# Load parameters from command line.
target_dir = FLAGS_target_dir
script_dir = FLAGS_script_dir
agent_infile = FLAGS_agent_infile
parse_reranker_beam = FLAGS_parse_reranker_beam
interpolation_reranker_beam = FLAGS_interpolation_reranker_beam
pairs_infile = FLAGS_pairs_infile
outfile = FLAGS_outfile
# Load agent.
with open(agent_infile, 'rb') as f:
a = pickle.load(f)
# Launch jobs.
with open(pairs_infile, 'rb') as f:
d = pickle.load(f)
jobs_remaining = []
condorify_fn = os.path.join(script_dir, "condorify_gpu_email")
script_fn = os.path.join(script_dir, "_condor_get_ground_pair.py")
for idx in range(len(d)):
out_fn = os.path.join(target_dir, "temp.gpair." + str(idx) + ".pickle")
log_fn = os.path.join(target_dir, "temp.gpair." + str(idx) + ".log")
cmd = (condorify_fn + " " +
"python3 " + script_fn +
" --agent_infile " + agent_infile +
" --parse_reranker_beam " + str(parse_reranker_beam) +
" --interpolation_reranker_beam " + str(interpolation_reranker_beam) +
" --pairs_infile " + pairs_infile +
" --pair_idx " + str(idx) +
" --outfile " + out_fn +
" " + log_fn)
os.system(cmd)
jobs_remaining.append(idx)
# Collect jobs.
t = []
time_remaining = time_limit
while len(jobs_remaining) > 0 and time_remaining > 0:
time.sleep(poll_increment)
time_remaining -= poll_increment
newly_completed = []
for idx in jobs_remaining:
fn = os.path.join(target_dir, "temp.gpair." + str(idx) + ".pickle")
log_fn = os.path.join(target_dir, "temp.gpair." + str(idx) + ".log")
err_fn = ("err." + log_fn).replace("/", "-")
try:
with open(fn, 'rb') as f:
pairs = pickle.load(f)
if pairs is not None:
t.extend(pairs)
for pair in pairs:
pass
# print ("_condor_get_utt_sem_pairs: got ground pair idx " + str(idx) + " for '" +
# str(d[idx][0]) + "', " + a.parser.print_parse(d[idx][1]))
# print ("_condor_get_utt_sem_pairs: ... " + pair[1])
else:
print ("_condor_get_utt_sem_pairs: got no ground pair for '" +
str(d[idx][0]) + "', " + a.parser.print_parse(d[idx][1]))
newly_completed.append(idx)
os.system("rm " + fn) # remove output file
os.system("rm " + log_fn) # remove log file
os.system("rm " + err_fn) # remove err file
# Output pickle hasn't been written yet.
except (IOError, ValueError, EOFError):
# Check for a non-empty error log, suggesting the job has crashed.
try:
with open(err_fn) as f:
err_text = f.read()
if len(err_text.strip()) > 0 and 'FutureWarning' not in err_text:
# Error, so move on and save log.
print ("_condor_get_utt_sem_pairs: discovered failed job for pair idx " +
str(idx) + ":\n'" + err_text + "'")
os.system("mv " + err_fn + " " + err_fn + ".autosave") # preserve the error log on disk
newly_completed.append(idx)
os.system("rm " + log_fn) # remove log
except IOError:
pass
now_remaining = [idx for idx in jobs_remaining if idx not in newly_completed]
if len(newly_completed) > 0:
print ("_condor_get_utt_sem_pairs: completed " + str(len(newly_completed)) +
" more jobs (" + str(len(d) - len(now_remaining)) + "/" + str(len(d)) + ") " +
"with " + str(len(t)) + " actual pairs so far")
jobs_remaining = now_remaining[:]
print ("_condor_get_utt_sem_pairs: finished " + str(len(d) - len(jobs_remaining)) + " of " +
str(len(d)) + " jobs; abandoned " + str(len(jobs_remaining)) + " due to time limit; got " +
str(len(t)) + " actual pairs")
os.system("condor_rm jesse")
# Output results.
with open(outfile, 'wb') as f:
pickle.dump(t, f)
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--target_dir', type=str, required=True,
help="the directory to write new files")
parser.add_argument('--script_dir', type=str, required=True,
help="the directory where condorify script lives")
parser.add_argument('--agent_infile', type=str, required=True,
help="the agent pickle")
parser.add_argument('--parse_reranker_beam', type=int, required=True,
help="how many parses to re-rank internally before returning")
parser.add_argument('--interpolation_reranker_beam', type=int, required=True,
help="how many parse+grounding scores to beam down before reranking")
parser.add_argument('--pairs_infile', type=str, required=True,
help="the pairs pickle")
parser.add_argument('--outfile', type=str, required=True,
help="where to dump the pairs and epoch data")
args = parser.parse_args()
for k, v in vars(args).items():
globals()['FLAGS_%s' % k] = v
main()