-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathleave_one_out_predicate_exp.py
641 lines (594 loc) · 37 KB
/
leave_one_out_predicate_exp.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
#!/usr/bin/env python
__author__ = 'jesse'
import argparse
import gensim
import numpy as np
import operator
import os
import pickle
import random
import sys
from functions import get_data_for_classifier, fit_classifier, get_margin_kappa_and_cm, get_margin_f1, get_margin_acc
from functions import get_best_sequence_of_behaviors, get_representatives_of_unique_sets, get_labels, get_kappa
def shuffle_ties(l):
# Identify spans of equal values.
spans = []
curr_span = [0, 0]
for idx in range(1, len(l)):
if np.isclose(l[idx][1], l[curr_span[0]][1]):
curr_span[1] = idx
else:
spans.append(curr_span)
curr_span = [idx, idx]
spans.append(curr_span)
# Shuffle order of equal spans.
for s in spans:
c = l[s[0]:s[1] + 1]
random.shuffle(c)
l = l[:s[0]] + c + l[s[1] + 1:]
return l
def main():
behaviors = ["drop", "grasp", "hold", "lift", "look", "lower", "press", "push"]
modalities = ["audio", "color", "fpfh", "haptics", "fc7"]
nb_objects = 32
nb_behaviors = len(behaviors)
# time for setup + feature extraction
behavior_t = {"drop": 9.8, "grasp": 22., "hold": 5.7, "lift": 11.1,
"look": 0.8, "lower": 10.6, "press": 22., "push": 22.} # actual costs
min_conf = 0
nb_obs = 5 # an artifact of the dataset. there are 5 observations available per behavior
# Convert flags to local variables.
indir = FLAGS_indir
test_pidx = FLAGS_test_pidx
kernel = FLAGS_kernel
word_embeddings_fn = FLAGS_word_embeddings
required_examples = FLAGS_required_examples
train_kappa_threshold = FLAGS_train_kappa_threshold
outfile = FLAGS_outfile
resample_test = FLAGS_resample_test
alternative_labels = FLAGS_alternative_labels
uniform_costs = FLAGS_uniform_costs
degenerate = FLAGS_degenerate
use_behavior_decisions = FLAGS_use_behavior_decisions
if uniform_costs is not None and uniform_costs:
behavior_t = {"drop": 1., "grasp": 1., "hold": 1., "lift": 1.,
"look": 1., "lower": 1., "press": 1., "push": 1.} # uniform costs
time_step = int(np.ceil(sum([behavior_t[b] for b in behaviors])))
# time_step = int(np.ceil(sum([behavior_t[b] for b in behaviors]) / 5.)) # increase samples
print "reading in folds, labels, predicates, and features..."
labels = get_labels(indir, alternative_labels)
# train/test split based on labels for predicate in question
# f_train = range(nb_objects)
# f_test = range(nb_objects)
# fixed train/test splits from fully annotated sets plus leftovers
f_train = [10, 3, 27, 7] + [18, 2, 20, 17] + [5, 14, 8, 15] + [1, 30, 29, 31]
f_test = [21, 24, 19, 23] + [16, 0, 4, 9] + [22, 28, 12, 25] + [11, 6, 26, 13]
# DEBUG - imbalanced train 24 / test 8
# f_train = [10, 3, 27, 7] + [18, 2, 20, 17] + [5, 14, 8, 15] + [1, 30, 29, 31] + [21, 24, 19, 23] + [16, 0, 4, 9]
# f_test = [22, 28, 12, 25] + [11, 6, 26, 13]
test_oidxs = [oidx for oidx in f_test if labels[oidx][test_pidx] == 0 or labels[oidx][test_pidx] == 1]
train_oidxs = [oidx for oidx in f_train if oidx not in test_oidxs]
with open(os.path.join(indir, 'predicates.pickle'), 'rb') as f:
predicates = pickle.load(f)
nb_predicates = len(predicates)
feature_fn = os.path.join(indir, 'features.pickle')
with open(feature_fn, 'rb') as f:
object_feats = pickle.load(f)
contexts = []
for oidx in range(nb_objects):
contexts = []
for b in behaviors:
if b not in object_feats[oidx]:
continue
for m in modalities:
if m not in object_feats[oidx][b]:
continue
contexts.append((b, m))
nb_contexts = len(contexts)
valid_predicates = [pidx for pidx in range(nb_predicates)
if sum([1 if labels[oidx][pidx] == 1 else 0
for oidx in train_oidxs]) >= required_examples
and sum([1 if labels[oidx][pidx] == 0 else 0
for oidx in train_oidxs]) >= required_examples]
train_threshold_preds = [pidx for pidx in range(nb_predicates)
if sum([1 if labels[oidx][pidx] == 1 or labels[oidx][pidx] == 0 else 0
for oidx in train_oidxs]) >= train_kappa_threshold]
trainable_preds = [pidx for pidx in range(nb_predicates)
if pidx in valid_predicates and pidx in train_threshold_preds
and pidx != test_pidx]
if len(trainable_preds) == 0:
print "WARNING: no trainable predicates under constraints"
sys.exit()
with open(os.path.join(indir, 'behavior_annotations.pickle'), 'rb') as f:
behavior_annotations = pickle.load(f)
print "... done"
# DEBUG
# for oidx in range(nb_objects): # DEBUG
# print str(oidx) + ": " + ', '.join([predicates[pidx] for pidx in range(nb_predicates)
# if labels[oidx][pidx] == 1]) # DEBUG
# _ = raw_input() # DEBUG
# DEBUG
# Pre-calculate matrix of cosine similarity of word embeddings.
print "pre-calculating word embeddings similarities..."
print "... loading word embeddings"
wvb = True if word_embeddings_fn.split('.')[-1] == 'bin' else False
wv = gensim.models.KeyedVectors.load_word2vec_format(word_embeddings_fn, binary=wvb,
limit=150000)
print "...... done"
print "... calculating similarities"
# If missing, give 1 to self and 0 else; give 0 similarity between in and out.
if predicates[test_pidx] in wv.vocab:
pred_cosine = [(1 + wv.similarity(predicates[test_pidx], predicates[pjdx])) / 2.0
if predicates[pjdx] in wv.vocab else 0 for pjdx in range(nb_predicates)]
else:
pred_cosine = [0 if pjdx != test_pidx else 1 for pjdx in range(nb_predicates)]
pred_cosine = [val / sum(pred_cosine) for val in pred_cosine]
max_sims = [i for i, x in enumerate(pred_cosine)
if np.isclose(x, np.max([pred_cosine[pjdx] for pjdx in trainable_preds]))]
print "max sims '" + str(predicates[test_pidx]) + "': " + ','.join([predicates[midx] for midx in max_sims])
top_k_sims = max_sims[:]
while len(top_k_sims) < 3: # get top 3
curr_max_val = np.max([pred_cosine[pjdx] for pjdx in trainable_preds
if pjdx not in top_k_sims])
top_k_sims.extend([i for i, x in enumerate(pred_cosine)
if np.isclose(x, curr_max_val)])
print "top k sims '" + str(predicates[test_pidx]) + "': " + ','.join([predicates[midx] for midx in top_k_sims])
# print len(wv.vocab) # DEBUG
# print [predicates[pidx] for pidx in range(nb_predicates) if predicates[pidx] in wv.vocab] # DEBUG
# missing_preds = [predicates[pidx] for pidx in range(nb_predicates)
# if predicates[pidx] not in wv.vocab] # DEBUG
# print missing_preds, len(missing_preds), len(predicates) # DEBUG
print "...... done"
print "... done"
# Fit SVMs.
print "fitting SVMs for each train predicate considering only objects not labeled for test predicate..."
kappas = [] # pidx, b, m
behavior_kappas = []
f1s = []
accs = []
num_examples = [] # pidx
for pidx in range(nb_predicates):
if pidx not in trainable_preds:
print "... '" + predicates[pidx] + "' insufficient labels or is test pred"
kappas.append({b: {m: 0 for _b, m in contexts if _b == b} for b, _ in contexts})
behavior_kappas.append({b: 0 for b in behaviors})
f1s.append({b: {m: 0 for _b, m in contexts if _b == b} for b, _ in contexts})
accs.append({b: {m: 0 for _b, m in contexts if _b == b} for b, _ in contexts})
num_examples.append(0)
continue
print "... '" + predicates[pidx] + "' fitting"
train_pairs = [(oidx, labels[oidx][pidx])
for oidx in train_oidxs
if labels[oidx][pidx] == 0 or labels[oidx][pidx] == 1]
num_examples.append(len(train_pairs))
pc = {}
pcm = {}
pk = {}
pf = {}
pa = {}
for b, m in contexts:
if b not in pc:
pc[b] = {}
pcm[b] = {}
pk[b] = {}
pf[b] = {}
pa[b] = {}
pc[b][m] = fit_classifier(kernel, b, m, train_pairs, object_feats)
pk[b][m], pcm[b][m] = get_margin_kappa_and_cm(pc[b][m], b, m, train_pairs, object_feats,
xval=train_pairs, kernel=kernel, minc=(min_conf, 0))
pf1 = get_margin_f1(pc[b][m], b, m, train_pairs, object_feats,
xval=train_pairs, kernel=kernel, minc=(min_conf, 0))
# pf[b][m] = pf1 + 1. / nb_contexts # test; adds uniform conf vector to all others effectively
pf[b][m] = pf1
pa[b][m] = get_margin_acc(pc[b][m], b, m, train_pairs, object_feats,
xval=train_pairs, kernel=kernel, minc=(min_conf, 0))
# print "pidx: " + str(pidx) # DEBUG
# print '\n'.join(['\t'.join([b, m, str(pk[b][m]), str(pf[b][m])]) for b, m in contexts]) # DEBUG
# _ = raw_input() # DEBUG
kappas.append(pk)
pbk = {}
for b in behaviors:
bcm = [[sum([pk[b][m] * pcm[b][m][0][0] for m in pk[b].keys()]),
sum([pk[b][m] * pcm[b][m][0][1] for m in pk[b].keys()])],
[sum([pk[b][m] * pcm[b][m][1][0] for m in pk[b].keys()]),
sum([pk[b][m] * pcm[b][m][1][1] for m in pk[b].keys()])]]
pbk[b] = get_kappa(bcm)
behavior_kappas.append(pbk)
f1s.append(pf)
accs.append(pa)
print "... done"
# Propagate kappas to held out predicate under various schemes.
print "calculating confidence distributions..."
w = {"uniform": [1.0 for _ in range(nb_contexts)],
"prior_kappa": [np.mean([kappas[pjdx][b][m]
for pjdx in range(nb_predicates) if pjdx != test_pidx])
for b, m in contexts],
"cos_avg_kappa": [np.mean([kappas[pjdx][b][m] * pred_cosine[pjdx]
for pjdx in range(nb_predicates) if pjdx != test_pidx])
for b, m in contexts],
"cos_max_kappa": [np.mean([kappas[pjdx][b][m] for pjdx in max_sims])
for b, m in contexts],
"cos_top3_kappa": [np.mean([kappas[pjdx][b][m] * pred_cosine[pjdx]
for pjdx in top_k_sims])
for b, m in contexts],
# "prior_f1": [np.mean([f1s[pjdx][b][m]
# for pjdx in range(nb_predicates) if pjdx != test_pidx])
# for b, m in contexts],
# "cos_avg_f1": [np.mean([f1s[pjdx][b][m] * pred_cosine[pjdx]
# for pjdx in range(nb_predicates) if pjdx != test_pidx])
# for b, m in contexts],
# "cos_max_f1": [np.mean([f1s[pjdx][b][m] for pjdx in max_sims])
# for b, m in contexts],
# "prior_acc": [np.mean([accs[pjdx][b][m]
# for pjdx in range(nb_predicates) if pjdx != test_pidx])
# for b, m in contexts],
# "cos_max_acc": [np.mean([accs[pjdx][b][m] for pjdx in max_sims])
# for b, m in contexts],
# "prior_ba": [np.mean([behavior_annotations[pjdx][b]
# for pjdx in range(nb_predicates) if pjdx != test_pidx])
# for b, _ in contexts],
# "cos_avg_ba": [np.mean([behavior_annotations[pjdx][b] * pred_cosine[pjdx]
# for pjdx in range(nb_predicates) if pjdx != test_pidx])
# for b, _ in contexts],
# "cos_max_ba": [np.mean([behavior_annotations[pjdx][b] for pjdx in max_sims])
# for b, _ in contexts],
# "cos_max_kba": [np.mean([behavior_annotations[pjdx][b] * kappas[pjdx][b][m] for pjdx in max_sims])
# for b, m in contexts],
# "cos_max_fba": [np.mean([behavior_annotations[pjdx][b] * f1s[pjdx][b][m] for pjdx in max_sims])
# for b, m in contexts],
# "cos_max_aba": [np.mean([behavior_annotations[pjdx][b] * accs[pjdx][b][m] for pjdx in max_sims])
# for b, m in contexts],
"ba": [behavior_annotations[test_pidx][b] for b, _ in contexts],
"kba": [np.mean([kappas[pjdx][b][m] * pred_cosine[pjdx]
for pjdx in top_k_sims]) * behavior_annotations[test_pidx][b]
for b, m in contexts]
}
wb = {"uniform": [1.0 for _ in range(nb_behaviors)],
"prior_kappa": [np.mean([behavior_kappas[pjdx][b]
for pjdx in range(nb_predicates) if pjdx != test_pidx])
for b in behaviors],
"cos_avg_kappa": [np.mean([behavior_kappas[pjdx][b] * pred_cosine[pjdx]
for pjdx in range(nb_predicates) if pjdx != test_pidx])
for b in behaviors],
"cos_max_kappa": [np.mean([behavior_kappas[pjdx][b] for pjdx in max_sims])
for b in behaviors],
"cos_top3_kappa": [np.mean([behavior_kappas[pjdx][b] * pred_cosine[pjdx]
for pjdx in top_k_sims])
for b in behaviors],
# "prior_ba": [np.mean([behavior_annotations[pjdx][b]
# for pjdx in range(nb_predicates) if pjdx != test_pidx])
# for b in behaviors],
# "cos_max_ba": [np.mean([behavior_annotations[pjdx][b] for pjdx in max_sims])
# for b in behaviors],
# "cos_max_kba": [np.mean([behavior_annotations[pjdx][b] * behavior_kappas[pjdx][b] for pjdx in max_sims])
# for b in behaviors]
"ba": [behavior_annotations[test_pidx][b] for b in behaviors],
"kba": [np.mean([behavior_kappas[pjdx][b] * pred_cosine[pjdx]
for pjdx in top_k_sims]) * behavior_annotations[test_pidx][b]
for b in behaviors]
}
# w = {"uniform": w["uniform"]} # DEBUG
# wb = {"uniform": wb["uniform"]} # DEBUG
print "... done"
# DEBUG - visualize distributional confidence scores
# d = {(b, m): w["ba"][contexts.index((b, m))] for b, m in contexts}
# for key, value in sorted(d.items(), key=operator.itemgetter(1)):
# print key, value
# _ = raw_input()
# DEBUG
# Perform leave-one-object-out training/testing using exploration policies elicited by weight distributions.
# Record decision on held-out object and behaviors used to arrive at it during training/testing.
print "performing leave-one-object-out cross validation with training/testing guided by kappa weights..."
max_allowed = int(np.ceil(sum([behavior_t[b] * nb_obs for b in behaviors]))) + 1
min_allowed = int(np.ceil(min([behavior_t[b] * nb_obs for b in behaviors])))
# min_allowed = max_allowed - 1 # DEBUG
train_contexts_used = {wn: {dependent_inc: {test_oidx: [] for test_oidx in test_oidxs}
for dependent_inc in range(min_allowed, max_allowed, time_step) + [max_allowed - 1]}
for wn in w.keys()}
train_behaviors_used = {wn: {dependent_inc: {test_oidx: [] for test_oidx in test_oidxs}
for dependent_inc in range(min_allowed, max_allowed, time_step) + [max_allowed - 1]}
for wn in w.keys()}
train_time_used = {wn: {dependent_inc: {test_oidx: [] for test_oidx in test_oidxs}
for dependent_inc in range(min_allowed, max_allowed, time_step) + [max_allowed - 1]}
for wn in w.keys()}
test_contexts_used = {wn: {dependent_inc: {test_oidx: [] for test_oidx in test_oidxs}
for dependent_inc in range(min_allowed, max_allowed, time_step) + [max_allowed - 1]}
for wn in w.keys()}
test_behaviors_used = {wn: {dependent_inc: {test_oidx: [] for test_oidx in test_oidxs}
for dependent_inc in range(min_allowed, max_allowed, time_step) + [max_allowed - 1]}
for wn in w.keys()}
test_time_used = {wn: {dependent_inc: {test_oidx: [] for test_oidx in test_oidxs}
for dependent_inc in range(min_allowed, max_allowed, time_step) + [max_allowed - 1]}
for wn in w.keys()}
decisions = {wn: {dependent_inc: {test_oidx: [] for test_oidx in test_oidxs}
for dependent_inc in range(min_allowed, max_allowed, time_step) + [max_allowed - 1]}
for wn in w.keys()}
for test_oidx in test_oidxs:
print "... leaving " + str(test_oidx) + " out of " + str(test_oidxs)
# For varying thresholds, record number of behaviors, contexts, and test result on held out object.
for wn in w.keys():
if use_behavior_decisions:
behavior_r = {behaviors[bidx]: wb[wn][bidx] for bidx in range(nb_behaviors)}
else:
behavior_r = {behaviors[bidx]: sum([w[wn][cidx]
for cidx in range(nb_contexts)
if contexts[cidx][0] == behaviors[bidx]])
for bidx in range(nb_behaviors)}
s = sum([behavior_r[b] for b in behavior_r])
behavior_r = {b: behavior_r[b] / s if s > 0 else 1.0 / len(behavior_r.keys())
for b in behavior_r}
# print wn, behavior_r # DEBUG
# _ = raw_input() # DEBUG
for dependent_inc in range(min_allowed, max_allowed, time_step) + [max_allowed - 1]:
# Sample a training sequence, then a testing sequence based on that
for samples in range(100):
b_sequences = get_best_sequence_of_behaviors(behaviors, behavior_r, behavior_t, dependent_inc,
maximize_reward=False, max_samples=100,
random_walk=True, degenerate=degenerate)
b_sequences = get_representatives_of_unique_sets(b_sequences)
# print wn, dependent_inc, len(b_sequences) # DEBUG
# print "\t\n".join([str(bseq) for bseq in b_sequences]) # DEBUG
# _ = raw_input() # DEBUG
bsidx = random.randint(0, len(b_sequences) - 1)
b_sequence = b_sequences[bsidx]
# DEBUG - visualize distributional confidence scores
# print "training sequence and behavior reward distribution:"
# print b_sequence
# d = {b: behavior_r[b] for b in behaviors}
# for key, value in sorted(d.items(), key=operator.itemgetter(1)):
# print key, value
# _ = raw_input()
# DEBUG
# Train SVMs on test_pidx objects except held-out test_oidx,
# getting local confidence estimates based on the behavior sequence in question.
# This allows us to train on a # of observations corresponding to the number of times
# each behavior was actually performed during training.
xval_svms = [] # cidx
xval_kappas = []
xval_cms = []
train_pairs = [(oidx, labels[oidx][test_pidx])
for oidx in test_oidxs if oidx != test_oidx]
mc = (1 if len([l for _, l in train_pairs if l == 1]) >
len([l for _, l in train_pairs if l == 0]) else 0)
for cidx in range(nb_contexts):
b, m = contexts[cidx]
ol = b_sequence.count(b)
if ol > 0:
xval_svms.append(fit_classifier(kernel, b, m, train_pairs, object_feats,
obs_limit=ol))
xvk, xvcm = get_margin_kappa_and_cm(xval_svms[-1], b, m, train_pairs, object_feats,
xval=train_pairs, kernel=kernel,
minc=(min_conf, 0), obs_limit=ol)
xval_kappas.append(xvk)
xval_cms.append(xvcm)
else:
xval_svms.append(None)
xval_kappas.append(0)
xval_cms.append([[0, 0], [0, 0]])
xval_behavior_kappas = []
for b in behaviors:
if b in b_sequence:
bcm = [[sum([xval_kappas[cidx] * xval_cms[cidx][0][0] for cidx in range(nb_contexts)
if contexts[cidx][0] == b]),
sum([xval_kappas[cidx] * xval_cms[cidx][0][1] for cidx in range(nb_contexts)
if contexts[cidx][0] == b])],
[sum([xval_kappas[cidx] * xval_cms[cidx][1][0] for cidx in range(nb_contexts)
if contexts[cidx][0] == b]),
sum([xval_kappas[cidx] * xval_cms[cidx][1][1] for cidx in range(nb_contexts)
if contexts[cidx][0] == b])]]
if bcm[0][0] + bcm[0][1] + bcm[1][0] + bcm[1][1] > 0:
xval_behavior_kappas.append(get_kappa(bcm))
else:
xval_behavior_kappas.append(0)
else:
xval_behavior_kappas.append(0)
# print dependent_inc, b_sequence, xval_behavior_kappas # DEBUG
# _ = raw_input() # DEBUG
# Train.
train_contexts = []
train_behaviors = []
for b in b_sequence:
bidx = behaviors.index(b)
train_contexts.extend([cidx for cidx in range(nb_contexts)
if contexts[cidx][0] == behaviors[bidx]])
train_behaviors.append(bidx)
if (True and dependent_inc == max_allowed - 1 and
set(train_behaviors) != set(range(len(behaviors)))): # DEBUG
print "WARNING: training set not maxed despite having enough time" # DEBUG
print wn, dependent_inc, [behaviors[bidx] for bidx in train_behaviors] # DEBUG
print "behavior_r: " + str(behavior_r) # DEBUG
# _ = raw_input() # DEBUG
train_contexts_used[wn][dependent_inc][test_oidx].append(train_contexts)
train_behaviors_used[wn][dependent_inc][test_oidx].append(train_behaviors)
train_time_used[wn][dependent_inc][test_oidx].append(sum([behavior_t[behaviors[bidx]]
for bidx in train_behaviors]))
# Test.
contexts_for_sequence = [cjdx for cjdx in range(nb_contexts)
if contexts[cjdx][0] in b_sequence]
s = sum([w[wn][cjdx] for cjdx in contexts_for_sequence])
context_weights = {cjdx: w[wn][cjdx] / s if s > 0 else 1.0 / len(contexts_for_sequence)
for cjdx in contexts_for_sequence}
# At test time, re-order in-behavior contexts with own kappas.
# Allows re-ordering train behaviors too.
if resample_test:
# TODO: get this up-to-date with the use_behavior_decisions option, which is not
# TODO: currently implemented here
new_weights = []
dws = []
new_b_weights = {b: 0 for b in b_sequence}
for cidx in context_weights.keys():
xval_context_weight = xval_kappas[cidx]
# xval_context_weight = xval_f1s[cidx]
new_weights.append((cidx, xval_context_weight))
dws.append(xval_context_weight)
new_weights = {cidx: dw / sum(dws) if sum(dws) > 0 else 1.0 / len(new_weights)
for cidx, dw in new_weights}
for cidx in new_weights:
new_b_weights[contexts[cidx][0]] += new_weights[cidx]
new_t = {b: behavior_t[b] for b in b_sequence}
# Get new sequences, this time maximizing for reward, e.g. taking high-reward actions first,
# with sufficiently high reward allowing taking vacuous actions between/after
# Additionally, allow time * nb_obs, that is, the policy expands to allow intermingling of
# different behaviors if it's greedy-optimal to do so, as well as repeating high-reward
# loops like, for example, 'grasp'/'drop'
new_sequences = get_best_sequence_of_behaviors(list(set(b_sequence)), new_b_weights,
new_t, dependent_inc,
maximize_reward=True,
max_samples=100,
random_walk=True,
degenerate=degenerate)
if False and (len(new_sequences) > 1 or new_sequences[0] != b_sequence): # DEBUG
print "context_weights: " + str(context_weights) # DEBUG
print "behavior_r: " + str(behavior_r) # DEBUG
print "b_sequence: " + str(b_sequence) # DEBUG
print "new_weights: " + str([[contexts[cidx], new_weights[cidx]]
for cidx in new_weights.keys()])
print "new_b_weights: " + str(new_b_weights) # DEBUG
print "new_sequences: " + str(new_sequences) # DEBUG
_ = raw_input() # DEBUG
context_weights = new_weights
test_sequences = new_sequences
# Get observations for each context and keep them in storage.
observations = [] # indexed by cidx; holds vectors of observations for each context
next_obs = [0 for _ in range(nb_behaviors)] # holds index of next observation to retrieve
for cidx in range(nb_contexts):
observations.append(get_data_for_classifier(contexts[cidx][0], contexts[cidx][1],
[(test_oidx, labels[test_oidx][test_pidx])], object_feats)[0])
tidx = random.randint(0, len(test_sequences) - 1)
# DEBUG - visualize distributional confidence scores
# print "testing sequence and behavior reward distribution:"
# print test_sequences[tidx]
# d = {b: new_b_weights[b] for b in list(set(b_sequence))}
# for key, value in sorted(d.items(), key=operator.itemgetter(1)):
# print key, value
# _ = raw_input()
# DEBUG
for bidx in range(nb_behaviors):
next_obs[bidx] = 0
behaviors_so_far = []
contexts_so_far = []
pos = 0
neg = 0
for btidx in range(len(test_sequences[tidx])):
if sum([new_b_weights[b] if next_obs[behaviors.index(b)] < nb_obs else 0
for b in test_sequences[tidx][:btidx + 1]]) == 0:
print "WARNING: aborting sequence early" + str(test_sequences[tidx])
break
b = test_sequences[tidx][btidx]
bidx = behaviors.index(b)
behaviors_so_far.append(bidx)
contexts_for_behavior = [cjdx for cjdx in range(nb_contexts)
if contexts[cjdx][0] == behaviors[bidx]]
context_weight_pairs = sorted({contexts_for_behavior[idx]:
context_weights[contexts_for_behavior[idx]]
for idx in range(len(contexts_for_behavior))}.items(),
key=operator.itemgetter(1), reverse=True)
context_weight_pairs = shuffle_ties(context_weight_pairs)
sufficient_dec_weight = False
for cidx, cdw in context_weight_pairs:
if cdw > 0:
contexts_so_far.append(cidx)
ds = xval_svms[cidx].predict([observations[cidx][next_obs[bidx]]])
for d in ds:
if d > 0:
pos += d * cdw / test_sequences[tidx].count(b)
else:
neg -= d * cdw / test_sequences[tidx].count(b)
# print pos, neg # DEBUG
# print test_oidx, contexts[cidx], ds, cdw # DEBUG
if pos > 0.5 or neg > 0.5:
sufficient_dec_weight = True
break
next_obs[bidx] += 1
if sufficient_dec_weight:
break
dec = 1 if pos > neg and (pos > 0 or neg > 0) else mc
test_contexts_used[wn][dependent_inc][test_oidx].append(contexts_so_far)
test_behaviors_used[wn][dependent_inc][test_oidx].append(behaviors_so_far)
test_time_used[wn][dependent_inc][test_oidx].append(sum([behavior_t[behaviors[bidx]]
for bidx in behaviors_so_far]))
decisions[wn][dependent_inc][test_oidx].append(dec)
else:
# Get observations for each context and keep them in storage.
observations = [] # indexed by cidx; holds vectors of observations for each context
next_obs = [0 for _ in range(nb_behaviors)] # holds index of next observation to retrieve
for cidx in range(nb_contexts):
observations.append(get_data_for_classifier(contexts[cidx][0], contexts[cidx][1],
[(test_oidx, labels[test_oidx][test_pidx])], object_feats)[0])
behaviors_so_far = []
contexts_so_far = []
dsum = 0
for btidx in range(len(b_sequence)):
b = b_sequence[btidx]
bidx = behaviors.index(b)
behaviors_so_far.append(bidx)
contexts_for_behavior = [cjdx for cjdx in range(nb_contexts)
if contexts[cjdx][0] == behaviors[bidx]]
if use_behavior_decisions:
bd = 0
for cidx in contexts_for_behavior:
contexts_so_far.append(cidx)
ds = xval_svms[cidx].predict([observations[cidx][next_obs[bidx]]])
for d in ds:
bd += d * xval_kappas[cidx]
bd = 1 if bd > 0 else -1
dsum += bd * xval_behavior_kappas[bidx] / b_sequence.count(b)
else:
for cidx in contexts_for_behavior:
contexts_so_far.append(cidx)
ds = xval_svms[cidx].predict([observations[cidx][next_obs[bidx]]])
for d in ds:
dsum += d * xval_kappas[cidx] / b_sequence.count(b)
# print contexts[cidx], d, xval_kappas[cidx], b_sequence.count(b) # DEBUG
next_obs[bidx] += 1
dec = 1 if dsum > 0 else 0
# print dec, dsum # DEBUG
# _ = raw_input() # DEBUG
test_contexts_used[wn][dependent_inc][test_oidx].append(contexts_so_far)
test_behaviors_used[wn][dependent_inc][test_oidx].append(behaviors_so_far)
test_time_used[wn][dependent_inc][test_oidx].append(sum([behavior_t[behaviors[bidx]]
for bidx in behaviors_so_far]))
decisions[wn][dependent_inc][test_oidx].append(dec)
# print test_oidx, dependent_inc, bsidx, tidx # DEBUG
# print "contexts: " + str(test_contexts_used[wn][dependent_inc][test_oidx][bsidx]) # DEBUG
# print "behaviors: " + str(test_behaviors_used[wn][dependent_inc][test_oidx][bsidx]) # DEBUG
# print "times: " + str(test_time_used[wn][dependent_inc][test_oidx][bsidx]) # DEBUG
# print "decisions: " + str(decisions[wn][dependent_inc][test_oidx][bsidx]) # DEBUG
# _ = raw_input() # DEBUG
print "... done"
# Write outfiles.
print "writing outfile..."
with open(outfile, 'wb') as f:
pickle.dump([w.keys(), train_contexts_used, train_behaviors_used, train_time_used,
test_contexts_used, test_behaviors_used, test_time_used, decisions], f)
print "... done"
if __name__ == '__main__':
parser = argparse.ArgumentParser()
parser.add_argument('--indir', type=str, required=True,
help="data directory")
parser.add_argument('--test_pidx', type=int, required=True,
help="the predicate id held out for testing")
parser.add_argument('--kernel', type=str, required=True,
help="SVM kernel to use (linear, poly, rbf)")
parser.add_argument('--word_embeddings', type=str, required=True,
help="word embeddings binary to use")
parser.add_argument('--required_examples', type=int, required=True,
help="how many positive and negative examples per predicate to qualify")
parser.add_argument('--train_kappa_threshold', type=int, required=True,
help="number of examples required before trusting a training predicate classifier")
parser.add_argument('--outfile', type=str, required=True,
help="file to write results pickle")
parser.add_argument('--resample_test', type=int, required=True,
help="whether to resample at test time and cut off exploration early")
parser.add_argument('--alternative_labels', type=str, required=False,
help="specify labels pickle; labels in this pickle will override defaults")
parser.add_argument('--uniform_costs', type=int, required=False,
help="whether to use a uniform cost function")
parser.add_argument('--degenerate', type=int, required=False,
help="whether to use a degenerate transition graph")
parser.add_argument('--use_behavior_decisions', type=int, required=False,
help="whether to use behavior-level kappas and decisions")
args = parser.parse_args()
for k, v in vars(args).items():
globals()['FLAGS_%s' % k] = v
main()