-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy patheval_other_sent.py
148 lines (118 loc) · 5.27 KB
/
eval_other_sent.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
import argparse
import json
import time
import datetime
import numpy as np
import code
import os
import cPickle as pickle
import math
import scipy.io
from imagernn.solver import Solver
from imagernn.imagernn_utils import decodeGenerator, eval_split
from imagernn.data_provider import prepare_data, loadArbitraryFeatures
from picsom_bin_data import picsom_bin_data
from nltk.tokenize import word_tokenize
"""
This script is used to compute models opinion of how likely a given sentence corresponds to a given image
This can be used to build mutual evaluations of the different models with each other
"""
def main(params):
# load the checkpoint
checkpoint_path = params['checkpoint_path']
print 'loading checkpoint %s' % (checkpoint_path, )
checkpoint = pickle.load(open(checkpoint_path, 'rb'))
checkpoint_params = checkpoint['params']
model_npy = checkpoint['model']
misc = {}
misc['wordtoix'] = checkpoint['wordtoix']
ixtoword = checkpoint['ixtoword']
if 'use_theano' not in checkpoint_params:
checkpoint_params['use_theano'] = 1
checkpoint_params['use_theano'] = 1
if 'image_feat_size' not in checkpoint_params:
checkpoint_params['image_feat_size'] = 4096
# output blob which we will dump to JSON for visualizing the results
blob = {}
blob['params'] = params
blob['checkpoint_params'] = checkpoint_params
blob['imgblobs'] = []
# load the tasks.txt file
root_path = params['root_path']
img_names_list = open(params['imgList'], 'r').read().splitlines()
if len(img_names_list[0].rsplit(',')) > 2:
img_names = [x.rsplit (',')[0] for x in img_names_list]
sentRaw = [x.rsplit (',')[1] for x in img_names_list]
idxes = [int(x.rsplit (',')[2]) for x in img_names_list]
elif len(img_names_list[0].rsplit(',')) == 2:
img_names = [x.rsplit (',')[0] for x in img_names_list]
sentRaw = [x.rsplit (',')[1] for x in img_names_list]
idxes = xrange(len(img_names_list))
else:
print 'ERROR: List should atleast contain image name and a corresponding sentence'
return
if checkpoint_params.get('en_aux_inp',0) and (params.get('aux_inp_file',None) == None):
raise ValueError('ERROR: please specify auxillary input feature using --aux_inp_file')
return
# load the features for all images
features, aux_inp = loadArbitraryFeatures(params, idxes)
D,NN = features.shape
N = len(img_names)
# iterate over all images and predict sentences
BatchGenerator = decodeGenerator(checkpoint_params)
BatchGenerator.build_eval_other_sent(BatchGenerator.model_th, checkpoint_params,model_npy)
eval_batch_size = params.get('eval_batch_size',100)
wordtoix = checkpoint['wordtoix']
gen_fprop = BatchGenerator.f_eval_other
print("\nUsing model run for %0.2f epochs with validation perplx at %0.3f\n" % (checkpoint['epoch'], \
checkpoint['perplexity']))
n = 0
while n < N:
print('image %d/%d:\r' % (n, N)),
cbs = 0
# encode the image
batch = []
while n < N and cbs < eval_batch_size:
out = {}
out['image'] = {'feat':features[:, n]}
out['sentence'] = {'raw': sentRaw[n],'tokens':word_tokenize(sentRaw[n])}
out['idx'] = n
if checkpoint_params.get('en_aux_inp',0):
out['image']['aux_inp'] = aux_inp[:, n]
cbs += 1
n += 1
batch.append(out)
inp_list, lenS = prepare_data(batch,wordtoix)
# perform the work. heavy lifting happens inside
eval_array = gen_fprop(*inp_list)
for ix,x in enumerate(batch):
# build up the output
img_blob = {}
img_blob['img_path'] = img_names[x['idx']]
# encode the top prediction
img_blob['candidate'] = {'text': x['sentence']['raw'], 'logprob': float(eval_array[0,ix])}
blob['imgblobs'].append(img_blob)
# dump result struct to file
jsonFname = 'result_struct_%s.json' % (params['fname_append'] )
save_file = os.path.join(root_path, jsonFname)
print 'writing predictions to %s...' % (save_file, )
json.dump(blob, open(save_file, 'w'))
# dump output html
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument('checkpoint_path', type=str, help='the input checkpoint')
parser.add_argument('-i', '--imgList', type=str, default='testimgs.txt', help='file with the list of images to process. Either just filenames or in <filenmae, index> format')
parser.add_argument('-f', '--feat_file', type=str, default='vgg_feats.mat', help='file with the features. We can rightnow process only .mat format')
parser.add_argument('-d', '--dest', dest='root_path', default='example_images', type=str, help='folder to store the output files')
parser.add_argument('-b', '--beam_size', type=int, default=1, help='beam size in inference. 1 indicates greedy per-word max procedure. Good value is approx 20 or so, and more = better.')
parser.add_argument('--fname_append', type=str, default='', help='str to append to routput files')
parser.add_argument('--aux_inp_file', dest='aux_inp_file', type=str, default=None, help='Is there any auxillary inputs ? If yes indicate file here')
args = parser.parse_args()
params = vars(args) # convert to ordinary dict
if params['aux_inp_file'] != None:
params['en_aux_inp'] = 1
else:
params['en_aux_inp'] = 0
print 'parsed parameters:'
print json.dumps(params, indent = 2)
main(params)