-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmscnn_train_test.py
298 lines (245 loc) · 9.27 KB
/
mscnn_train_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
import os
import time
import tensorflow as tf
import numpy as np
from sys import argv
import ipdb
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from inputs import get_train_dataset_fb_id, get_eval_dataset_fb_id
# configurations
CLASSES = [
'air conditioner',
'car horn',
'children playing',
'dog bark',
'drilling',
'engine idling',
'gun shot',
'jackhammer',
'siren',
'street music'
]
train_set_cv6 = []
valid_set_cv6 = []
test_set_cv6 = []
for i in range(10):
# test set
test_idx = i
test_set = [data_sets_all_6[i]]
test_set_cv6.append(test_set)
# valid set
valid_idx = (i + 1) % 10
valid_set = [data_sets_all_6[valid_idx]]
valid_set_cv6.append(valid_set)
# train set
train_idx = range(10)
train_idx = list(set(train_idx) - set([test_idx]) - set([valid_idx]))
train_set = [data_sets_all_6[i] for i in train_idx]
train_set_cv6.append(train_set)
os.environ["TF_CPP_MIN_LOG_LEVEL"]="2"
tf.logging.set_verbosity(tf.logging.ERROR)
BATCH_SIZE = 100
EPOCHS = 50
LEARNING_RATE = 1e-4
weight_decay = 1e-3
LR_DECAY_BASE = 1.00
IKP = 0.25
OKP = 0.50
SKP = 0.75
N_CLASSES = len(CLASSES)
CV_IDX = 0 # [0, ... 9]
trial = 0
tbpath = 'Tensorboard/mscnn_' + str(CV_IDX) + '_' + str(trial)
mpath = 'Model/mscnn_' + str(CV_IDX) + '_' + str(trial)
tf.gfile.MkDir(tbpath)
tf.gfile.MkDir(mpath)
def CNN_JS(X, is_training=False):
# 1st convolutional layer, map spectrogram image to 24 feature maps
h_conv1 = tf.layers.conv2d(X, 24, [5, 5], [1, 1], 'same', use_bias=False)
h_pool1 = tf.nn.max_pool(h_conv1, ksize=[1, 4, 2, 1], strides=[1, 4, 2, 1], padding='SAME')
h_bn1 = tf.layers.batch_normalization(h_pool1, training=is_training)
h_1 = tf.nn.relu(h_bn1)
# 2nd convolutional layer, map 24 feature maps to 48
h_conv2 = tf.layers.conv2d(h_1, 48, [5, 5], [1, 1], 'same', use_bias=False)
h_pool2 = tf.nn.max_pool(h_conv2, ksize=[1, 4, 2, 1], strides=[1, 4, 2, 1], padding='SAME')
h_bn2 = tf.layers.batch_normalization(h_pool2, training=is_training)
h_2 = tf.nn.relu(h_bn2)
# 3nd convolutional layer, map 48 feature maps to 48, no pooling
h_conv3 = tf.layers.conv2d(h_2, 48, [5, 5], [1, 1], 'same', use_bias=False)
h_bn3 = tf.layers.batch_normalization(h_conv3, training=is_training)
h_3 = tf.nn.relu(h_bn3)
return h_3
def FC1(X, nhid=100, kp=1.0, is_training=False):
tsX = tf.shape(X)
slX = X.get_shape().as_list()
X = tf.reshape(X, [tsX[0], tsX[1], slX[2]*slX[3]])
X = tf.reduce_mean(X, axis=1) # temporal avg pooling
X = tf.nn.dropout(X, keep_prob = kp)
X = tf.layers.dense(X, nhid, use_bias=False)
X = tf.layers.batch_normalization(X, training=is_training)
X = tf.nn.relu(X) # [B x 100]
return X
def FC2(X, nhid=10, kp=1.0, is_training=False):
X = tf.nn.dropout(X, keep_prob = kp)
X = tf.layers.dense(X, nhid)
return X
def Model_TF(X, LABELS_FB, lstm_ikp=1.0, lstm_okp=1.0, lstm_skp=1.0, is_training=False):
# CNN feature map via shared CNN_JS
cnn_feat = CNN_JS(X, is_training)
# FC1
fc = FC1(cnn_feat, FC1_DIM, lstm_ikp, is_training)
# FC2 for sound classification
fc2 = FC2(fc, N_CLASSES, lstm_okp, is_training)
return fc2
# Build our dataflow graph.
GRAPH = tf.Graph()
with GRAPH.as_default():
# datasets
dataset_train = get_train_dataset_fb_id(train_set_cv6[CV_IDX], batch_size=BATCH_SIZE)
dataset_valid = get_eval_dataset_fb_id(valid_set_cv6[CV_IDX])
dataset_test = get_eval_dataset_fb_id(test_set_cv6[CV_IDX])
is_training = tf.placeholder(tf.bool)
# reinitializable iterator
iterator = tf.contrib.data.Iterator.from_structure(dataset_train.output_types, (tf.TensorShape([None, None, 128, 3]), tf.TensorShape([None]), tf.TensorShape([None]), tf.TensorShape([None]) ) )
iter_init_train = iterator.make_initializer(dataset_train)
iter_init_valid = iterator.make_initializer(dataset_valid)
iter_init_test = iterator.make_initializer(dataset_test)
next_element = iterator.get_next()
# spectrum normalization
SPECTRUMS = next_element[0]
LABELS = next_element[1]
LABELS_FB = next_element[2]
# pass spectrum to model
lstm_ikp = tf.placeholder(tf.float32, shape=())
lstm_okp = tf.placeholder(tf.float32, shape=())
lstm_skp = tf.placeholder(tf.float32, shape=())
# Model
pred_Y = Model_TF(SPECTRUMS, LABELS_FB, lstm_ikp, lstm_okp, lstm_skp, is_training)
# sound classification loss
sc_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=pred_Y, labels=LABELS))
tf.summary.scalar("sc_loss", sc_loss)
# fianl cost with l2_loss
tvars = tf.trainable_variables()
l2_loss = tf.add_n([tf.nn.l2_loss(v) for v in tvars if 'dense' in v.name and 'kernel' in v.name])
tf.summary.scalar("l2_loss", l2_loss)
COST = sc_loss + weight_decay*l2_loss
# calc num of tvars
nvars = 0
for var in tvars:
sh = var.get_shape().as_list()
print(var.name, sh)
nvars += np.prod(sh)
print(nvars, 'total variables')
# Compute gradients.
lr = tf.placeholder(tf.float32, shape=())
OPTIMIZER = tf.train.AdamOptimizer(lr) # default epsilon 1e-08
update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
with tf.control_dependencies(update_ops):
grads,_ = tf.clip_by_global_norm(tf.gradients(COST,tvars),1) # compute gradients and do clipping
APPLY_GRADIENT_OP = OPTIMIZER.apply_gradients(zip(grads, tvars)) # apply gradients
# evaluation
correct_pred = tf.equal(tf.argmax(pred_Y, 1), LABELS)
ACCURACY = tf.reduce_mean(tf.cast(correct_pred, dtype=tf.float32))
# confusion matrix
CONF_MAT = tf.confusion_matrix(LABELS, tf.argmax(pred_Y, 1), num_classes=10)
SUMMARIES_OP = tf.summary.merge_all()
# Start training the model.
with tf.Session(graph=GRAPH) as SESSION:
# initialize first
SESSION.run(tf.global_variables_initializer())
# Create a tensorflow summary writer.
SUMMARY_WRITER = tf.summary.FileWriter(tbpath, graph=GRAPH)
# Create a tensorflow graph writer.
GRAPH_WRITER = tf.train.Saver(max_to_keep=EPOCHS)
steps = 0 # for tf.summary step
best_cost = float('Inf')
best_acc = float(0)
best_model = 0
for EPOCH in range(EPOCHS):
# initialize an iterator over the training dataset.
SESSION.run(iter_init_train)
iters = 0
costs = 0.0
accs = 0.0
lr_decay = LR_DECAY_BASE ** (EPOCH)
lr_epoch = LEARNING_RATE / lr_decay
print('Epoch %d, Leanring rate = %.7f' % (EPOCH, lr_epoch))
start_time = time.time()
while True:
try:
_, summaries, COST_VAL, ACC_VAL = SESSION.run([APPLY_GRADIENT_OP, SUMMARIES_OP, COST, ACCURACY],
feed_dict={lstm_ikp: IKP, lstm_okp: OKP, lstm_skp: SKP, is_training: True, lr: lr_epoch})
costs += COST_VAL
accs += ACC_VAL
iters += 1
if iters % 100 == 0:
end_time = time.time()
DURATION = end_time - start_time
start_time = end_time
print('Epoch %d, Iters %d, cost = %.6f (%.3f sec)' % (EPOCH, iters, (costs/iters), DURATION))
SUMMARY_WRITER.add_summary(summaries, steps)
steps += 1
except tf.errors.OutOfRangeError:
break
GRAPH_WRITER.save(SESSION, mpath+'/model', global_step=EPOCH) # save model after each epoch
print('Epoch %d, Train cost = %.6f, acc = %.6f' % (EPOCH, (costs/iters), (accs/iters)))
# validation after each epoch
SESSION.run(iter_init_valid)
iters = 0
costs = 0.0
accs = 0.0
while True:
try:
COST_VAL, ACC_VAL = SESSION.run([COST, ACCURACY],
feed_dict={lstm_ikp: 1.0, lstm_okp: 1.0, lstm_skp: 1.0, is_training: False, lr: 0.0})
costs += COST_VAL
accs += ACC_VAL
iters += 1
except tf.errors.OutOfRangeError:
break
final_cost = (costs/iters)
final_acc = (accs/iters)
print('Epoch %d, Valid cost = %.6f, acc = %.6f' % (EPOCH, final_cost, final_acc))
if final_acc > best_acc:
print('Best epoch is changed from %d to %d, Acc %.4f to %.4f' % (best_model, EPOCH, best_acc, final_acc))
best_acc = final_acc
best_model = EPOCH
# test after each epoch
SESSION.run(iter_init_test)
iters = 0
costs = 0.0
accs = 0.0
while True:
try:
COST_VAL, ACC_VAL = SESSION.run([COST, ACCURACY],
feed_dict={lstm_ikp: 1.0, lstm_okp: 1.0, lstm_skp: 1.0, is_training: False, lr: 0.0})
costs += COST_VAL
accs += ACC_VAL
iters += 1
except tf.errors.OutOfRangeError:
break
final_cost = (costs/iters)
final_acc = (accs/iters)
print('Epoch %d, Test cost = %.6f, acc = %.6f' % (EPOCH, final_cost, final_acc))
# test
best_model_path = mpath + '/model-' + str(best_model)
GRAPH_WRITER.restore(SESSION, best_model_path)
print('Test using the best model %s' % best_model_path)
SESSION.run(iter_init_test)
iters = 0
accs = 0.0
conf_mat = np.zeros((10,10)).astype(int)
while True:
try:
[ACCS_VAL, CONF_MAT_VAL] = SESSION.run([ACCURACY, CONF_MAT], feed_dict={lstm_ikp: 1.0, lstm_okp: 1.0, lstm_skp: 1.0, is_training: False, lr: 0.0})
accs += ACCS_VAL
conf_mat = conf_mat + CONF_MAT_VAL
iters += 1
except tf.errors.OutOfRangeError:
break
print('Test Accuracy : %.4f' % (accs/iters))
print(conf_mat)
np.savetxt(best_model_path + '-confmat.txt', conf_mat)
np.save(best_model_path + '-confmat.npy', conf_mat)