forked from shtoshni/e2e_asr
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlm_model.py
121 lines (95 loc) · 3.97 KB
/
lm_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
"""Language model class that creates the computation graph.
Author: Shubham Toshniwal
Date: February, 2018
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from bunch import Bunch
import random
import tensorflow as tf
import tf_utils
from losses import LossUtils
from base_params import BaseParams
from lm_dataset import LMDataset
from tensorflow.contrib.rnn.python.ops.core_rnn_cell import _linear
class LMModel(BaseParams):
"""Language model."""
@classmethod
def class_params(cls):
params = Bunch()
# Optimization params
params['lm_batch_size'] = 128
params['lm_learning_rate'] = 1e-4
params['lm_learning_rate_decay_factor'] = 0.5
params['max_gradient_norm'] = 5.0
params['simple_lm'] = False
return params
def __init__(self, encoder, data_files, params=None):
"""Initializer of class
Args:
encoder: Encoder object executed via encoder(args)
"""
if params is None:
self.params = self.class_params()
else:
self.params = params
params = self.params
self.data_files = data_files
self.data_iter = self.update_iterator()
self.learning_rate = tf.Variable(float(params.lm_learning_rate),
trainable=False)
self.learning_rate_decay_op = self.learning_rate.assign(
self.learning_rate * params.lm_learning_rate_decay_factor)
# Number of gradient updates performed
self.lm_global_step = tf.Variable(0, trainable=False)
# Number of epochs done
self.epoch = tf.Variable(0, trainable=False)
self.epoch_incr = self.epoch.assign(self.epoch + 1)
self.encoder = encoder
self.create_computational_graph()
# Gradients and parameter updation for training the model.
trainable_vars = tf.trainable_variables()#[]
#for var in tf.trainable_variables():
# if "decoder_char" in var.name:
# trainable_vars.append(var)
# print (var.name)
# Initialize optimizer
opt = tf.train.AdamOptimizer(self.learning_rate, name='AdamLM')
# Get gradients from loss
gradients = tf.gradients(self.losses, trainable_vars)
# Gradient clipping
clipped_gradients, _ = tf.clip_by_global_norm(gradients,
params.max_gradient_norm)
# Apply gradients
self.updates = opt.apply_gradients(
zip(clipped_gradients, trainable_vars),
global_step=self.lm_global_step)
def update_iterator(self):
"""Create data iterator."""
random.shuffle(self.data_files)
lm_set = LMDataset(self.data_files, self.params.lm_batch_size)
return lm_set.data_iter
def create_computational_graph(self):
"""Creates the computational graph."""
self.encoder_inputs, self.seq_len = self.get_batch()
self.targets, self.target_weights =\
tf_utils.create_shifted_targets(self.encoder_inputs, self.seq_len)
# Create computational graph
# First encode input
with tf.variable_scope("rnn_decoder_char", reuse=tf.AUTO_REUSE):
self.outputs = self.encoder(self.encoder_inputs, self.seq_len)
self.losses = LossUtils.cross_entropy_loss(
self.outputs, self.targets, self.seq_len)
def get_batch(self):
"""Get a batch from the iterator."""
batch = self.data_iter.get_next()
# (T + 1) * B - encoder_len would take care of not processing (T+1)th symbol
encoder_inputs = tf.transpose(batch["char"], [1, 0])
encoder_len = batch["char_len"]
return [encoder_inputs, encoder_len]
@classmethod
def add_parse_options(cls, parser):
# LM params
parser.add_argument("-lm_learning_rate", default=0.0001, type=float,
help="LM learning rate")