forked from rakeshvar/rnn_ctc
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlstm.py
116 lines (95 loc) · 3.79 KB
/
lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import theano.tensor as tt
import theano as th
from theano.tensor.nnet import sigmoid
import numpy as np
from activations import share, activation_by_name
def orthonormal_wts(n, m):
nm = max(n, m)
return np.linalg.svd(np.random.randn(nm, nm))[0].astype(
th.config.floatX)[:n, :m]
def stacked_wts(n, m, copies, name=None):
return share(
np.hstack([orthonormal_wts(n, m) for _ in range(copies)]),
name=name)
class LSTM():
"""
Long Short Term Memory Layer.
Does not implement incell connections from cell value to the gates.
Reference: Supervised Sequence Learning with RNNs by Alex Graves
Chapter 4, Fig 4.2
"""
def __init__(self, inpt,
nin, nunits,
forget=False,
actvn_pre='tanh',
actvn_post='linear',
learn_init_states=True):
"""
Init
:param inpt: Lower layer's excitation.
:param nin: Dimension of lower layer.
:param nunits: Number of units.
:param forget: Want a seperate forget gate (or use 1-input)?
:param actvn_pre: Activation applied to new candidate for cell value.
:param actvn_post: Activation applied to cell value before output.
:param learn_init_states: Should the intial states be learnt?
:return: Output
"""
# TODO: Incell connections
num_activations = 3 + forget
w = stacked_wts(nin, nunits, num_activations)
u = stacked_wts(nunits, nunits, num_activations)
b = share(np.zeros(num_activations * nunits))
out0 = share(np.zeros(nunits))
cell0 = share(np.zeros(nunits))
actvn_pre = activation_by_name(actvn_pre)
actvn_post = activation_by_name(actvn_post)
def step(in_t, out_tm1, cell_tm1):
"""
Scan function.
:param in_t: Current input from bottom layer
:param out_tm1: Prev output of LSTM layer
:param cell_tm1: Prev cell value
:return: Current output and cell value
"""
tmp = tt.dot(out_tm1, u) + in_t
inn_gate = sigmoid(tmp[:nunits])
out_gate = sigmoid(tmp[nunits:2 * nunits])
fgt_gate = sigmoid(
tmp[2 * nunits:3 * nunits]) if forget else 1 - inn_gate
cell_val = actvn_pre(tmp[-nunits:])
cell_val = fgt_gate * cell_tm1 + inn_gate * cell_val
out = out_gate * actvn_post(cell_val)
return out, cell_val
inpt = tt.dot(inpt, w) + b
# seqlen x nin * nin x 3*nout + 3 * nout = seqlen x 3*nout
rval, updates = th.scan(step,
sequences=[inpt],
outputs_info=[out0, cell0], )
self.output = rval[0]
self.params = [w, u, b]
if learn_init_states:
self.params += [out0, cell0]
self.nout = nunits
class BDLSTM():
"""
Long Short Term Memory Layer.
Does not implement incell connections from cell value to the gates.
Reference: Supervised Sequence Learning with RNNs by Alex Graves
Chapter 4, Fig 4.2
"""
def __init__(self, inpt,
nin, nunits,
forget=False,
actvn_pre='tanh',
actvn_post='linear',
learn_init_states=True):
fwd = LSTM(inpt, nin, nunits, forget, actvn_pre, actvn_post,
learn_init_states)
bwd = LSTM(inpt[::-1], nin, nunits, forget, actvn_pre, actvn_post,
learn_init_states)
self.params = fwd.params + bwd.params
self.nout = fwd.nout + bwd.nout
self.output = tt.concatenate([fwd.output,
bwd.output[::-1]],
axis=1)