forked from polyaxon/haupt
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathlstm.py
132 lines (119 loc) · 6.74 KB
/
lstm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
# -*- coding: utf-8 -*-
from __future__ import absolute_import, division, print_function
import tensorflow as tf
from tensorflow.contrib import learn as tflearn
from tensorflow.contrib import layers as tflayers
def lstm_model(num_units, rnn_layers, dense_layers=None, learning_rate=0.1, optimizer='Adagrad'):
"""
Creates a deep model based on:
* stacked lstm cells
* an optional dense layers
:param num_units: the size of the cells.
:param rnn_layers: list of int or dict
* list of int: the steps used to instantiate the `BasicLSTMCell` cell
* list of dict: [{steps: int, keep_prob: int}, ...]
:param dense_layers: list of nodes for each layer
:return: the model definition
"""
def lstm_cells(layers):
## Is this fault checking? Should it raise a TypeError if layers[0] is not a dict?
## Again, if not exposed/not used by user, this shouldn't be needed.
if isinstance(layers[0], dict):
##tf.contrib.rnn.DropoutWrapper()
##Changes in 1.4 are entirely additions.
##original properties are:
## -output_size: Output tensor size??
## -state_size: Internal state?? Where? how? why is this accessable??
## - __init__ Unchanged, except the addition of a dropout_state_filter_visitor variable
## -- only affects state_keep_prob, isn't relevant here.
## - __call__ Completely identical
## - zero_state Completely identical
##tf.contrib.rnn.BasicLSTMCell()
##1.4 adds alias in tf.nn library. Maybe try to minimize use of contrib library?
##Something something adds stability??
##In fact, they recomend this. "For advanced models, please use the full tf.nn cell that follows
##Original Properties: output_size, state_size. 1.4 has only additions
##Methods:
##__init__() 1.4 removes a depreciated input_size variable, which does nothing.
## --it also changes the format of the activation function, though functionality is the same.
## --state_is_tuple stays the same.
## zero_state() is identical in paramters and return values.
# For loop variant, for learning purpouses.
rolling_result = list()
for layer in layers:
if (layer.get('keep_prob')):
rolling_result.append(tf.nn.rnn_cell.DropoutWrapper(
tf.nn.BasicLSTMCell(
layer['num_units'],
state_is_tuple=True),
layer['keep_prob']))
else:
rolling_result.append(tf.nn.BasicLSTMCell(
layer['num_units'],
state_is_tuple=True))
return rolling_result
# Original list comprehension
# return [tf.contrib.rnn.DropoutWrapper( tf.contrib.rnn.BasicLSTMCell( layer['num_units'], state_is_tuple=True),
# layer['keep_prob']
# )
# if layer.get('keep_prob') else tf.contrib.rnn.BasicLSTMCell(
# layer['num_units'],
# state_is_tuple=True
# ) for layer in layers
# ]
## This is the else clause of the previous if statement.
## Very smilar to else clause at return above.
## only difference is that it's appending the full "layer", rather than layer['num_units']
return [tf.nn.BasicLSTMCell(layer, state_is_tuple=True) for layer in layers]
def dnn_layers(input_layers, layers):
## Why suport different formats?? The use of a variable that could be either
## a scalar type (int/char/whatever) or a dict (much more complex), or NoneType
## is interesting to me. Need to pay attention to use here.
## tflayers is alias for tf.contrib.layers
if layers and isinstance(layers, dict):
## tflayers.stack() (alias of tf.contrib.layers.stack)
## Calls stack_layers repeatedly. What does stack_layers do?
## identical between versions.
## activation, dropout are kwargs passed directly to stack_layers
## In this layer, every cell is connected to every other cell.
return tflayers.stack(input_layers, tflayers.fully_connected,
layers['layers'],
activation=layers.get('activation'),
dropout=layers.get('dropout'))
elif layers:
## Find out what activation and ropout parameters do, and why they're excluded here
return tflayers.stack(input_layers, tflayers.fully_connected, layers)
else:
## Why does this exist? Should there be an exception here? in this case, the function
## does nothing.
return input_layers
# As used in lstm_sin_cos, X is features, y is labels.
def _lstm_model(X, y):
## tf.contrib.rnn.MultiRNNCell (alias of tf.nn.rnn_cell.MultiRNNCell)
## Properties are unchanged (only additions)
## Methods:
## - __init__() unchanged. State_is_tuple=False is depreciated, but not used here.
## - __call__() unchanged.
## - zero_state() unchanged.
stacked_lstm = tf.nn.MultiRNNCell(lstm_cells(rnn_layers), state_is_tuple=True)
## tf.unstack()
## Functionality is unchanged, but the oposite operation has been changed from pack() to stack()
## Also, the numpy equivelent has changed from list(x) to np.unstack(x)
x_ = tf.unstack(X, axis=1, num=num_units)
## tf.contrib.rnn.static_rnn (moved to tf.nn.static_rnn)
## Identical, except for the fact that it's been moved
output, layers = tf.nn.static_rnn(stacked_lstm, x_, dtype=tf.float32)
output = dnn_layers(output[-1], dense_layers)
## tflearn.models.linear_regression()
## No change. Corrected a typo in a comment between versions, but nothing else.
prediction, loss = tflearn.models.linear_regression(output, y)
## tf.contrib.layers.optimize_loss()
## now raises a ValueError if gradients is empty, look out for try/except statements.
train_op = tf.contrib.layers.optimize_loss(
## tf.contrib.framework.get_global_step()
## Depreciated. New version is documented, old version is not. update, move it,
## and hope functionality is the same.
loss, tf.train.get_global_step(), optimizer=optimizer,
learning_rate=learning_rate)
return prediction, loss, train_op
return _lstm_model