Skip to content

Commit

Permalink
Merge pull request #107 from Yoctol/fix-extend-embeddings
Browse files Browse the repository at this point in the history
Fix extend embeddings
  • Loading branch information
noobOriented authored Jun 13, 2019
2 parents 4c9d04e + 8811c2e commit 39592d8
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 97 deletions.
2 changes: 1 addition & 1 deletion talos/__version__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__title__ = 'talos'
__version__ = '1.4.3'
__version__ = '1.4.4'
__description__ = 'Powerful Neural Network Builder'
__author__ = 'Jsaon'
77 changes: 42 additions & 35 deletions talos/layers/embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,53 +64,53 @@ def build(self, input_shape):

self.total_embeddings = self.embeddings

if self.extend_dims > 0:
self.extend_embeddings = self._force_trainable_add_weight(
shape=(self.input_dim, self.extend_dims),
name='extend_embeddings',
)
self.total_embeddings = tf.concat(
[self.total_embeddings, self.extend_embeddings],
axis=1,
name='embeddings_with_extended_dims',
)

if self.auxiliary_tokens > 0:
# HACK, since Layer.add_weight will take
# the intersection of trainable (in arg) and self.trainable
# manually set self.trainable = True
# to make sure auxiliary_embeddings is tracked by backend.
original_trainable = self.trainable
self.trainable = True
self.auxiliary_embeddings = self.add_weight(
shape=(self.auxiliary_tokens, self.output_dim),
embeddings_dim = self.total_embeddings.shape[1].value
self.auxiliary_embeddings = self._force_trainable_add_weight(
shape=(self.auxiliary_tokens, embeddings_dim),
name='auxiliary_embeddings',
trainable=True,
)
self.trainable = original_trainable
self.total_embeddings = tf.concat(
[self.total_embeddings, self.auxiliary_embeddings],
axis=0,
name='embeddings_with_auxiliary_tokens',
)

if self.extend_dims > 0:
original_trainable = self.trainable
self.trainable = True
vocab_size, embeddings_dim = self.total_embeddings.shape.as_list()
self.extend_embeddings = self.add_weight(
shape=(vocab_size, embeddings_dim + self.extend_dims),
name='extend_embeddings_dims',
trainable=True,
)
self.trainable = original_trainable
self.total_embeddings = tf.concat(
[self.total_embeddings, self.extend_embeddings],
axis=1,
name='embeddings_with_extended_dims',
)
self.total_embeddings = tf.identity(self.total_embeddings, name='total_embeddings')
self.built = True

def _force_trainable_add_weight(self, **kwargs):
# HACK, since Layer.add_weight will take
# the intersection of trainable (in arg) and self.trainable
# manually set self.trainable = True
# to make sure weight is tracked by backend.
original_trainable = self.trainable
self.trainable = True
weight = self.add_weight(**kwargs, trainable=True)
self.trainable = original_trainable
return weight

@property
def trainable_weights(self):
# HACK in keras implementation, they consider layer.trainable as well,
# be it's ignored in this part.
# it's ignored in this part.
return self._trainable_weights

@property
def non_trainable_weights(self):
# HACK in keras implementation, they consider layer.trainable as well,
# be it's ignored in this part.
# it's ignored in this part.
return self._non_trainable_weights

@classmethod
Expand Down Expand Up @@ -190,13 +190,19 @@ def call(self, inputs, mask=None, training=None):
training = tf.keras.backend.learning_phase()

if self.dropout is not None:
# randomly drop token: row of embedding matrix
# NOTE randomly drop token: row of embedding matrix
# to avoid scaling by 1 / keep_prob, slightly modify `tf.nn.dropout`
def dropped_embeddings():
return tf.nn.dropout(
self.total_embeddings,
rate=self.dropout,
noise_shape=(self.vocab_size, 1), # for broadcast
) * (1. - self.dropout) # avoid scaling
random_tensor = tf.random_uniform(
shape=(self.total_embeddings.shape[0].value, 1),
minval=1. - self.dropout,
maxval=2. - self.dropout,
dtype=self.total_embeddings.dtype,
)
# 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob)
binary_tensor = tf.math.floor(random_tensor)
return self.total_embeddings * binary_tensor

embeddings = tf_utils.smart_cond(
training,
dropped_embeddings,
Expand All @@ -205,8 +211,7 @@ def dropped_embeddings():
else:
embeddings = self.total_embeddings

out = tf.nn.embedding_lookup(embeddings, inputs)
return out
return tf.nn.embedding_lookup(embeddings, inputs)

def compute_mask(self, inputs, mask):
if self.mask_index is None:
Expand Down Expand Up @@ -235,5 +240,7 @@ def get_config(self):
'mask_index': self.mask_index,
'input_length': self.input_length,
'auxiliary_tokens': self.auxiliary_tokens,
'extend_dims': self.extend_dims,
'dropout': self.dropout,
}
return config
129 changes: 68 additions & 61 deletions talos/layers/tests/test_embeddings.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,29 @@ def test_init_from_invalid_mask_index_raise(invalid_mask_index):
Embedding(vocab_size=5, embeddings_dim=5, mask_index=invalid_mask_index)


def test_dropout(inputs, sess):
embed_layer = Embedding(
vocab_size=10, embeddings_dim=5, dropout=0.8, embeddings_initializer='ones')
training = tf.placeholder(dtype=tf.bool, shape=())
outputs = embed_layer(inputs, mask=mask, training=training)

sess.run(tf.variables_initializer(var_list=embed_layer.variables))

maxlen = inputs.shape[1].value
input_val = np.random.randint(0, embed_layer.vocab_size, size=[5, maxlen])
dropped_out = sess.run(
outputs,
feed_dict={inputs: input_val, training: True},
)
assert np.all(dropped_out == 0., axis=2).any() # on embedding dims

no_dropped_out = sess.run(
outputs,
feed_dict={inputs: input_val, training: False},
)
assert (no_dropped_out != 0.).all()


@pytest.mark.parametrize('constant', [False, True])
def test_construct_from_weights(inputs, sess, constant):
weights = np.array([[0, 1], [2, 3], [4, 5]], dtype=np.float32)
Expand All @@ -78,76 +101,67 @@ def test_construct_from_weights(inputs, sess, constant):


@pytest.mark.parametrize('constant', [False, True])
def test_auxiliary_tokens_partially_trainable(inputs, sess, constant):
@pytest.mark.parametrize('auxiliary_tokens, extend_dims', [
(0, 2),
(2, 0),
(2, 2),
])
def test_extend_partially_trainable(inputs, sess, constant, auxiliary_tokens, extend_dims):
maxlen = inputs.shape[1].value
vocab_size, embeddings_dim = 5, 3
embed_layer = Embedding.from_weights(
np.random.uniform(size=[5, 3]).astype(np.float32),
np.random.uniform(size=[vocab_size, embeddings_dim]).astype(np.float32),
constant=constant,
trainable=False,
auxiliary_tokens=2,
auxiliary_tokens=auxiliary_tokens,
extend_dims=extend_dims,
)
word_vec = embed_layer(inputs)
assert len(embed_layer.trainable_variables) == 1
assert len(embed_layer.non_trainable_variables) == (0 if constant else 1)
assert len(embed_layer.variables) == (1 if constant else 2)

update_op = tf.train.GradientDescentOptimizer(0.1).minimize(tf.reduce_sum(word_vec))
len_trainable_variables = (1 if auxiliary_tokens else 0) + (1 if extend_dims else 0)
len_non_trainable_variables = 0 if constant else 1

sess.run(tf.variables_initializer(var_list=embed_layer.variables))

original_weights_val = sess.run(embed_layer.total_embeddings)
sess.run(update_op, feed_dict={inputs: np.random.choice(5 + 2, size=[10, maxlen])})
new_weights_val = sess.run(embed_layer.total_embeddings)

# after update:
# first 5 row should keep
np.testing.assert_array_almost_equal(
original_weights_val[:5],
new_weights_val[:5],
)
# others (auxiliary tokens) should change.
with pytest.raises(AssertionError):
np.testing.assert_array_almost_equal(
original_weights_val[5:], # auxiliary tokens
new_weights_val[5:],
)


@pytest.mark.parametrize('constant', [False, True])
def test_extend_dims_partially_trainable(inputs, sess, constant):
maxlen = inputs.shape[1].value
vocab_size = 5
original_embedding_size = 3
embed_layer = Embedding.from_weights(
np.random.uniform(size=[vocab_size, original_embedding_size]).astype(np.float32),
constant=constant,
trainable=False,
extend_dims=2,
)
word_vec = embed_layer(inputs)
assert len(embed_layer.trainable_variables) == 1
assert len(embed_layer.non_trainable_variables) == (0 if constant else 1)
assert len(embed_layer.variables) == (1 if constant else 2)
assert len(embed_layer.trainable_variables) == len_trainable_variables
assert len(embed_layer.non_trainable_variables) == len_non_trainable_variables
assert len(embed_layer.variables) == len_trainable_variables + len_non_trainable_variables
assert embed_layer.total_embeddings.shape.as_list() == [
vocab_size + auxiliary_tokens,
embeddings_dim + extend_dims,
]

update_op = tf.train.GradientDescentOptimizer(0.1).minimize(tf.reduce_sum(word_vec))

sess.run(tf.variables_initializer(var_list=embed_layer.variables))

original_weights_val = sess.run(embed_layer.total_embeddings)
sess.run(update_op, feed_dict={inputs: np.random.choice(vocab_size, size=[10, maxlen])})
sess.run(
update_op,
feed_dict={inputs: np.random.choice(
vocab_size + auxiliary_tokens,
size=[10, maxlen],
)},
)
new_weights_val = sess.run(embed_layer.total_embeddings)

# after update:
# original part should keep
np.testing.assert_array_almost_equal(
original_weights_val[:, : original_embedding_size],
new_weights_val[:, : original_embedding_size],
original_weights_val[: vocab_size, : embeddings_dim],
new_weights_val[: vocab_size, : embeddings_dim],
)
# others (extend dims) should change.
with pytest.raises(AssertionError):
np.testing.assert_array_almost_equal(
original_weights_val[:, original_embedding_size:], # extend dims
new_weights_val[:, original_embedding_size:],
)
# others (auxiliary tokens) should change.
if auxiliary_tokens:
with pytest.raises(AssertionError):
np.testing.assert_array_almost_equal(
original_weights_val[vocab_size:],
new_weights_val[vocab_size:],
)
if extend_dims:
with pytest.raises(AssertionError):
np.testing.assert_array_almost_equal(
original_weights_val[:, embeddings_dim:],
new_weights_val[:, embeddings_dim:],
)


@pytest.mark.parametrize('invalid_weights', [
Expand All @@ -159,16 +173,9 @@ def test_construct_from_invalid_weights_raise(invalid_weights):
Embedding.from_weights(invalid_weights)


@pytest.mark.parametrize('constant,auxiliary_tokens,extend_dims', [
(True, 0, 0),
(True, 2, 0),
(True, 0, 2),
(True, 2, 10),
(False, 0, 0),
(False, 2, 0),
(False, 0, 2),
(False, 2, 10),
])
@pytest.mark.parametrize('constant', [True, False])
@pytest.mark.parametrize('auxiliary_tokens', [0, 2])
@pytest.mark.parametrize('extend_dims', [0, 5])
def test_freeze_success(inputs, sess, constant, auxiliary_tokens, extend_dims):
# build graph with constant embedding layer
embed_layer = Embedding.from_weights(
Expand Down

0 comments on commit 39592d8

Please sign in to comment.