diff --git a/talos/__version__.py b/talos/__version__.py index 219f70c..6092742 100644 --- a/talos/__version__.py +++ b/talos/__version__.py @@ -1,4 +1,4 @@ __title__ = 'talos' -__version__ = '1.4.3' +__version__ = '1.4.4' __description__ = 'Powerful Neural Network Builder' __author__ = 'Jsaon' diff --git a/talos/layers/embeddings.py b/talos/layers/embeddings.py index f4baa0c..ab9c473 100644 --- a/talos/layers/embeddings.py +++ b/talos/layers/embeddings.py @@ -64,53 +64,53 @@ def build(self, input_shape): self.total_embeddings = self.embeddings + if self.extend_dims > 0: + self.extend_embeddings = self._force_trainable_add_weight( + shape=(self.input_dim, self.extend_dims), + name='extend_embeddings', + ) + self.total_embeddings = tf.concat( + [self.total_embeddings, self.extend_embeddings], + axis=1, + name='embeddings_with_extended_dims', + ) + if self.auxiliary_tokens > 0: - # HACK, since Layer.add_weight will take - # the intersection of trainable (in arg) and self.trainable - # manually set self.trainable = True - # to make sure auxiliary_embeddings is tracked by backend. - original_trainable = self.trainable - self.trainable = True - self.auxiliary_embeddings = self.add_weight( - shape=(self.auxiliary_tokens, self.output_dim), + embeddings_dim = self.total_embeddings.shape[1].value + self.auxiliary_embeddings = self._force_trainable_add_weight( + shape=(self.auxiliary_tokens, embeddings_dim), name='auxiliary_embeddings', - trainable=True, ) - self.trainable = original_trainable self.total_embeddings = tf.concat( [self.total_embeddings, self.auxiliary_embeddings], axis=0, name='embeddings_with_auxiliary_tokens', ) - if self.extend_dims > 0: - original_trainable = self.trainable - self.trainable = True - vocab_size, embeddings_dim = self.total_embeddings.shape.as_list() - self.extend_embeddings = self.add_weight( - shape=(vocab_size, embeddings_dim + self.extend_dims), - name='extend_embeddings_dims', - trainable=True, - ) - self.trainable = original_trainable - self.total_embeddings = tf.concat( - [self.total_embeddings, self.extend_embeddings], - axis=1, - name='embeddings_with_extended_dims', - ) self.total_embeddings = tf.identity(self.total_embeddings, name='total_embeddings') self.built = True + def _force_trainable_add_weight(self, **kwargs): + # HACK, since Layer.add_weight will take + # the intersection of trainable (in arg) and self.trainable + # manually set self.trainable = True + # to make sure weight is tracked by backend. + original_trainable = self.trainable + self.trainable = True + weight = self.add_weight(**kwargs, trainable=True) + self.trainable = original_trainable + return weight + @property def trainable_weights(self): # HACK in keras implementation, they consider layer.trainable as well, - # be it's ignored in this part. + # it's ignored in this part. return self._trainable_weights @property def non_trainable_weights(self): # HACK in keras implementation, they consider layer.trainable as well, - # be it's ignored in this part. + # it's ignored in this part. return self._non_trainable_weights @classmethod @@ -190,13 +190,19 @@ def call(self, inputs, mask=None, training=None): training = tf.keras.backend.learning_phase() if self.dropout is not None: - # randomly drop token: row of embedding matrix + # NOTE randomly drop token: row of embedding matrix + # to avoid scaling by 1 / keep_prob, slightly modify `tf.nn.dropout` def dropped_embeddings(): - return tf.nn.dropout( - self.total_embeddings, - rate=self.dropout, - noise_shape=(self.vocab_size, 1), # for broadcast - ) * (1. - self.dropout) # avoid scaling + random_tensor = tf.random_uniform( + shape=(self.total_embeddings.shape[0].value, 1), + minval=1. - self.dropout, + maxval=2. - self.dropout, + dtype=self.total_embeddings.dtype, + ) + # 0. if [keep_prob, 1.0) and 1. if [1.0, 1.0 + keep_prob) + binary_tensor = tf.math.floor(random_tensor) + return self.total_embeddings * binary_tensor + embeddings = tf_utils.smart_cond( training, dropped_embeddings, @@ -205,8 +211,7 @@ def dropped_embeddings(): else: embeddings = self.total_embeddings - out = tf.nn.embedding_lookup(embeddings, inputs) - return out + return tf.nn.embedding_lookup(embeddings, inputs) def compute_mask(self, inputs, mask): if self.mask_index is None: @@ -235,5 +240,7 @@ def get_config(self): 'mask_index': self.mask_index, 'input_length': self.input_length, 'auxiliary_tokens': self.auxiliary_tokens, + 'extend_dims': self.extend_dims, + 'dropout': self.dropout, } return config diff --git a/talos/layers/tests/test_embeddings.py b/talos/layers/tests/test_embeddings.py index 5f2661f..5b20290 100644 --- a/talos/layers/tests/test_embeddings.py +++ b/talos/layers/tests/test_embeddings.py @@ -65,6 +65,29 @@ def test_init_from_invalid_mask_index_raise(invalid_mask_index): Embedding(vocab_size=5, embeddings_dim=5, mask_index=invalid_mask_index) +def test_dropout(inputs, sess): + embed_layer = Embedding( + vocab_size=10, embeddings_dim=5, dropout=0.8, embeddings_initializer='ones') + training = tf.placeholder(dtype=tf.bool, shape=()) + outputs = embed_layer(inputs, mask=mask, training=training) + + sess.run(tf.variables_initializer(var_list=embed_layer.variables)) + + maxlen = inputs.shape[1].value + input_val = np.random.randint(0, embed_layer.vocab_size, size=[5, maxlen]) + dropped_out = sess.run( + outputs, + feed_dict={inputs: input_val, training: True}, + ) + assert np.all(dropped_out == 0., axis=2).any() # on embedding dims + + no_dropped_out = sess.run( + outputs, + feed_dict={inputs: input_val, training: False}, + ) + assert (no_dropped_out != 0.).all() + + @pytest.mark.parametrize('constant', [False, True]) def test_construct_from_weights(inputs, sess, constant): weights = np.array([[0, 1], [2, 3], [4, 5]], dtype=np.float32) @@ -78,76 +101,67 @@ def test_construct_from_weights(inputs, sess, constant): @pytest.mark.parametrize('constant', [False, True]) -def test_auxiliary_tokens_partially_trainable(inputs, sess, constant): +@pytest.mark.parametrize('auxiliary_tokens, extend_dims', [ + (0, 2), + (2, 0), + (2, 2), +]) +def test_extend_partially_trainable(inputs, sess, constant, auxiliary_tokens, extend_dims): maxlen = inputs.shape[1].value + vocab_size, embeddings_dim = 5, 3 embed_layer = Embedding.from_weights( - np.random.uniform(size=[5, 3]).astype(np.float32), + np.random.uniform(size=[vocab_size, embeddings_dim]).astype(np.float32), constant=constant, trainable=False, - auxiliary_tokens=2, + auxiliary_tokens=auxiliary_tokens, + extend_dims=extend_dims, ) word_vec = embed_layer(inputs) - assert len(embed_layer.trainable_variables) == 1 - assert len(embed_layer.non_trainable_variables) == (0 if constant else 1) - assert len(embed_layer.variables) == (1 if constant else 2) - update_op = tf.train.GradientDescentOptimizer(0.1).minimize(tf.reduce_sum(word_vec)) + len_trainable_variables = (1 if auxiliary_tokens else 0) + (1 if extend_dims else 0) + len_non_trainable_variables = 0 if constant else 1 - sess.run(tf.variables_initializer(var_list=embed_layer.variables)) - - original_weights_val = sess.run(embed_layer.total_embeddings) - sess.run(update_op, feed_dict={inputs: np.random.choice(5 + 2, size=[10, maxlen])}) - new_weights_val = sess.run(embed_layer.total_embeddings) - - # after update: - # first 5 row should keep - np.testing.assert_array_almost_equal( - original_weights_val[:5], - new_weights_val[:5], - ) - # others (auxiliary tokens) should change. - with pytest.raises(AssertionError): - np.testing.assert_array_almost_equal( - original_weights_val[5:], # auxiliary tokens - new_weights_val[5:], - ) - - -@pytest.mark.parametrize('constant', [False, True]) -def test_extend_dims_partially_trainable(inputs, sess, constant): - maxlen = inputs.shape[1].value - vocab_size = 5 - original_embedding_size = 3 - embed_layer = Embedding.from_weights( - np.random.uniform(size=[vocab_size, original_embedding_size]).astype(np.float32), - constant=constant, - trainable=False, - extend_dims=2, - ) - word_vec = embed_layer(inputs) - assert len(embed_layer.trainable_variables) == 1 - assert len(embed_layer.non_trainable_variables) == (0 if constant else 1) - assert len(embed_layer.variables) == (1 if constant else 2) + assert len(embed_layer.trainable_variables) == len_trainable_variables + assert len(embed_layer.non_trainable_variables) == len_non_trainable_variables + assert len(embed_layer.variables) == len_trainable_variables + len_non_trainable_variables + assert embed_layer.total_embeddings.shape.as_list() == [ + vocab_size + auxiliary_tokens, + embeddings_dim + extend_dims, + ] update_op = tf.train.GradientDescentOptimizer(0.1).minimize(tf.reduce_sum(word_vec)) sess.run(tf.variables_initializer(var_list=embed_layer.variables)) original_weights_val = sess.run(embed_layer.total_embeddings) - sess.run(update_op, feed_dict={inputs: np.random.choice(vocab_size, size=[10, maxlen])}) + sess.run( + update_op, + feed_dict={inputs: np.random.choice( + vocab_size + auxiliary_tokens, + size=[10, maxlen], + )}, + ) new_weights_val = sess.run(embed_layer.total_embeddings) # after update: + # original part should keep np.testing.assert_array_almost_equal( - original_weights_val[:, : original_embedding_size], - new_weights_val[:, : original_embedding_size], + original_weights_val[: vocab_size, : embeddings_dim], + new_weights_val[: vocab_size, : embeddings_dim], ) - # others (extend dims) should change. - with pytest.raises(AssertionError): - np.testing.assert_array_almost_equal( - original_weights_val[:, original_embedding_size:], # extend dims - new_weights_val[:, original_embedding_size:], - ) + # others (auxiliary tokens) should change. + if auxiliary_tokens: + with pytest.raises(AssertionError): + np.testing.assert_array_almost_equal( + original_weights_val[vocab_size:], + new_weights_val[vocab_size:], + ) + if extend_dims: + with pytest.raises(AssertionError): + np.testing.assert_array_almost_equal( + original_weights_val[:, embeddings_dim:], + new_weights_val[:, embeddings_dim:], + ) @pytest.mark.parametrize('invalid_weights', [ @@ -159,16 +173,9 @@ def test_construct_from_invalid_weights_raise(invalid_weights): Embedding.from_weights(invalid_weights) -@pytest.mark.parametrize('constant,auxiliary_tokens,extend_dims', [ - (True, 0, 0), - (True, 2, 0), - (True, 0, 2), - (True, 2, 10), - (False, 0, 0), - (False, 2, 0), - (False, 0, 2), - (False, 2, 10), -]) +@pytest.mark.parametrize('constant', [True, False]) +@pytest.mark.parametrize('auxiliary_tokens', [0, 2]) +@pytest.mark.parametrize('extend_dims', [0, 5]) def test_freeze_success(inputs, sess, constant, auxiliary_tokens, extend_dims): # build graph with constant embedding layer embed_layer = Embedding.from_weights(