From 1e65b51b600e1c19b5b1b81afd49b03403cd703d Mon Sep 17 00:00:00 2001 From: Hongyu Chiu <20734616+james77777778@users.noreply.github.com> Date: Fri, 14 Feb 2025 09:38:59 +0800 Subject: [PATCH] Add `_build_at_init` in `Layer` and use it everywhere. --- keras/src/backend/tensorflow/saved_model_test.py | 3 +-- keras/src/export/tfsm_layer.py | 3 ++- keras/src/layers/activations/activation.py | 8 +------- keras/src/layers/activations/elu.py | 8 +------- keras/src/layers/activations/leaky_relu.py | 8 +------- keras/src/layers/activations/relu.py | 8 +------- keras/src/layers/activations/softmax.py | 8 +------- keras/src/layers/core/identity.py | 8 +------- keras/src/layers/core/masking.py | 8 +------- keras/src/layers/layer.py | 15 +++++++++++++-- keras/src/layers/layer_test.py | 13 +++++-------- .../layers/normalization/unit_normalization.py | 8 +------- keras/src/layers/pooling/base_global_pooling.py | 8 +------- keras/src/layers/pooling/base_pooling.py | 8 +------- .../regularization/activity_regularization.py | 8 +------- keras/src/layers/regularization/alpha_dropout.py | 8 +------- keras/src/layers/regularization/dropout.py | 8 +------- .../src/layers/regularization/gaussian_dropout.py | 8 +------- keras/src/layers/regularization/gaussian_noise.py | 8 +------- keras/src/models/sequential.py | 1 - keras/src/utils/jax_layer.py | 3 +-- 21 files changed, 37 insertions(+), 121 deletions(-) diff --git a/keras/src/backend/tensorflow/saved_model_test.py b/keras/src/backend/tensorflow/saved_model_test.py index bac8837499d0..45543a5d36e7 100644 --- a/keras/src/backend/tensorflow/saved_model_test.py +++ b/keras/src/backend/tensorflow/saved_model_test.py @@ -215,7 +215,7 @@ def test_multi_input_custom_model_and_layer(self): @object_registration.register_keras_serializable(package="my_package") class CustomLayer(layers.Layer): def build(self, *input_shape): - self.built = True + pass def call(self, *input_list): self.add_loss(input_list[-2] * 2) @@ -226,7 +226,6 @@ class CustomModel(models.Model): def build(self, *input_shape): self.layer = CustomLayer() self.layer.build(*input_shape) - self.built = True @tf.function def call(self, *inputs): diff --git a/keras/src/export/tfsm_layer.py b/keras/src/export/tfsm_layer.py index 61859bf0fc22..93c079a3cffc 100644 --- a/keras/src/export/tfsm_layer.py +++ b/keras/src/export/tfsm_layer.py @@ -116,7 +116,8 @@ def __init__( self._add_existing_weight(v) for v in ntvs: self._add_existing_weight(v) - self.built = True + + self._build_at_init() def _add_existing_weight(self, weight): """Tracks an existing weight.""" diff --git a/keras/src/layers/activations/activation.py b/keras/src/layers/activations/activation.py index a577feacc27a..16b6a9748d95 100644 --- a/keras/src/layers/activations/activation.py +++ b/keras/src/layers/activations/activation.py @@ -1,5 +1,4 @@ from keras.src import activations -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer @@ -28,12 +27,7 @@ def __init__(self, activation, **kwargs): self.supports_masking = True self.activation = activations.get(activation) - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs): return self.activation(inputs) diff --git a/keras/src/layers/activations/elu.py b/keras/src/layers/activations/elu.py index 09a3bcc65d54..5a63ee8e8e32 100644 --- a/keras/src/layers/activations/elu.py +++ b/keras/src/layers/activations/elu.py @@ -1,5 +1,4 @@ from keras.src import activations -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer @@ -25,12 +24,7 @@ def __init__(self, alpha=1.0, **kwargs): self.alpha = alpha self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs): return activations.elu(inputs, alpha=self.alpha) diff --git a/keras/src/layers/activations/leaky_relu.py b/keras/src/layers/activations/leaky_relu.py index a22d6dc1a435..3b5602e0dbb7 100644 --- a/keras/src/layers/activations/leaky_relu.py +++ b/keras/src/layers/activations/leaky_relu.py @@ -1,7 +1,6 @@ import warnings from keras.src import activations -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer @@ -52,12 +51,7 @@ def __init__(self, negative_slope=0.3, **kwargs): self.negative_slope = negative_slope self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs): return activations.leaky_relu( diff --git a/keras/src/layers/activations/relu.py b/keras/src/layers/activations/relu.py index 64492d0714b9..72629ce32d98 100644 --- a/keras/src/layers/activations/relu.py +++ b/keras/src/layers/activations/relu.py @@ -1,5 +1,4 @@ from keras.src import activations -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer @@ -63,12 +62,7 @@ def __init__( self.threshold = threshold self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs): return activations.relu( diff --git a/keras/src/layers/activations/softmax.py b/keras/src/layers/activations/softmax.py index 9d4100985d25..9822b3b055c0 100644 --- a/keras/src/layers/activations/softmax.py +++ b/keras/src/layers/activations/softmax.py @@ -1,6 +1,5 @@ from keras.src import activations from keras.src import backend -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer @@ -49,12 +48,7 @@ def __init__(self, axis=-1, **kwargs): self.axis = axis self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs, mask=None): if mask is not None: diff --git a/keras/src/layers/core/identity.py b/keras/src/layers/core/identity.py index 4d044b78a7ca..206835831bcd 100644 --- a/keras/src/layers/core/identity.py +++ b/keras/src/layers/core/identity.py @@ -1,5 +1,4 @@ from keras.src import tree -from keras.src import utils from keras.src.api_export import keras_export from keras.src.backend import KerasTensor from keras.src.layers.layer import Layer @@ -17,12 +16,7 @@ def __init__(self, **kwargs): super().__init__(**kwargs) self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs): return inputs diff --git a/keras/src/layers/core/masking.py b/keras/src/layers/core/masking.py index d348345623e6..692c322d0aae 100644 --- a/keras/src/layers/core/masking.py +++ b/keras/src/layers/core/masking.py @@ -1,6 +1,5 @@ from keras.src import backend from keras.src import ops -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer from keras.src.saving.serialization_lib import deserialize_keras_object @@ -53,12 +52,7 @@ def __init__(self, mask_value=0.0, **kwargs): self.mask_value = mask_value self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def compute_mask(self, inputs, mask=None): return ops.any(ops.not_equal(inputs, self.mask_value), axis=-1) diff --git a/keras/src/layers/layer.py b/keras/src/layers/layer.py index 87dff72965fe..9601ce4f89f4 100644 --- a/keras/src/layers/layer.py +++ b/keras/src/layers/layer.py @@ -372,6 +372,18 @@ def _initialize_tracker(self): # Reset attribute tracking (TF-specific) self._self_setattr_tracking = _self_setattr_tracking + def _build_at_init(self): + """Build the layer at `Layer.__init__`. + + We can only safely mark the layer as `built=True` in `Layer.__init__` if + `build` is not overridden. Otherwise, it might cause the subclasses to + ignore the user's `build`. + """ + if utils.is_default(self.build): + self.built = True + self._post_build() + self._lock_state() + @property def path(self): """The path of the layer. @@ -919,8 +931,7 @@ def maybe_convert(x): outputs, layout ) - if not self.built: - self.built = True + self.built = True # Record activity regularizer loss. if self.activity_regularizer is not None: for output in tree.flatten(outputs): diff --git a/keras/src/layers/layer_test.py b/keras/src/layers/layer_test.py index 8c1e06d6fc3b..64fee2b10f5a 100644 --- a/keras/src/layers/layer_test.py +++ b/keras/src/layers/layer_test.py @@ -647,7 +647,7 @@ def __init__(self): trainable=True, dtype="float32", ) - self.built = True + self._build_at_init() def call(self, x): # Should not autocast. @@ -663,7 +663,7 @@ def __init__(self): initializer="ones", trainable=True, ) - self.built = True + self._build_at_init() def call(self, x): # Should not autocast. @@ -681,7 +681,7 @@ def __init__(self): trainable=True, autocast=False, ) - self.built = True + self._build_at_init() def call(self, x): # Should not autocast `self.v`. @@ -700,7 +700,7 @@ def __init__(self): self.inner_one = InnerLayerOne() self.inner_two = InnerLayerTwo() self.inner_three = InnerLayerThree() - self.built = True + self._build_at_init() def call(self, x): # Should autocast. @@ -864,7 +864,7 @@ def __init__(self): trainable=True, regularizer="l1", ) - self.built = True + self._build_at_init() def call(self, x): x = backend.convert_to_tensor(x, dtype="float32") @@ -1009,7 +1009,6 @@ class MatchingArguments(layers.Layer): def build(self, bar_shape, foo_shape): self.foo_shape = foo_shape self.bar_shape = bar_shape - self.built = True def call(self, foo, bar): return foo[:, 0] + bar[:, 0] @@ -1018,7 +1017,6 @@ class SubsetArguments(layers.Layer): def build(self, baz_shape, foo_shape): self.foo_shape = foo_shape self.baz_shape = baz_shape - self.built = True def call(self, foo, bar=None, baz=None): return foo[:, 0] + bar[:, 0] + baz[:, 0] @@ -1026,7 +1024,6 @@ def call(self, foo, bar=None, baz=None): class SingleArgument(layers.Layer): def build(self, anything_whatsoever): self.foo_shape = anything_whatsoever - self.built = True def call(self, foo, bar): return foo[:, 0] + bar[:, 0] diff --git a/keras/src/layers/normalization/unit_normalization.py b/keras/src/layers/normalization/unit_normalization.py index 6a09b33e2df0..15ba884f1bbc 100644 --- a/keras/src/layers/normalization/unit_normalization.py +++ b/keras/src/layers/normalization/unit_normalization.py @@ -1,5 +1,4 @@ from keras.src import ops -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer @@ -39,12 +38,7 @@ def __init__(self, axis=-1, **kwargs): ) self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs): return ops.normalize(inputs, axis=self.axis, order=2, epsilon=1e-12) diff --git a/keras/src/layers/pooling/base_global_pooling.py b/keras/src/layers/pooling/base_global_pooling.py index b5a3bfed3c65..95e9ddca550f 100644 --- a/keras/src/layers/pooling/base_global_pooling.py +++ b/keras/src/layers/pooling/base_global_pooling.py @@ -1,5 +1,4 @@ from keras.src import backend -from keras.src import utils from keras.src.layers.input_spec import InputSpec from keras.src.layers.layer import Layer @@ -16,12 +15,7 @@ def __init__( self.keepdims = keepdims self.input_spec = InputSpec(ndim=pool_dimensions + 2) - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs): raise NotImplementedError diff --git a/keras/src/layers/pooling/base_pooling.py b/keras/src/layers/pooling/base_pooling.py index e2edd814a783..b427f86ac82a 100644 --- a/keras/src/layers/pooling/base_pooling.py +++ b/keras/src/layers/pooling/base_pooling.py @@ -1,6 +1,5 @@ from keras.src import backend from keras.src import ops -from keras.src import utils from keras.src.layers.input_spec import InputSpec from keras.src.layers.layer import Layer from keras.src.ops.operation_utils import compute_pooling_output_shape @@ -36,12 +35,7 @@ def __init__( self.input_spec = InputSpec(ndim=pool_dimensions + 2) - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs): if self.pool_mode == "max": diff --git a/keras/src/layers/regularization/activity_regularization.py b/keras/src/layers/regularization/activity_regularization.py index 32569ad206f8..a9d663c6d46f 100644 --- a/keras/src/layers/regularization/activity_regularization.py +++ b/keras/src/layers/regularization/activity_regularization.py @@ -1,5 +1,4 @@ from keras.src import regularizers -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer @@ -29,12 +28,7 @@ def __init__(self, l1=0.0, l2=0.0, **kwargs): self.l1 = l1 self.l2 = l2 - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs): return inputs diff --git a/keras/src/layers/regularization/alpha_dropout.py b/keras/src/layers/regularization/alpha_dropout.py index aec2acc8fa0f..ebfd68e15917 100644 --- a/keras/src/layers/regularization/alpha_dropout.py +++ b/keras/src/layers/regularization/alpha_dropout.py @@ -1,6 +1,5 @@ from keras.src import backend from keras.src import ops -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer @@ -48,12 +47,7 @@ def __init__(self, rate, noise_shape=None, seed=None, **kwargs): self.seed_generator = backend.random.SeedGenerator(seed) self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs, training=False): if training and self.rate > 0: diff --git a/keras/src/layers/regularization/dropout.py b/keras/src/layers/regularization/dropout.py index 4c45deabe0ea..0041e65c152c 100644 --- a/keras/src/layers/regularization/dropout.py +++ b/keras/src/layers/regularization/dropout.py @@ -1,5 +1,4 @@ from keras.src import backend -from keras.src import utils from keras.src.api_export import keras_export from keras.src.layers.layer import Layer @@ -54,12 +53,7 @@ def __init__(self, rate, noise_shape=None, seed=None, **kwargs): self.seed_generator = backend.random.SeedGenerator(seed) self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs, training=False): if training and self.rate > 0: diff --git a/keras/src/layers/regularization/gaussian_dropout.py b/keras/src/layers/regularization/gaussian_dropout.py index c2f7cb684a9f..dae82edd168d 100644 --- a/keras/src/layers/regularization/gaussian_dropout.py +++ b/keras/src/layers/regularization/gaussian_dropout.py @@ -3,7 +3,6 @@ from keras.src import backend from keras.src import layers from keras.src import ops -from keras.src import utils from keras.src.api_export import keras_export @@ -39,12 +38,7 @@ def __init__(self, rate, seed=None, **kwargs): self.seed_generator = backend.random.SeedGenerator(seed) self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs, training=False): if training and self.rate > 0: diff --git a/keras/src/layers/regularization/gaussian_noise.py b/keras/src/layers/regularization/gaussian_noise.py index 044d9392e268..561541d4d4dc 100644 --- a/keras/src/layers/regularization/gaussian_noise.py +++ b/keras/src/layers/regularization/gaussian_noise.py @@ -1,7 +1,6 @@ from keras.src import backend from keras.src import layers from keras.src import ops -from keras.src import utils from keras.src.api_export import keras_export @@ -40,12 +39,7 @@ def __init__(self, stddev, seed=None, **kwargs): self.seed_generator = backend.random.SeedGenerator(seed) self.supports_masking = True - # We can only safely mark the layer as built when build is not - # overridden. - if utils.is_default(self.build): - self.built = True - self._post_build() - self._lock_state() + self._build_at_init() def call(self, inputs, training=False): if training and self.stddev > 0: diff --git a/keras/src/models/sequential.py b/keras/src/models/sequential.py index 5815add1c142..0d7dd6daed5d 100644 --- a/keras/src/models/sequential.py +++ b/keras/src/models/sequential.py @@ -206,7 +206,6 @@ def build(self, input_shape=None): raise e outputs = x self._functional = Functional(inputs=inputs, outputs=outputs) - self.built = True def call(self, inputs, training=None, mask=None): if self._functional: diff --git a/keras/src/utils/jax_layer.py b/keras/src/utils/jax_layer.py index 7776e7a5ba2a..a02af992778f 100644 --- a/keras/src/utils/jax_layer.py +++ b/keras/src/utils/jax_layer.py @@ -237,7 +237,7 @@ def __init__( self.tracked_params = self._create_variables(params, trainable=True) self.tracked_state = self._create_variables(state, trainable=False) if self.params is not None or self.state is not None: - self.built = True + self._build_at_init() self.call_fn_arguments = self._validate_signature( call_fn, @@ -397,7 +397,6 @@ def create_input(shape): init_params, trainable=True ) self.tracked_state = self._create_variables(init_state, trainable=False) - self.built = True def call(self, inputs, training=False): def unwrap_variable(variable):