Enable training tests and fix a range of bugs
This commit is contained in:
parent
770cb289f7
commit
427c533005
@ -162,13 +162,13 @@ def softmax(x, axis=-1):
|
|||||||
|
|
||||||
|
|
||||||
@keras_core_export("keras_core.activations.elu")
|
@keras_core_export("keras_core.activations.elu")
|
||||||
def elu(x, alpha=1.0):
|
def elu(x):
|
||||||
"""Exponential Linear Unit.
|
"""Exponential Linear Unit.
|
||||||
|
|
||||||
The exponential linear unit (ELU) with `alpha > 0` is define as:
|
The exponential linear unit (ELU) with `alpha > 0` is define as:
|
||||||
|
|
||||||
- `x` if `x > 0`
|
- `x` if `x > 0`
|
||||||
- alpha * `exp(x) - 1` if `x < 0`
|
- `exp(x) - 1` if `x < 0`
|
||||||
|
|
||||||
ELUs have negative values which pushes the mean of the activations
|
ELUs have negative values which pushes the mean of the activations
|
||||||
closer to zero.
|
closer to zero.
|
||||||
@ -186,7 +186,7 @@ def elu(x, alpha=1.0):
|
|||||||
|
|
||||||
- [Clevert et al., 2016](https://arxiv.org/abs/1511.07289)
|
- [Clevert et al., 2016](https://arxiv.org/abs/1511.07289)
|
||||||
"""
|
"""
|
||||||
return ops.elu(x, alpha=alpha)
|
return ops.elu(x)
|
||||||
|
|
||||||
|
|
||||||
@keras_core_export("keras_core.activations.selu")
|
@keras_core_export("keras_core.activations.selu")
|
||||||
|
@ -56,8 +56,8 @@ def hard_sigmoid(x):
|
|||||||
return jnn.hard_sigmoid(x)
|
return jnn.hard_sigmoid(x)
|
||||||
|
|
||||||
|
|
||||||
def elu(x, alpha=1.0):
|
def elu(x):
|
||||||
return jnn.elu(x, alpha=alpha)
|
return jnn.elu(x)
|
||||||
|
|
||||||
|
|
||||||
def selu(x):
|
def selu(x):
|
||||||
|
@ -2,6 +2,8 @@ from jax import lax
|
|||||||
from jax import numpy as jnp
|
from jax import numpy as jnp
|
||||||
from tensorflow import nest
|
from tensorflow import nest
|
||||||
|
|
||||||
|
from keras_core.backend.common.stateless_scope import StatelessScope
|
||||||
|
|
||||||
|
|
||||||
def rnn(
|
def rnn(
|
||||||
step_function,
|
step_function,
|
||||||
@ -178,12 +180,16 @@ def rnn(
|
|||||||
|
|
||||||
scan_xs = inputs
|
scan_xs = inputs
|
||||||
|
|
||||||
new_states, outputs = lax.scan(
|
with StatelessScope():
|
||||||
f=_step,
|
# We must use a stateless scope because `scan` will involve
|
||||||
init=initial_states,
|
# JAX tracing -- any variable update at this stage would
|
||||||
xs=scan_xs,
|
# be a leak.
|
||||||
reverse=go_backwards,
|
new_states, outputs = lax.scan(
|
||||||
)
|
f=_step,
|
||||||
|
init=initial_states,
|
||||||
|
xs=scan_xs,
|
||||||
|
reverse=go_backwards,
|
||||||
|
)
|
||||||
if go_backwards:
|
if go_backwards:
|
||||||
outputs = jnp.flip(outputs, axis=0)
|
outputs = jnp.flip(outputs, axis=0)
|
||||||
last_output = outputs[-1]
|
last_output = outputs[-1]
|
||||||
|
@ -57,12 +57,8 @@ def hard_sigmoid(x):
|
|||||||
return tf.clip_by_value(x, 0.0, 1.0)
|
return tf.clip_by_value(x, 0.0, 1.0)
|
||||||
|
|
||||||
|
|
||||||
def elu(x, alpha=1.0):
|
def elu(x):
|
||||||
res = tf.nn.elu(x)
|
return tf.nn.elu(x)
|
||||||
if alpha == 1:
|
|
||||||
return res
|
|
||||||
else:
|
|
||||||
return tf.where(x > 0, res, alpha * res)
|
|
||||||
|
|
||||||
|
|
||||||
def selu(x):
|
def selu(x):
|
||||||
|
@ -42,9 +42,10 @@ def max(x, axis=None, keepdims=False, initial=None):
|
|||||||
|
|
||||||
# TensorFlow returns -inf by default for an empty list, but for consistency
|
# TensorFlow returns -inf by default for an empty list, but for consistency
|
||||||
# with other backends and the numpy API we want to throw in this case.
|
# with other backends and the numpy API we want to throw in this case.
|
||||||
|
size_x = size(x)
|
||||||
tf.assert_greater(
|
tf.assert_greater(
|
||||||
size(x),
|
size_x,
|
||||||
tf.constant(0, dtype=tf.int64),
|
tf.constant(0, dtype=size_x.dtype),
|
||||||
message="Cannot compute the max of an empty tensor.",
|
message="Cannot compute the max of an empty tensor.",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -55,7 +55,6 @@ class TensorFlowTrainer(base_trainer.Trainer):
|
|||||||
self._loss_tracker.update_state(loss)
|
self._loss_tracker.update_state(loss)
|
||||||
|
|
||||||
# Compute gradients
|
# Compute gradients
|
||||||
# TODO: move value conversion to TF
|
|
||||||
if self.trainable_weights:
|
if self.trainable_weights:
|
||||||
trainable_weights = [v.value for v in self.trainable_weights]
|
trainable_weights = [v.value for v in self.trainable_weights]
|
||||||
gradients = tape.gradient(loss, trainable_weights)
|
gradients = tape.gradient(loss, trainable_weights)
|
||||||
@ -88,7 +87,6 @@ class TensorFlowTrainer(base_trainer.Trainer):
|
|||||||
return y_pred
|
return y_pred
|
||||||
|
|
||||||
def make_train_function(self, force=False):
|
def make_train_function(self, force=False):
|
||||||
# TODO: support tf.distribute and steps_per_execution.
|
|
||||||
if self.train_function is not None and not force:
|
if self.train_function is not None and not force:
|
||||||
return self.train_function
|
return self.train_function
|
||||||
|
|
||||||
@ -131,10 +129,10 @@ class TensorFlowTrainer(base_trainer.Trainer):
|
|||||||
self.train_function = train_function
|
self.train_function = train_function
|
||||||
|
|
||||||
def make_test_function(self, force=False):
|
def make_test_function(self, force=False):
|
||||||
# TODO: support tf.distribute and steps_per_execution.
|
|
||||||
if self.test_function is not None and not force:
|
if self.test_function is not None and not force:
|
||||||
return self.test_function
|
return self.test_function
|
||||||
|
|
||||||
|
@tf.autograph.experimental.do_not_convert
|
||||||
def one_step_on_data(data):
|
def one_step_on_data(data):
|
||||||
"""Runs a single test step on a batch of data."""
|
"""Runs a single test step on a batch of data."""
|
||||||
return self.test_step(data)
|
return self.test_step(data)
|
||||||
@ -173,10 +171,10 @@ class TensorFlowTrainer(base_trainer.Trainer):
|
|||||||
self.test_function = test_function
|
self.test_function = test_function
|
||||||
|
|
||||||
def make_predict_function(self, force=False):
|
def make_predict_function(self, force=False):
|
||||||
# TODO: support tf.distribute and steps_per_execution.
|
|
||||||
if self.predict_function is not None and not force:
|
if self.predict_function is not None and not force:
|
||||||
return self.predict_function
|
return self.predict_function
|
||||||
|
|
||||||
|
@tf.autograph.experimental.do_not_convert
|
||||||
def one_step_on_data(data):
|
def one_step_on_data(data):
|
||||||
"""Runs a predict test step on a batch of data."""
|
"""Runs a predict test step on a batch of data."""
|
||||||
return self.predict_step(data)
|
return self.predict_step(data)
|
||||||
|
@ -47,10 +47,6 @@ def normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
|
|||||||
def uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None):
|
def uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None):
|
||||||
"""Produce random number based on the uniform distribution.
|
"""Produce random number based on the uniform distribution.
|
||||||
|
|
||||||
The generated values follow a uniform distribution in the range
|
|
||||||
`[minval, maxval)`. The lower bound `minval` is included in the range,
|
|
||||||
while the upper bound `maxval` is excluded.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
shape: The shape of the random values to generate.
|
shape: The shape of the random values to generate.
|
||||||
minval: Floats, defaults to 0. Lower bound of the range of
|
minval: Floats, defaults to 0. Lower bound of the range of
|
||||||
@ -81,10 +77,6 @@ def uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None):
|
|||||||
def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
|
def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
|
||||||
"""Produce random number based on the truncated normal distribution.
|
"""Produce random number based on the truncated normal distribution.
|
||||||
|
|
||||||
The values are drawn from a normal distribution with specified mean and
|
|
||||||
standard deviation, discarding and re-drawing any samples that are more
|
|
||||||
than two standard deviations from the mean.
|
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
shape: The shape of the random values to generate.
|
shape: The shape of the random values to generate.
|
||||||
mean: Floats, defaults to 0. Mean of the random values to generate.
|
mean: Floats, defaults to 0. Mean of the random values to generate.
|
||||||
@ -103,15 +95,14 @@ def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
|
|||||||
across multiple calls, use as seed an instance
|
across multiple calls, use as seed an instance
|
||||||
of `keras_core.backend.SeedGenerator`.
|
of `keras_core.backend.SeedGenerator`.
|
||||||
"""
|
"""
|
||||||
# Take a larger standard normal dist, discard values outside 2 * stddev
|
x = torch.empty(shape)
|
||||||
# Offset by mean and stddev
|
# TODO: setting seed globally via `manual_seed` might create side effects.
|
||||||
x = normal(shape + (4,), mean=0, stddev=1, dtype=dtype, seed=seed)
|
if seed is not None:
|
||||||
valid = (x > -2) & (x < 2)
|
seed_val, _ = draw_seed(seed)
|
||||||
indexes = valid.max(-1, keepdim=True)[1]
|
torch.manual_seed(int(seed_val))
|
||||||
trunc_x = torch.empty(shape)
|
return torch.nn.init.trunc_normal_(
|
||||||
trunc_x.data.copy_(x.gather(-1, indexes).squeeze(-1))
|
x, mean=mean, std=stddev, a=-stddev * 2, b=stddev * 2
|
||||||
trunc_x.data.mul_(stddev).add_(mean)
|
)
|
||||||
return trunc_x
|
|
||||||
|
|
||||||
|
|
||||||
def dropout(inputs, rate, noise_shape=None, seed=None):
|
def dropout(inputs, rate, noise_shape=None, seed=None):
|
||||||
|
@ -8,9 +8,11 @@ class Activation(Layer):
|
|||||||
"""Applies an activation function to an output.
|
"""Applies an activation function to an output.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
activation: Activation function. It could be a callable, or the name of
|
activation: Activation function. It could be
|
||||||
an activation from the `keras_core.activations` namespace.
|
a callable, or the name of an activation
|
||||||
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
from the `keras_core.activations` namespace.
|
||||||
|
**kwargs: Base layer keyword arguments, such as
|
||||||
|
`name` and `dtype`.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
|
@ -10,22 +10,21 @@ class ELU(Layer):
|
|||||||
Formula:
|
Formula:
|
||||||
|
|
||||||
```
|
```
|
||||||
f(x) = alpha * (exp(x) - 1.) for x < 0
|
f(x) = (exp(x) - 1.) for x < 0
|
||||||
f(x) = x for x >= 0
|
f(x) = x for x >= 0
|
||||||
```
|
```
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
alpha: float, slope of negative section. Defaults to 1.0.
|
**kwargs: Base layer keyword arguments, such as
|
||||||
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
`name` and `dtype`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, alpha=1.0, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.alpha = alpha
|
|
||||||
self.supports_masking = True
|
self.supports_masking = True
|
||||||
|
|
||||||
def call(self, inputs):
|
def call(self, inputs):
|
||||||
return activations.elu(inputs, alpha=self.alpha)
|
return activations.elu(inputs)
|
||||||
|
|
||||||
def compute_output_shape(self, input_shape):
|
def compute_output_shape(self, input_shape):
|
||||||
return input_shape
|
return input_shape
|
||||||
|
@ -2,7 +2,6 @@ import numpy as np
|
|||||||
|
|
||||||
from keras_core import testing
|
from keras_core import testing
|
||||||
from keras_core.layers.activations import elu
|
from keras_core.layers.activations import elu
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
|
|
||||||
class ELUTest(testing.TestCase):
|
class ELUTest(testing.TestCase):
|
||||||
@ -18,12 +17,7 @@ class ELUTest(testing.TestCase):
|
|||||||
supports_masking=True,
|
supports_masking=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_correctness(self):
|
x = np.random.random((2, 5))
|
||||||
x = np.random.random((2, 2, 5))
|
|
||||||
elu_layer = elu.ELU()
|
elu_layer = elu.ELU()
|
||||||
tf_elu_layer = tf.keras.layers.ELU()
|
result = elu_layer(x[np.newaxis, :])[0]
|
||||||
self.assertAllClose(elu_layer(x), tf_elu_layer(x))
|
self.assertAllClose(result, x, rtol=1e-05)
|
||||||
|
|
||||||
elu_layer = elu.ELU(alpha=0.7)
|
|
||||||
tf_elu_layer = tf.keras.layers.ELU(alpha=0.7)
|
|
||||||
self.assertAllClose(elu_layer(x), tf_elu_layer(x))
|
|
||||||
|
@ -13,37 +13,46 @@ class PReLU(Layer):
|
|||||||
|
|
||||||
Formula:
|
Formula:
|
||||||
``` python
|
``` python
|
||||||
f(x) = alpha * x for x < 0
|
f(x) = negative_slope * x for x < 0
|
||||||
f(x) = x for x >= 0
|
f(x) = x for x >= 0
|
||||||
```
|
```
|
||||||
where `alpha` is a learned array with the same shape as x.
|
where `negative_slope` is a learned array with the same shape as x.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
alpha_initializer: Initializer function for the weights.
|
negative_slope_initializer: Initializer function for the weights.
|
||||||
alpha_regularizer: Regularizer for the weights.
|
negative_slope_regularizer: Regularizer for the weights.
|
||||||
alpha_constraint: Constraint for the weights.
|
negative_slope_constraint: Constraint for the weights.
|
||||||
shared_axes: The axes along which to share learnable parameters for the
|
shared_axes: The axes along which to share learnable
|
||||||
activation function. For example, if the incoming feature maps are
|
parameters for the activation function.
|
||||||
from a 2D convolution with output shape
|
For example, if the incoming feature maps
|
||||||
`(batch, height, width, channels)`, and you wish to share parameters
|
are from a 2D convolution
|
||||||
across space so that each filter only has one set of parameters,
|
with output shape `(batch, height, width, channels)`,
|
||||||
|
and you wish to share parameters across space
|
||||||
|
so that each filter only has one set of parameters,
|
||||||
set `shared_axes=[1, 2]`.
|
set `shared_axes=[1, 2]`.
|
||||||
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
**kwargs: Base layer keyword arguments, such as
|
||||||
|
`name` and `dtype`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
self,
|
self,
|
||||||
alpha_initializer="Zeros",
|
negative_slope_initializer="Zeros",
|
||||||
alpha_regularizer=None,
|
negative_slope_regularizer=None,
|
||||||
alpha_constraint=None,
|
negative_slope_constraint=None,
|
||||||
shared_axes=None,
|
shared_axes=None,
|
||||||
**kwargs
|
**kwargs
|
||||||
):
|
):
|
||||||
super().__init__(**kwargs)
|
super().__init__(**kwargs)
|
||||||
self.supports_masking = True
|
self.supports_masking = True
|
||||||
self.alpha_initializer = initializers.get(alpha_initializer)
|
self.negative_slope_initializer = initializers.get(
|
||||||
self.alpha_regularizer = regularizers.get(alpha_regularizer)
|
negative_slope_initializer
|
||||||
self.alpha_constraint = constraints.get(alpha_constraint)
|
)
|
||||||
|
self.negative_slope_regularizer = regularizers.get(
|
||||||
|
negative_slope_regularizer
|
||||||
|
)
|
||||||
|
self.negative_slope_constraint = constraints.get(
|
||||||
|
negative_slope_constraint
|
||||||
|
)
|
||||||
if shared_axes is None:
|
if shared_axes is None:
|
||||||
self.shared_axes = None
|
self.shared_axes = None
|
||||||
elif not isinstance(shared_axes, (list, tuple)):
|
elif not isinstance(shared_axes, (list, tuple)):
|
||||||
@ -56,12 +65,12 @@ class PReLU(Layer):
|
|||||||
if self.shared_axes is not None:
|
if self.shared_axes is not None:
|
||||||
for i in self.shared_axes:
|
for i in self.shared_axes:
|
||||||
param_shape[i - 1] = 1
|
param_shape[i - 1] = 1
|
||||||
self.alpha = self.add_weight(
|
self.negative_slope = self.add_weight(
|
||||||
shape=param_shape,
|
shape=param_shape,
|
||||||
name="alpha",
|
name="negative_slope",
|
||||||
initializer=self.alpha_initializer,
|
initializer=self.negative_slope_initializer,
|
||||||
regularizer=self.alpha_regularizer,
|
regularizer=self.negative_slope_regularizer,
|
||||||
constraint=self.alpha_constraint,
|
constraint=self.negative_slope_constraint,
|
||||||
)
|
)
|
||||||
# Set input spec
|
# Set input spec
|
||||||
axes = {}
|
axes = {}
|
||||||
@ -74,21 +83,21 @@ class PReLU(Layer):
|
|||||||
|
|
||||||
def call(self, inputs):
|
def call(self, inputs):
|
||||||
pos = activations.relu(inputs)
|
pos = activations.relu(inputs)
|
||||||
neg = -self.alpha * activations.relu(-inputs)
|
neg = -self.negative_slope * activations.relu(-inputs)
|
||||||
return pos + neg
|
return pos + neg
|
||||||
|
|
||||||
def get_config(self):
|
def get_config(self):
|
||||||
config = super().get_config()
|
config = super().get_config()
|
||||||
config.update(
|
config.update(
|
||||||
{
|
{
|
||||||
"alpha_initializer": initializers.serialize(
|
"negative_slope_initializer": initializers.serialize(
|
||||||
self.alpha_initializer
|
self.negative_slope_initializer
|
||||||
),
|
),
|
||||||
"alpha_regularizer": regularizers.serialize(
|
"negative_slope_regularizer": regularizers.serialize(
|
||||||
self.alpha_regularizer
|
self.negative_slope_regularizer
|
||||||
),
|
),
|
||||||
"alpha_constraint": constraints.serialize(
|
"negative_slope_constraint": constraints.serialize(
|
||||||
self.alpha_constraint
|
self.negative_slope_constraint
|
||||||
),
|
),
|
||||||
"shared_axes": self.shared_axes,
|
"shared_axes": self.shared_axes,
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,6 @@ import numpy as np
|
|||||||
|
|
||||||
from keras_core import testing
|
from keras_core import testing
|
||||||
from keras_core.layers.activations import prelu
|
from keras_core.layers.activations import prelu
|
||||||
import tensorflow as tf
|
|
||||||
|
|
||||||
|
|
||||||
class PReLUTest(testing.TestCase):
|
class PReLUTest(testing.TestCase):
|
||||||
@ -10,9 +9,9 @@ class PReLUTest(testing.TestCase):
|
|||||||
self.run_layer_test(
|
self.run_layer_test(
|
||||||
prelu.PReLU,
|
prelu.PReLU,
|
||||||
init_kwargs={
|
init_kwargs={
|
||||||
"alpha_initializer": "zeros",
|
"negative_slope_initializer": "zeros",
|
||||||
"alpha_regularizer": "L1",
|
"negative_slope_regularizer": "L1",
|
||||||
"alpha_constraint": "MaxNorm",
|
"negative_slope_constraint": "MaxNorm",
|
||||||
"shared_axes": 1,
|
"shared_axes": 1,
|
||||||
},
|
},
|
||||||
input_shape=(2, 3, 4),
|
input_shape=(2, 3, 4),
|
||||||
@ -20,25 +19,15 @@ class PReLUTest(testing.TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
def test_prelu_correctness(self):
|
def test_prelu_correctness(self):
|
||||||
inputs = np.random.randn(2, 10, 5, 3)
|
|
||||||
prelu_layer = prelu.PReLU(
|
prelu_layer = prelu.PReLU(
|
||||||
alpha_initializer="glorot_uniform",
|
negative_slope_initializer="glorot_uniform",
|
||||||
alpha_regularizer="l1",
|
negative_slope_regularizer="l1",
|
||||||
alpha_constraint="non_neg",
|
negative_slope_constraint="non_neg",
|
||||||
shared_axes=(1, 2),
|
shared_axes=None,
|
||||||
)
|
)
|
||||||
tf_prelu_layer = tf.keras.layers.PReLU(
|
test_input = np.random.randn(10, 5)
|
||||||
alpha_initializer="glorot_uniform",
|
result = prelu_layer(test_input)
|
||||||
alpha_regularizer="l1",
|
expected_output = np.maximum(
|
||||||
alpha_constraint="non_neg",
|
0, test_input
|
||||||
shared_axes=(1, 2),
|
) + prelu_layer.negative_slope.numpy() * np.minimum(0, test_input)
|
||||||
)
|
self.assertAllClose(result, expected_output)
|
||||||
|
|
||||||
prelu_layer.build(inputs.shape)
|
|
||||||
tf_prelu_layer.build(inputs.shape)
|
|
||||||
|
|
||||||
weights = np.random.random((1, 1, 3))
|
|
||||||
prelu_layer.alpha.assign(weights)
|
|
||||||
tf_prelu_layer.alpha.assign(weights)
|
|
||||||
|
|
||||||
self.assertAllClose(prelu_layer(inputs), tf_prelu_layer(inputs))
|
|
||||||
|
@ -17,11 +17,7 @@ class ReLU(Layer):
|
|||||||
|
|
||||||
Example:
|
Example:
|
||||||
``` python
|
``` python
|
||||||
relu_layer = keras_core.layers.activations.ReLU(
|
relu_layer = relu.ReLU(max_value=10, negative_slope=0.5, threshold=0)
|
||||||
max_value=10,
|
|
||||||
negative_slope=0.5,
|
|
||||||
threshold=0,
|
|
||||||
)
|
|
||||||
input = np.array([-10, -5, 0.0, 5, 10])
|
input = np.array([-10, -5, 0.0, 5, 10])
|
||||||
result = relu_layer(input)
|
result = relu_layer(input)
|
||||||
# result = [-5. , -2.5, 0. , 5. , 10.]
|
# result = [-5. , -2.5, 0. , 5. , 10.]
|
||||||
@ -30,10 +26,12 @@ class ReLU(Layer):
|
|||||||
Args:
|
Args:
|
||||||
max_value: Float >= 0. Maximum activation value. None means unlimited.
|
max_value: Float >= 0. Maximum activation value. None means unlimited.
|
||||||
Defaults to `None`.
|
Defaults to `None`.
|
||||||
negative_slope: Float >= 0. Negative slope coefficient. Defaults to 0.0.
|
negative_slope: Float >= 0. Negative slope coefficient.
|
||||||
|
Defaults to 0.0.
|
||||||
threshold: Float >= 0. Threshold value for thresholded activation.
|
threshold: Float >= 0. Threshold value for thresholded activation.
|
||||||
Defaults to 0.0.
|
Defaults to 0.0.
|
||||||
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
**kwargs: Base layer keyword arguments, such as
|
||||||
|
`name` and `dtype`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(
|
def __init__(
|
||||||
|
@ -15,7 +15,7 @@ class Softmax(Layer):
|
|||||||
```
|
```
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
>>>softmax_layer = keras_core.layers.activations.Softmax()
|
>>>softmax_layer = Softmax()
|
||||||
>>>input = np.array([1.0, 2.0, 1.0])
|
>>>input = np.array([1.0, 2.0, 1.0])
|
||||||
>>>result = softmax_layer(input)
|
>>>result = softmax_layer(input)
|
||||||
[0.21194157, 0.5761169, 0.21194157]
|
[0.21194157, 0.5761169, 0.21194157]
|
||||||
@ -24,10 +24,11 @@ class Softmax(Layer):
|
|||||||
Args:
|
Args:
|
||||||
axis: Integer, or list of Integers, axis along which the softmax
|
axis: Integer, or list of Integers, axis along which the softmax
|
||||||
normalization is applied.
|
normalization is applied.
|
||||||
**kwargs: Base layer keyword arguments, such as `name` and `dtype`.
|
**kwargs: Base layer keyword arguments, such as
|
||||||
|
`name` and `dtype`.
|
||||||
|
|
||||||
Call arguments:
|
Call arguments:
|
||||||
inputs: The inputs (logits) to the softmax layer.
|
inputs: The inputs, or logits to the softmax layer.
|
||||||
mask: A boolean mask of the same shape as `inputs`. The mask
|
mask: A boolean mask of the same shape as `inputs`. The mask
|
||||||
specifies 1 to keep and 0 to mask. Defaults to `None`.
|
specifies 1 to keep and 0 to mask. Defaults to `None`.
|
||||||
|
|
||||||
|
@ -20,6 +20,7 @@ class AdditiveAttentionTest(testing.TestCase):
|
|||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
expected_num_losses=0,
|
expected_num_losses=0,
|
||||||
supports_masking=True,
|
supports_masking=True,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
# Sale.
|
# Sale.
|
||||||
self.run_layer_test(
|
self.run_layer_test(
|
||||||
@ -35,6 +36,7 @@ class AdditiveAttentionTest(testing.TestCase):
|
|||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
expected_num_losses=0,
|
expected_num_losses=0,
|
||||||
supports_masking=True,
|
supports_masking=True,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_attention_correctness(self):
|
def test_attention_correctness(self):
|
||||||
|
@ -20,6 +20,7 @@ class AttentionTest(testing.TestCase):
|
|||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
expected_num_losses=0,
|
expected_num_losses=0,
|
||||||
supports_masking=True,
|
supports_masking=True,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
# Sale and concat.
|
# Sale and concat.
|
||||||
self.run_layer_test(
|
self.run_layer_test(
|
||||||
@ -36,6 +37,7 @@ class AttentionTest(testing.TestCase):
|
|||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
expected_num_losses=0,
|
expected_num_losses=0,
|
||||||
supports_masking=True,
|
supports_masking=True,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_attention_correctness(self):
|
def test_attention_correctness(self):
|
||||||
|
@ -21,6 +21,7 @@ class MultiHeadAttentionTest(testing.TestCase, parameterized.TestCase):
|
|||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
expected_num_losses=0,
|
expected_num_losses=0,
|
||||||
supports_masking=True,
|
supports_masking=True,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.run_layer_test(
|
self.run_layer_test(
|
||||||
@ -39,6 +40,7 @@ class MultiHeadAttentionTest(testing.TestCase, parameterized.TestCase):
|
|||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
expected_num_losses=0,
|
expected_num_losses=0,
|
||||||
supports_masking=True,
|
supports_masking=True,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
@ -78,6 +80,7 @@ class MultiHeadAttentionTest(testing.TestCase, parameterized.TestCase):
|
|||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
expected_num_losses=0,
|
expected_num_losses=0,
|
||||||
supports_masking=True,
|
supports_masking=True,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@parameterized.named_parameters(
|
||||||
|
@ -24,8 +24,8 @@ class LambdaTest(testing.TestCase):
|
|||||||
self.run_layer_test(
|
self.run_layer_test(
|
||||||
layers.Lambda,
|
layers.Lambda,
|
||||||
init_kwargs={"function": ops.square, "mask": ops.ones((2, 3))},
|
init_kwargs={"function": ops.square, "mask": ops.ones((2, 3))},
|
||||||
input_shape=(2, 3),
|
input_shape=(2, 3, 4),
|
||||||
expected_output_shape=(2, 3),
|
expected_output_shape=(2, 3, 4),
|
||||||
expected_num_trainable_weights=0,
|
expected_num_trainable_weights=0,
|
||||||
expected_num_non_trainable_weights=0,
|
expected_num_non_trainable_weights=0,
|
||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
|
@ -706,7 +706,9 @@ class Layer(Operation):
|
|||||||
for x in scope.losses:
|
for x in scope.losses:
|
||||||
if x in self._losses:
|
if x in self._losses:
|
||||||
scope.losses.remove(x)
|
scope.losses.remove(x)
|
||||||
self._losses = []
|
self._losses.clear()
|
||||||
|
for layer in self._layers:
|
||||||
|
layer._clear_losses()
|
||||||
|
|
||||||
def add_metric(self):
|
def add_metric(self):
|
||||||
# Permanently disabled
|
# Permanently disabled
|
||||||
|
@ -49,8 +49,8 @@ def batch_dot(x, y, axes=None):
|
|||||||
rank is 1, we reshape it to `(batch_size, 1)`.
|
rank is 1, we reshape it to `(batch_size, 1)`.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
x_shape = tuple(ops.shape(x))
|
x_shape = x.shape
|
||||||
y_shape = tuple(ops.shape(y))
|
y_shape = y.shape
|
||||||
|
|
||||||
x_ndim = len(x_shape)
|
x_ndim = len(x_shape)
|
||||||
y_ndim = len(y_shape)
|
y_ndim = len(y_shape)
|
||||||
@ -301,8 +301,8 @@ class Dot(Merge):
|
|||||||
if isinstance(self.axes, int):
|
if isinstance(self.axes, int):
|
||||||
if self.axes < 0:
|
if self.axes < 0:
|
||||||
axes = [
|
axes = [
|
||||||
self.axes % x1.ndim,
|
self.axes % len(x1.shape),
|
||||||
self.axes % x2.ndim,
|
self.axes % len(x2.shape),
|
||||||
]
|
]
|
||||||
else:
|
else:
|
||||||
axes = [self.axes] * 2
|
axes = [self.axes] * 2
|
||||||
@ -310,7 +310,7 @@ class Dot(Merge):
|
|||||||
axes = []
|
axes = []
|
||||||
for i in range(len(self.axes)):
|
for i in range(len(self.axes)):
|
||||||
if self.axes[i] < 0:
|
if self.axes[i] < 0:
|
||||||
axes.append(self.axes[i] % inputs[i].ndim)
|
axes.append(self.axes[i] % len(inputs[i].shape))
|
||||||
else:
|
else:
|
||||||
axes.append(self.axes[i])
|
axes.append(self.axes[i])
|
||||||
|
|
||||||
|
@ -146,19 +146,18 @@ class GroupNormalization(Layer):
|
|||||||
super().build(input_shape)
|
super().build(input_shape)
|
||||||
|
|
||||||
def call(self, inputs):
|
def call(self, inputs):
|
||||||
input_shape = inputs.shape
|
|
||||||
|
|
||||||
reshaped_inputs = self._reshape_into_groups(inputs)
|
reshaped_inputs = self._reshape_into_groups(inputs)
|
||||||
|
|
||||||
normalized_inputs = self._apply_normalization(
|
normalized_inputs = self._apply_normalization(
|
||||||
reshaped_inputs, input_shape
|
reshaped_inputs, inputs.shape
|
||||||
)
|
)
|
||||||
|
return ops.reshape(normalized_inputs, ops.shape(inputs))
|
||||||
return ops.reshape(normalized_inputs, input_shape)
|
|
||||||
|
|
||||||
def _reshape_into_groups(self, inputs):
|
def _reshape_into_groups(self, inputs):
|
||||||
input_shape = inputs.shape
|
input_shape = ops.shape(inputs)
|
||||||
group_shape = [input_shape[i] for i in range(len(input_shape))]
|
group_shape = list(inputs.shape)
|
||||||
|
for i, e in enumerate(group_shape):
|
||||||
|
if e is None:
|
||||||
|
group_shape[i] = input_shape[i]
|
||||||
|
|
||||||
group_shape[self.axis] = input_shape[self.axis] // self.groups
|
group_shape[self.axis] = input_shape[self.axis] // self.groups
|
||||||
group_shape.insert(self.axis, self.groups)
|
group_shape.insert(self.axis, self.groups)
|
||||||
|
@ -215,7 +215,7 @@ class LayerNormalization(Layer):
|
|||||||
outputs = ops.cast(outputs, input_dtype)
|
outputs = ops.cast(outputs, input_dtype)
|
||||||
|
|
||||||
# If some components of the shape got lost due to adjustments, fix that.
|
# If some components of the shape got lost due to adjustments, fix that.
|
||||||
outputs = ops.reshape(outputs, input_shape)
|
outputs = ops.reshape(outputs, ops.shape(inputs))
|
||||||
|
|
||||||
return outputs
|
return outputs
|
||||||
|
|
||||||
|
@ -19,6 +19,7 @@ class HashedCrossingTest(testing.TestCase):
|
|||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
expected_num_losses=0,
|
expected_num_losses=0,
|
||||||
supports_masking=False,
|
supports_masking=False,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
self.run_layer_test(
|
self.run_layer_test(
|
||||||
layers.HashedCrossing,
|
layers.HashedCrossing,
|
||||||
@ -30,6 +31,7 @@ class HashedCrossingTest(testing.TestCase):
|
|||||||
expected_num_seed_generators=0,
|
expected_num_seed_generators=0,
|
||||||
expected_num_losses=0,
|
expected_num_losses=0,
|
||||||
supports_masking=False,
|
supports_masking=False,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_correctness(self):
|
def test_correctness(self):
|
||||||
|
@ -14,6 +14,7 @@ class RandomCropTest(testing.TestCase):
|
|||||||
},
|
},
|
||||||
input_shape=(2, 3, 4),
|
input_shape=(2, 3, 4),
|
||||||
supports_masking=False,
|
supports_masking=False,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_random_crop_full(self):
|
def test_random_crop_full(self):
|
||||||
@ -34,6 +35,7 @@ class RandomCropTest(testing.TestCase):
|
|||||||
input_shape=(12, 8, 16, 3),
|
input_shape=(12, 8, 16, 3),
|
||||||
expected_output_shape=(12, 8, 8, 3),
|
expected_output_shape=(12, 8, 8, 3),
|
||||||
supports_masking=False,
|
supports_masking=False,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_predicting_with_longer_height(self):
|
def test_predicting_with_longer_height(self):
|
||||||
@ -46,6 +48,7 @@ class RandomCropTest(testing.TestCase):
|
|||||||
input_shape=(12, 8, 16, 3),
|
input_shape=(12, 8, 16, 3),
|
||||||
expected_output_shape=(12, 10, 8, 3),
|
expected_output_shape=(12, 10, 8, 3),
|
||||||
supports_masking=False,
|
supports_masking=False,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
|
||||||
def test_predicting_with_longer_width(self):
|
def test_predicting_with_longer_width(self):
|
||||||
@ -58,4 +61,5 @@ class RandomCropTest(testing.TestCase):
|
|||||||
input_shape=(12, 8, 16, 3),
|
input_shape=(12, 8, 16, 3),
|
||||||
expected_output_shape=(12, 8, 18, 3),
|
expected_output_shape=(12, 8, 18, 3),
|
||||||
supports_masking=False,
|
supports_masking=False,
|
||||||
|
run_training_check=False,
|
||||||
)
|
)
|
||||||
|
@ -44,6 +44,12 @@ class RNNCellWithDropout(layers.Layer, DropoutRNNCell):
|
|||||||
|
|
||||||
|
|
||||||
class DropoutRNNCellTest(testing.TestCase):
|
class DropoutRNNCellTest(testing.TestCase):
|
||||||
|
def test_seed_tracking(self):
|
||||||
|
cell = RNNCellWithDropout(3, seed=1337)
|
||||||
|
self.assertEqual(len(cell.non_trainable_variables), 1)
|
||||||
|
layer = layers.RNN(cell)
|
||||||
|
self.assertEqual(len(layer.non_trainable_variables), 1)
|
||||||
|
|
||||||
def test_basics(self):
|
def test_basics(self):
|
||||||
self.run_layer_test(
|
self.run_layer_test(
|
||||||
layers.RNN,
|
layers.RNN,
|
||||||
@ -53,5 +59,6 @@ class DropoutRNNCellTest(testing.TestCase):
|
|||||||
expected_output_shape=(3, 5),
|
expected_output_shape=(3, 5),
|
||||||
expected_num_trainable_weights=2,
|
expected_num_trainable_weights=2,
|
||||||
expected_num_non_trainable_weights=0,
|
expected_num_non_trainable_weights=0,
|
||||||
|
expected_num_non_trainable_variables=1,
|
||||||
supports_masking=True,
|
supports_masking=True,
|
||||||
)
|
)
|
||||||
|
@ -247,9 +247,6 @@ class GRUCell(Layer, DropoutRNNCell):
|
|||||||
|
|
||||||
hh = self.activation(x_h + recurrent_h)
|
hh = self.activation(x_h + recurrent_h)
|
||||||
else:
|
else:
|
||||||
if 0.0 < self.dropout < 1.0:
|
|
||||||
inputs = inputs * dp_mask[0]
|
|
||||||
|
|
||||||
# inputs projected by all gate matrices at once
|
# inputs projected by all gate matrices at once
|
||||||
matrix_x = ops.matmul(inputs, self.kernel)
|
matrix_x = ops.matmul(inputs, self.kernel)
|
||||||
if self.use_bias:
|
if self.use_bias:
|
||||||
|
@ -262,8 +262,6 @@ class LSTMCell(Layer, DropoutRNNCell):
|
|||||||
h_tm1 = (h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o)
|
h_tm1 = (h_tm1_i, h_tm1_f, h_tm1_c, h_tm1_o)
|
||||||
c, o = self._compute_carry_and_output(x, h_tm1, c_tm1)
|
c, o = self._compute_carry_and_output(x, h_tm1, c_tm1)
|
||||||
else:
|
else:
|
||||||
if 0.0 < self.dropout < 1.0:
|
|
||||||
inputs = inputs * dp_mask[0]
|
|
||||||
z = ops.matmul(inputs, self.kernel)
|
z = ops.matmul(inputs, self.kernel)
|
||||||
|
|
||||||
z += ops.matmul(h_tm1, self.recurrent_kernel)
|
z += ops.matmul(h_tm1, self.recurrent_kernel)
|
||||||
|
@ -239,12 +239,8 @@ def hard_sigmoid(x):
|
|||||||
|
|
||||||
|
|
||||||
class Elu(Operation):
|
class Elu(Operation):
|
||||||
def __init__(self, alpha=1.0):
|
|
||||||
super().__init__()
|
|
||||||
self.alpha = alpha
|
|
||||||
|
|
||||||
def call(self, x):
|
def call(self, x):
|
||||||
return backend.nn.elu(x, alpha=self.alpha)
|
return backend.nn.elu(x)
|
||||||
|
|
||||||
def compute_output_spec(self, x):
|
def compute_output_spec(self, x):
|
||||||
return KerasTensor(x.shape, dtype=x.dtype)
|
return KerasTensor(x.shape, dtype=x.dtype)
|
||||||
@ -253,10 +249,10 @@ class Elu(Operation):
|
|||||||
@keras_core_export(
|
@keras_core_export(
|
||||||
["keras_core.operations.elu", "keras_core.operations.nn.elu"]
|
["keras_core.operations.elu", "keras_core.operations.nn.elu"]
|
||||||
)
|
)
|
||||||
def elu(x, alpha=1.0):
|
def elu(x):
|
||||||
if any_symbolic_tensors((x,)):
|
if any_symbolic_tensors((x,)):
|
||||||
return Elu(alpha).symbolic_call(x)
|
return Elu().symbolic_call(x)
|
||||||
return backend.nn.elu(x, alpha=alpha)
|
return backend.nn.elu(x)
|
||||||
|
|
||||||
|
|
||||||
class Selu(Operation):
|
class Selu(Operation):
|
||||||
|
@ -637,10 +637,6 @@ class NNOpsCorrectnessTest(testing.TestCase):
|
|||||||
knn.elu(x),
|
knn.elu(x),
|
||||||
[-0.63212055, 0, 1, 2, 3],
|
[-0.63212055, 0, 1, 2, 3],
|
||||||
)
|
)
|
||||||
self.assertAllClose(
|
|
||||||
knn.elu(x, alpha=0.5),
|
|
||||||
[-0.31606027, 0, 1, 2, 3],
|
|
||||||
)
|
|
||||||
|
|
||||||
def test_selu(self):
|
def test_selu(self):
|
||||||
x = np.array([-1, 0, 1, 2, 3], dtype=np.float32)
|
x = np.array([-1, 0, 1, 2, 3], dtype=np.float32)
|
||||||
|
@ -19,7 +19,11 @@ class SeedGenerator:
|
|||||||
return [seed, 0]
|
return [seed, 0]
|
||||||
|
|
||||||
self.state = Variable(
|
self.state = Variable(
|
||||||
seed_initializer, shape=(2,), dtype="uint32", trainable=False
|
seed_initializer,
|
||||||
|
shape=(2,),
|
||||||
|
dtype="uint32",
|
||||||
|
trainable=False,
|
||||||
|
name="seed_generator_state",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@ -106,6 +106,7 @@ class TestCase(unittest.TestCase):
|
|||||||
supports_masking=None,
|
supports_masking=None,
|
||||||
expected_mask_shape=None,
|
expected_mask_shape=None,
|
||||||
custom_objects=None,
|
custom_objects=None,
|
||||||
|
run_training_check=True,
|
||||||
):
|
):
|
||||||
"""Run basic checks on a layer.
|
"""Run basic checks on a layer.
|
||||||
|
|
||||||
@ -140,6 +141,8 @@ class TestCase(unittest.TestCase):
|
|||||||
returned by compute_mask() (only supports 1 shape).
|
returned by compute_mask() (only supports 1 shape).
|
||||||
custom_objects: Dict of any custom objects to be
|
custom_objects: Dict of any custom objects to be
|
||||||
considered during deserialization.
|
considered during deserialization.
|
||||||
|
run_training_check: Whether to attempt to train the layer
|
||||||
|
(if an input shape or input data was provided).
|
||||||
"""
|
"""
|
||||||
if input_shape is not None and input_data is not None:
|
if input_shape is not None and input_data is not None:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
@ -271,7 +274,9 @@ class TestCase(unittest.TestCase):
|
|||||||
|
|
||||||
model = TestModel(layer)
|
model = TestModel(layer)
|
||||||
model.compile(optimizer="sgd", loss="mse", jit_compile=False)
|
model.compile(optimizer="sgd", loss="mse", jit_compile=False)
|
||||||
model.fit(np.array(input_data), np.array(output_data))
|
input_data = nest.map_structure(lambda x: np.array(x), input_data)
|
||||||
|
output_data = nest.map_structure(lambda x: np.array(x), output_data)
|
||||||
|
model.fit(input_data, output_data, verbose=0)
|
||||||
|
|
||||||
# Build test.
|
# Build test.
|
||||||
if input_shape is not None:
|
if input_shape is not None:
|
||||||
@ -309,8 +314,8 @@ class TestCase(unittest.TestCase):
|
|||||||
output_data = layer(input_data, **call_kwargs)
|
output_data = layer(input_data, **call_kwargs)
|
||||||
run_output_asserts(layer, output_data, eager=True)
|
run_output_asserts(layer, output_data, eager=True)
|
||||||
|
|
||||||
# # Compiled training step - TODO
|
if run_training_check:
|
||||||
# run_training_step(layer, input_data, output_data)
|
run_training_step(layer, input_data, output_data)
|
||||||
|
|
||||||
|
|
||||||
def create_keras_tensors(input_shape, dtype):
|
def create_keras_tensors(input_shape, dtype):
|
||||||
|
Loading…
Reference in New Issue
Block a user