Sync OSS keras to head.

PiperOrigin-RevId: 347838100
This commit is contained in:
Scott Zhu 2020-12-16 09:34:29 -08:00 committed by TensorFlower Gardener
parent af1a2eb1f5
commit f0c0c877ba
10 changed files with 408 additions and 350 deletions

@ -1,7 +1,10 @@
# Description:
# Contains the Keras engine API (internal TensorFlow version).
# buildifier: disable=same-origin-load
load("@org_keras//keras:keras.bzl", "tf_py_test")
# buildifier: disable=same-origin-load
load("@org_keras//keras:keras.bzl", "cuda_py_test")
package(

@ -27,8 +27,8 @@ import shutil
from absl.testing import parameterized
import numpy as np
from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.python.framework import test_util as tf_test_util
import keras
from tensorflow.python.framework import test_util as tf_test_util
from keras import combinations
from keras import keras_parameterized
from keras import testing_utils

@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Locally-connected layers.
"""
"""Locally-connected layers."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@ -59,79 +58,61 @@ class LocallyConnected1D(Layer):
```
Arguments:
filters: Integer, the dimensionality of the output space
(i.e. the number of output filters in the convolution).
kernel_size: An integer or tuple/list of a single integer,
specifying the length of the 1D convolution window.
strides: An integer or tuple/list of a single integer,
specifying the stride length of the convolution.
Specifying any stride value != 1 is incompatible with specifying
any `dilation_rate` value != 1.
padding: Currently only supports `"valid"` (case-insensitive).
`"same"` may be supported in the future.
`"valid"` means no padding.
data_format: A string,
one of `channels_last` (default) or `channels_first`.
The ordering of the dimensions in the inputs.
`channels_last` corresponds to inputs with shape
`(batch, length, channels)` while `channels_first`
corresponds to inputs with shape
`(batch, channels, length)`.
It defaults to the `image_data_format` value found in your
Keras config file at `~/.keras/keras.json`.
If you never set it, then it will be "channels_last".
activation: Activation function to use.
If you don't specify anything, no activation is applied
filters: Integer, the dimensionality of the output space (i.e. the number
of output filters in the convolution).
kernel_size: An integer or tuple/list of a single integer, specifying the
length of the 1D convolution window.
strides: An integer or tuple/list of a single integer, specifying the
stride length of the convolution.
padding: Currently only supports `"valid"` (case-insensitive). `"same"`
may be supported in the future. `"valid"` means no padding.
data_format: A string, one of `channels_last` (default) or
`channels_first`. The ordering of the dimensions in the inputs.
`channels_last` corresponds to inputs with shape `(batch, length,
channels)` while `channels_first` corresponds to inputs with shape
`(batch, channels, length)`. It defaults to the `image_data_format`
value found in your Keras config file at `~/.keras/keras.json`. If you
never set it, then it will be "channels_last".
activation: Activation function to use. If you don't specify anything, no
activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix.
bias_initializer: Initializer for the bias vector.
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix.
kernel_regularizer: Regularizer function applied to the `kernel` weights
matrix.
bias_regularizer: Regularizer function applied to the bias vector.
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation")..
activity_regularizer: Regularizer function applied to the output of the
layer (its "activation")..
kernel_constraint: Constraint function applied to the kernel matrix.
bias_constraint: Constraint function applied to the bias vector.
implementation: implementation mode, either `1`, `2`, or `3`.
`1` loops over input spatial locations to perform the forward pass.
It is memory-efficient but performs a lot of (small) ops.
`2` stores layer weights in a dense but sparsely-populated 2D matrix
and implements the forward pass as a single matrix-multiply. It uses
a lot of RAM but performs few (large) ops.
`3` stores layer weights in a sparse tensor and implements the forward
pass as a single sparse matrix-multiply.
implementation: implementation mode, either `1`, `2`, or `3`. `1` loops
over input spatial locations to perform the forward pass. It is
memory-efficient but performs a lot of (small) ops. `2` stores layer
weights in a dense but sparsely-populated 2D matrix and implements the
forward pass as a single matrix-multiply. It uses a lot of RAM but
performs few (large) ops. `3` stores layer weights in a sparse tensor
and implements the forward pass as a single sparse matrix-multiply.
How to choose:
`1`: large, dense models,
`2`: small models,
`3`: large, sparse models,
where "large" stands for large input/output activations
(i.e. many `filters`, `input_filters`, large `input_size`,
`output_size`), and "sparse" stands for few connections between inputs
and outputs, i.e. small ratio
`filters * input_filters * kernel_size / (input_size * strides)`,
where inputs to and outputs of the layer are assumed to have shapes
`(input_size, input_filters)`, `(output_size, filters)`
respectively.
It is recommended to benchmark each in the setting of interest to pick
the most efficient one (in terms of speed and memory usage). Correct
choice of implementation can lead to dramatic speed improvements (e.g.
50X), potentially at the expense of RAM.
Also, only `padding="valid"` is supported by `implementation=1`.
`3`: large, sparse models, where "large" stands for large
input/output activations (i.e. many `filters`, `input_filters`,
large `input_size`, `output_size`), and "sparse" stands for few
connections between inputs and outputs, i.e. small ratio `filters *
input_filters * kernel_size / (input_size * strides)`, where inputs
to and outputs of the layer are assumed to have shapes `(input_size,
input_filters)`, `(output_size, filters)` respectively. It is
recommended to benchmark each in the setting of interest to pick the
most efficient one (in terms of speed and memory usage). Correct
choice of implementation can lead to dramatic speed improvements
(e.g. 50X), potentially at the expense of RAM. Also, only
`padding="valid"` is supported by `implementation=1`.
Input shape:
3D tensor with shape: `(batch_size, steps, input_dim)`
Output shape:
3D tensor with shape: `(batch_size, new_steps, filters)`
`steps` value might have changed due to padding or strides.
3D tensor with shape: `(batch_size, new_steps, filters)` `steps` value
might have changed due to padding or strides.
"""
def __init__(self,
@ -158,8 +139,8 @@ class LocallyConnected1D(Layer):
self.padding = conv_utils.normalize_padding(padding)
if self.padding != 'valid' and implementation == 1:
raise ValueError('Invalid border mode for LocallyConnected1D '
'(only "valid" is supported if implementation is 1): '
+ padding)
'(only "valid" is supported if implementation is 1): ' +
padding)
self.data_format = conv_utils.normalize_data_format(data_format)
self.activation = activations.get(activation)
self.use_bias = use_bias
@ -181,10 +162,13 @@ class LocallyConnected1D(Layer):
input_dim, input_length = input_shape[2], input_shape[1]
if input_dim is None:
raise ValueError('Axis 2 of input should be fully-defined. '
'Found shape:', input_shape)
self.output_length = conv_utils.conv_output_length(
input_length, self.kernel_size[0], self.padding, self.strides[0])
raise ValueError(
'Axis 2 of input should be fully-defined. '
'Found shape:', input_shape)
self.output_length = conv_utils.conv_output_length(input_length,
self.kernel_size[0],
self.padding,
self.strides[0])
if self.implementation == 1:
self.kernel_shape = (self.output_length, self.kernel_size[0] * input_dim,
@ -199,17 +183,18 @@ class LocallyConnected1D(Layer):
elif self.implementation == 2:
if self.data_format == 'channels_first':
self.kernel_shape = (input_dim, input_length,
self.filters, self.output_length)
self.kernel_shape = (input_dim, input_length, self.filters,
self.output_length)
else:
self.kernel_shape = (input_length, input_dim,
self.output_length, self.filters)
self.kernel_shape = (input_length, input_dim, self.output_length,
self.filters)
self.kernel = self.add_weight(shape=self.kernel_shape,
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.kernel = self.add_weight(
shape=self.kernel_shape,
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.kernel_mask = get_locallyconnected_mask(
input_shape=(input_length,),
@ -231,8 +216,7 @@ class LocallyConnected1D(Layer):
padding=self.padding,
filters_in=input_dim,
filters_out=self.filters,
data_format=self.data_format)
)
data_format=self.data_format))
self.kernel = self.add_weight(
shape=(len(self.kernel_idxs),),
@ -242,8 +226,8 @@ class LocallyConnected1D(Layer):
constraint=self.kernel_constraint)
else:
raise ValueError('Unrecognized implementation mode: %d.'
% self.implementation)
raise ValueError('Unrecognized implementation mode: %d.' %
self.implementation)
if self.use_bias:
self.bias = self.add_weight(
@ -291,8 +275,8 @@ class LocallyConnected1D(Layer):
self.compute_output_shape(inputs.shape))
else:
raise ValueError('Unrecognized implementation mode: %d.'
% self.implementation)
raise ValueError('Unrecognized implementation mode: %d.' %
self.implementation)
if self.use_bias:
output = K.bias_add(output, self.bias, data_format=self.data_format)
@ -366,87 +350,71 @@ class LocallyConnected2D(Layer):
```
Arguments:
filters: Integer, the dimensionality of the output space
(i.e. the number of output filters in the convolution).
kernel_size: An integer or tuple/list of 2 integers, specifying the
width and height of the 2D convolution window.
Can be a single integer to specify the same value for
all spatial dimensions.
strides: An integer or tuple/list of 2 integers,
specifying the strides of the convolution along the width and height.
Can be a single integer to specify the same value for
all spatial dimensions.
padding: Currently only support `"valid"` (case-insensitive).
`"same"` will be supported in future.
`"valid"` means no padding.
data_format: A string,
one of `channels_last` (default) or `channels_first`.
The ordering of the dimensions in the inputs.
`channels_last` corresponds to inputs with shape
`(batch, height, width, channels)` while `channels_first`
corresponds to inputs with shape
`(batch, channels, height, width)`.
It defaults to the `image_data_format` value found in your
Keras config file at `~/.keras/keras.json`.
If you never set it, then it will be "channels_last".
activation: Activation function to use.
If you don't specify anything, no activation is applied
filters: Integer, the dimensionality of the output space (i.e. the number
of output filters in the convolution).
kernel_size: An integer or tuple/list of 2 integers, specifying the width
and height of the 2D convolution window. Can be a single integer to
specify the same value for all spatial dimensions.
strides: An integer or tuple/list of 2 integers, specifying the strides of
the convolution along the width and height. Can be a single integer to
specify the same value for all spatial dimensions.
padding: Currently only support `"valid"` (case-insensitive). `"same"`
will be supported in future. `"valid"` means no padding.
data_format: A string, one of `channels_last` (default) or
`channels_first`. The ordering of the dimensions in the inputs.
`channels_last` corresponds to inputs with shape `(batch, height, width,
channels)` while `channels_first` corresponds to inputs with shape
`(batch, channels, height, width)`. It defaults to the
`image_data_format` value found in your Keras config file at
`~/.keras/keras.json`. If you never set it, then it will be
"channels_last".
activation: Activation function to use. If you don't specify anything, no
activation is applied
(ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix.
bias_initializer: Initializer for the bias vector.
kernel_regularizer: Regularizer function applied to
the `kernel` weights matrix.
kernel_regularizer: Regularizer function applied to the `kernel` weights
matrix.
bias_regularizer: Regularizer function applied to the bias vector.
activity_regularizer: Regularizer function applied to
the output of the layer (its "activation").
activity_regularizer: Regularizer function applied to the output of the
layer (its "activation").
kernel_constraint: Constraint function applied to the kernel matrix.
bias_constraint: Constraint function applied to the bias vector.
implementation: implementation mode, either `1`, `2`, or `3`.
`1` loops over input spatial locations to perform the forward pass.
It is memory-efficient but performs a lot of (small) ops.
`2` stores layer weights in a dense but sparsely-populated 2D matrix
and implements the forward pass as a single matrix-multiply. It uses
a lot of RAM but performs few (large) ops.
`3` stores layer weights in a sparse tensor and implements the forward
pass as a single sparse matrix-multiply.
implementation: implementation mode, either `1`, `2`, or `3`. `1` loops
over input spatial locations to perform the forward pass. It is
memory-efficient but performs a lot of (small) ops. `2` stores layer
weights in a dense but sparsely-populated 2D matrix and implements the
forward pass as a single matrix-multiply. It uses a lot of RAM but
performs few (large) ops. `3` stores layer weights in a sparse tensor
and implements the forward pass as a single sparse matrix-multiply.
How to choose:
`1`: large, dense models,
`2`: small models,
`3`: large, sparse models,
where "large" stands for large input/output activations
(i.e. many `filters`, `input_filters`, large `np.prod(input_size)`,
`np.prod(output_size)`), and "sparse" stands for few connections
between inputs and outputs, i.e. small ratio
`filters * input_filters * np.prod(kernel_size) / (np.prod(input_size)
* np.prod(strides))`, where inputs to and outputs of the layer are
assumed to have shapes `input_size + (input_filters,)`,
`output_size + (filters,)` respectively.
It is recommended to benchmark each in the setting of interest to pick
the most efficient one (in terms of speed and memory usage). Correct
choice of implementation can lead to dramatic speed improvements (e.g.
50X), potentially at the expense of RAM.
Also, only `padding="valid"` is supported by `implementation=1`.
`3`: large, sparse models, where "large" stands for large
input/output activations (i.e. many `filters`, `input_filters`,
large `np.prod(input_size)`, `np.prod(output_size)`), and "sparse"
stands for few connections between inputs and outputs, i.e. small
ratio `filters * input_filters * np.prod(kernel_size) /
(np.prod(input_size) * np.prod(strides))`, where inputs to and
outputs of the layer are assumed to have shapes `input_size +
(input_filters,)`, `output_size + (filters,)` respectively. It is
recommended to benchmark each in the setting of interest to pick the
most efficient one (in terms of speed and memory usage). Correct
choice of implementation can lead to dramatic speed improvements
(e.g. 50X), potentially at the expense of RAM. Also, only
`padding="valid"` is supported by `implementation=1`.
Input shape:
4D tensor with shape:
`(samples, channels, rows, cols)` if data_format='channels_first'
or 4D tensor with shape:
`(samples, rows, cols, channels)` if data_format='channels_last'.
4D tensor with shape: `(samples, channels, rows, cols)` if
data_format='channels_first'
or 4D tensor with shape: `(samples, rows, cols, channels)` if
data_format='channels_last'.
Output shape:
4D tensor with shape:
`(samples, filters, new_rows, new_cols)` if data_format='channels_first'
or 4D tensor with shape:
`(samples, new_rows, new_cols, filters)` if data_format='channels_last'.
`rows` and `cols` values might have changed due to padding.
4D tensor with shape: `(samples, filters, new_rows, new_cols)` if
data_format='channels_first'
or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if
data_format='channels_last'. `rows` and `cols` values might have changed
due to padding.
"""
def __init__(self,
@ -473,8 +441,8 @@ class LocallyConnected2D(Layer):
self.padding = conv_utils.normalize_padding(padding)
if self.padding != 'valid' and implementation == 1:
raise ValueError('Invalid border mode for LocallyConnected2D '
'(only "valid" is supported if implementation is 1): '
+ padding)
'(only "valid" is supported if implementation is 1): ' +
padding)
self.data_format = conv_utils.normalize_data_format(data_format)
self.activation = activations.get(activation)
self.use_bias = use_bias
@ -509,10 +477,8 @@ class LocallyConnected2D(Layer):
self.output_col = output_col
if self.implementation == 1:
self.kernel_shape = (
output_row * output_col,
self.kernel_size[0] * self.kernel_size[1] * input_filter,
self.filters)
self.kernel_shape = (output_row * output_col, self.kernel_size[0] *
self.kernel_size[1] * input_filter, self.filters)
self.kernel = self.add_weight(
shape=self.kernel_shape,
@ -523,17 +489,18 @@ class LocallyConnected2D(Layer):
elif self.implementation == 2:
if self.data_format == 'channels_first':
self.kernel_shape = (input_filter, input_row, input_col,
self.filters, self.output_row, self.output_col)
self.kernel_shape = (input_filter, input_row, input_col, self.filters,
self.output_row, self.output_col)
else:
self.kernel_shape = (input_row, input_col, input_filter,
self.output_row, self.output_col, self.filters)
self.kernel = self.add_weight(shape=self.kernel_shape,
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.kernel = self.add_weight(
shape=self.kernel_shape,
initializer=self.kernel_initializer,
name='kernel',
regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.kernel_mask = get_locallyconnected_mask(
input_shape=(input_row, input_col),
@ -555,8 +522,7 @@ class LocallyConnected2D(Layer):
padding=self.padding,
filters_in=input_filter,
filters_out=self.filters,
data_format=self.data_format)
)
data_format=self.data_format))
self.kernel = self.add_weight(
shape=(len(self.kernel_idxs),),
@ -566,8 +532,8 @@ class LocallyConnected2D(Layer):
constraint=self.kernel_constraint)
else:
raise ValueError('Unrecognized implementation mode: %d.'
% self.implementation)
raise ValueError('Unrecognized implementation mode: %d.' %
self.implementation)
if self.use_bias:
self.bias = self.add_weight(
@ -619,8 +585,8 @@ class LocallyConnected2D(Layer):
self.compute_output_shape(inputs.shape))
else:
raise ValueError('Unrecognized implementation mode: %d.'
% self.implementation)
raise ValueError('Unrecognized implementation mode: %d.' %
self.implementation)
if self.use_bias:
output = K.bias_add(output, self.bias, data_format=self.data_format)
@ -686,10 +652,10 @@ def get_locallyconnected_mask(input_shape, kernel_shape, strides, padding,
`strides`, `padding` and `data_format`.
Arguments:
input_shape: tuple of size N: `(d_in1, ..., d_inN)`
spatial shape of the input.
kernel_shape: tuple of size N, spatial shape of the convolutional kernel
/ receptive field.
input_shape: tuple of size N: `(d_in1, ..., d_inN)` spatial shape of the
input.
kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
receptive field.
strides: tuple of size N, strides along each spatial dimension.
padding: type of padding, string `"same"` or `"valid"`.
data_format: a string, `"channels_first"` or `"channels_last"`.
@ -709,8 +675,7 @@ def get_locallyconnected_mask(input_shape, kernel_shape, strides, padding,
input_shape=input_shape,
kernel_shape=kernel_shape,
strides=strides,
padding=padding
)
padding=padding)
ndims = int(mask.ndim / 2)
@ -739,34 +704,26 @@ def local_conv_matmul(inputs, kernel, kernel_mask, output_shape):
reshapes to make `inputs` and `kernel` 2-D and `output` (N+2)-D.
Arguments:
inputs: (N+2)-D tensor with shape
`(batch_size, channels_in, d_in1, ..., d_inN)`
or
`(batch_size, d_in1, ..., d_inN, channels_in)`.
inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ...,
d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`.
kernel: the unshared weights for N-D convolution,
an (N+2)-D tensor of shape:
`(d_in1, ..., d_inN, channels_in, d_out2, ..., d_outN, channels_out)`
or
`(channels_in, d_in1, ..., d_inN, channels_out, d_out2, ..., d_outN)`,
with the ordering of channels and spatial dimensions matching
that of the input.
Each entry is the weight between a particular input and
output location, similarly to a fully-connected weight matrix.
kernel_mask: a float 0/1 mask tensor of shape:
`(d_in1, ..., d_inN, 1, d_out2, ..., d_outN, 1)`
or
`(1, d_in1, ..., d_inN, 1, d_out2, ..., d_outN)`,
with the ordering of singleton and spatial dimensions
matching that of the input.
Mask represents the connectivity pattern of the layer and is
precomputed elsewhere based on layer parameters: stride,
padding, and the receptive field shape.
an (N+2)-D tensor of shape: `(d_in1, ..., d_inN, channels_in, d_out2,
..., d_outN, channels_out)` or `(channels_in, d_in1, ..., d_inN,
channels_out, d_out2, ..., d_outN)`, with the ordering of channels
and spatial dimensions matching that of the input. Each entry is the
weight between a particular input and output location, similarly to
a fully-connected weight matrix.
kernel_mask: a float 0/1 mask tensor of shape: `(d_in1, ..., d_inN, 1,
d_out2, ..., d_outN, 1)` or `(1, d_in1, ..., d_inN, 1, d_out2, ...,
d_outN)`, with the ordering of singleton and spatial dimensions matching
that of the input. Mask represents the connectivity pattern of the layer
and is
precomputed elsewhere based on layer parameters: stride, padding, and
the receptive field shape.
output_shape: a tuple of (N+2) elements representing the output shape:
`(batch_size, channels_out, d_out1, ..., d_outN)`
or
`(batch_size, d_out1, ..., d_outN, channels_out)`,
with the ordering of channels and spatial dimensions matching that of
the input.
`(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size,
d_out1, ..., d_outN, channels_out)`, with the ordering of channels and
spatial dimensions matching that of the input.
Returns:
Output (N+2)-D tensor with shape `output_shape`.
@ -777,8 +734,9 @@ def local_conv_matmul(inputs, kernel, kernel_mask, output_shape):
kernel = make_2d(kernel, split_dim=K.ndim(kernel) // 2)
output_flat = tf.compat.v1.sparse_matmul(inputs_flat, kernel, b_is_sparse=True)
output = K.reshape(output_flat,
[K.shape(output_flat)[0],] + output_shape.as_list()[1:])
output = K.reshape(output_flat, [
K.shape(output_flat)[0],
] + output_shape.as_list()[1:])
return output
@ -810,14 +768,16 @@ def local_conv_sparse_matmul(inputs, kernel, kernel_idxs, kernel_shape,
"""
inputs_flat = K.reshape(inputs, (K.shape(inputs)[0], -1))
output_flat = tf.raw_ops.SparseTensorDenseMatMul(
a_indices=kernel_idxs, a_values=kernel, a_shape=kernel_shape,
b=inputs_flat, adjoint_b=True)
a_indices=kernel_idxs,
a_values=kernel,
a_shape=kernel_shape,
b=inputs_flat,
adjoint_b=True)
output_flat_transpose = K.transpose(output_flat)
output_reshaped = K.reshape(
output_flat_transpose,
[K.shape(output_flat_transpose)[0],] + output_shape.as_list()[1:]
)
output_reshaped = K.reshape(output_flat_transpose, [
K.shape(output_flat_transpose)[0],
] + output_shape.as_list()[1:])
return output_reshaped
@ -830,7 +790,7 @@ def make_2d(tensor, split_dim):
Arguments:
tensor: a tensor of shape `(d0, ..., d(N-1))`.
split_dim: an integer from 1 to N-1, index of the dimension to group
dimensions before (excluding) and after (including).
dimensions before (excluding) and after (including).
Returns:
Tensor of shape

@ -28,8 +28,8 @@ import time
from absl.testing import parameterized
import numpy as np
from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.python.framework import test_util as tf_test_util
import keras
from tensorflow.python.framework import test_util as tf_test_util
from keras import keras_parameterized
from keras import testing_utils
from keras.layers import recurrent as rnn_v1

@ -26,6 +26,7 @@ from absl.testing import parameterized
import numpy as np
import keras
from tensorflow.python.framework import test_util as tf_test_util
from keras import combinations
from keras import keras_parameterized
from keras import testing_utils
@ -33,8 +34,6 @@ from keras.engine import base_layer_utils
from keras.layers import core
from keras.layers.rnn_cell_wrapper_v2 import ResidualWrapper
from keras.utils import generic_utils
from tensorflow.python.eager import context
from tensorflow.python.framework import test_util as tf_test_util
from tensorflow.python.ops.ragged import ragged_tensor
from tensorflow.python.training.tracking import util as trackable_util
@ -653,7 +652,7 @@ class BidirectionalTest(tf.test.TestCase, parameterized.TestCase):
model.compile(loss='mse', optimizer='sgd')
model.fit(x, y, epochs=1, batch_size=1)
if context.executing_eagerly():
if tf.executing_eagerly():
run_test()
else:
tf_test_util.enable_output_all_intermediates(run_test)()

@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Built-in loss functions.
"""
"""Built-in loss functions."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
@ -85,8 +84,8 @@ class Loss(object):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op.
"""
losses_utils.ReductionV2.validate(reduction)
@ -115,15 +114,15 @@ class Loss(object):
sparse loss functions such as sparse categorical crossentropy where
shape = `[batch_size, d0, .. dN-1]`
y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
sample_weight: Optional `sample_weight` acts as a
coefficient for the loss. If a scalar is provided, then the loss is
simply scaled by the given value. If `sample_weight` is a tensor of size
`[batch_size]`, then the total loss for each sample of the batch is
rescaled by the corresponding element in the `sample_weight` vector. If
the shape of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be
broadcasted to this shape), then each loss element of `y_pred` is scaled
sample_weight: Optional `sample_weight` acts as a coefficient for the
loss. If a scalar is provided, then the loss is simply scaled by the
given value. If `sample_weight` is a tensor of size `[batch_size]`, then
the total loss for each sample of the batch is rescaled by the
corresponding element in the `sample_weight` vector. If the shape of
`sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted to
this shape), then each loss element of `y_pred` is scaled
by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss
functions reduce by 1 dimension, usually axis=-1.)
functions reduce by 1 dimension, usually axis=-1.)
Returns:
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
@ -223,8 +222,8 @@ class LossFunctionWrapper(Loss):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: (Optional) name for the loss.
**kwargs: The keyword arguments that are passed on to `fn`.
"""
@ -243,8 +242,7 @@ class LossFunctionWrapper(Loss):
Loss values per sample.
"""
if tf.is_tensor(y_pred) and tf.is_tensor(y_true):
y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(
y_pred, y_true)
y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)
ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx())
return ag_fn(y_true, y_pred, **self._fn_kwargs)
@ -307,8 +305,8 @@ class MeanSquaredError(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'mean_squared_error'.
"""
super(MeanSquaredError, self).__init__(
@ -366,8 +364,8 @@ class MeanAbsoluteError(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'mean_absolute_error'.
"""
super(MeanAbsoluteError, self).__init__(
@ -426,8 +424,8 @@ class MeanAbsolutePercentageError(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to
'mean_absolute_percentage_error'.
"""
@ -487,8 +485,8 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to
'mean_squared_logarithmic_error'.
"""
@ -500,44 +498,64 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper):
class BinaryCrossentropy(LossFunctionWrapper):
"""Computes the cross-entropy loss between true labels and predicted labels.
Use this cross-entropy loss when there are only two label classes (assumed to
be 0 and 1). For each example, there should be a single floating-point value
per prediction.
Use this cross-entropy loss for binary (0 or 1) classification applications.
The loss function requires the following inputs:
In the snippet below, each of the four examples has only a single
floating-pointing value, and both `y_pred` and `y_true` have the shape
`[batch_size]`.
- `y_true` (true label): This is either 0 or 1.
- `y_pred` (predicted value): This is the model's prediction, i.e, a single
floating-point value which either represents a
[logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
when `from_logits=True`) or a probability (i.e, value in [0., 1.] when
`from_logits=False`).
Standalone usage:
**Recommended Usage:** (set `from_logits=True`)
>>> y_true = [[0., 1.], [0., 0.]]
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy()
>>> bce(y_true, y_pred).numpy()
0.815
>>> # Calling with 'sample_weight'.
>>> bce(y_true, y_pred, sample_weight=[1, 0]).numpy()
0.458
>>> # Using 'sum' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(
... reduction=tf.keras.losses.Reduction.SUM)
>>> bce(y_true, y_pred).numpy()
1.630
>>> # Using 'none' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(
... reduction=tf.keras.losses.Reduction.NONE)
>>> bce(y_true, y_pred).numpy()
array([0.916 , 0.714], dtype=float32)
Usage with the `tf.keras` API:
With `tf.keras` API:
```python
model.compile(optimizer='sgd', loss=tf.keras.losses.BinaryCrossentropy())
model.compile(
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
....
)
```
As a standalone function:
>>> # Example 1: (batch_size = 1, number of samples = 4)
>>> y_true = [0, 1, 0, 0]
>>> y_pred = [-18.6, 0.51, 2.94, -12.8]
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
>>> bce(y_true, y_pred).numpy()
0.865
>>> # Example 2: (batch_size = 2, number of samples = 4)
>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
>>> # Using default 'auto'/'sum_over_batch_size' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
>>> bce(y_true, y_pred).numpy()
0.865
>>> # Using 'sample_weight' attribute
>>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
0.243
>>> # Using 'sum' reduction` type.
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
... reduction=tf.keras.losses.Reduction.SUM)
>>> bce(y_true, y_pred).numpy()
1.730
>>> # Using 'none' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
... reduction=tf.keras.losses.Reduction.NONE)
>>> bce(y_true, y_pred).numpy()
array([0.235, 1.496], dtype=float32)
**Default Usage:** (set `from_logits=False`)
>>> # Make the following updates to the above "Recommended Usage" section
>>> # 1. Set `from_logits=False`
>>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
>>> # 2. Update `y_pred` to use probabilities instead of logits
>>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]
"""
def __init__(self,
@ -563,8 +581,8 @@ class BinaryCrossentropy(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: (Optional) Name for the op. Defaults to 'binary_crossentropy'.
"""
super(BinaryCrossentropy, self).__init__(
@ -633,9 +651,9 @@ class CategoricalCrossentropy(LossFunctionWrapper):
default, we assume that `y_pred` encodes a probability distribution.
**Note - Using from_logits=True is more numerically stable.**
label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
meaning the confidence on label values are relaxed. e.g.
`label_smoothing=0.2` means that we will use a value of `0.1` for label
`0` and `0.9` for label `1`"
meaning the confidence on label values are relaxed. For example, if
`0.1`, use `0.1 / num_classes` for non-target labels and
`0.9 + 0.1 / num_classes` for target labels.
reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
loss. Default value is `AUTO`. `AUTO` indicates that the reduction
option will be determined by the usage context. For almost all cases
@ -643,8 +661,8 @@ class CategoricalCrossentropy(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'categorical_crossentropy'.
"""
super(CategoricalCrossentropy, self).__init__(
@ -720,8 +738,8 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to
'sparse_categorical_crossentropy'.
"""
@ -784,8 +802,8 @@ class Hinge(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'hinge'.
"""
super(Hinge, self).__init__(hinge, name=name, reduction=reduction)
@ -845,8 +863,8 @@ class SquaredHinge(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'squared_hinge'.
"""
super(SquaredHinge, self).__init__(
@ -905,8 +923,8 @@ class CategoricalHinge(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'categorical_hinge'.
"""
super(CategoricalHinge, self).__init__(
@ -962,8 +980,8 @@ class Poisson(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'poisson'.
"""
super(Poisson, self).__init__(poisson, name=name, reduction=reduction)
@ -1019,8 +1037,8 @@ class LogCosh(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'log_cosh'.
"""
super(LogCosh, self).__init__(log_cosh, name=name, reduction=reduction)
@ -1079,8 +1097,8 @@ class KLDivergence(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'kl_divergence'.
"""
super(KLDivergence, self).__init__(
@ -1147,20 +1165,17 @@ class Huber(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training)
for more details.
https://www.tensorflow.org/tutorials/distribute/custom_training) for
more details.
name: Optional name for the op. Defaults to 'huber_loss'.
"""
super(Huber, self).__init__(
huber, name=name, reduction=reduction, delta=delta)
@keras_export('keras.metrics.mean_squared_error',
'keras.metrics.mse',
'keras.metrics.MSE',
'keras.losses.mean_squared_error',
'keras.losses.mse',
'keras.losses.MSE')
@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',
'keras.metrics.MSE', 'keras.losses.mean_squared_error',
'keras.losses.mse', 'keras.losses.MSE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_error(y_true, y_pred):
"""Computes the mean squared error between labels and predictions.
@ -1191,12 +1206,9 @@ def mean_squared_error(y_true, y_pred):
return K.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
@keras_export('keras.metrics.mean_absolute_error',
'keras.metrics.mae',
'keras.metrics.MAE',
'keras.losses.mean_absolute_error',
'keras.losses.mae',
'keras.losses.MAE')
@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',
'keras.metrics.MAE', 'keras.losses.mean_absolute_error',
'keras.losses.mae', 'keras.losses.MAE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_error(y_true, y_pred):
"""Computes the mean absolute error between labels and predictions.
@ -1225,11 +1237,9 @@ def mean_absolute_error(y_true, y_pred):
@keras_export('keras.metrics.mean_absolute_percentage_error',
'keras.metrics.mape',
'keras.metrics.MAPE',
'keras.metrics.mape', 'keras.metrics.MAPE',
'keras.losses.mean_absolute_percentage_error',
'keras.losses.mape',
'keras.losses.MAPE')
'keras.losses.mape', 'keras.losses.MAPE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_percentage_error(y_true, y_pred):
"""Computes the mean absolute percentage error between `y_true` and `y_pred`.
@ -1262,11 +1272,9 @@ def mean_absolute_percentage_error(y_true, y_pred):
@keras_export('keras.metrics.mean_squared_logarithmic_error',
'keras.metrics.msle',
'keras.metrics.MSLE',
'keras.metrics.msle', 'keras.metrics.MSLE',
'keras.losses.mean_squared_logarithmic_error',
'keras.losses.msle',
'keras.losses.MSLE')
'keras.losses.msle', 'keras.losses.MSLE')
@tf.__internal__.dispatch.add_dispatch_support
def mean_squared_logarithmic_error(y_true, y_pred):
"""Computes the mean squared logarithmic error between `y_true` and `y_pred`.
@ -1511,7 +1519,9 @@ def categorical_crossentropy(y_true,
y_pred: Tensor of predicted targets.
from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
we assume that `y_pred` encodes a probability distribution.
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
example, if `0.1`, use `0.1 / num_classes` for non-target labels
and `0.9 + 0.1 / num_classes` for target labels.
Returns:
Categorical crossentropy loss value.
@ -1582,7 +1592,9 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
we assume that `y_pred` encodes a probability distribution.
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
for the target class and `0.5 * label_smoothing` for the non-target class.
Returns:
Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
@ -1602,12 +1614,9 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
@keras_export('keras.metrics.kl_divergence',
'keras.metrics.kullback_leibler_divergence',
'keras.metrics.kld',
'keras.metrics.KLD',
'keras.losses.kl_divergence',
'keras.losses.kullback_leibler_divergence',
'keras.losses.kld',
'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
'keras.metrics.KLD', 'keras.losses.kl_divergence',
'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
'keras.losses.KLD')
@tf.__internal__.dispatch.add_dispatch_support
def kl_divergence(y_true, y_pred):

@ -69,12 +69,11 @@ class AutoCastVariable(tf.Variable, core.Tensor):
called.
"""
def __init__(self, variable, op=None):
def __init__(self, variable):
"""Creates an AutoCastVariable instance.
Args:
variable: A floating-point resource variable to wrap.
op: Optional operation of this variable.
Raises:
ValueError: If `variable` is not a floating-point resource variable
@ -86,7 +85,11 @@ class AutoCastVariable(tf.Variable, core.Tensor):
raise ValueError('variable must be a floating point variable but has '
'type: %s' % variable.dtype.name)
self._variable = variable
self._op = op
# 'delegate' means AutoCastVariable.op return self._variable.op, which will
# raise an AttributeError in Eager (as intended). If set to any other value,
# AutoCastVariable.op returns that value instead, which is used to set the
# op attribute in AutoCastVariable.assign().
self._op = 'delegate'
def _should_cast(self):
"""Returns True if this variable should be casted when accessed."""
@ -211,10 +214,18 @@ class AutoCastVariable(tf.Variable, core.Tensor):
use_locking=None,
name=None,
read_value=True):
# TODO(b/146181571): This logic can be simplified once
# DistributedVariable.assign returns a DistributedVariable. Currently for
# MirroredStrategy, it returns a Mirrored value.
if tf.compat.v1.executing_eagerly_outside_functions():
assign_op = update_fn(value, use_locking, name, False)
if read_value:
return create_autocast_variable(self._variable, op=assign_op)
# We create a new AutoCastVariable with the same underlying tf.Variable.
# The new AutoCastVariable is identical except the 'op' attribute is
# defined. This matches the behavior of tf.Variable.assign.
var = create_autocast_variable(self._variable)
var._op = assign_op # pylint:disable=protected-access
return var
return assign_op
# Fallback to wrapping the returned variable in graph mode if possible
@ -310,9 +321,9 @@ class AutoCastVariable(tf.Variable, core.Tensor):
@property
def op(self):
if self._op is not None:
return self._op
return self._variable.op
if self._op == 'delegate':
return self._variable.op
return self._op
def _as_graph_element(self):
graph_element = self._variable._as_graph_element() # pylint:disable=protected-access
@ -481,7 +492,7 @@ tf.register_tensor_conversion_function(AutoCastVariable,
AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access
def create_autocast_variable(variable, op=None):
def create_autocast_variable(variable):
"""Creates an AutoCastVariable that wraps another variable.
This typically just returns `AutoCastVariable(variable)`. But, if the variable
@ -493,14 +504,13 @@ def create_autocast_variable(variable, op=None):
Args:
variable: A floating-point resource variable to wrap.
op: Optional operation of this variable.
Returns:
An AutoCastVariable that wraps the variable.
"""
if not isinstance(variable, (distribute_values.DistributedVariable,
ps_distribute_values.AggregatingVariable)):
return AutoCastVariable(variable, op=op)
return AutoCastVariable(variable)
class AutoCastDistributedVariable(AutoCastVariable, variable.__class__):
"""An AutoCastVariable that also subclasses from variable.__class__.
@ -523,7 +533,7 @@ def create_autocast_variable(variable, op=None):
).format(v=self)
# pylint: enable=missing-format-attribute
return AutoCastDistributedVariable(variable, op=op)
return AutoCastDistributedVariable(variable)
class enable_auto_cast_variables(object): # pylint:disable=invalid-name

@ -26,7 +26,14 @@ from absl.testing import parameterized
import numpy as np
from tensorflow.python.distribute import test_util
from keras.mixed_precision import autocast_variable
from keras.optimizer_v2 import adadelta
from keras.optimizer_v2 import adagrad
from keras.optimizer_v2 import adam
from keras.optimizer_v2 import adamax
from keras.optimizer_v2 import ftrl
from keras.optimizer_v2 import gradient_descent as gradient_descent_v2
from keras.optimizer_v2 import nadam
from keras.optimizer_v2 import rmsprop
maybe_distribute = tf.__internal__.test.combinations.combine(distribution=[
tf.__internal__.distribute.combinations.default_strategy,
@ -335,11 +342,28 @@ class AutoCastVariableTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllClose(5., self.evaluate(run_assign()))
@tf.__internal__.distribute.combinations.generate(maybe_distribute)
def test_assign_op(self, distribution):
def test_op_attribute(self, distribution):
with distribution.scope():
x = get_var(0., tf.float32)
x = autocast_variable.create_autocast_variable(x)
# Variable.op raises an AttributeError in Eager mode and is an op in graph
# mode. Variable.assign(...).op is None in Eager mode and an op in Graph
# mode or a tf.function. We test this is also true of AutoCastVariable.
if tf.executing_eagerly():
with self.assertRaisesRegex(
AttributeError,
'Tensor.op is meaningless when eager execution is enabled'):
x.op # pylint: disable=pointless-statement
self.assertIsNone(x.assign(1.0).op)
self.assertIsNone(x.assign_add(1.0).op)
self.assertIsNone(x.assign_sub(1.0).op)
else:
self.assertIsNotNone(x.op)
self.assertIsNotNone(x.assign(1.0).op)
self.assertIsNotNone(x.assign_add(1.0).op)
self.assertIsNotNone(x.assign_sub(1.0).op)
@tf.function
def func():
self.assertIsNotNone(x.assign(1.0).op)
@ -486,25 +510,51 @@ class AutoCastVariableTest(tf.test.TestCase, parameterized.TestCase):
'dtype_to_cast_to=float32 '
'inner_variable=MirroredVariable.*>')
@parameterized.named_parameters(
('v1', tf.compat.v1.train.GradientDescentOptimizer),
('v2', gradient_descent_v2.SGD))
def test_optimizer(self, optimizer_class):
@tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(
optimizer_class=[
adadelta.Adadelta,
adagrad.Adagrad,
adam.Adam,
adamax.Adamax,
ftrl.Ftrl,
gradient_descent_v2.SGD,
nadam.Nadam,
rmsprop.RMSprop,
tf.compat.v1.train.GradientDescentOptimizer
],
use_tf_function=[False, True]))
def test_optimizer(self, optimizer_class, use_tf_function):
if use_tf_function and not tf.executing_eagerly():
self.skipTest('Test does not support graph mode with tf.function')
x = get_var(1., tf.float32)
x = autocast_variable.create_autocast_variable(x)
opt = optimizer_class(1.)
y = get_var(1., tf.float32)
opt = optimizer_class(learning_rate=1.)
@tf.function
def f():
opt.minimize(lambda: x + 1., var_list=[x])
# Minimize both the AutoCastVariable and the normal tf.Variable. Both
# variables should be updated to the same value.
op = opt.minimize(lambda: x + y, var_list=[x, y])
return None if tf.compat.v1.executing_eagerly_outside_functions() else op
if use_tf_function:
f = tf.function(f)
if tf.executing_eagerly():
f()
else:
op = f() # pylint: disable=assignment-from-no-return
op = f()
self.evaluate(tf.compat.v1.global_variables_initializer())
self.evaluate(op)
self.assertEqual(self.evaluate(x), 0)
# Assert the AutoCastVariable has changed from its initial value
self.assertNotEqual(self.evaluate(x), 1.)
# Assert AutoCastVariable is updated correctly by comparing it to the normal
# variable
self.assertAlmostEqual(self.evaluate(x), self.evaluate(y))
if optimizer_class in (gradient_descent_v2.SGD,
tf.compat.v1.train.GradientDescentOptimizer):
# With SGD, the variables decreases by exactly 1
self.assertEqual(self.evaluate(x), 0)
if __name__ == '__main__':

@ -139,7 +139,7 @@ def load(path, compile=True, options=None): # pylint: disable=redefined-builtin
# Recreate layers and metrics using the info stored in the metadata.
keras_loader = KerasObjectLoader(metadata, object_graph_def)
keras_loader.load_layers()
keras_loader.load_layers(compile=compile)
# Generate a dictionary of all loaded nodes.
nodes_to_load = {'root': None}
@ -364,7 +364,7 @@ class KerasObjectLoader(object):
obj_child, child_proto, child_id)
self.loaded_nodes[child_id] = obj_child, setter
def load_layers(self):
def load_layers(self, compile=True): # pylint: disable=redefined-builtin
"""Load all layer nodes from the metadata."""
# Load metrics after models and layers, since it's likely that models
# and layers will create the metric when initialized (this avoids wasting
@ -380,9 +380,20 @@ class KerasObjectLoader(object):
node_metadata.metadata)
for node_metadata in metric_list:
self.loaded_nodes[node_metadata.node_id] = self._load_layer(
node_metadata.node_id, node_metadata.identifier,
node_metadata.metadata)
try:
self.loaded_nodes[node_metadata.node_id] = self._load_layer(
node_metadata.node_id, node_metadata.identifier,
node_metadata.metadata)
except ValueError:
# Metrics are only needed when the model is compiled later. We ignore
# errors when trying to load custom metrics when `compile=False` until
# custom metrics are serialized properly (b/135550038).
if compile:
raise
logging.warning('Unable to restore custom metric. Please ensure that '
'the layer implements `get_config` and `from_config` '
'when saving. In addition, please use the '
'`custom_objects` arg when calling `load_model()`.')
def _load_layer(self, node_id, identifier, metadata):
"""Load a single layer from a SavedUserObject proto."""

@ -1142,6 +1142,22 @@ class MetricTest(tf.test.TestCase, parameterized.TestCase):
self._test_metric_save_and_load(
metric, self._save_model_dir(), 1, test_sample_weight=False)
@keras_parameterized.run_with_all_model_types
def test_custom_metric_model(self):
class CustomMetric(keras.metrics.MeanSquaredError):
pass
model = testing_utils.get_small_mlp(1, 4, input_dim=3)
model.compile(loss='mse', optimizer='rmsprop', metrics=[CustomMetric()])
saved_model_dir = self._save_model_dir()
tf.saved_model.save(model, saved_model_dir)
with self.assertRaisesRegex(ValueError, 'custom_objects'):
keras_load.load(saved_model_dir)
keras_load.load(saved_model_dir, compile=False)
if __name__ == '__main__':
tf.test.main()