Sync OSS keras to head.

PiperOrigin-RevId: 347838100
This commit is contained in:
Scott Zhu 2020-12-16 09:34:29 -08:00 committed by TensorFlower Gardener
parent af1a2eb1f5
commit f0c0c877ba
10 changed files with 408 additions and 350 deletions

@ -1,7 +1,10 @@
# Description: # Description:
# Contains the Keras engine API (internal TensorFlow version). # Contains the Keras engine API (internal TensorFlow version).
# buildifier: disable=same-origin-load
load("@org_keras//keras:keras.bzl", "tf_py_test") load("@org_keras//keras:keras.bzl", "tf_py_test")
# buildifier: disable=same-origin-load
load("@org_keras//keras:keras.bzl", "cuda_py_test") load("@org_keras//keras:keras.bzl", "cuda_py_test")
package( package(

@ -27,8 +27,8 @@ import shutil
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.python.framework import test_util as tf_test_util
import keras import keras
from tensorflow.python.framework import test_util as tf_test_util
from keras import combinations from keras import combinations
from keras import keras_parameterized from keras import keras_parameterized
from keras import testing_utils from keras import testing_utils

@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Locally-connected layers. """Locally-connected layers."""
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
@ -59,79 +58,61 @@ class LocallyConnected1D(Layer):
``` ```
Arguments: Arguments:
filters: Integer, the dimensionality of the output space filters: Integer, the dimensionality of the output space (i.e. the number
(i.e. the number of output filters in the convolution). of output filters in the convolution).
kernel_size: An integer or tuple/list of a single integer, kernel_size: An integer or tuple/list of a single integer, specifying the
specifying the length of the 1D convolution window. length of the 1D convolution window.
strides: An integer or tuple/list of a single integer, strides: An integer or tuple/list of a single integer, specifying the
specifying the stride length of the convolution. stride length of the convolution.
Specifying any stride value != 1 is incompatible with specifying padding: Currently only supports `"valid"` (case-insensitive). `"same"`
any `dilation_rate` value != 1. may be supported in the future. `"valid"` means no padding.
padding: Currently only supports `"valid"` (case-insensitive). data_format: A string, one of `channels_last` (default) or
`"same"` may be supported in the future. `channels_first`. The ordering of the dimensions in the inputs.
`"valid"` means no padding. `channels_last` corresponds to inputs with shape `(batch, length,
data_format: A string, channels)` while `channels_first` corresponds to inputs with shape
one of `channels_last` (default) or `channels_first`. `(batch, channels, length)`. It defaults to the `image_data_format`
The ordering of the dimensions in the inputs. value found in your Keras config file at `~/.keras/keras.json`. If you
`channels_last` corresponds to inputs with shape never set it, then it will be "channels_last".
`(batch, length, channels)` while `channels_first` activation: Activation function to use. If you don't specify anything, no
corresponds to inputs with shape activation is applied
`(batch, channels, length)`.
It defaults to the `image_data_format` value found in your
Keras config file at `~/.keras/keras.json`.
If you never set it, then it will be "channels_last".
activation: Activation function to use.
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`). (ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector. use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix. kernel_initializer: Initializer for the `kernel` weights matrix.
bias_initializer: Initializer for the bias vector. bias_initializer: Initializer for the bias vector.
kernel_regularizer: Regularizer function applied to kernel_regularizer: Regularizer function applied to the `kernel` weights
the `kernel` weights matrix. matrix.
bias_regularizer: Regularizer function applied to the bias vector. bias_regularizer: Regularizer function applied to the bias vector.
activity_regularizer: Regularizer function applied to activity_regularizer: Regularizer function applied to the output of the
the output of the layer (its "activation").. layer (its "activation")..
kernel_constraint: Constraint function applied to the kernel matrix. kernel_constraint: Constraint function applied to the kernel matrix.
bias_constraint: Constraint function applied to the bias vector. bias_constraint: Constraint function applied to the bias vector.
implementation: implementation mode, either `1`, `2`, or `3`. implementation: implementation mode, either `1`, `2`, or `3`. `1` loops
`1` loops over input spatial locations to perform the forward pass. over input spatial locations to perform the forward pass. It is
It is memory-efficient but performs a lot of (small) ops. memory-efficient but performs a lot of (small) ops. `2` stores layer
weights in a dense but sparsely-populated 2D matrix and implements the
`2` stores layer weights in a dense but sparsely-populated 2D matrix forward pass as a single matrix-multiply. It uses a lot of RAM but
and implements the forward pass as a single matrix-multiply. It uses performs few (large) ops. `3` stores layer weights in a sparse tensor
a lot of RAM but performs few (large) ops. and implements the forward pass as a single sparse matrix-multiply.
`3` stores layer weights in a sparse tensor and implements the forward
pass as a single sparse matrix-multiply.
How to choose: How to choose:
`1`: large, dense models, `1`: large, dense models,
`2`: small models, `2`: small models,
`3`: large, sparse models, `3`: large, sparse models, where "large" stands for large
input/output activations (i.e. many `filters`, `input_filters`,
where "large" stands for large input/output activations large `input_size`, `output_size`), and "sparse" stands for few
(i.e. many `filters`, `input_filters`, large `input_size`, connections between inputs and outputs, i.e. small ratio `filters *
`output_size`), and "sparse" stands for few connections between inputs input_filters * kernel_size / (input_size * strides)`, where inputs
and outputs, i.e. small ratio to and outputs of the layer are assumed to have shapes `(input_size,
`filters * input_filters * kernel_size / (input_size * strides)`, input_filters)`, `(output_size, filters)` respectively. It is
where inputs to and outputs of the layer are assumed to have shapes recommended to benchmark each in the setting of interest to pick the
`(input_size, input_filters)`, `(output_size, filters)` most efficient one (in terms of speed and memory usage). Correct
respectively. choice of implementation can lead to dramatic speed improvements
(e.g. 50X), potentially at the expense of RAM. Also, only
It is recommended to benchmark each in the setting of interest to pick `padding="valid"` is supported by `implementation=1`.
the most efficient one (in terms of speed and memory usage). Correct
choice of implementation can lead to dramatic speed improvements (e.g.
50X), potentially at the expense of RAM.
Also, only `padding="valid"` is supported by `implementation=1`.
Input shape: Input shape:
3D tensor with shape: `(batch_size, steps, input_dim)` 3D tensor with shape: `(batch_size, steps, input_dim)`
Output shape: Output shape:
3D tensor with shape: `(batch_size, new_steps, filters)` 3D tensor with shape: `(batch_size, new_steps, filters)` `steps` value
`steps` value might have changed due to padding or strides. might have changed due to padding or strides.
""" """
def __init__(self, def __init__(self,
@ -158,8 +139,8 @@ class LocallyConnected1D(Layer):
self.padding = conv_utils.normalize_padding(padding) self.padding = conv_utils.normalize_padding(padding)
if self.padding != 'valid' and implementation == 1: if self.padding != 'valid' and implementation == 1:
raise ValueError('Invalid border mode for LocallyConnected1D ' raise ValueError('Invalid border mode for LocallyConnected1D '
'(only "valid" is supported if implementation is 1): ' '(only "valid" is supported if implementation is 1): ' +
+ padding) padding)
self.data_format = conv_utils.normalize_data_format(data_format) self.data_format = conv_utils.normalize_data_format(data_format)
self.activation = activations.get(activation) self.activation = activations.get(activation)
self.use_bias = use_bias self.use_bias = use_bias
@ -181,10 +162,13 @@ class LocallyConnected1D(Layer):
input_dim, input_length = input_shape[2], input_shape[1] input_dim, input_length = input_shape[2], input_shape[1]
if input_dim is None: if input_dim is None:
raise ValueError('Axis 2 of input should be fully-defined. ' raise ValueError(
'Found shape:', input_shape) 'Axis 2 of input should be fully-defined. '
self.output_length = conv_utils.conv_output_length( 'Found shape:', input_shape)
input_length, self.kernel_size[0], self.padding, self.strides[0]) self.output_length = conv_utils.conv_output_length(input_length,
self.kernel_size[0],
self.padding,
self.strides[0])
if self.implementation == 1: if self.implementation == 1:
self.kernel_shape = (self.output_length, self.kernel_size[0] * input_dim, self.kernel_shape = (self.output_length, self.kernel_size[0] * input_dim,
@ -199,17 +183,18 @@ class LocallyConnected1D(Layer):
elif self.implementation == 2: elif self.implementation == 2:
if self.data_format == 'channels_first': if self.data_format == 'channels_first':
self.kernel_shape = (input_dim, input_length, self.kernel_shape = (input_dim, input_length, self.filters,
self.filters, self.output_length) self.output_length)
else: else:
self.kernel_shape = (input_length, input_dim, self.kernel_shape = (input_length, input_dim, self.output_length,
self.output_length, self.filters) self.filters)
self.kernel = self.add_weight(shape=self.kernel_shape, self.kernel = self.add_weight(
initializer=self.kernel_initializer, shape=self.kernel_shape,
name='kernel', initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer, name='kernel',
constraint=self.kernel_constraint) regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.kernel_mask = get_locallyconnected_mask( self.kernel_mask = get_locallyconnected_mask(
input_shape=(input_length,), input_shape=(input_length,),
@ -231,8 +216,7 @@ class LocallyConnected1D(Layer):
padding=self.padding, padding=self.padding,
filters_in=input_dim, filters_in=input_dim,
filters_out=self.filters, filters_out=self.filters,
data_format=self.data_format) data_format=self.data_format))
)
self.kernel = self.add_weight( self.kernel = self.add_weight(
shape=(len(self.kernel_idxs),), shape=(len(self.kernel_idxs),),
@ -242,8 +226,8 @@ class LocallyConnected1D(Layer):
constraint=self.kernel_constraint) constraint=self.kernel_constraint)
else: else:
raise ValueError('Unrecognized implementation mode: %d.' raise ValueError('Unrecognized implementation mode: %d.' %
% self.implementation) self.implementation)
if self.use_bias: if self.use_bias:
self.bias = self.add_weight( self.bias = self.add_weight(
@ -291,8 +275,8 @@ class LocallyConnected1D(Layer):
self.compute_output_shape(inputs.shape)) self.compute_output_shape(inputs.shape))
else: else:
raise ValueError('Unrecognized implementation mode: %d.' raise ValueError('Unrecognized implementation mode: %d.' %
% self.implementation) self.implementation)
if self.use_bias: if self.use_bias:
output = K.bias_add(output, self.bias, data_format=self.data_format) output = K.bias_add(output, self.bias, data_format=self.data_format)
@ -366,87 +350,71 @@ class LocallyConnected2D(Layer):
``` ```
Arguments: Arguments:
filters: Integer, the dimensionality of the output space filters: Integer, the dimensionality of the output space (i.e. the number
(i.e. the number of output filters in the convolution). of output filters in the convolution).
kernel_size: An integer or tuple/list of 2 integers, specifying the kernel_size: An integer or tuple/list of 2 integers, specifying the width
width and height of the 2D convolution window. and height of the 2D convolution window. Can be a single integer to
Can be a single integer to specify the same value for specify the same value for all spatial dimensions.
all spatial dimensions. strides: An integer or tuple/list of 2 integers, specifying the strides of
strides: An integer or tuple/list of 2 integers, the convolution along the width and height. Can be a single integer to
specifying the strides of the convolution along the width and height. specify the same value for all spatial dimensions.
Can be a single integer to specify the same value for padding: Currently only support `"valid"` (case-insensitive). `"same"`
all spatial dimensions. will be supported in future. `"valid"` means no padding.
padding: Currently only support `"valid"` (case-insensitive). data_format: A string, one of `channels_last` (default) or
`"same"` will be supported in future. `channels_first`. The ordering of the dimensions in the inputs.
`"valid"` means no padding. `channels_last` corresponds to inputs with shape `(batch, height, width,
data_format: A string, channels)` while `channels_first` corresponds to inputs with shape
one of `channels_last` (default) or `channels_first`. `(batch, channels, height, width)`. It defaults to the
The ordering of the dimensions in the inputs. `image_data_format` value found in your Keras config file at
`channels_last` corresponds to inputs with shape `~/.keras/keras.json`. If you never set it, then it will be
`(batch, height, width, channels)` while `channels_first` "channels_last".
corresponds to inputs with shape activation: Activation function to use. If you don't specify anything, no
`(batch, channels, height, width)`. activation is applied
It defaults to the `image_data_format` value found in your
Keras config file at `~/.keras/keras.json`.
If you never set it, then it will be "channels_last".
activation: Activation function to use.
If you don't specify anything, no activation is applied
(ie. "linear" activation: `a(x) = x`). (ie. "linear" activation: `a(x) = x`).
use_bias: Boolean, whether the layer uses a bias vector. use_bias: Boolean, whether the layer uses a bias vector.
kernel_initializer: Initializer for the `kernel` weights matrix. kernel_initializer: Initializer for the `kernel` weights matrix.
bias_initializer: Initializer for the bias vector. bias_initializer: Initializer for the bias vector.
kernel_regularizer: Regularizer function applied to kernel_regularizer: Regularizer function applied to the `kernel` weights
the `kernel` weights matrix. matrix.
bias_regularizer: Regularizer function applied to the bias vector. bias_regularizer: Regularizer function applied to the bias vector.
activity_regularizer: Regularizer function applied to activity_regularizer: Regularizer function applied to the output of the
the output of the layer (its "activation"). layer (its "activation").
kernel_constraint: Constraint function applied to the kernel matrix. kernel_constraint: Constraint function applied to the kernel matrix.
bias_constraint: Constraint function applied to the bias vector. bias_constraint: Constraint function applied to the bias vector.
implementation: implementation mode, either `1`, `2`, or `3`. implementation: implementation mode, either `1`, `2`, or `3`. `1` loops
`1` loops over input spatial locations to perform the forward pass. over input spatial locations to perform the forward pass. It is
It is memory-efficient but performs a lot of (small) ops. memory-efficient but performs a lot of (small) ops. `2` stores layer
weights in a dense but sparsely-populated 2D matrix and implements the
`2` stores layer weights in a dense but sparsely-populated 2D matrix forward pass as a single matrix-multiply. It uses a lot of RAM but
and implements the forward pass as a single matrix-multiply. It uses performs few (large) ops. `3` stores layer weights in a sparse tensor
a lot of RAM but performs few (large) ops. and implements the forward pass as a single sparse matrix-multiply.
`3` stores layer weights in a sparse tensor and implements the forward
pass as a single sparse matrix-multiply.
How to choose: How to choose:
`1`: large, dense models, `1`: large, dense models,
`2`: small models, `2`: small models,
`3`: large, sparse models, `3`: large, sparse models, where "large" stands for large
input/output activations (i.e. many `filters`, `input_filters`,
where "large" stands for large input/output activations large `np.prod(input_size)`, `np.prod(output_size)`), and "sparse"
(i.e. many `filters`, `input_filters`, large `np.prod(input_size)`, stands for few connections between inputs and outputs, i.e. small
`np.prod(output_size)`), and "sparse" stands for few connections ratio `filters * input_filters * np.prod(kernel_size) /
between inputs and outputs, i.e. small ratio (np.prod(input_size) * np.prod(strides))`, where inputs to and
`filters * input_filters * np.prod(kernel_size) / (np.prod(input_size) outputs of the layer are assumed to have shapes `input_size +
* np.prod(strides))`, where inputs to and outputs of the layer are (input_filters,)`, `output_size + (filters,)` respectively. It is
assumed to have shapes `input_size + (input_filters,)`, recommended to benchmark each in the setting of interest to pick the
`output_size + (filters,)` respectively. most efficient one (in terms of speed and memory usage). Correct
choice of implementation can lead to dramatic speed improvements
It is recommended to benchmark each in the setting of interest to pick (e.g. 50X), potentially at the expense of RAM. Also, only
the most efficient one (in terms of speed and memory usage). Correct `padding="valid"` is supported by `implementation=1`.
choice of implementation can lead to dramatic speed improvements (e.g.
50X), potentially at the expense of RAM.
Also, only `padding="valid"` is supported by `implementation=1`.
Input shape: Input shape:
4D tensor with shape: 4D tensor with shape: `(samples, channels, rows, cols)` if
`(samples, channels, rows, cols)` if data_format='channels_first' data_format='channels_first'
or 4D tensor with shape: or 4D tensor with shape: `(samples, rows, cols, channels)` if
`(samples, rows, cols, channels)` if data_format='channels_last'. data_format='channels_last'.
Output shape: Output shape:
4D tensor with shape: 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if
`(samples, filters, new_rows, new_cols)` if data_format='channels_first' data_format='channels_first'
or 4D tensor with shape: or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if
`(samples, new_rows, new_cols, filters)` if data_format='channels_last'. data_format='channels_last'. `rows` and `cols` values might have changed
`rows` and `cols` values might have changed due to padding. due to padding.
""" """
def __init__(self, def __init__(self,
@ -473,8 +441,8 @@ class LocallyConnected2D(Layer):
self.padding = conv_utils.normalize_padding(padding) self.padding = conv_utils.normalize_padding(padding)
if self.padding != 'valid' and implementation == 1: if self.padding != 'valid' and implementation == 1:
raise ValueError('Invalid border mode for LocallyConnected2D ' raise ValueError('Invalid border mode for LocallyConnected2D '
'(only "valid" is supported if implementation is 1): ' '(only "valid" is supported if implementation is 1): ' +
+ padding) padding)
self.data_format = conv_utils.normalize_data_format(data_format) self.data_format = conv_utils.normalize_data_format(data_format)
self.activation = activations.get(activation) self.activation = activations.get(activation)
self.use_bias = use_bias self.use_bias = use_bias
@ -509,10 +477,8 @@ class LocallyConnected2D(Layer):
self.output_col = output_col self.output_col = output_col
if self.implementation == 1: if self.implementation == 1:
self.kernel_shape = ( self.kernel_shape = (output_row * output_col, self.kernel_size[0] *
output_row * output_col, self.kernel_size[1] * input_filter, self.filters)
self.kernel_size[0] * self.kernel_size[1] * input_filter,
self.filters)
self.kernel = self.add_weight( self.kernel = self.add_weight(
shape=self.kernel_shape, shape=self.kernel_shape,
@ -523,17 +489,18 @@ class LocallyConnected2D(Layer):
elif self.implementation == 2: elif self.implementation == 2:
if self.data_format == 'channels_first': if self.data_format == 'channels_first':
self.kernel_shape = (input_filter, input_row, input_col, self.kernel_shape = (input_filter, input_row, input_col, self.filters,
self.filters, self.output_row, self.output_col) self.output_row, self.output_col)
else: else:
self.kernel_shape = (input_row, input_col, input_filter, self.kernel_shape = (input_row, input_col, input_filter,
self.output_row, self.output_col, self.filters) self.output_row, self.output_col, self.filters)
self.kernel = self.add_weight(shape=self.kernel_shape, self.kernel = self.add_weight(
initializer=self.kernel_initializer, shape=self.kernel_shape,
name='kernel', initializer=self.kernel_initializer,
regularizer=self.kernel_regularizer, name='kernel',
constraint=self.kernel_constraint) regularizer=self.kernel_regularizer,
constraint=self.kernel_constraint)
self.kernel_mask = get_locallyconnected_mask( self.kernel_mask = get_locallyconnected_mask(
input_shape=(input_row, input_col), input_shape=(input_row, input_col),
@ -555,8 +522,7 @@ class LocallyConnected2D(Layer):
padding=self.padding, padding=self.padding,
filters_in=input_filter, filters_in=input_filter,
filters_out=self.filters, filters_out=self.filters,
data_format=self.data_format) data_format=self.data_format))
)
self.kernel = self.add_weight( self.kernel = self.add_weight(
shape=(len(self.kernel_idxs),), shape=(len(self.kernel_idxs),),
@ -566,8 +532,8 @@ class LocallyConnected2D(Layer):
constraint=self.kernel_constraint) constraint=self.kernel_constraint)
else: else:
raise ValueError('Unrecognized implementation mode: %d.' raise ValueError('Unrecognized implementation mode: %d.' %
% self.implementation) self.implementation)
if self.use_bias: if self.use_bias:
self.bias = self.add_weight( self.bias = self.add_weight(
@ -619,8 +585,8 @@ class LocallyConnected2D(Layer):
self.compute_output_shape(inputs.shape)) self.compute_output_shape(inputs.shape))
else: else:
raise ValueError('Unrecognized implementation mode: %d.' raise ValueError('Unrecognized implementation mode: %d.' %
% self.implementation) self.implementation)
if self.use_bias: if self.use_bias:
output = K.bias_add(output, self.bias, data_format=self.data_format) output = K.bias_add(output, self.bias, data_format=self.data_format)
@ -686,10 +652,10 @@ def get_locallyconnected_mask(input_shape, kernel_shape, strides, padding,
`strides`, `padding` and `data_format`. `strides`, `padding` and `data_format`.
Arguments: Arguments:
input_shape: tuple of size N: `(d_in1, ..., d_inN)` input_shape: tuple of size N: `(d_in1, ..., d_inN)` spatial shape of the
spatial shape of the input. input.
kernel_shape: tuple of size N, spatial shape of the convolutional kernel kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
/ receptive field. receptive field.
strides: tuple of size N, strides along each spatial dimension. strides: tuple of size N, strides along each spatial dimension.
padding: type of padding, string `"same"` or `"valid"`. padding: type of padding, string `"same"` or `"valid"`.
data_format: a string, `"channels_first"` or `"channels_last"`. data_format: a string, `"channels_first"` or `"channels_last"`.
@ -709,8 +675,7 @@ def get_locallyconnected_mask(input_shape, kernel_shape, strides, padding,
input_shape=input_shape, input_shape=input_shape,
kernel_shape=kernel_shape, kernel_shape=kernel_shape,
strides=strides, strides=strides,
padding=padding padding=padding)
)
ndims = int(mask.ndim / 2) ndims = int(mask.ndim / 2)
@ -739,34 +704,26 @@ def local_conv_matmul(inputs, kernel, kernel_mask, output_shape):
reshapes to make `inputs` and `kernel` 2-D and `output` (N+2)-D. reshapes to make `inputs` and `kernel` 2-D and `output` (N+2)-D.
Arguments: Arguments:
inputs: (N+2)-D tensor with shape inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ...,
`(batch_size, channels_in, d_in1, ..., d_inN)` d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`.
or
`(batch_size, d_in1, ..., d_inN, channels_in)`.
kernel: the unshared weights for N-D convolution, kernel: the unshared weights for N-D convolution,
an (N+2)-D tensor of shape: an (N+2)-D tensor of shape: `(d_in1, ..., d_inN, channels_in, d_out2,
`(d_in1, ..., d_inN, channels_in, d_out2, ..., d_outN, channels_out)` ..., d_outN, channels_out)` or `(channels_in, d_in1, ..., d_inN,
or channels_out, d_out2, ..., d_outN)`, with the ordering of channels
`(channels_in, d_in1, ..., d_inN, channels_out, d_out2, ..., d_outN)`, and spatial dimensions matching that of the input. Each entry is the
with the ordering of channels and spatial dimensions matching weight between a particular input and output location, similarly to
that of the input. a fully-connected weight matrix.
Each entry is the weight between a particular input and kernel_mask: a float 0/1 mask tensor of shape: `(d_in1, ..., d_inN, 1,
output location, similarly to a fully-connected weight matrix. d_out2, ..., d_outN, 1)` or `(1, d_in1, ..., d_inN, 1, d_out2, ...,
kernel_mask: a float 0/1 mask tensor of shape: d_outN)`, with the ordering of singleton and spatial dimensions matching
`(d_in1, ..., d_inN, 1, d_out2, ..., d_outN, 1)` that of the input. Mask represents the connectivity pattern of the layer
or and is
`(1, d_in1, ..., d_inN, 1, d_out2, ..., d_outN)`, precomputed elsewhere based on layer parameters: stride, padding, and
with the ordering of singleton and spatial dimensions the receptive field shape.
matching that of the input.
Mask represents the connectivity pattern of the layer and is
precomputed elsewhere based on layer parameters: stride,
padding, and the receptive field shape.
output_shape: a tuple of (N+2) elements representing the output shape: output_shape: a tuple of (N+2) elements representing the output shape:
`(batch_size, channels_out, d_out1, ..., d_outN)` `(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size,
or d_out1, ..., d_outN, channels_out)`, with the ordering of channels and
`(batch_size, d_out1, ..., d_outN, channels_out)`, spatial dimensions matching that of the input.
with the ordering of channels and spatial dimensions matching that of
the input.
Returns: Returns:
Output (N+2)-D tensor with shape `output_shape`. Output (N+2)-D tensor with shape `output_shape`.
@ -777,8 +734,9 @@ def local_conv_matmul(inputs, kernel, kernel_mask, output_shape):
kernel = make_2d(kernel, split_dim=K.ndim(kernel) // 2) kernel = make_2d(kernel, split_dim=K.ndim(kernel) // 2)
output_flat = tf.compat.v1.sparse_matmul(inputs_flat, kernel, b_is_sparse=True) output_flat = tf.compat.v1.sparse_matmul(inputs_flat, kernel, b_is_sparse=True)
output = K.reshape(output_flat, output = K.reshape(output_flat, [
[K.shape(output_flat)[0],] + output_shape.as_list()[1:]) K.shape(output_flat)[0],
] + output_shape.as_list()[1:])
return output return output
@ -810,14 +768,16 @@ def local_conv_sparse_matmul(inputs, kernel, kernel_idxs, kernel_shape,
""" """
inputs_flat = K.reshape(inputs, (K.shape(inputs)[0], -1)) inputs_flat = K.reshape(inputs, (K.shape(inputs)[0], -1))
output_flat = tf.raw_ops.SparseTensorDenseMatMul( output_flat = tf.raw_ops.SparseTensorDenseMatMul(
a_indices=kernel_idxs, a_values=kernel, a_shape=kernel_shape, a_indices=kernel_idxs,
b=inputs_flat, adjoint_b=True) a_values=kernel,
a_shape=kernel_shape,
b=inputs_flat,
adjoint_b=True)
output_flat_transpose = K.transpose(output_flat) output_flat_transpose = K.transpose(output_flat)
output_reshaped = K.reshape( output_reshaped = K.reshape(output_flat_transpose, [
output_flat_transpose, K.shape(output_flat_transpose)[0],
[K.shape(output_flat_transpose)[0],] + output_shape.as_list()[1:] ] + output_shape.as_list()[1:])
)
return output_reshaped return output_reshaped
@ -830,7 +790,7 @@ def make_2d(tensor, split_dim):
Arguments: Arguments:
tensor: a tensor of shape `(d0, ..., d(N-1))`. tensor: a tensor of shape `(d0, ..., d(N-1))`.
split_dim: an integer from 1 to N-1, index of the dimension to group split_dim: an integer from 1 to N-1, index of the dimension to group
dimensions before (excluding) and after (including). dimensions before (excluding) and after (including).
Returns: Returns:
Tensor of shape Tensor of shape

@ -28,8 +28,8 @@ import time
from absl.testing import parameterized from absl.testing import parameterized
import numpy as np import numpy as np
from tensorflow.core.protobuf import rewriter_config_pb2 from tensorflow.core.protobuf import rewriter_config_pb2
from tensorflow.python.framework import test_util as tf_test_util
import keras import keras
from tensorflow.python.framework import test_util as tf_test_util
from keras import keras_parameterized from keras import keras_parameterized
from keras import testing_utils from keras import testing_utils
from keras.layers import recurrent as rnn_v1 from keras.layers import recurrent as rnn_v1

@ -26,6 +26,7 @@ from absl.testing import parameterized
import numpy as np import numpy as np
import keras import keras
from tensorflow.python.framework import test_util as tf_test_util
from keras import combinations from keras import combinations
from keras import keras_parameterized from keras import keras_parameterized
from keras import testing_utils from keras import testing_utils
@ -33,8 +34,6 @@ from keras.engine import base_layer_utils
from keras.layers import core from keras.layers import core
from keras.layers.rnn_cell_wrapper_v2 import ResidualWrapper from keras.layers.rnn_cell_wrapper_v2 import ResidualWrapper
from keras.utils import generic_utils from keras.utils import generic_utils
from tensorflow.python.eager import context
from tensorflow.python.framework import test_util as tf_test_util
from tensorflow.python.ops.ragged import ragged_tensor from tensorflow.python.ops.ragged import ragged_tensor
from tensorflow.python.training.tracking import util as trackable_util from tensorflow.python.training.tracking import util as trackable_util
@ -653,7 +652,7 @@ class BidirectionalTest(tf.test.TestCase, parameterized.TestCase):
model.compile(loss='mse', optimizer='sgd') model.compile(loss='mse', optimizer='sgd')
model.fit(x, y, epochs=1, batch_size=1) model.fit(x, y, epochs=1, batch_size=1)
if context.executing_eagerly(): if tf.executing_eagerly():
run_test() run_test()
else: else:
tf_test_util.enable_output_all_intermediates(run_test)() tf_test_util.enable_output_all_intermediates(run_test)()

@ -12,8 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# ============================================================================== # ==============================================================================
"""Built-in loss functions. """Built-in loss functions."""
"""
from __future__ import absolute_import from __future__ import absolute_import
from __future__ import division from __future__ import division
from __future__ import print_function from __future__ import print_function
@ -85,8 +84,8 @@ class Loss(object):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. name: Optional name for the op.
""" """
losses_utils.ReductionV2.validate(reduction) losses_utils.ReductionV2.validate(reduction)
@ -115,15 +114,15 @@ class Loss(object):
sparse loss functions such as sparse categorical crossentropy where sparse loss functions such as sparse categorical crossentropy where
shape = `[batch_size, d0, .. dN-1]` shape = `[batch_size, d0, .. dN-1]`
y_pred: The predicted values. shape = `[batch_size, d0, .. dN]` y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
sample_weight: Optional `sample_weight` acts as a sample_weight: Optional `sample_weight` acts as a coefficient for the
coefficient for the loss. If a scalar is provided, then the loss is loss. If a scalar is provided, then the loss is simply scaled by the
simply scaled by the given value. If `sample_weight` is a tensor of size given value. If `sample_weight` is a tensor of size `[batch_size]`, then
`[batch_size]`, then the total loss for each sample of the batch is the total loss for each sample of the batch is rescaled by the
rescaled by the corresponding element in the `sample_weight` vector. If corresponding element in the `sample_weight` vector. If the shape of
the shape of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted to
broadcasted to this shape), then each loss element of `y_pred` is scaled this shape), then each loss element of `y_pred` is scaled
by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss
functions reduce by 1 dimension, usually axis=-1.) functions reduce by 1 dimension, usually axis=-1.)
Returns: Returns:
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
@ -223,8 +222,8 @@ class LossFunctionWrapper(Loss):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: (Optional) name for the loss. name: (Optional) name for the loss.
**kwargs: The keyword arguments that are passed on to `fn`. **kwargs: The keyword arguments that are passed on to `fn`.
""" """
@ -243,8 +242,7 @@ class LossFunctionWrapper(Loss):
Loss values per sample. Loss values per sample.
""" """
if tf.is_tensor(y_pred) and tf.is_tensor(y_true): if tf.is_tensor(y_pred) and tf.is_tensor(y_true):
y_pred, y_true = losses_utils.squeeze_or_expand_dimensions( y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)
y_pred, y_true)
ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx()) ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx())
return ag_fn(y_true, y_pred, **self._fn_kwargs) return ag_fn(y_true, y_pred, **self._fn_kwargs)
@ -307,8 +305,8 @@ class MeanSquaredError(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'mean_squared_error'. name: Optional name for the op. Defaults to 'mean_squared_error'.
""" """
super(MeanSquaredError, self).__init__( super(MeanSquaredError, self).__init__(
@ -366,8 +364,8 @@ class MeanAbsoluteError(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'mean_absolute_error'. name: Optional name for the op. Defaults to 'mean_absolute_error'.
""" """
super(MeanAbsoluteError, self).__init__( super(MeanAbsoluteError, self).__init__(
@ -426,8 +424,8 @@ class MeanAbsolutePercentageError(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to name: Optional name for the op. Defaults to
'mean_absolute_percentage_error'. 'mean_absolute_percentage_error'.
""" """
@ -487,8 +485,8 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to name: Optional name for the op. Defaults to
'mean_squared_logarithmic_error'. 'mean_squared_logarithmic_error'.
""" """
@ -500,44 +498,64 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper):
class BinaryCrossentropy(LossFunctionWrapper): class BinaryCrossentropy(LossFunctionWrapper):
"""Computes the cross-entropy loss between true labels and predicted labels. """Computes the cross-entropy loss between true labels and predicted labels.
Use this cross-entropy loss when there are only two label classes (assumed to Use this cross-entropy loss for binary (0 or 1) classification applications.
be 0 and 1). For each example, there should be a single floating-point value The loss function requires the following inputs:
per prediction.
In the snippet below, each of the four examples has only a single - `y_true` (true label): This is either 0 or 1.
floating-pointing value, and both `y_pred` and `y_true` have the shape - `y_pred` (predicted value): This is the model's prediction, i.e, a single
`[batch_size]`. floating-point value which either represents a
[logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
when `from_logits=True`) or a probability (i.e, value in [0., 1.] when
`from_logits=False`).
Standalone usage: **Recommended Usage:** (set `from_logits=True`)
>>> y_true = [[0., 1.], [0., 0.]] With `tf.keras` API:
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy()
>>> bce(y_true, y_pred).numpy()
0.815
>>> # Calling with 'sample_weight'.
>>> bce(y_true, y_pred, sample_weight=[1, 0]).numpy()
0.458
>>> # Using 'sum' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(
... reduction=tf.keras.losses.Reduction.SUM)
>>> bce(y_true, y_pred).numpy()
1.630
>>> # Using 'none' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(
... reduction=tf.keras.losses.Reduction.NONE)
>>> bce(y_true, y_pred).numpy()
array([0.916 , 0.714], dtype=float32)
Usage with the `tf.keras` API:
```python ```python
model.compile(optimizer='sgd', loss=tf.keras.losses.BinaryCrossentropy()) model.compile(
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
....
)
``` ```
As a standalone function:
>>> # Example 1: (batch_size = 1, number of samples = 4)
>>> y_true = [0, 1, 0, 0]
>>> y_pred = [-18.6, 0.51, 2.94, -12.8]
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
>>> bce(y_true, y_pred).numpy()
0.865
>>> # Example 2: (batch_size = 2, number of samples = 4)
>>> y_true = [[0, 1], [0, 0]]
>>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
>>> # Using default 'auto'/'sum_over_batch_size' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
>>> bce(y_true, y_pred).numpy()
0.865
>>> # Using 'sample_weight' attribute
>>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
0.243
>>> # Using 'sum' reduction` type.
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
... reduction=tf.keras.losses.Reduction.SUM)
>>> bce(y_true, y_pred).numpy()
1.730
>>> # Using 'none' reduction type.
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
... reduction=tf.keras.losses.Reduction.NONE)
>>> bce(y_true, y_pred).numpy()
array([0.235, 1.496], dtype=float32)
**Default Usage:** (set `from_logits=False`)
>>> # Make the following updates to the above "Recommended Usage" section
>>> # 1. Set `from_logits=False`
>>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
>>> # 2. Update `y_pred` to use probabilities instead of logits
>>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]
""" """
def __init__(self, def __init__(self,
@ -563,8 +581,8 @@ class BinaryCrossentropy(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: (Optional) Name for the op. Defaults to 'binary_crossentropy'. name: (Optional) Name for the op. Defaults to 'binary_crossentropy'.
""" """
super(BinaryCrossentropy, self).__init__( super(BinaryCrossentropy, self).__init__(
@ -633,9 +651,9 @@ class CategoricalCrossentropy(LossFunctionWrapper):
default, we assume that `y_pred` encodes a probability distribution. default, we assume that `y_pred` encodes a probability distribution.
**Note - Using from_logits=True is more numerically stable.** **Note - Using from_logits=True is more numerically stable.**
label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
meaning the confidence on label values are relaxed. e.g. meaning the confidence on label values are relaxed. For example, if
`label_smoothing=0.2` means that we will use a value of `0.1` for label `0.1`, use `0.1 / num_classes` for non-target labels and
`0` and `0.9` for label `1`" `0.9 + 0.1 / num_classes` for target labels.
reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
loss. Default value is `AUTO`. `AUTO` indicates that the reduction loss. Default value is `AUTO`. `AUTO` indicates that the reduction
option will be determined by the usage context. For almost all cases option will be determined by the usage context. For almost all cases
@ -643,8 +661,8 @@ class CategoricalCrossentropy(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'categorical_crossentropy'. name: Optional name for the op. Defaults to 'categorical_crossentropy'.
""" """
super(CategoricalCrossentropy, self).__init__( super(CategoricalCrossentropy, self).__init__(
@ -720,8 +738,8 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to name: Optional name for the op. Defaults to
'sparse_categorical_crossentropy'. 'sparse_categorical_crossentropy'.
""" """
@ -784,8 +802,8 @@ class Hinge(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'hinge'. name: Optional name for the op. Defaults to 'hinge'.
""" """
super(Hinge, self).__init__(hinge, name=name, reduction=reduction) super(Hinge, self).__init__(hinge, name=name, reduction=reduction)
@ -845,8 +863,8 @@ class SquaredHinge(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'squared_hinge'. name: Optional name for the op. Defaults to 'squared_hinge'.
""" """
super(SquaredHinge, self).__init__( super(SquaredHinge, self).__init__(
@ -905,8 +923,8 @@ class CategoricalHinge(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'categorical_hinge'. name: Optional name for the op. Defaults to 'categorical_hinge'.
""" """
super(CategoricalHinge, self).__init__( super(CategoricalHinge, self).__init__(
@ -962,8 +980,8 @@ class Poisson(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'poisson'. name: Optional name for the op. Defaults to 'poisson'.
""" """
super(Poisson, self).__init__(poisson, name=name, reduction=reduction) super(Poisson, self).__init__(poisson, name=name, reduction=reduction)
@ -1019,8 +1037,8 @@ class LogCosh(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'log_cosh'. name: Optional name for the op. Defaults to 'log_cosh'.
""" """
super(LogCosh, self).__init__(log_cosh, name=name, reduction=reduction) super(LogCosh, self).__init__(log_cosh, name=name, reduction=reduction)
@ -1079,8 +1097,8 @@ class KLDivergence(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'kl_divergence'. name: Optional name for the op. Defaults to 'kl_divergence'.
""" """
super(KLDivergence, self).__init__( super(KLDivergence, self).__init__(
@ -1147,20 +1165,17 @@ class Huber(LossFunctionWrapper):
`tf.distribute.Strategy`, outside of built-in training loops such as `tf.distribute.Strategy`, outside of built-in training loops such as
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE` `tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
will raise an error. Please see this custom training [tutorial]( will raise an error. Please see this custom training [tutorial](
https://www.tensorflow.org/tutorials/distribute/custom_training) https://www.tensorflow.org/tutorials/distribute/custom_training) for
for more details. more details.
name: Optional name for the op. Defaults to 'huber_loss'. name: Optional name for the op. Defaults to 'huber_loss'.
""" """
super(Huber, self).__init__( super(Huber, self).__init__(
huber, name=name, reduction=reduction, delta=delta) huber, name=name, reduction=reduction, delta=delta)
@keras_export('keras.metrics.mean_squared_error', @keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',
'keras.metrics.mse', 'keras.metrics.MSE', 'keras.losses.mean_squared_error',
'keras.metrics.MSE', 'keras.losses.mse', 'keras.losses.MSE')
'keras.losses.mean_squared_error',
'keras.losses.mse',
'keras.losses.MSE')
@tf.__internal__.dispatch.add_dispatch_support @tf.__internal__.dispatch.add_dispatch_support
def mean_squared_error(y_true, y_pred): def mean_squared_error(y_true, y_pred):
"""Computes the mean squared error between labels and predictions. """Computes the mean squared error between labels and predictions.
@ -1191,12 +1206,9 @@ def mean_squared_error(y_true, y_pred):
return K.mean(tf.math.squared_difference(y_pred, y_true), axis=-1) return K.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
@keras_export('keras.metrics.mean_absolute_error', @keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',
'keras.metrics.mae', 'keras.metrics.MAE', 'keras.losses.mean_absolute_error',
'keras.metrics.MAE', 'keras.losses.mae', 'keras.losses.MAE')
'keras.losses.mean_absolute_error',
'keras.losses.mae',
'keras.losses.MAE')
@tf.__internal__.dispatch.add_dispatch_support @tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_error(y_true, y_pred): def mean_absolute_error(y_true, y_pred):
"""Computes the mean absolute error between labels and predictions. """Computes the mean absolute error between labels and predictions.
@ -1225,11 +1237,9 @@ def mean_absolute_error(y_true, y_pred):
@keras_export('keras.metrics.mean_absolute_percentage_error', @keras_export('keras.metrics.mean_absolute_percentage_error',
'keras.metrics.mape', 'keras.metrics.mape', 'keras.metrics.MAPE',
'keras.metrics.MAPE',
'keras.losses.mean_absolute_percentage_error', 'keras.losses.mean_absolute_percentage_error',
'keras.losses.mape', 'keras.losses.mape', 'keras.losses.MAPE')
'keras.losses.MAPE')
@tf.__internal__.dispatch.add_dispatch_support @tf.__internal__.dispatch.add_dispatch_support
def mean_absolute_percentage_error(y_true, y_pred): def mean_absolute_percentage_error(y_true, y_pred):
"""Computes the mean absolute percentage error between `y_true` and `y_pred`. """Computes the mean absolute percentage error between `y_true` and `y_pred`.
@ -1262,11 +1272,9 @@ def mean_absolute_percentage_error(y_true, y_pred):
@keras_export('keras.metrics.mean_squared_logarithmic_error', @keras_export('keras.metrics.mean_squared_logarithmic_error',
'keras.metrics.msle', 'keras.metrics.msle', 'keras.metrics.MSLE',
'keras.metrics.MSLE',
'keras.losses.mean_squared_logarithmic_error', 'keras.losses.mean_squared_logarithmic_error',
'keras.losses.msle', 'keras.losses.msle', 'keras.losses.MSLE')
'keras.losses.MSLE')
@tf.__internal__.dispatch.add_dispatch_support @tf.__internal__.dispatch.add_dispatch_support
def mean_squared_logarithmic_error(y_true, y_pred): def mean_squared_logarithmic_error(y_true, y_pred):
"""Computes the mean squared logarithmic error between `y_true` and `y_pred`. """Computes the mean squared logarithmic error between `y_true` and `y_pred`.
@ -1511,7 +1519,9 @@ def categorical_crossentropy(y_true,
y_pred: Tensor of predicted targets. y_pred: Tensor of predicted targets.
from_logits: Whether `y_pred` is expected to be a logits tensor. By default, from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
we assume that `y_pred` encodes a probability distribution. we assume that `y_pred` encodes a probability distribution.
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
example, if `0.1`, use `0.1 / num_classes` for non-target labels
and `0.9 + 0.1 / num_classes` for target labels.
Returns: Returns:
Categorical crossentropy loss value. Categorical crossentropy loss value.
@ -1582,7 +1592,9 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
from_logits: Whether `y_pred` is expected to be a logits tensor. By default, from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
we assume that `y_pred` encodes a probability distribution. we assume that `y_pred` encodes a probability distribution.
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
for the target class and `0.5 * label_smoothing` for the non-target class.
Returns: Returns:
Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
@ -1602,12 +1614,9 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
@keras_export('keras.metrics.kl_divergence', @keras_export('keras.metrics.kl_divergence',
'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
'keras.metrics.kld', 'keras.metrics.KLD', 'keras.losses.kl_divergence',
'keras.metrics.KLD', 'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
'keras.losses.kl_divergence',
'keras.losses.kullback_leibler_divergence',
'keras.losses.kld',
'keras.losses.KLD') 'keras.losses.KLD')
@tf.__internal__.dispatch.add_dispatch_support @tf.__internal__.dispatch.add_dispatch_support
def kl_divergence(y_true, y_pred): def kl_divergence(y_true, y_pred):

@ -69,12 +69,11 @@ class AutoCastVariable(tf.Variable, core.Tensor):
called. called.
""" """
def __init__(self, variable, op=None): def __init__(self, variable):
"""Creates an AutoCastVariable instance. """Creates an AutoCastVariable instance.
Args: Args:
variable: A floating-point resource variable to wrap. variable: A floating-point resource variable to wrap.
op: Optional operation of this variable.
Raises: Raises:
ValueError: If `variable` is not a floating-point resource variable ValueError: If `variable` is not a floating-point resource variable
@ -86,7 +85,11 @@ class AutoCastVariable(tf.Variable, core.Tensor):
raise ValueError('variable must be a floating point variable but has ' raise ValueError('variable must be a floating point variable but has '
'type: %s' % variable.dtype.name) 'type: %s' % variable.dtype.name)
self._variable = variable self._variable = variable
self._op = op # 'delegate' means AutoCastVariable.op return self._variable.op, which will
# raise an AttributeError in Eager (as intended). If set to any other value,
# AutoCastVariable.op returns that value instead, which is used to set the
# op attribute in AutoCastVariable.assign().
self._op = 'delegate'
def _should_cast(self): def _should_cast(self):
"""Returns True if this variable should be casted when accessed.""" """Returns True if this variable should be casted when accessed."""
@ -211,10 +214,18 @@ class AutoCastVariable(tf.Variable, core.Tensor):
use_locking=None, use_locking=None,
name=None, name=None,
read_value=True): read_value=True):
# TODO(b/146181571): This logic can be simplified once
# DistributedVariable.assign returns a DistributedVariable. Currently for
# MirroredStrategy, it returns a Mirrored value.
if tf.compat.v1.executing_eagerly_outside_functions(): if tf.compat.v1.executing_eagerly_outside_functions():
assign_op = update_fn(value, use_locking, name, False) assign_op = update_fn(value, use_locking, name, False)
if read_value: if read_value:
return create_autocast_variable(self._variable, op=assign_op) # We create a new AutoCastVariable with the same underlying tf.Variable.
# The new AutoCastVariable is identical except the 'op' attribute is
# defined. This matches the behavior of tf.Variable.assign.
var = create_autocast_variable(self._variable)
var._op = assign_op # pylint:disable=protected-access
return var
return assign_op return assign_op
# Fallback to wrapping the returned variable in graph mode if possible # Fallback to wrapping the returned variable in graph mode if possible
@ -310,9 +321,9 @@ class AutoCastVariable(tf.Variable, core.Tensor):
@property @property
def op(self): def op(self):
if self._op is not None: if self._op == 'delegate':
return self._op return self._variable.op
return self._variable.op return self._op
def _as_graph_element(self): def _as_graph_element(self):
graph_element = self._variable._as_graph_element() # pylint:disable=protected-access graph_element = self._variable._as_graph_element() # pylint:disable=protected-access
@ -481,7 +492,7 @@ tf.register_tensor_conversion_function(AutoCastVariable,
AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access
def create_autocast_variable(variable, op=None): def create_autocast_variable(variable):
"""Creates an AutoCastVariable that wraps another variable. """Creates an AutoCastVariable that wraps another variable.
This typically just returns `AutoCastVariable(variable)`. But, if the variable This typically just returns `AutoCastVariable(variable)`. But, if the variable
@ -493,14 +504,13 @@ def create_autocast_variable(variable, op=None):
Args: Args:
variable: A floating-point resource variable to wrap. variable: A floating-point resource variable to wrap.
op: Optional operation of this variable.
Returns: Returns:
An AutoCastVariable that wraps the variable. An AutoCastVariable that wraps the variable.
""" """
if not isinstance(variable, (distribute_values.DistributedVariable, if not isinstance(variable, (distribute_values.DistributedVariable,
ps_distribute_values.AggregatingVariable)): ps_distribute_values.AggregatingVariable)):
return AutoCastVariable(variable, op=op) return AutoCastVariable(variable)
class AutoCastDistributedVariable(AutoCastVariable, variable.__class__): class AutoCastDistributedVariable(AutoCastVariable, variable.__class__):
"""An AutoCastVariable that also subclasses from variable.__class__. """An AutoCastVariable that also subclasses from variable.__class__.
@ -523,7 +533,7 @@ def create_autocast_variable(variable, op=None):
).format(v=self) ).format(v=self)
# pylint: enable=missing-format-attribute # pylint: enable=missing-format-attribute
return AutoCastDistributedVariable(variable, op=op) return AutoCastDistributedVariable(variable)
class enable_auto_cast_variables(object): # pylint:disable=invalid-name class enable_auto_cast_variables(object): # pylint:disable=invalid-name

@ -26,7 +26,14 @@ from absl.testing import parameterized
import numpy as np import numpy as np
from tensorflow.python.distribute import test_util from tensorflow.python.distribute import test_util
from keras.mixed_precision import autocast_variable from keras.mixed_precision import autocast_variable
from keras.optimizer_v2 import adadelta
from keras.optimizer_v2 import adagrad
from keras.optimizer_v2 import adam
from keras.optimizer_v2 import adamax
from keras.optimizer_v2 import ftrl
from keras.optimizer_v2 import gradient_descent as gradient_descent_v2 from keras.optimizer_v2 import gradient_descent as gradient_descent_v2
from keras.optimizer_v2 import nadam
from keras.optimizer_v2 import rmsprop
maybe_distribute = tf.__internal__.test.combinations.combine(distribution=[ maybe_distribute = tf.__internal__.test.combinations.combine(distribution=[
tf.__internal__.distribute.combinations.default_strategy, tf.__internal__.distribute.combinations.default_strategy,
@ -335,11 +342,28 @@ class AutoCastVariableTest(tf.test.TestCase, parameterized.TestCase):
self.assertAllClose(5., self.evaluate(run_assign())) self.assertAllClose(5., self.evaluate(run_assign()))
@tf.__internal__.distribute.combinations.generate(maybe_distribute) @tf.__internal__.distribute.combinations.generate(maybe_distribute)
def test_assign_op(self, distribution): def test_op_attribute(self, distribution):
with distribution.scope(): with distribution.scope():
x = get_var(0., tf.float32) x = get_var(0., tf.float32)
x = autocast_variable.create_autocast_variable(x) x = autocast_variable.create_autocast_variable(x)
# Variable.op raises an AttributeError in Eager mode and is an op in graph
# mode. Variable.assign(...).op is None in Eager mode and an op in Graph
# mode or a tf.function. We test this is also true of AutoCastVariable.
if tf.executing_eagerly():
with self.assertRaisesRegex(
AttributeError,
'Tensor.op is meaningless when eager execution is enabled'):
x.op # pylint: disable=pointless-statement
self.assertIsNone(x.assign(1.0).op)
self.assertIsNone(x.assign_add(1.0).op)
self.assertIsNone(x.assign_sub(1.0).op)
else:
self.assertIsNotNone(x.op)
self.assertIsNotNone(x.assign(1.0).op)
self.assertIsNotNone(x.assign_add(1.0).op)
self.assertIsNotNone(x.assign_sub(1.0).op)
@tf.function @tf.function
def func(): def func():
self.assertIsNotNone(x.assign(1.0).op) self.assertIsNotNone(x.assign(1.0).op)
@ -486,25 +510,51 @@ class AutoCastVariableTest(tf.test.TestCase, parameterized.TestCase):
'dtype_to_cast_to=float32 ' 'dtype_to_cast_to=float32 '
'inner_variable=MirroredVariable.*>') 'inner_variable=MirroredVariable.*>')
@parameterized.named_parameters( @tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(
('v1', tf.compat.v1.train.GradientDescentOptimizer), optimizer_class=[
('v2', gradient_descent_v2.SGD)) adadelta.Adadelta,
def test_optimizer(self, optimizer_class): adagrad.Adagrad,
adam.Adam,
adamax.Adamax,
ftrl.Ftrl,
gradient_descent_v2.SGD,
nadam.Nadam,
rmsprop.RMSprop,
tf.compat.v1.train.GradientDescentOptimizer
],
use_tf_function=[False, True]))
def test_optimizer(self, optimizer_class, use_tf_function):
if use_tf_function and not tf.executing_eagerly():
self.skipTest('Test does not support graph mode with tf.function')
x = get_var(1., tf.float32) x = get_var(1., tf.float32)
x = autocast_variable.create_autocast_variable(x) x = autocast_variable.create_autocast_variable(x)
opt = optimizer_class(1.) y = get_var(1., tf.float32)
opt = optimizer_class(learning_rate=1.)
@tf.function
def f(): def f():
opt.minimize(lambda: x + 1., var_list=[x]) # Minimize both the AutoCastVariable and the normal tf.Variable. Both
# variables should be updated to the same value.
op = opt.minimize(lambda: x + y, var_list=[x, y])
return None if tf.compat.v1.executing_eagerly_outside_functions() else op
if use_tf_function:
f = tf.function(f)
if tf.executing_eagerly(): if tf.executing_eagerly():
f() f()
else: else:
op = f() # pylint: disable=assignment-from-no-return op = f()
self.evaluate(tf.compat.v1.global_variables_initializer()) self.evaluate(tf.compat.v1.global_variables_initializer())
self.evaluate(op) self.evaluate(op)
self.assertEqual(self.evaluate(x), 0) # Assert the AutoCastVariable has changed from its initial value
self.assertNotEqual(self.evaluate(x), 1.)
# Assert AutoCastVariable is updated correctly by comparing it to the normal
# variable
self.assertAlmostEqual(self.evaluate(x), self.evaluate(y))
if optimizer_class in (gradient_descent_v2.SGD,
tf.compat.v1.train.GradientDescentOptimizer):
# With SGD, the variables decreases by exactly 1
self.assertEqual(self.evaluate(x), 0)
if __name__ == '__main__': if __name__ == '__main__':

@ -139,7 +139,7 @@ def load(path, compile=True, options=None): # pylint: disable=redefined-builtin
# Recreate layers and metrics using the info stored in the metadata. # Recreate layers and metrics using the info stored in the metadata.
keras_loader = KerasObjectLoader(metadata, object_graph_def) keras_loader = KerasObjectLoader(metadata, object_graph_def)
keras_loader.load_layers() keras_loader.load_layers(compile=compile)
# Generate a dictionary of all loaded nodes. # Generate a dictionary of all loaded nodes.
nodes_to_load = {'root': None} nodes_to_load = {'root': None}
@ -364,7 +364,7 @@ class KerasObjectLoader(object):
obj_child, child_proto, child_id) obj_child, child_proto, child_id)
self.loaded_nodes[child_id] = obj_child, setter self.loaded_nodes[child_id] = obj_child, setter
def load_layers(self): def load_layers(self, compile=True): # pylint: disable=redefined-builtin
"""Load all layer nodes from the metadata.""" """Load all layer nodes from the metadata."""
# Load metrics after models and layers, since it's likely that models # Load metrics after models and layers, since it's likely that models
# and layers will create the metric when initialized (this avoids wasting # and layers will create the metric when initialized (this avoids wasting
@ -380,9 +380,20 @@ class KerasObjectLoader(object):
node_metadata.metadata) node_metadata.metadata)
for node_metadata in metric_list: for node_metadata in metric_list:
self.loaded_nodes[node_metadata.node_id] = self._load_layer( try:
node_metadata.node_id, node_metadata.identifier, self.loaded_nodes[node_metadata.node_id] = self._load_layer(
node_metadata.metadata) node_metadata.node_id, node_metadata.identifier,
node_metadata.metadata)
except ValueError:
# Metrics are only needed when the model is compiled later. We ignore
# errors when trying to load custom metrics when `compile=False` until
# custom metrics are serialized properly (b/135550038).
if compile:
raise
logging.warning('Unable to restore custom metric. Please ensure that '
'the layer implements `get_config` and `from_config` '
'when saving. In addition, please use the '
'`custom_objects` arg when calling `load_model()`.')
def _load_layer(self, node_id, identifier, metadata): def _load_layer(self, node_id, identifier, metadata):
"""Load a single layer from a SavedUserObject proto.""" """Load a single layer from a SavedUserObject proto."""

@ -1142,6 +1142,22 @@ class MetricTest(tf.test.TestCase, parameterized.TestCase):
self._test_metric_save_and_load( self._test_metric_save_and_load(
metric, self._save_model_dir(), 1, test_sample_weight=False) metric, self._save_model_dir(), 1, test_sample_weight=False)
@keras_parameterized.run_with_all_model_types
def test_custom_metric_model(self):
class CustomMetric(keras.metrics.MeanSquaredError):
pass
model = testing_utils.get_small_mlp(1, 4, input_dim=3)
model.compile(loss='mse', optimizer='rmsprop', metrics=[CustomMetric()])
saved_model_dir = self._save_model_dir()
tf.saved_model.save(model, saved_model_dir)
with self.assertRaisesRegex(ValueError, 'custom_objects'):
keras_load.load(saved_model_dir)
keras_load.load(saved_model_dir, compile=False)
if __name__ == '__main__': if __name__ == '__main__':
tf.test.main() tf.test.main()