Sync OSS keras to head.
PiperOrigin-RevId: 347838100
This commit is contained in:
parent
af1a2eb1f5
commit
f0c0c877ba
@ -1,7 +1,10 @@
|
|||||||
# Description:
|
# Description:
|
||||||
# Contains the Keras engine API (internal TensorFlow version).
|
# Contains the Keras engine API (internal TensorFlow version).
|
||||||
|
|
||||||
|
# buildifier: disable=same-origin-load
|
||||||
load("@org_keras//keras:keras.bzl", "tf_py_test")
|
load("@org_keras//keras:keras.bzl", "tf_py_test")
|
||||||
|
|
||||||
|
# buildifier: disable=same-origin-load
|
||||||
load("@org_keras//keras:keras.bzl", "cuda_py_test")
|
load("@org_keras//keras:keras.bzl", "cuda_py_test")
|
||||||
|
|
||||||
package(
|
package(
|
||||||
|
@ -27,8 +27,8 @@ import shutil
|
|||||||
from absl.testing import parameterized
|
from absl.testing import parameterized
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tensorflow.core.protobuf import rewriter_config_pb2
|
from tensorflow.core.protobuf import rewriter_config_pb2
|
||||||
from tensorflow.python.framework import test_util as tf_test_util
|
|
||||||
import keras
|
import keras
|
||||||
|
from tensorflow.python.framework import test_util as tf_test_util
|
||||||
from keras import combinations
|
from keras import combinations
|
||||||
from keras import keras_parameterized
|
from keras import keras_parameterized
|
||||||
from keras import testing_utils
|
from keras import testing_utils
|
||||||
|
@ -12,8 +12,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
"""Locally-connected layers.
|
"""Locally-connected layers."""
|
||||||
"""
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
@ -59,79 +58,61 @@ class LocallyConnected1D(Layer):
|
|||||||
```
|
```
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
filters: Integer, the dimensionality of the output space
|
filters: Integer, the dimensionality of the output space (i.e. the number
|
||||||
(i.e. the number of output filters in the convolution).
|
of output filters in the convolution).
|
||||||
kernel_size: An integer or tuple/list of a single integer,
|
kernel_size: An integer or tuple/list of a single integer, specifying the
|
||||||
specifying the length of the 1D convolution window.
|
length of the 1D convolution window.
|
||||||
strides: An integer or tuple/list of a single integer,
|
strides: An integer or tuple/list of a single integer, specifying the
|
||||||
specifying the stride length of the convolution.
|
stride length of the convolution.
|
||||||
Specifying any stride value != 1 is incompatible with specifying
|
padding: Currently only supports `"valid"` (case-insensitive). `"same"`
|
||||||
any `dilation_rate` value != 1.
|
may be supported in the future. `"valid"` means no padding.
|
||||||
padding: Currently only supports `"valid"` (case-insensitive).
|
data_format: A string, one of `channels_last` (default) or
|
||||||
`"same"` may be supported in the future.
|
`channels_first`. The ordering of the dimensions in the inputs.
|
||||||
`"valid"` means no padding.
|
`channels_last` corresponds to inputs with shape `(batch, length,
|
||||||
data_format: A string,
|
channels)` while `channels_first` corresponds to inputs with shape
|
||||||
one of `channels_last` (default) or `channels_first`.
|
`(batch, channels, length)`. It defaults to the `image_data_format`
|
||||||
The ordering of the dimensions in the inputs.
|
value found in your Keras config file at `~/.keras/keras.json`. If you
|
||||||
`channels_last` corresponds to inputs with shape
|
never set it, then it will be "channels_last".
|
||||||
`(batch, length, channels)` while `channels_first`
|
activation: Activation function to use. If you don't specify anything, no
|
||||||
corresponds to inputs with shape
|
activation is applied
|
||||||
`(batch, channels, length)`.
|
|
||||||
It defaults to the `image_data_format` value found in your
|
|
||||||
Keras config file at `~/.keras/keras.json`.
|
|
||||||
If you never set it, then it will be "channels_last".
|
|
||||||
activation: Activation function to use.
|
|
||||||
If you don't specify anything, no activation is applied
|
|
||||||
(ie. "linear" activation: `a(x) = x`).
|
(ie. "linear" activation: `a(x) = x`).
|
||||||
use_bias: Boolean, whether the layer uses a bias vector.
|
use_bias: Boolean, whether the layer uses a bias vector.
|
||||||
kernel_initializer: Initializer for the `kernel` weights matrix.
|
kernel_initializer: Initializer for the `kernel` weights matrix.
|
||||||
bias_initializer: Initializer for the bias vector.
|
bias_initializer: Initializer for the bias vector.
|
||||||
kernel_regularizer: Regularizer function applied to
|
kernel_regularizer: Regularizer function applied to the `kernel` weights
|
||||||
the `kernel` weights matrix.
|
matrix.
|
||||||
bias_regularizer: Regularizer function applied to the bias vector.
|
bias_regularizer: Regularizer function applied to the bias vector.
|
||||||
activity_regularizer: Regularizer function applied to
|
activity_regularizer: Regularizer function applied to the output of the
|
||||||
the output of the layer (its "activation")..
|
layer (its "activation")..
|
||||||
kernel_constraint: Constraint function applied to the kernel matrix.
|
kernel_constraint: Constraint function applied to the kernel matrix.
|
||||||
bias_constraint: Constraint function applied to the bias vector.
|
bias_constraint: Constraint function applied to the bias vector.
|
||||||
implementation: implementation mode, either `1`, `2`, or `3`.
|
implementation: implementation mode, either `1`, `2`, or `3`. `1` loops
|
||||||
`1` loops over input spatial locations to perform the forward pass.
|
over input spatial locations to perform the forward pass. It is
|
||||||
It is memory-efficient but performs a lot of (small) ops.
|
memory-efficient but performs a lot of (small) ops. `2` stores layer
|
||||||
|
weights in a dense but sparsely-populated 2D matrix and implements the
|
||||||
`2` stores layer weights in a dense but sparsely-populated 2D matrix
|
forward pass as a single matrix-multiply. It uses a lot of RAM but
|
||||||
and implements the forward pass as a single matrix-multiply. It uses
|
performs few (large) ops. `3` stores layer weights in a sparse tensor
|
||||||
a lot of RAM but performs few (large) ops.
|
and implements the forward pass as a single sparse matrix-multiply.
|
||||||
|
|
||||||
`3` stores layer weights in a sparse tensor and implements the forward
|
|
||||||
pass as a single sparse matrix-multiply.
|
|
||||||
|
|
||||||
How to choose:
|
How to choose:
|
||||||
|
|
||||||
`1`: large, dense models,
|
`1`: large, dense models,
|
||||||
`2`: small models,
|
`2`: small models,
|
||||||
`3`: large, sparse models,
|
`3`: large, sparse models, where "large" stands for large
|
||||||
|
input/output activations (i.e. many `filters`, `input_filters`,
|
||||||
where "large" stands for large input/output activations
|
large `input_size`, `output_size`), and "sparse" stands for few
|
||||||
(i.e. many `filters`, `input_filters`, large `input_size`,
|
connections between inputs and outputs, i.e. small ratio `filters *
|
||||||
`output_size`), and "sparse" stands for few connections between inputs
|
input_filters * kernel_size / (input_size * strides)`, where inputs
|
||||||
and outputs, i.e. small ratio
|
to and outputs of the layer are assumed to have shapes `(input_size,
|
||||||
`filters * input_filters * kernel_size / (input_size * strides)`,
|
input_filters)`, `(output_size, filters)` respectively. It is
|
||||||
where inputs to and outputs of the layer are assumed to have shapes
|
recommended to benchmark each in the setting of interest to pick the
|
||||||
`(input_size, input_filters)`, `(output_size, filters)`
|
most efficient one (in terms of speed and memory usage). Correct
|
||||||
respectively.
|
choice of implementation can lead to dramatic speed improvements
|
||||||
|
(e.g. 50X), potentially at the expense of RAM. Also, only
|
||||||
It is recommended to benchmark each in the setting of interest to pick
|
`padding="valid"` is supported by `implementation=1`.
|
||||||
the most efficient one (in terms of speed and memory usage). Correct
|
|
||||||
choice of implementation can lead to dramatic speed improvements (e.g.
|
|
||||||
50X), potentially at the expense of RAM.
|
|
||||||
|
|
||||||
Also, only `padding="valid"` is supported by `implementation=1`.
|
|
||||||
|
|
||||||
Input shape:
|
Input shape:
|
||||||
3D tensor with shape: `(batch_size, steps, input_dim)`
|
3D tensor with shape: `(batch_size, steps, input_dim)`
|
||||||
|
|
||||||
Output shape:
|
Output shape:
|
||||||
3D tensor with shape: `(batch_size, new_steps, filters)`
|
3D tensor with shape: `(batch_size, new_steps, filters)` `steps` value
|
||||||
`steps` value might have changed due to padding or strides.
|
might have changed due to padding or strides.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -158,8 +139,8 @@ class LocallyConnected1D(Layer):
|
|||||||
self.padding = conv_utils.normalize_padding(padding)
|
self.padding = conv_utils.normalize_padding(padding)
|
||||||
if self.padding != 'valid' and implementation == 1:
|
if self.padding != 'valid' and implementation == 1:
|
||||||
raise ValueError('Invalid border mode for LocallyConnected1D '
|
raise ValueError('Invalid border mode for LocallyConnected1D '
|
||||||
'(only "valid" is supported if implementation is 1): '
|
'(only "valid" is supported if implementation is 1): ' +
|
||||||
+ padding)
|
padding)
|
||||||
self.data_format = conv_utils.normalize_data_format(data_format)
|
self.data_format = conv_utils.normalize_data_format(data_format)
|
||||||
self.activation = activations.get(activation)
|
self.activation = activations.get(activation)
|
||||||
self.use_bias = use_bias
|
self.use_bias = use_bias
|
||||||
@ -181,10 +162,13 @@ class LocallyConnected1D(Layer):
|
|||||||
input_dim, input_length = input_shape[2], input_shape[1]
|
input_dim, input_length = input_shape[2], input_shape[1]
|
||||||
|
|
||||||
if input_dim is None:
|
if input_dim is None:
|
||||||
raise ValueError('Axis 2 of input should be fully-defined. '
|
raise ValueError(
|
||||||
'Found shape:', input_shape)
|
'Axis 2 of input should be fully-defined. '
|
||||||
self.output_length = conv_utils.conv_output_length(
|
'Found shape:', input_shape)
|
||||||
input_length, self.kernel_size[0], self.padding, self.strides[0])
|
self.output_length = conv_utils.conv_output_length(input_length,
|
||||||
|
self.kernel_size[0],
|
||||||
|
self.padding,
|
||||||
|
self.strides[0])
|
||||||
|
|
||||||
if self.implementation == 1:
|
if self.implementation == 1:
|
||||||
self.kernel_shape = (self.output_length, self.kernel_size[0] * input_dim,
|
self.kernel_shape = (self.output_length, self.kernel_size[0] * input_dim,
|
||||||
@ -199,17 +183,18 @@ class LocallyConnected1D(Layer):
|
|||||||
|
|
||||||
elif self.implementation == 2:
|
elif self.implementation == 2:
|
||||||
if self.data_format == 'channels_first':
|
if self.data_format == 'channels_first':
|
||||||
self.kernel_shape = (input_dim, input_length,
|
self.kernel_shape = (input_dim, input_length, self.filters,
|
||||||
self.filters, self.output_length)
|
self.output_length)
|
||||||
else:
|
else:
|
||||||
self.kernel_shape = (input_length, input_dim,
|
self.kernel_shape = (input_length, input_dim, self.output_length,
|
||||||
self.output_length, self.filters)
|
self.filters)
|
||||||
|
|
||||||
self.kernel = self.add_weight(shape=self.kernel_shape,
|
self.kernel = self.add_weight(
|
||||||
initializer=self.kernel_initializer,
|
shape=self.kernel_shape,
|
||||||
name='kernel',
|
initializer=self.kernel_initializer,
|
||||||
regularizer=self.kernel_regularizer,
|
name='kernel',
|
||||||
constraint=self.kernel_constraint)
|
regularizer=self.kernel_regularizer,
|
||||||
|
constraint=self.kernel_constraint)
|
||||||
|
|
||||||
self.kernel_mask = get_locallyconnected_mask(
|
self.kernel_mask = get_locallyconnected_mask(
|
||||||
input_shape=(input_length,),
|
input_shape=(input_length,),
|
||||||
@ -231,8 +216,7 @@ class LocallyConnected1D(Layer):
|
|||||||
padding=self.padding,
|
padding=self.padding,
|
||||||
filters_in=input_dim,
|
filters_in=input_dim,
|
||||||
filters_out=self.filters,
|
filters_out=self.filters,
|
||||||
data_format=self.data_format)
|
data_format=self.data_format))
|
||||||
)
|
|
||||||
|
|
||||||
self.kernel = self.add_weight(
|
self.kernel = self.add_weight(
|
||||||
shape=(len(self.kernel_idxs),),
|
shape=(len(self.kernel_idxs),),
|
||||||
@ -242,8 +226,8 @@ class LocallyConnected1D(Layer):
|
|||||||
constraint=self.kernel_constraint)
|
constraint=self.kernel_constraint)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unrecognized implementation mode: %d.'
|
raise ValueError('Unrecognized implementation mode: %d.' %
|
||||||
% self.implementation)
|
self.implementation)
|
||||||
|
|
||||||
if self.use_bias:
|
if self.use_bias:
|
||||||
self.bias = self.add_weight(
|
self.bias = self.add_weight(
|
||||||
@ -291,8 +275,8 @@ class LocallyConnected1D(Layer):
|
|||||||
self.compute_output_shape(inputs.shape))
|
self.compute_output_shape(inputs.shape))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unrecognized implementation mode: %d.'
|
raise ValueError('Unrecognized implementation mode: %d.' %
|
||||||
% self.implementation)
|
self.implementation)
|
||||||
|
|
||||||
if self.use_bias:
|
if self.use_bias:
|
||||||
output = K.bias_add(output, self.bias, data_format=self.data_format)
|
output = K.bias_add(output, self.bias, data_format=self.data_format)
|
||||||
@ -366,87 +350,71 @@ class LocallyConnected2D(Layer):
|
|||||||
```
|
```
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
filters: Integer, the dimensionality of the output space
|
filters: Integer, the dimensionality of the output space (i.e. the number
|
||||||
(i.e. the number of output filters in the convolution).
|
of output filters in the convolution).
|
||||||
kernel_size: An integer or tuple/list of 2 integers, specifying the
|
kernel_size: An integer or tuple/list of 2 integers, specifying the width
|
||||||
width and height of the 2D convolution window.
|
and height of the 2D convolution window. Can be a single integer to
|
||||||
Can be a single integer to specify the same value for
|
specify the same value for all spatial dimensions.
|
||||||
all spatial dimensions.
|
strides: An integer or tuple/list of 2 integers, specifying the strides of
|
||||||
strides: An integer or tuple/list of 2 integers,
|
the convolution along the width and height. Can be a single integer to
|
||||||
specifying the strides of the convolution along the width and height.
|
specify the same value for all spatial dimensions.
|
||||||
Can be a single integer to specify the same value for
|
padding: Currently only support `"valid"` (case-insensitive). `"same"`
|
||||||
all spatial dimensions.
|
will be supported in future. `"valid"` means no padding.
|
||||||
padding: Currently only support `"valid"` (case-insensitive).
|
data_format: A string, one of `channels_last` (default) or
|
||||||
`"same"` will be supported in future.
|
`channels_first`. The ordering of the dimensions in the inputs.
|
||||||
`"valid"` means no padding.
|
`channels_last` corresponds to inputs with shape `(batch, height, width,
|
||||||
data_format: A string,
|
channels)` while `channels_first` corresponds to inputs with shape
|
||||||
one of `channels_last` (default) or `channels_first`.
|
`(batch, channels, height, width)`. It defaults to the
|
||||||
The ordering of the dimensions in the inputs.
|
`image_data_format` value found in your Keras config file at
|
||||||
`channels_last` corresponds to inputs with shape
|
`~/.keras/keras.json`. If you never set it, then it will be
|
||||||
`(batch, height, width, channels)` while `channels_first`
|
"channels_last".
|
||||||
corresponds to inputs with shape
|
activation: Activation function to use. If you don't specify anything, no
|
||||||
`(batch, channels, height, width)`.
|
activation is applied
|
||||||
It defaults to the `image_data_format` value found in your
|
|
||||||
Keras config file at `~/.keras/keras.json`.
|
|
||||||
If you never set it, then it will be "channels_last".
|
|
||||||
activation: Activation function to use.
|
|
||||||
If you don't specify anything, no activation is applied
|
|
||||||
(ie. "linear" activation: `a(x) = x`).
|
(ie. "linear" activation: `a(x) = x`).
|
||||||
use_bias: Boolean, whether the layer uses a bias vector.
|
use_bias: Boolean, whether the layer uses a bias vector.
|
||||||
kernel_initializer: Initializer for the `kernel` weights matrix.
|
kernel_initializer: Initializer for the `kernel` weights matrix.
|
||||||
bias_initializer: Initializer for the bias vector.
|
bias_initializer: Initializer for the bias vector.
|
||||||
kernel_regularizer: Regularizer function applied to
|
kernel_regularizer: Regularizer function applied to the `kernel` weights
|
||||||
the `kernel` weights matrix.
|
matrix.
|
||||||
bias_regularizer: Regularizer function applied to the bias vector.
|
bias_regularizer: Regularizer function applied to the bias vector.
|
||||||
activity_regularizer: Regularizer function applied to
|
activity_regularizer: Regularizer function applied to the output of the
|
||||||
the output of the layer (its "activation").
|
layer (its "activation").
|
||||||
kernel_constraint: Constraint function applied to the kernel matrix.
|
kernel_constraint: Constraint function applied to the kernel matrix.
|
||||||
bias_constraint: Constraint function applied to the bias vector.
|
bias_constraint: Constraint function applied to the bias vector.
|
||||||
implementation: implementation mode, either `1`, `2`, or `3`.
|
implementation: implementation mode, either `1`, `2`, or `3`. `1` loops
|
||||||
`1` loops over input spatial locations to perform the forward pass.
|
over input spatial locations to perform the forward pass. It is
|
||||||
It is memory-efficient but performs a lot of (small) ops.
|
memory-efficient but performs a lot of (small) ops. `2` stores layer
|
||||||
|
weights in a dense but sparsely-populated 2D matrix and implements the
|
||||||
`2` stores layer weights in a dense but sparsely-populated 2D matrix
|
forward pass as a single matrix-multiply. It uses a lot of RAM but
|
||||||
and implements the forward pass as a single matrix-multiply. It uses
|
performs few (large) ops. `3` stores layer weights in a sparse tensor
|
||||||
a lot of RAM but performs few (large) ops.
|
and implements the forward pass as a single sparse matrix-multiply.
|
||||||
|
|
||||||
`3` stores layer weights in a sparse tensor and implements the forward
|
|
||||||
pass as a single sparse matrix-multiply.
|
|
||||||
|
|
||||||
How to choose:
|
How to choose:
|
||||||
|
|
||||||
`1`: large, dense models,
|
`1`: large, dense models,
|
||||||
`2`: small models,
|
`2`: small models,
|
||||||
`3`: large, sparse models,
|
`3`: large, sparse models, where "large" stands for large
|
||||||
|
input/output activations (i.e. many `filters`, `input_filters`,
|
||||||
where "large" stands for large input/output activations
|
large `np.prod(input_size)`, `np.prod(output_size)`), and "sparse"
|
||||||
(i.e. many `filters`, `input_filters`, large `np.prod(input_size)`,
|
stands for few connections between inputs and outputs, i.e. small
|
||||||
`np.prod(output_size)`), and "sparse" stands for few connections
|
ratio `filters * input_filters * np.prod(kernel_size) /
|
||||||
between inputs and outputs, i.e. small ratio
|
(np.prod(input_size) * np.prod(strides))`, where inputs to and
|
||||||
`filters * input_filters * np.prod(kernel_size) / (np.prod(input_size)
|
outputs of the layer are assumed to have shapes `input_size +
|
||||||
* np.prod(strides))`, where inputs to and outputs of the layer are
|
(input_filters,)`, `output_size + (filters,)` respectively. It is
|
||||||
assumed to have shapes `input_size + (input_filters,)`,
|
recommended to benchmark each in the setting of interest to pick the
|
||||||
`output_size + (filters,)` respectively.
|
most efficient one (in terms of speed and memory usage). Correct
|
||||||
|
choice of implementation can lead to dramatic speed improvements
|
||||||
It is recommended to benchmark each in the setting of interest to pick
|
(e.g. 50X), potentially at the expense of RAM. Also, only
|
||||||
the most efficient one (in terms of speed and memory usage). Correct
|
`padding="valid"` is supported by `implementation=1`.
|
||||||
choice of implementation can lead to dramatic speed improvements (e.g.
|
|
||||||
50X), potentially at the expense of RAM.
|
|
||||||
|
|
||||||
Also, only `padding="valid"` is supported by `implementation=1`.
|
|
||||||
|
|
||||||
Input shape:
|
Input shape:
|
||||||
4D tensor with shape:
|
4D tensor with shape: `(samples, channels, rows, cols)` if
|
||||||
`(samples, channels, rows, cols)` if data_format='channels_first'
|
data_format='channels_first'
|
||||||
or 4D tensor with shape:
|
or 4D tensor with shape: `(samples, rows, cols, channels)` if
|
||||||
`(samples, rows, cols, channels)` if data_format='channels_last'.
|
data_format='channels_last'.
|
||||||
|
|
||||||
Output shape:
|
Output shape:
|
||||||
4D tensor with shape:
|
4D tensor with shape: `(samples, filters, new_rows, new_cols)` if
|
||||||
`(samples, filters, new_rows, new_cols)` if data_format='channels_first'
|
data_format='channels_first'
|
||||||
or 4D tensor with shape:
|
or 4D tensor with shape: `(samples, new_rows, new_cols, filters)` if
|
||||||
`(samples, new_rows, new_cols, filters)` if data_format='channels_last'.
|
data_format='channels_last'. `rows` and `cols` values might have changed
|
||||||
`rows` and `cols` values might have changed due to padding.
|
due to padding.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -473,8 +441,8 @@ class LocallyConnected2D(Layer):
|
|||||||
self.padding = conv_utils.normalize_padding(padding)
|
self.padding = conv_utils.normalize_padding(padding)
|
||||||
if self.padding != 'valid' and implementation == 1:
|
if self.padding != 'valid' and implementation == 1:
|
||||||
raise ValueError('Invalid border mode for LocallyConnected2D '
|
raise ValueError('Invalid border mode for LocallyConnected2D '
|
||||||
'(only "valid" is supported if implementation is 1): '
|
'(only "valid" is supported if implementation is 1): ' +
|
||||||
+ padding)
|
padding)
|
||||||
self.data_format = conv_utils.normalize_data_format(data_format)
|
self.data_format = conv_utils.normalize_data_format(data_format)
|
||||||
self.activation = activations.get(activation)
|
self.activation = activations.get(activation)
|
||||||
self.use_bias = use_bias
|
self.use_bias = use_bias
|
||||||
@ -509,10 +477,8 @@ class LocallyConnected2D(Layer):
|
|||||||
self.output_col = output_col
|
self.output_col = output_col
|
||||||
|
|
||||||
if self.implementation == 1:
|
if self.implementation == 1:
|
||||||
self.kernel_shape = (
|
self.kernel_shape = (output_row * output_col, self.kernel_size[0] *
|
||||||
output_row * output_col,
|
self.kernel_size[1] * input_filter, self.filters)
|
||||||
self.kernel_size[0] * self.kernel_size[1] * input_filter,
|
|
||||||
self.filters)
|
|
||||||
|
|
||||||
self.kernel = self.add_weight(
|
self.kernel = self.add_weight(
|
||||||
shape=self.kernel_shape,
|
shape=self.kernel_shape,
|
||||||
@ -523,17 +489,18 @@ class LocallyConnected2D(Layer):
|
|||||||
|
|
||||||
elif self.implementation == 2:
|
elif self.implementation == 2:
|
||||||
if self.data_format == 'channels_first':
|
if self.data_format == 'channels_first':
|
||||||
self.kernel_shape = (input_filter, input_row, input_col,
|
self.kernel_shape = (input_filter, input_row, input_col, self.filters,
|
||||||
self.filters, self.output_row, self.output_col)
|
self.output_row, self.output_col)
|
||||||
else:
|
else:
|
||||||
self.kernel_shape = (input_row, input_col, input_filter,
|
self.kernel_shape = (input_row, input_col, input_filter,
|
||||||
self.output_row, self.output_col, self.filters)
|
self.output_row, self.output_col, self.filters)
|
||||||
|
|
||||||
self.kernel = self.add_weight(shape=self.kernel_shape,
|
self.kernel = self.add_weight(
|
||||||
initializer=self.kernel_initializer,
|
shape=self.kernel_shape,
|
||||||
name='kernel',
|
initializer=self.kernel_initializer,
|
||||||
regularizer=self.kernel_regularizer,
|
name='kernel',
|
||||||
constraint=self.kernel_constraint)
|
regularizer=self.kernel_regularizer,
|
||||||
|
constraint=self.kernel_constraint)
|
||||||
|
|
||||||
self.kernel_mask = get_locallyconnected_mask(
|
self.kernel_mask = get_locallyconnected_mask(
|
||||||
input_shape=(input_row, input_col),
|
input_shape=(input_row, input_col),
|
||||||
@ -555,8 +522,7 @@ class LocallyConnected2D(Layer):
|
|||||||
padding=self.padding,
|
padding=self.padding,
|
||||||
filters_in=input_filter,
|
filters_in=input_filter,
|
||||||
filters_out=self.filters,
|
filters_out=self.filters,
|
||||||
data_format=self.data_format)
|
data_format=self.data_format))
|
||||||
)
|
|
||||||
|
|
||||||
self.kernel = self.add_weight(
|
self.kernel = self.add_weight(
|
||||||
shape=(len(self.kernel_idxs),),
|
shape=(len(self.kernel_idxs),),
|
||||||
@ -566,8 +532,8 @@ class LocallyConnected2D(Layer):
|
|||||||
constraint=self.kernel_constraint)
|
constraint=self.kernel_constraint)
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unrecognized implementation mode: %d.'
|
raise ValueError('Unrecognized implementation mode: %d.' %
|
||||||
% self.implementation)
|
self.implementation)
|
||||||
|
|
||||||
if self.use_bias:
|
if self.use_bias:
|
||||||
self.bias = self.add_weight(
|
self.bias = self.add_weight(
|
||||||
@ -619,8 +585,8 @@ class LocallyConnected2D(Layer):
|
|||||||
self.compute_output_shape(inputs.shape))
|
self.compute_output_shape(inputs.shape))
|
||||||
|
|
||||||
else:
|
else:
|
||||||
raise ValueError('Unrecognized implementation mode: %d.'
|
raise ValueError('Unrecognized implementation mode: %d.' %
|
||||||
% self.implementation)
|
self.implementation)
|
||||||
|
|
||||||
if self.use_bias:
|
if self.use_bias:
|
||||||
output = K.bias_add(output, self.bias, data_format=self.data_format)
|
output = K.bias_add(output, self.bias, data_format=self.data_format)
|
||||||
@ -686,10 +652,10 @@ def get_locallyconnected_mask(input_shape, kernel_shape, strides, padding,
|
|||||||
`strides`, `padding` and `data_format`.
|
`strides`, `padding` and `data_format`.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
input_shape: tuple of size N: `(d_in1, ..., d_inN)`
|
input_shape: tuple of size N: `(d_in1, ..., d_inN)` spatial shape of the
|
||||||
spatial shape of the input.
|
input.
|
||||||
kernel_shape: tuple of size N, spatial shape of the convolutional kernel
|
kernel_shape: tuple of size N, spatial shape of the convolutional kernel /
|
||||||
/ receptive field.
|
receptive field.
|
||||||
strides: tuple of size N, strides along each spatial dimension.
|
strides: tuple of size N, strides along each spatial dimension.
|
||||||
padding: type of padding, string `"same"` or `"valid"`.
|
padding: type of padding, string `"same"` or `"valid"`.
|
||||||
data_format: a string, `"channels_first"` or `"channels_last"`.
|
data_format: a string, `"channels_first"` or `"channels_last"`.
|
||||||
@ -709,8 +675,7 @@ def get_locallyconnected_mask(input_shape, kernel_shape, strides, padding,
|
|||||||
input_shape=input_shape,
|
input_shape=input_shape,
|
||||||
kernel_shape=kernel_shape,
|
kernel_shape=kernel_shape,
|
||||||
strides=strides,
|
strides=strides,
|
||||||
padding=padding
|
padding=padding)
|
||||||
)
|
|
||||||
|
|
||||||
ndims = int(mask.ndim / 2)
|
ndims = int(mask.ndim / 2)
|
||||||
|
|
||||||
@ -739,34 +704,26 @@ def local_conv_matmul(inputs, kernel, kernel_mask, output_shape):
|
|||||||
reshapes to make `inputs` and `kernel` 2-D and `output` (N+2)-D.
|
reshapes to make `inputs` and `kernel` 2-D and `output` (N+2)-D.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
inputs: (N+2)-D tensor with shape
|
inputs: (N+2)-D tensor with shape `(batch_size, channels_in, d_in1, ...,
|
||||||
`(batch_size, channels_in, d_in1, ..., d_inN)`
|
d_inN)` or `(batch_size, d_in1, ..., d_inN, channels_in)`.
|
||||||
or
|
|
||||||
`(batch_size, d_in1, ..., d_inN, channels_in)`.
|
|
||||||
kernel: the unshared weights for N-D convolution,
|
kernel: the unshared weights for N-D convolution,
|
||||||
an (N+2)-D tensor of shape:
|
an (N+2)-D tensor of shape: `(d_in1, ..., d_inN, channels_in, d_out2,
|
||||||
`(d_in1, ..., d_inN, channels_in, d_out2, ..., d_outN, channels_out)`
|
..., d_outN, channels_out)` or `(channels_in, d_in1, ..., d_inN,
|
||||||
or
|
channels_out, d_out2, ..., d_outN)`, with the ordering of channels
|
||||||
`(channels_in, d_in1, ..., d_inN, channels_out, d_out2, ..., d_outN)`,
|
and spatial dimensions matching that of the input. Each entry is the
|
||||||
with the ordering of channels and spatial dimensions matching
|
weight between a particular input and output location, similarly to
|
||||||
that of the input.
|
a fully-connected weight matrix.
|
||||||
Each entry is the weight between a particular input and
|
kernel_mask: a float 0/1 mask tensor of shape: `(d_in1, ..., d_inN, 1,
|
||||||
output location, similarly to a fully-connected weight matrix.
|
d_out2, ..., d_outN, 1)` or `(1, d_in1, ..., d_inN, 1, d_out2, ...,
|
||||||
kernel_mask: a float 0/1 mask tensor of shape:
|
d_outN)`, with the ordering of singleton and spatial dimensions matching
|
||||||
`(d_in1, ..., d_inN, 1, d_out2, ..., d_outN, 1)`
|
that of the input. Mask represents the connectivity pattern of the layer
|
||||||
or
|
and is
|
||||||
`(1, d_in1, ..., d_inN, 1, d_out2, ..., d_outN)`,
|
precomputed elsewhere based on layer parameters: stride, padding, and
|
||||||
with the ordering of singleton and spatial dimensions
|
the receptive field shape.
|
||||||
matching that of the input.
|
|
||||||
Mask represents the connectivity pattern of the layer and is
|
|
||||||
precomputed elsewhere based on layer parameters: stride,
|
|
||||||
padding, and the receptive field shape.
|
|
||||||
output_shape: a tuple of (N+2) elements representing the output shape:
|
output_shape: a tuple of (N+2) elements representing the output shape:
|
||||||
`(batch_size, channels_out, d_out1, ..., d_outN)`
|
`(batch_size, channels_out, d_out1, ..., d_outN)` or `(batch_size,
|
||||||
or
|
d_out1, ..., d_outN, channels_out)`, with the ordering of channels and
|
||||||
`(batch_size, d_out1, ..., d_outN, channels_out)`,
|
spatial dimensions matching that of the input.
|
||||||
with the ordering of channels and spatial dimensions matching that of
|
|
||||||
the input.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Output (N+2)-D tensor with shape `output_shape`.
|
Output (N+2)-D tensor with shape `output_shape`.
|
||||||
@ -777,8 +734,9 @@ def local_conv_matmul(inputs, kernel, kernel_mask, output_shape):
|
|||||||
kernel = make_2d(kernel, split_dim=K.ndim(kernel) // 2)
|
kernel = make_2d(kernel, split_dim=K.ndim(kernel) // 2)
|
||||||
|
|
||||||
output_flat = tf.compat.v1.sparse_matmul(inputs_flat, kernel, b_is_sparse=True)
|
output_flat = tf.compat.v1.sparse_matmul(inputs_flat, kernel, b_is_sparse=True)
|
||||||
output = K.reshape(output_flat,
|
output = K.reshape(output_flat, [
|
||||||
[K.shape(output_flat)[0],] + output_shape.as_list()[1:])
|
K.shape(output_flat)[0],
|
||||||
|
] + output_shape.as_list()[1:])
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
|
||||||
@ -810,14 +768,16 @@ def local_conv_sparse_matmul(inputs, kernel, kernel_idxs, kernel_shape,
|
|||||||
"""
|
"""
|
||||||
inputs_flat = K.reshape(inputs, (K.shape(inputs)[0], -1))
|
inputs_flat = K.reshape(inputs, (K.shape(inputs)[0], -1))
|
||||||
output_flat = tf.raw_ops.SparseTensorDenseMatMul(
|
output_flat = tf.raw_ops.SparseTensorDenseMatMul(
|
||||||
a_indices=kernel_idxs, a_values=kernel, a_shape=kernel_shape,
|
a_indices=kernel_idxs,
|
||||||
b=inputs_flat, adjoint_b=True)
|
a_values=kernel,
|
||||||
|
a_shape=kernel_shape,
|
||||||
|
b=inputs_flat,
|
||||||
|
adjoint_b=True)
|
||||||
output_flat_transpose = K.transpose(output_flat)
|
output_flat_transpose = K.transpose(output_flat)
|
||||||
|
|
||||||
output_reshaped = K.reshape(
|
output_reshaped = K.reshape(output_flat_transpose, [
|
||||||
output_flat_transpose,
|
K.shape(output_flat_transpose)[0],
|
||||||
[K.shape(output_flat_transpose)[0],] + output_shape.as_list()[1:]
|
] + output_shape.as_list()[1:])
|
||||||
)
|
|
||||||
return output_reshaped
|
return output_reshaped
|
||||||
|
|
||||||
|
|
||||||
@ -830,7 +790,7 @@ def make_2d(tensor, split_dim):
|
|||||||
Arguments:
|
Arguments:
|
||||||
tensor: a tensor of shape `(d0, ..., d(N-1))`.
|
tensor: a tensor of shape `(d0, ..., d(N-1))`.
|
||||||
split_dim: an integer from 1 to N-1, index of the dimension to group
|
split_dim: an integer from 1 to N-1, index of the dimension to group
|
||||||
dimensions before (excluding) and after (including).
|
dimensions before (excluding) and after (including).
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Tensor of shape
|
Tensor of shape
|
||||||
|
@ -28,8 +28,8 @@ import time
|
|||||||
from absl.testing import parameterized
|
from absl.testing import parameterized
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from tensorflow.core.protobuf import rewriter_config_pb2
|
from tensorflow.core.protobuf import rewriter_config_pb2
|
||||||
from tensorflow.python.framework import test_util as tf_test_util
|
|
||||||
import keras
|
import keras
|
||||||
|
from tensorflow.python.framework import test_util as tf_test_util
|
||||||
from keras import keras_parameterized
|
from keras import keras_parameterized
|
||||||
from keras import testing_utils
|
from keras import testing_utils
|
||||||
from keras.layers import recurrent as rnn_v1
|
from keras.layers import recurrent as rnn_v1
|
||||||
|
@ -26,6 +26,7 @@ from absl.testing import parameterized
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
import keras
|
import keras
|
||||||
|
from tensorflow.python.framework import test_util as tf_test_util
|
||||||
from keras import combinations
|
from keras import combinations
|
||||||
from keras import keras_parameterized
|
from keras import keras_parameterized
|
||||||
from keras import testing_utils
|
from keras import testing_utils
|
||||||
@ -33,8 +34,6 @@ from keras.engine import base_layer_utils
|
|||||||
from keras.layers import core
|
from keras.layers import core
|
||||||
from keras.layers.rnn_cell_wrapper_v2 import ResidualWrapper
|
from keras.layers.rnn_cell_wrapper_v2 import ResidualWrapper
|
||||||
from keras.utils import generic_utils
|
from keras.utils import generic_utils
|
||||||
from tensorflow.python.eager import context
|
|
||||||
from tensorflow.python.framework import test_util as tf_test_util
|
|
||||||
from tensorflow.python.ops.ragged import ragged_tensor
|
from tensorflow.python.ops.ragged import ragged_tensor
|
||||||
from tensorflow.python.training.tracking import util as trackable_util
|
from tensorflow.python.training.tracking import util as trackable_util
|
||||||
|
|
||||||
@ -653,7 +652,7 @@ class BidirectionalTest(tf.test.TestCase, parameterized.TestCase):
|
|||||||
model.compile(loss='mse', optimizer='sgd')
|
model.compile(loss='mse', optimizer='sgd')
|
||||||
model.fit(x, y, epochs=1, batch_size=1)
|
model.fit(x, y, epochs=1, batch_size=1)
|
||||||
|
|
||||||
if context.executing_eagerly():
|
if tf.executing_eagerly():
|
||||||
run_test()
|
run_test()
|
||||||
else:
|
else:
|
||||||
tf_test_util.enable_output_all_intermediates(run_test)()
|
tf_test_util.enable_output_all_intermediates(run_test)()
|
||||||
|
223
keras/losses.py
223
keras/losses.py
@ -12,8 +12,7 @@
|
|||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
# ==============================================================================
|
# ==============================================================================
|
||||||
"""Built-in loss functions.
|
"""Built-in loss functions."""
|
||||||
"""
|
|
||||||
from __future__ import absolute_import
|
from __future__ import absolute_import
|
||||||
from __future__ import division
|
from __future__ import division
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
@ -85,8 +84,8 @@ class Loss(object):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op.
|
name: Optional name for the op.
|
||||||
"""
|
"""
|
||||||
losses_utils.ReductionV2.validate(reduction)
|
losses_utils.ReductionV2.validate(reduction)
|
||||||
@ -115,15 +114,15 @@ class Loss(object):
|
|||||||
sparse loss functions such as sparse categorical crossentropy where
|
sparse loss functions such as sparse categorical crossentropy where
|
||||||
shape = `[batch_size, d0, .. dN-1]`
|
shape = `[batch_size, d0, .. dN-1]`
|
||||||
y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
|
y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`
|
||||||
sample_weight: Optional `sample_weight` acts as a
|
sample_weight: Optional `sample_weight` acts as a coefficient for the
|
||||||
coefficient for the loss. If a scalar is provided, then the loss is
|
loss. If a scalar is provided, then the loss is simply scaled by the
|
||||||
simply scaled by the given value. If `sample_weight` is a tensor of size
|
given value. If `sample_weight` is a tensor of size `[batch_size]`, then
|
||||||
`[batch_size]`, then the total loss for each sample of the batch is
|
the total loss for each sample of the batch is rescaled by the
|
||||||
rescaled by the corresponding element in the `sample_weight` vector. If
|
corresponding element in the `sample_weight` vector. If the shape of
|
||||||
the shape of `sample_weight` is `[batch_size, d0, .. dN-1]` (or can be
|
`sample_weight` is `[batch_size, d0, .. dN-1]` (or can be broadcasted to
|
||||||
broadcasted to this shape), then each loss element of `y_pred` is scaled
|
this shape), then each loss element of `y_pred` is scaled
|
||||||
by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss
|
by the corresponding value of `sample_weight`. (Note on`dN-1`: all loss
|
||||||
functions reduce by 1 dimension, usually axis=-1.)
|
functions reduce by 1 dimension, usually axis=-1.)
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
|
Weighted loss float `Tensor`. If `reduction` is `NONE`, this has
|
||||||
@ -223,8 +222,8 @@ class LossFunctionWrapper(Loss):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: (Optional) name for the loss.
|
name: (Optional) name for the loss.
|
||||||
**kwargs: The keyword arguments that are passed on to `fn`.
|
**kwargs: The keyword arguments that are passed on to `fn`.
|
||||||
"""
|
"""
|
||||||
@ -243,8 +242,7 @@ class LossFunctionWrapper(Loss):
|
|||||||
Loss values per sample.
|
Loss values per sample.
|
||||||
"""
|
"""
|
||||||
if tf.is_tensor(y_pred) and tf.is_tensor(y_true):
|
if tf.is_tensor(y_pred) and tf.is_tensor(y_true):
|
||||||
y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(
|
y_pred, y_true = losses_utils.squeeze_or_expand_dimensions(y_pred, y_true)
|
||||||
y_pred, y_true)
|
|
||||||
ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx())
|
ag_fn = autograph.tf_convert(self.fn, ag_ctx.control_status_ctx())
|
||||||
return ag_fn(y_true, y_pred, **self._fn_kwargs)
|
return ag_fn(y_true, y_pred, **self._fn_kwargs)
|
||||||
|
|
||||||
@ -307,8 +305,8 @@ class MeanSquaredError(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'mean_squared_error'.
|
name: Optional name for the op. Defaults to 'mean_squared_error'.
|
||||||
"""
|
"""
|
||||||
super(MeanSquaredError, self).__init__(
|
super(MeanSquaredError, self).__init__(
|
||||||
@ -366,8 +364,8 @@ class MeanAbsoluteError(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'mean_absolute_error'.
|
name: Optional name for the op. Defaults to 'mean_absolute_error'.
|
||||||
"""
|
"""
|
||||||
super(MeanAbsoluteError, self).__init__(
|
super(MeanAbsoluteError, self).__init__(
|
||||||
@ -426,8 +424,8 @@ class MeanAbsolutePercentageError(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to
|
name: Optional name for the op. Defaults to
|
||||||
'mean_absolute_percentage_error'.
|
'mean_absolute_percentage_error'.
|
||||||
"""
|
"""
|
||||||
@ -487,8 +485,8 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to
|
name: Optional name for the op. Defaults to
|
||||||
'mean_squared_logarithmic_error'.
|
'mean_squared_logarithmic_error'.
|
||||||
"""
|
"""
|
||||||
@ -500,44 +498,64 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper):
|
|||||||
class BinaryCrossentropy(LossFunctionWrapper):
|
class BinaryCrossentropy(LossFunctionWrapper):
|
||||||
"""Computes the cross-entropy loss between true labels and predicted labels.
|
"""Computes the cross-entropy loss between true labels and predicted labels.
|
||||||
|
|
||||||
Use this cross-entropy loss when there are only two label classes (assumed to
|
Use this cross-entropy loss for binary (0 or 1) classification applications.
|
||||||
be 0 and 1). For each example, there should be a single floating-point value
|
The loss function requires the following inputs:
|
||||||
per prediction.
|
|
||||||
|
|
||||||
In the snippet below, each of the four examples has only a single
|
- `y_true` (true label): This is either 0 or 1.
|
||||||
floating-pointing value, and both `y_pred` and `y_true` have the shape
|
- `y_pred` (predicted value): This is the model's prediction, i.e, a single
|
||||||
`[batch_size]`.
|
floating-point value which either represents a
|
||||||
|
[logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf]
|
||||||
|
when `from_logits=True`) or a probability (i.e, value in [0., 1.] when
|
||||||
|
`from_logits=False`).
|
||||||
|
|
||||||
Standalone usage:
|
**Recommended Usage:** (set `from_logits=True`)
|
||||||
|
|
||||||
>>> y_true = [[0., 1.], [0., 0.]]
|
With `tf.keras` API:
|
||||||
>>> y_pred = [[0.6, 0.4], [0.4, 0.6]]
|
|
||||||
>>> # Using 'auto'/'sum_over_batch_size' reduction type.
|
|
||||||
>>> bce = tf.keras.losses.BinaryCrossentropy()
|
|
||||||
>>> bce(y_true, y_pred).numpy()
|
|
||||||
0.815
|
|
||||||
|
|
||||||
>>> # Calling with 'sample_weight'.
|
|
||||||
>>> bce(y_true, y_pred, sample_weight=[1, 0]).numpy()
|
|
||||||
0.458
|
|
||||||
|
|
||||||
>>> # Using 'sum' reduction type.
|
|
||||||
>>> bce = tf.keras.losses.BinaryCrossentropy(
|
|
||||||
... reduction=tf.keras.losses.Reduction.SUM)
|
|
||||||
>>> bce(y_true, y_pred).numpy()
|
|
||||||
1.630
|
|
||||||
|
|
||||||
>>> # Using 'none' reduction type.
|
|
||||||
>>> bce = tf.keras.losses.BinaryCrossentropy(
|
|
||||||
... reduction=tf.keras.losses.Reduction.NONE)
|
|
||||||
>>> bce(y_true, y_pred).numpy()
|
|
||||||
array([0.916 , 0.714], dtype=float32)
|
|
||||||
|
|
||||||
Usage with the `tf.keras` API:
|
|
||||||
|
|
||||||
```python
|
```python
|
||||||
model.compile(optimizer='sgd', loss=tf.keras.losses.BinaryCrossentropy())
|
model.compile(
|
||||||
|
loss=tf.keras.losses.BinaryCrossentropy(from_logits=True),
|
||||||
|
....
|
||||||
|
)
|
||||||
```
|
```
|
||||||
|
|
||||||
|
As a standalone function:
|
||||||
|
|
||||||
|
>>> # Example 1: (batch_size = 1, number of samples = 4)
|
||||||
|
>>> y_true = [0, 1, 0, 0]
|
||||||
|
>>> y_pred = [-18.6, 0.51, 2.94, -12.8]
|
||||||
|
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
|
||||||
|
>>> bce(y_true, y_pred).numpy()
|
||||||
|
0.865
|
||||||
|
|
||||||
|
>>> # Example 2: (batch_size = 2, number of samples = 4)
|
||||||
|
>>> y_true = [[0, 1], [0, 0]]
|
||||||
|
>>> y_pred = [[-18.6, 0.51], [2.94, -12.8]]
|
||||||
|
>>> # Using default 'auto'/'sum_over_batch_size' reduction type.
|
||||||
|
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True)
|
||||||
|
>>> bce(y_true, y_pred).numpy()
|
||||||
|
0.865
|
||||||
|
>>> # Using 'sample_weight' attribute
|
||||||
|
>>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]).numpy()
|
||||||
|
0.243
|
||||||
|
>>> # Using 'sum' reduction` type.
|
||||||
|
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
|
||||||
|
... reduction=tf.keras.losses.Reduction.SUM)
|
||||||
|
>>> bce(y_true, y_pred).numpy()
|
||||||
|
1.730
|
||||||
|
>>> # Using 'none' reduction type.
|
||||||
|
>>> bce = tf.keras.losses.BinaryCrossentropy(from_logits=True,
|
||||||
|
... reduction=tf.keras.losses.Reduction.NONE)
|
||||||
|
>>> bce(y_true, y_pred).numpy()
|
||||||
|
array([0.235, 1.496], dtype=float32)
|
||||||
|
|
||||||
|
**Default Usage:** (set `from_logits=False`)
|
||||||
|
|
||||||
|
>>> # Make the following updates to the above "Recommended Usage" section
|
||||||
|
>>> # 1. Set `from_logits=False`
|
||||||
|
>>> tf.keras.losses.BinaryCrossentropy() # OR ...('from_logits=False')
|
||||||
|
>>> # 2. Update `y_pred` to use probabilities instead of logits
|
||||||
|
>>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]]
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self,
|
def __init__(self,
|
||||||
@ -563,8 +581,8 @@ class BinaryCrossentropy(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: (Optional) Name for the op. Defaults to 'binary_crossentropy'.
|
name: (Optional) Name for the op. Defaults to 'binary_crossentropy'.
|
||||||
"""
|
"""
|
||||||
super(BinaryCrossentropy, self).__init__(
|
super(BinaryCrossentropy, self).__init__(
|
||||||
@ -633,9 +651,9 @@ class CategoricalCrossentropy(LossFunctionWrapper):
|
|||||||
default, we assume that `y_pred` encodes a probability distribution.
|
default, we assume that `y_pred` encodes a probability distribution.
|
||||||
**Note - Using from_logits=True is more numerically stable.**
|
**Note - Using from_logits=True is more numerically stable.**
|
||||||
label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
|
label_smoothing: Float in [0, 1]. When > 0, label values are smoothed,
|
||||||
meaning the confidence on label values are relaxed. e.g.
|
meaning the confidence on label values are relaxed. For example, if
|
||||||
`label_smoothing=0.2` means that we will use a value of `0.1` for label
|
`0.1`, use `0.1 / num_classes` for non-target labels and
|
||||||
`0` and `0.9` for label `1`"
|
`0.9 + 0.1 / num_classes` for target labels.
|
||||||
reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
|
reduction: (Optional) Type of `tf.keras.losses.Reduction` to apply to
|
||||||
loss. Default value is `AUTO`. `AUTO` indicates that the reduction
|
loss. Default value is `AUTO`. `AUTO` indicates that the reduction
|
||||||
option will be determined by the usage context. For almost all cases
|
option will be determined by the usage context. For almost all cases
|
||||||
@ -643,8 +661,8 @@ class CategoricalCrossentropy(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'categorical_crossentropy'.
|
name: Optional name for the op. Defaults to 'categorical_crossentropy'.
|
||||||
"""
|
"""
|
||||||
super(CategoricalCrossentropy, self).__init__(
|
super(CategoricalCrossentropy, self).__init__(
|
||||||
@ -720,8 +738,8 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to
|
name: Optional name for the op. Defaults to
|
||||||
'sparse_categorical_crossentropy'.
|
'sparse_categorical_crossentropy'.
|
||||||
"""
|
"""
|
||||||
@ -784,8 +802,8 @@ class Hinge(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'hinge'.
|
name: Optional name for the op. Defaults to 'hinge'.
|
||||||
"""
|
"""
|
||||||
super(Hinge, self).__init__(hinge, name=name, reduction=reduction)
|
super(Hinge, self).__init__(hinge, name=name, reduction=reduction)
|
||||||
@ -845,8 +863,8 @@ class SquaredHinge(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'squared_hinge'.
|
name: Optional name for the op. Defaults to 'squared_hinge'.
|
||||||
"""
|
"""
|
||||||
super(SquaredHinge, self).__init__(
|
super(SquaredHinge, self).__init__(
|
||||||
@ -905,8 +923,8 @@ class CategoricalHinge(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'categorical_hinge'.
|
name: Optional name for the op. Defaults to 'categorical_hinge'.
|
||||||
"""
|
"""
|
||||||
super(CategoricalHinge, self).__init__(
|
super(CategoricalHinge, self).__init__(
|
||||||
@ -962,8 +980,8 @@ class Poisson(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'poisson'.
|
name: Optional name for the op. Defaults to 'poisson'.
|
||||||
"""
|
"""
|
||||||
super(Poisson, self).__init__(poisson, name=name, reduction=reduction)
|
super(Poisson, self).__init__(poisson, name=name, reduction=reduction)
|
||||||
@ -1019,8 +1037,8 @@ class LogCosh(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'log_cosh'.
|
name: Optional name for the op. Defaults to 'log_cosh'.
|
||||||
"""
|
"""
|
||||||
super(LogCosh, self).__init__(log_cosh, name=name, reduction=reduction)
|
super(LogCosh, self).__init__(log_cosh, name=name, reduction=reduction)
|
||||||
@ -1079,8 +1097,8 @@ class KLDivergence(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'kl_divergence'.
|
name: Optional name for the op. Defaults to 'kl_divergence'.
|
||||||
"""
|
"""
|
||||||
super(KLDivergence, self).__init__(
|
super(KLDivergence, self).__init__(
|
||||||
@ -1147,20 +1165,17 @@ class Huber(LossFunctionWrapper):
|
|||||||
`tf.distribute.Strategy`, outside of built-in training loops such as
|
`tf.distribute.Strategy`, outside of built-in training loops such as
|
||||||
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
`tf.keras` `compile` and `fit`, using `AUTO` or `SUM_OVER_BATCH_SIZE`
|
||||||
will raise an error. Please see this custom training [tutorial](
|
will raise an error. Please see this custom training [tutorial](
|
||||||
https://www.tensorflow.org/tutorials/distribute/custom_training)
|
https://www.tensorflow.org/tutorials/distribute/custom_training) for
|
||||||
for more details.
|
more details.
|
||||||
name: Optional name for the op. Defaults to 'huber_loss'.
|
name: Optional name for the op. Defaults to 'huber_loss'.
|
||||||
"""
|
"""
|
||||||
super(Huber, self).__init__(
|
super(Huber, self).__init__(
|
||||||
huber, name=name, reduction=reduction, delta=delta)
|
huber, name=name, reduction=reduction, delta=delta)
|
||||||
|
|
||||||
|
|
||||||
@keras_export('keras.metrics.mean_squared_error',
|
@keras_export('keras.metrics.mean_squared_error', 'keras.metrics.mse',
|
||||||
'keras.metrics.mse',
|
'keras.metrics.MSE', 'keras.losses.mean_squared_error',
|
||||||
'keras.metrics.MSE',
|
'keras.losses.mse', 'keras.losses.MSE')
|
||||||
'keras.losses.mean_squared_error',
|
|
||||||
'keras.losses.mse',
|
|
||||||
'keras.losses.MSE')
|
|
||||||
@tf.__internal__.dispatch.add_dispatch_support
|
@tf.__internal__.dispatch.add_dispatch_support
|
||||||
def mean_squared_error(y_true, y_pred):
|
def mean_squared_error(y_true, y_pred):
|
||||||
"""Computes the mean squared error between labels and predictions.
|
"""Computes the mean squared error between labels and predictions.
|
||||||
@ -1191,12 +1206,9 @@ def mean_squared_error(y_true, y_pred):
|
|||||||
return K.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
|
return K.mean(tf.math.squared_difference(y_pred, y_true), axis=-1)
|
||||||
|
|
||||||
|
|
||||||
@keras_export('keras.metrics.mean_absolute_error',
|
@keras_export('keras.metrics.mean_absolute_error', 'keras.metrics.mae',
|
||||||
'keras.metrics.mae',
|
'keras.metrics.MAE', 'keras.losses.mean_absolute_error',
|
||||||
'keras.metrics.MAE',
|
'keras.losses.mae', 'keras.losses.MAE')
|
||||||
'keras.losses.mean_absolute_error',
|
|
||||||
'keras.losses.mae',
|
|
||||||
'keras.losses.MAE')
|
|
||||||
@tf.__internal__.dispatch.add_dispatch_support
|
@tf.__internal__.dispatch.add_dispatch_support
|
||||||
def mean_absolute_error(y_true, y_pred):
|
def mean_absolute_error(y_true, y_pred):
|
||||||
"""Computes the mean absolute error between labels and predictions.
|
"""Computes the mean absolute error between labels and predictions.
|
||||||
@ -1225,11 +1237,9 @@ def mean_absolute_error(y_true, y_pred):
|
|||||||
|
|
||||||
|
|
||||||
@keras_export('keras.metrics.mean_absolute_percentage_error',
|
@keras_export('keras.metrics.mean_absolute_percentage_error',
|
||||||
'keras.metrics.mape',
|
'keras.metrics.mape', 'keras.metrics.MAPE',
|
||||||
'keras.metrics.MAPE',
|
|
||||||
'keras.losses.mean_absolute_percentage_error',
|
'keras.losses.mean_absolute_percentage_error',
|
||||||
'keras.losses.mape',
|
'keras.losses.mape', 'keras.losses.MAPE')
|
||||||
'keras.losses.MAPE')
|
|
||||||
@tf.__internal__.dispatch.add_dispatch_support
|
@tf.__internal__.dispatch.add_dispatch_support
|
||||||
def mean_absolute_percentage_error(y_true, y_pred):
|
def mean_absolute_percentage_error(y_true, y_pred):
|
||||||
"""Computes the mean absolute percentage error between `y_true` and `y_pred`.
|
"""Computes the mean absolute percentage error between `y_true` and `y_pred`.
|
||||||
@ -1262,11 +1272,9 @@ def mean_absolute_percentage_error(y_true, y_pred):
|
|||||||
|
|
||||||
|
|
||||||
@keras_export('keras.metrics.mean_squared_logarithmic_error',
|
@keras_export('keras.metrics.mean_squared_logarithmic_error',
|
||||||
'keras.metrics.msle',
|
'keras.metrics.msle', 'keras.metrics.MSLE',
|
||||||
'keras.metrics.MSLE',
|
|
||||||
'keras.losses.mean_squared_logarithmic_error',
|
'keras.losses.mean_squared_logarithmic_error',
|
||||||
'keras.losses.msle',
|
'keras.losses.msle', 'keras.losses.MSLE')
|
||||||
'keras.losses.MSLE')
|
|
||||||
@tf.__internal__.dispatch.add_dispatch_support
|
@tf.__internal__.dispatch.add_dispatch_support
|
||||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||||
"""Computes the mean squared logarithmic error between `y_true` and `y_pred`.
|
"""Computes the mean squared logarithmic error between `y_true` and `y_pred`.
|
||||||
@ -1511,7 +1519,9 @@ def categorical_crossentropy(y_true,
|
|||||||
y_pred: Tensor of predicted targets.
|
y_pred: Tensor of predicted targets.
|
||||||
from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
|
from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
|
||||||
we assume that `y_pred` encodes a probability distribution.
|
we assume that `y_pred` encodes a probability distribution.
|
||||||
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
|
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For
|
||||||
|
example, if `0.1`, use `0.1 / num_classes` for non-target labels
|
||||||
|
and `0.9 + 0.1 / num_classes` for target labels.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Categorical crossentropy loss value.
|
Categorical crossentropy loss value.
|
||||||
@ -1582,7 +1592,9 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
|
|||||||
y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
|
y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`.
|
||||||
from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
|
from_logits: Whether `y_pred` is expected to be a logits tensor. By default,
|
||||||
we assume that `y_pred` encodes a probability distribution.
|
we assume that `y_pred` encodes a probability distribution.
|
||||||
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels.
|
label_smoothing: Float in [0, 1]. If > `0` then smooth the labels by
|
||||||
|
squeezing them towards 0.5 That is, using `1. - 0.5 * label_smoothing`
|
||||||
|
for the target class and `0.5 * label_smoothing` for the non-target class.
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
|
Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`.
|
||||||
@ -1602,12 +1614,9 @@ def binary_crossentropy(y_true, y_pred, from_logits=False, label_smoothing=0):
|
|||||||
|
|
||||||
|
|
||||||
@keras_export('keras.metrics.kl_divergence',
|
@keras_export('keras.metrics.kl_divergence',
|
||||||
'keras.metrics.kullback_leibler_divergence',
|
'keras.metrics.kullback_leibler_divergence', 'keras.metrics.kld',
|
||||||
'keras.metrics.kld',
|
'keras.metrics.KLD', 'keras.losses.kl_divergence',
|
||||||
'keras.metrics.KLD',
|
'keras.losses.kullback_leibler_divergence', 'keras.losses.kld',
|
||||||
'keras.losses.kl_divergence',
|
|
||||||
'keras.losses.kullback_leibler_divergence',
|
|
||||||
'keras.losses.kld',
|
|
||||||
'keras.losses.KLD')
|
'keras.losses.KLD')
|
||||||
@tf.__internal__.dispatch.add_dispatch_support
|
@tf.__internal__.dispatch.add_dispatch_support
|
||||||
def kl_divergence(y_true, y_pred):
|
def kl_divergence(y_true, y_pred):
|
||||||
|
@ -69,12 +69,11 @@ class AutoCastVariable(tf.Variable, core.Tensor):
|
|||||||
called.
|
called.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, variable, op=None):
|
def __init__(self, variable):
|
||||||
"""Creates an AutoCastVariable instance.
|
"""Creates an AutoCastVariable instance.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
variable: A floating-point resource variable to wrap.
|
variable: A floating-point resource variable to wrap.
|
||||||
op: Optional operation of this variable.
|
|
||||||
|
|
||||||
Raises:
|
Raises:
|
||||||
ValueError: If `variable` is not a floating-point resource variable
|
ValueError: If `variable` is not a floating-point resource variable
|
||||||
@ -86,7 +85,11 @@ class AutoCastVariable(tf.Variable, core.Tensor):
|
|||||||
raise ValueError('variable must be a floating point variable but has '
|
raise ValueError('variable must be a floating point variable but has '
|
||||||
'type: %s' % variable.dtype.name)
|
'type: %s' % variable.dtype.name)
|
||||||
self._variable = variable
|
self._variable = variable
|
||||||
self._op = op
|
# 'delegate' means AutoCastVariable.op return self._variable.op, which will
|
||||||
|
# raise an AttributeError in Eager (as intended). If set to any other value,
|
||||||
|
# AutoCastVariable.op returns that value instead, which is used to set the
|
||||||
|
# op attribute in AutoCastVariable.assign().
|
||||||
|
self._op = 'delegate'
|
||||||
|
|
||||||
def _should_cast(self):
|
def _should_cast(self):
|
||||||
"""Returns True if this variable should be casted when accessed."""
|
"""Returns True if this variable should be casted when accessed."""
|
||||||
@ -211,10 +214,18 @@ class AutoCastVariable(tf.Variable, core.Tensor):
|
|||||||
use_locking=None,
|
use_locking=None,
|
||||||
name=None,
|
name=None,
|
||||||
read_value=True):
|
read_value=True):
|
||||||
|
# TODO(b/146181571): This logic can be simplified once
|
||||||
|
# DistributedVariable.assign returns a DistributedVariable. Currently for
|
||||||
|
# MirroredStrategy, it returns a Mirrored value.
|
||||||
if tf.compat.v1.executing_eagerly_outside_functions():
|
if tf.compat.v1.executing_eagerly_outside_functions():
|
||||||
assign_op = update_fn(value, use_locking, name, False)
|
assign_op = update_fn(value, use_locking, name, False)
|
||||||
if read_value:
|
if read_value:
|
||||||
return create_autocast_variable(self._variable, op=assign_op)
|
# We create a new AutoCastVariable with the same underlying tf.Variable.
|
||||||
|
# The new AutoCastVariable is identical except the 'op' attribute is
|
||||||
|
# defined. This matches the behavior of tf.Variable.assign.
|
||||||
|
var = create_autocast_variable(self._variable)
|
||||||
|
var._op = assign_op # pylint:disable=protected-access
|
||||||
|
return var
|
||||||
return assign_op
|
return assign_op
|
||||||
|
|
||||||
# Fallback to wrapping the returned variable in graph mode if possible
|
# Fallback to wrapping the returned variable in graph mode if possible
|
||||||
@ -310,9 +321,9 @@ class AutoCastVariable(tf.Variable, core.Tensor):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def op(self):
|
def op(self):
|
||||||
if self._op is not None:
|
if self._op == 'delegate':
|
||||||
return self._op
|
return self._variable.op
|
||||||
return self._variable.op
|
return self._op
|
||||||
|
|
||||||
def _as_graph_element(self):
|
def _as_graph_element(self):
|
||||||
graph_element = self._variable._as_graph_element() # pylint:disable=protected-access
|
graph_element = self._variable._as_graph_element() # pylint:disable=protected-access
|
||||||
@ -481,7 +492,7 @@ tf.register_tensor_conversion_function(AutoCastVariable,
|
|||||||
AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access
|
AutoCastVariable._dense_var_to_tensor) # pylint:disable=protected-access
|
||||||
|
|
||||||
|
|
||||||
def create_autocast_variable(variable, op=None):
|
def create_autocast_variable(variable):
|
||||||
"""Creates an AutoCastVariable that wraps another variable.
|
"""Creates an AutoCastVariable that wraps another variable.
|
||||||
|
|
||||||
This typically just returns `AutoCastVariable(variable)`. But, if the variable
|
This typically just returns `AutoCastVariable(variable)`. But, if the variable
|
||||||
@ -493,14 +504,13 @@ def create_autocast_variable(variable, op=None):
|
|||||||
|
|
||||||
Args:
|
Args:
|
||||||
variable: A floating-point resource variable to wrap.
|
variable: A floating-point resource variable to wrap.
|
||||||
op: Optional operation of this variable.
|
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
An AutoCastVariable that wraps the variable.
|
An AutoCastVariable that wraps the variable.
|
||||||
"""
|
"""
|
||||||
if not isinstance(variable, (distribute_values.DistributedVariable,
|
if not isinstance(variable, (distribute_values.DistributedVariable,
|
||||||
ps_distribute_values.AggregatingVariable)):
|
ps_distribute_values.AggregatingVariable)):
|
||||||
return AutoCastVariable(variable, op=op)
|
return AutoCastVariable(variable)
|
||||||
|
|
||||||
class AutoCastDistributedVariable(AutoCastVariable, variable.__class__):
|
class AutoCastDistributedVariable(AutoCastVariable, variable.__class__):
|
||||||
"""An AutoCastVariable that also subclasses from variable.__class__.
|
"""An AutoCastVariable that also subclasses from variable.__class__.
|
||||||
@ -523,7 +533,7 @@ def create_autocast_variable(variable, op=None):
|
|||||||
).format(v=self)
|
).format(v=self)
|
||||||
# pylint: enable=missing-format-attribute
|
# pylint: enable=missing-format-attribute
|
||||||
|
|
||||||
return AutoCastDistributedVariable(variable, op=op)
|
return AutoCastDistributedVariable(variable)
|
||||||
|
|
||||||
|
|
||||||
class enable_auto_cast_variables(object): # pylint:disable=invalid-name
|
class enable_auto_cast_variables(object): # pylint:disable=invalid-name
|
||||||
|
@ -26,7 +26,14 @@ from absl.testing import parameterized
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
from tensorflow.python.distribute import test_util
|
from tensorflow.python.distribute import test_util
|
||||||
from keras.mixed_precision import autocast_variable
|
from keras.mixed_precision import autocast_variable
|
||||||
|
from keras.optimizer_v2 import adadelta
|
||||||
|
from keras.optimizer_v2 import adagrad
|
||||||
|
from keras.optimizer_v2 import adam
|
||||||
|
from keras.optimizer_v2 import adamax
|
||||||
|
from keras.optimizer_v2 import ftrl
|
||||||
from keras.optimizer_v2 import gradient_descent as gradient_descent_v2
|
from keras.optimizer_v2 import gradient_descent as gradient_descent_v2
|
||||||
|
from keras.optimizer_v2 import nadam
|
||||||
|
from keras.optimizer_v2 import rmsprop
|
||||||
|
|
||||||
maybe_distribute = tf.__internal__.test.combinations.combine(distribution=[
|
maybe_distribute = tf.__internal__.test.combinations.combine(distribution=[
|
||||||
tf.__internal__.distribute.combinations.default_strategy,
|
tf.__internal__.distribute.combinations.default_strategy,
|
||||||
@ -335,11 +342,28 @@ class AutoCastVariableTest(tf.test.TestCase, parameterized.TestCase):
|
|||||||
self.assertAllClose(5., self.evaluate(run_assign()))
|
self.assertAllClose(5., self.evaluate(run_assign()))
|
||||||
|
|
||||||
@tf.__internal__.distribute.combinations.generate(maybe_distribute)
|
@tf.__internal__.distribute.combinations.generate(maybe_distribute)
|
||||||
def test_assign_op(self, distribution):
|
def test_op_attribute(self, distribution):
|
||||||
with distribution.scope():
|
with distribution.scope():
|
||||||
x = get_var(0., tf.float32)
|
x = get_var(0., tf.float32)
|
||||||
x = autocast_variable.create_autocast_variable(x)
|
x = autocast_variable.create_autocast_variable(x)
|
||||||
|
|
||||||
|
# Variable.op raises an AttributeError in Eager mode and is an op in graph
|
||||||
|
# mode. Variable.assign(...).op is None in Eager mode and an op in Graph
|
||||||
|
# mode or a tf.function. We test this is also true of AutoCastVariable.
|
||||||
|
if tf.executing_eagerly():
|
||||||
|
with self.assertRaisesRegex(
|
||||||
|
AttributeError,
|
||||||
|
'Tensor.op is meaningless when eager execution is enabled'):
|
||||||
|
x.op # pylint: disable=pointless-statement
|
||||||
|
self.assertIsNone(x.assign(1.0).op)
|
||||||
|
self.assertIsNone(x.assign_add(1.0).op)
|
||||||
|
self.assertIsNone(x.assign_sub(1.0).op)
|
||||||
|
else:
|
||||||
|
self.assertIsNotNone(x.op)
|
||||||
|
self.assertIsNotNone(x.assign(1.0).op)
|
||||||
|
self.assertIsNotNone(x.assign_add(1.0).op)
|
||||||
|
self.assertIsNotNone(x.assign_sub(1.0).op)
|
||||||
|
|
||||||
@tf.function
|
@tf.function
|
||||||
def func():
|
def func():
|
||||||
self.assertIsNotNone(x.assign(1.0).op)
|
self.assertIsNotNone(x.assign(1.0).op)
|
||||||
@ -486,25 +510,51 @@ class AutoCastVariableTest(tf.test.TestCase, parameterized.TestCase):
|
|||||||
'dtype_to_cast_to=float32 '
|
'dtype_to_cast_to=float32 '
|
||||||
'inner_variable=MirroredVariable.*>')
|
'inner_variable=MirroredVariable.*>')
|
||||||
|
|
||||||
@parameterized.named_parameters(
|
@tf.__internal__.distribute.combinations.generate(tf.__internal__.test.combinations.combine(
|
||||||
('v1', tf.compat.v1.train.GradientDescentOptimizer),
|
optimizer_class=[
|
||||||
('v2', gradient_descent_v2.SGD))
|
adadelta.Adadelta,
|
||||||
def test_optimizer(self, optimizer_class):
|
adagrad.Adagrad,
|
||||||
|
adam.Adam,
|
||||||
|
adamax.Adamax,
|
||||||
|
ftrl.Ftrl,
|
||||||
|
gradient_descent_v2.SGD,
|
||||||
|
nadam.Nadam,
|
||||||
|
rmsprop.RMSprop,
|
||||||
|
tf.compat.v1.train.GradientDescentOptimizer
|
||||||
|
],
|
||||||
|
use_tf_function=[False, True]))
|
||||||
|
def test_optimizer(self, optimizer_class, use_tf_function):
|
||||||
|
if use_tf_function and not tf.executing_eagerly():
|
||||||
|
self.skipTest('Test does not support graph mode with tf.function')
|
||||||
x = get_var(1., tf.float32)
|
x = get_var(1., tf.float32)
|
||||||
x = autocast_variable.create_autocast_variable(x)
|
x = autocast_variable.create_autocast_variable(x)
|
||||||
opt = optimizer_class(1.)
|
y = get_var(1., tf.float32)
|
||||||
|
opt = optimizer_class(learning_rate=1.)
|
||||||
|
|
||||||
@tf.function
|
|
||||||
def f():
|
def f():
|
||||||
opt.minimize(lambda: x + 1., var_list=[x])
|
# Minimize both the AutoCastVariable and the normal tf.Variable. Both
|
||||||
|
# variables should be updated to the same value.
|
||||||
|
op = opt.minimize(lambda: x + y, var_list=[x, y])
|
||||||
|
return None if tf.compat.v1.executing_eagerly_outside_functions() else op
|
||||||
|
|
||||||
|
if use_tf_function:
|
||||||
|
f = tf.function(f)
|
||||||
|
|
||||||
if tf.executing_eagerly():
|
if tf.executing_eagerly():
|
||||||
f()
|
f()
|
||||||
else:
|
else:
|
||||||
op = f() # pylint: disable=assignment-from-no-return
|
op = f()
|
||||||
self.evaluate(tf.compat.v1.global_variables_initializer())
|
self.evaluate(tf.compat.v1.global_variables_initializer())
|
||||||
self.evaluate(op)
|
self.evaluate(op)
|
||||||
self.assertEqual(self.evaluate(x), 0)
|
# Assert the AutoCastVariable has changed from its initial value
|
||||||
|
self.assertNotEqual(self.evaluate(x), 1.)
|
||||||
|
# Assert AutoCastVariable is updated correctly by comparing it to the normal
|
||||||
|
# variable
|
||||||
|
self.assertAlmostEqual(self.evaluate(x), self.evaluate(y))
|
||||||
|
if optimizer_class in (gradient_descent_v2.SGD,
|
||||||
|
tf.compat.v1.train.GradientDescentOptimizer):
|
||||||
|
# With SGD, the variables decreases by exactly 1
|
||||||
|
self.assertEqual(self.evaluate(x), 0)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
@ -139,7 +139,7 @@ def load(path, compile=True, options=None): # pylint: disable=redefined-builtin
|
|||||||
|
|
||||||
# Recreate layers and metrics using the info stored in the metadata.
|
# Recreate layers and metrics using the info stored in the metadata.
|
||||||
keras_loader = KerasObjectLoader(metadata, object_graph_def)
|
keras_loader = KerasObjectLoader(metadata, object_graph_def)
|
||||||
keras_loader.load_layers()
|
keras_loader.load_layers(compile=compile)
|
||||||
|
|
||||||
# Generate a dictionary of all loaded nodes.
|
# Generate a dictionary of all loaded nodes.
|
||||||
nodes_to_load = {'root': None}
|
nodes_to_load = {'root': None}
|
||||||
@ -364,7 +364,7 @@ class KerasObjectLoader(object):
|
|||||||
obj_child, child_proto, child_id)
|
obj_child, child_proto, child_id)
|
||||||
self.loaded_nodes[child_id] = obj_child, setter
|
self.loaded_nodes[child_id] = obj_child, setter
|
||||||
|
|
||||||
def load_layers(self):
|
def load_layers(self, compile=True): # pylint: disable=redefined-builtin
|
||||||
"""Load all layer nodes from the metadata."""
|
"""Load all layer nodes from the metadata."""
|
||||||
# Load metrics after models and layers, since it's likely that models
|
# Load metrics after models and layers, since it's likely that models
|
||||||
# and layers will create the metric when initialized (this avoids wasting
|
# and layers will create the metric when initialized (this avoids wasting
|
||||||
@ -380,9 +380,20 @@ class KerasObjectLoader(object):
|
|||||||
node_metadata.metadata)
|
node_metadata.metadata)
|
||||||
|
|
||||||
for node_metadata in metric_list:
|
for node_metadata in metric_list:
|
||||||
self.loaded_nodes[node_metadata.node_id] = self._load_layer(
|
try:
|
||||||
node_metadata.node_id, node_metadata.identifier,
|
self.loaded_nodes[node_metadata.node_id] = self._load_layer(
|
||||||
node_metadata.metadata)
|
node_metadata.node_id, node_metadata.identifier,
|
||||||
|
node_metadata.metadata)
|
||||||
|
except ValueError:
|
||||||
|
# Metrics are only needed when the model is compiled later. We ignore
|
||||||
|
# errors when trying to load custom metrics when `compile=False` until
|
||||||
|
# custom metrics are serialized properly (b/135550038).
|
||||||
|
if compile:
|
||||||
|
raise
|
||||||
|
logging.warning('Unable to restore custom metric. Please ensure that '
|
||||||
|
'the layer implements `get_config` and `from_config` '
|
||||||
|
'when saving. In addition, please use the '
|
||||||
|
'`custom_objects` arg when calling `load_model()`.')
|
||||||
|
|
||||||
def _load_layer(self, node_id, identifier, metadata):
|
def _load_layer(self, node_id, identifier, metadata):
|
||||||
"""Load a single layer from a SavedUserObject proto."""
|
"""Load a single layer from a SavedUserObject proto."""
|
||||||
|
@ -1142,6 +1142,22 @@ class MetricTest(tf.test.TestCase, parameterized.TestCase):
|
|||||||
self._test_metric_save_and_load(
|
self._test_metric_save_and_load(
|
||||||
metric, self._save_model_dir(), 1, test_sample_weight=False)
|
metric, self._save_model_dir(), 1, test_sample_weight=False)
|
||||||
|
|
||||||
|
@keras_parameterized.run_with_all_model_types
|
||||||
|
def test_custom_metric_model(self):
|
||||||
|
|
||||||
|
class CustomMetric(keras.metrics.MeanSquaredError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
model = testing_utils.get_small_mlp(1, 4, input_dim=3)
|
||||||
|
model.compile(loss='mse', optimizer='rmsprop', metrics=[CustomMetric()])
|
||||||
|
|
||||||
|
saved_model_dir = self._save_model_dir()
|
||||||
|
tf.saved_model.save(model, saved_model_dir)
|
||||||
|
with self.assertRaisesRegex(ValueError, 'custom_objects'):
|
||||||
|
keras_load.load(saved_model_dir)
|
||||||
|
|
||||||
|
keras_load.load(saved_model_dir, compile=False)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
tf.test.main()
|
tf.test.main()
|
||||||
|
Loading…
Reference in New Issue
Block a user