diff --git a/keras_core/layers/__init__.py b/keras_core/layers/__init__.py index 4f2c3a2eb..a3ecdbe37 100644 --- a/keras_core/layers/__init__.py +++ b/keras_core/layers/__init__.py @@ -13,3 +13,4 @@ from keras_core.layers.regularization.gaussian_noise import GaussianNoise from keras_core.layers.regularization.spatial_dropout import SpatialDropout1D from keras_core.layers.regularization.spatial_dropout import SpatialDropout2D from keras_core.layers.regularization.spatial_dropout import SpatialDropout3D +from keras_core.layers.reshaping.reshape import Reshape diff --git a/keras_core/layers/reshaping/reshape.py b/keras_core/layers/reshaping/reshape.py new file mode 100644 index 000000000..1bd9cc014 --- /dev/null +++ b/keras_core/layers/reshaping/reshape.py @@ -0,0 +1,113 @@ +import math + +from keras_core import operations as ops +from keras_core.layers.layer import Layer + + +class Reshape(Layer): + """Layer that reshapes inputs into the given shape. + + Args: + target_shape: Target shape. Tuple of integers, does not include the + samples dimension (batch size). + + Input shape: + Arbitrary, although all dimensions in the input shape must be + known/fixed. Use the keyword argument `input_shape` (tuple of integers, + does not include the samples/batch size axis) when using this layer as + the first layer in a model. + + Output shape: + `(batch_size, *target_shape)` + + Example: + + >>> # as first layer in a Sequential model + >>> model = keras_core.Sequential() + >>> model.add(keras_core.layers.Reshape((3, 4), input_shape=(12,))) + >>> # model.output_shape == (None, 3, 4), `None` is the batch size. + >>> model.output_shape + (None, 3, 4) + + >>> # as intermediate layer in a Sequential model + >>> model.add(keras_core.layers.Reshape((6, 2))) + >>> model.output_shape + (None, 6, 2) + + >>> # also supports shape inference using `-1` as dimension + >>> model.add(keras_core.layers.Reshape((-1, 2, 2))) + >>> model.output_shape + (None, 3, 2, 2) + """ + + def __init__(self, target_shape, name=None, dtype=None): + super().__init__(name=name, dtype=dtype) + self.target_shape = tuple(target_shape) + + def _fix_unknown_dimension(self, input_shape, output_shape): + """Find and replace a missing dimension in an output shape. + + Args: + input_shape: Shape of tensor being reshaped as a tuple of ints. + output_shape: Desired shape of the tensor as a tuple of ints. It + contains at most a single `-1` which indicates a dimension that + should be derived from the input shape. + + Returns: + The new output shape as a tuple of ints with a -1 replaced with its + computed value. + + Raises: + ValueError: If the total tensor size of the output_shape is + different than the input_shape, or more than one unknown + dimension is specified. + """ + msg = ( + "total size of new tensor must be unchanged, " + f"input_shape={input_shape},output_shape={output_shape}" + ) + + known_output_size, unknown_dim_index = 1, None + for index, dim in enumerate(output_shape): + if dim == -1: + if unknown_dim_index is None: + unknown_dim_index = index + else: + raise ValueError( + "There must be at most one unknown dimension in " + f"output_shape. Received: output_shape={output_shape}." + ) + else: + known_output_size *= dim + + input_size = math.prod(input_shape) + if unknown_dim_index is not None: + if known_output_size == 0 or input_size % known_output_size != 0: + raise ValueError(msg) + result = list(output_shape) + result[unknown_dim_index] = input_size // known_output_size + return tuple(result) + elif input_size != known_output_size: + raise ValueError(msg) + return output_shape + + def compute_output_shape(self, input_shape): + output_shape = (input_shape[0],) + if None in input_shape[1:]: + # input shape (partially) unknown? replace -1's with None's + output_shape += tuple( + s if s != -1 else None for s in self.target_shape + ) + else: + output_shape += self._fix_unknown_dimension( + input_shape[1:], self.target_shape + ) + return output_shape + + def call(self, inputs): + return ops.reshape(inputs, (inputs.shape[0],) + self.target_shape) + + def get_config(self): + config = {"target_shape": self.target_shape} + base_config = super().get_config() + return {**base_config, **config} diff --git a/keras_core/layers/reshaping/reshape_test.py b/keras_core/layers/reshaping/reshape_test.py new file mode 100644 index 000000000..d71262e73 --- /dev/null +++ b/keras_core/layers/reshaping/reshape_test.py @@ -0,0 +1,75 @@ +import pytest + +from keras_core import backend +from keras_core import layers +from keras_core import testing + + +class ReshapeTest(testing.TestCase): + def test_reshape(self): + self.run_layer_test( + layers.Reshape, + init_kwargs={"target_shape": (8, 1)}, + input_shape=(3, 2, 4), + expected_output_shape=(3, 8, 1), + ) + + self.run_layer_test( + layers.Reshape, + init_kwargs={"target_shape": (8,)}, + input_shape=(3, 2, 4), + expected_output_shape=(3, 8), + ) + + self.run_layer_test( + layers.Reshape, + init_kwargs={"target_shape": (2, 4)}, + input_shape=(3, 8), + expected_output_shape=(3, 2, 4), + ) + self.run_layer_test( + layers.Reshape, + init_kwargs={"target_shape": (-1, 1)}, + input_shape=(3, 2, 4), + expected_output_shape=(3, 8, 1), + ) + + self.run_layer_test( + layers.Reshape, + init_kwargs={"target_shape": (1, -1)}, + input_shape=(3, 2, 4), + expected_output_shape=(3, 1, 8), + ) + + self.run_layer_test( + layers.Reshape, + init_kwargs={"target_shape": (-1,)}, + input_shape=(3, 2, 4), + expected_output_shape=(3, 8), + ) + + self.run_layer_test( + layers.Reshape, + init_kwargs={"target_shape": (2, -1)}, + input_shape=(3, 2, 4), + expected_output_shape=(3, 2, 4), + ) + + @pytest.mark.skipif( + not backend.DYNAMIC_SHAPES_OK, + reason="Backend does not support dynamic shapes", + ) + def test_reshape_with_dynamic_batch_size(self): + input_layer = layers.Input(shape=(2, 4)) + reshaped = layers.Reshape((8,))(input_layer) + self.assertEqual(reshaped.shape, (None, 8)) + + @pytest.mark.skipif( + not backend.DYNAMIC_SHAPES_OK, + reason="Backend does not support dynamic shapes", + ) + def test_reshape_sets_static_shape(self): + input_layer = layers.Input(batch_shape=(2, None)) + reshaped = layers.Reshape((3, 5))(input_layer) + # Also make sure the batch dim is not lost after reshape. + self.assertEqual(reshaped.shape, (2, 3, 5)) diff --git a/keras_core/losses/losses.py b/keras_core/losses/losses.py index 3475d3a16..4c4c1f160 100644 --- a/keras_core/losses/losses.py +++ b/keras_core/losses/losses.py @@ -1,5 +1,3 @@ -import warnings - from keras_core import backend from keras_core import operations as ops from keras_core.api_export import keras_core_export @@ -45,10 +43,10 @@ class MeanSquaredError(LossFunctionWrapper): ``` Args: - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. For almost all cases + this defaults to `"sum_over_batch_size"`. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. + name: Optional name for the instance. """ def __init__( @@ -71,10 +69,10 @@ class MeanAbsoluteError(LossFunctionWrapper): ``` Args: - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. For almost all cases + this defaults to `"sum_over_batch_size"`. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. + name: Optional name for the instance. """ def __init__( @@ -97,10 +95,10 @@ class MeanAbsolutePercentageError(LossFunctionWrapper): ``` Args: - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. For almost all cases + this defaults to `"sum_over_batch_size"`. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. + name: Optional name for the instance. """ def __init__( @@ -127,10 +125,10 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper): ``` Args: - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. For almost all cases + this defaults to `"sum_over_batch_size"`. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. + name: Optional name for the instance. """ def __init__( @@ -166,10 +164,10 @@ class CosineSimilarity(LossFunctionWrapper): Args: axis: The axis along which the cosine similarity is computed (the features axis). Defaults to -1. - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. Defaults to + `"sum_over_batch_size"`. + name: Optional name for the instance. """ def __init__( @@ -197,10 +195,10 @@ class Hinge(LossFunctionWrapper): provided we will convert them to -1 or 1. Args: - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. For almost all cases + this defaults to `"sum_over_batch_size"`. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. + name: Optional name for the instance. Defaults to `"hinge"` """ def __init__(self, reduction="sum_over_batch_size", name="hinge"): @@ -224,10 +222,10 @@ class SquaredHinge(LossFunctionWrapper): provided we will convert them to -1 or 1. Args: - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. For almost all cases + this defaults to `"sum_over_batch_size"`. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. + name: Optional name for the instance. Defaults to `"squared_hinge"` """ def __init__(self, reduction="sum_over_batch_size", name="squared_hinge"): @@ -250,10 +248,11 @@ class CategoricalHinge(LossFunctionWrapper): where `neg=maximum((1-y_true)*y_pred)` and `pos=sum(y_true*y_pred)` Args: - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. For almost all cases + this defaults to `"sum_over_batch_size"`. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. + name: Optional name for the instance. Defaults to + `"categorical_hinge"` """ def __init__( @@ -276,10 +275,10 @@ class KLDivergence(LossFunctionWrapper): ``` Args: - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. For almost all cases + this defaults to `"sum_over_batch_size"`. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. + name: Optional name for the instance. Defaults to 'kl_divergence'. """ def __init__(self, reduction="sum_over_batch_size", name="kl_divergence"): @@ -300,10 +299,10 @@ class Poisson(LossFunctionWrapper): ``` Args: - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. + reduction: Type of reduction to apply to loss. For almost all cases + this defaults to `"sum_over_batch_size"`. Options are `"sum"`, + `"sum_over_batch_size"` or `None`. + name: Optional name for the instance. Defaults to `"poisson"` """ def __init__(self, reduction="sum_over_batch_size", name="poisson"): @@ -313,572 +312,6 @@ class Poisson(LossFunctionWrapper): return Loss.get_config(self) -@keras_core_export("keras_core.losses.BinaryCrossentropy") -class BinaryCrossentropy(LossFunctionWrapper): - """Computes the cross-entropy loss between true labels and predicted labels. - - Use this cross-entropy loss for binary (0 or 1) classification applications. - The loss function requires the following inputs: - - - `y_true` (true label): This is either 0 or 1. - - `y_pred` (predicted value): This is the model's prediction, i.e, a single - floating-point value which either represents a - [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] - when `from_logits=True`) or a probability (i.e, value in [0., 1.] when - `from_logits=False`). - - Args: - from_logits: Whether to interpret `y_pred` as a tensor of - [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we - assume that `y_pred` contains probabilities (i.e., values in [0, - 1]). - label_smoothing: Float in range [0, 1]. When 0, no smoothing occurs. - When > 0, we compute the loss between the predicted labels - and a smoothed version of the true labels, where the smoothing - squeezes the labels towards 0.5. Larger values of - `label_smoothing` correspond to heavier smoothing. - axis: The axis along which to compute crossentropy (the features - axis). Defaults to -1. - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. - - Examples: - - **Recommended Usage:** (set `from_logits=True`) - - With `compile()` API: - - ```python - model.compile( - loss=keras_core.losses.BinaryCrossentropy(from_logits=True), - ... - ) - ``` - - As a standalone function: - - >>> # Example 1: (batch_size = 1, number of samples = 4) - >>> y_true = [0, 1, 0, 0] - >>> y_pred = [-18.6, 0.51, 2.94, -12.8] - >>> bce = keras_core.losses.BinaryCrossentropy(from_logits=True) - >>> bce(y_true, y_pred) - 0.865 - - >>> # Example 2: (batch_size = 2, number of samples = 4) - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] - >>> # Using default 'auto'/'sum_over_batch_size' reduction type. - >>> bce = keras_core.losses.BinaryCrossentropy(from_logits=True) - >>> bce(y_true, y_pred) - 0.865 - >>> # Using 'sample_weight' attribute - >>> bce(y_true, y_pred, sample_weight=[0.8, 0.2]) - 0.243 - >>> # Using 'sum' reduction` type. - >>> bce = keras_core.losses.BinaryCrossentropy(from_logits=True, - ... reduction="sum") - >>> bce(y_true, y_pred) - 1.730 - >>> # Using 'none' reduction type. - >>> bce = keras_core.losses.BinaryCrossentropy(from_logits=True, - ... reduction=None) - >>> bce(y_true, y_pred) - array([0.235, 1.496], dtype=float32) - - **Default Usage:** (set `from_logits=False`) - - >>> # Make the following updates to the above "Recommended Usage" section - >>> # 1. Set `from_logits=False` - >>> keras_core.losses.BinaryCrossentropy() # OR ...('from_logits=False') - >>> # 2. Update `y_pred` to use probabilities instead of logits - >>> y_pred = [0.6, 0.3, 0.2, 0.8] # OR [[0.6, 0.3], [0.2, 0.8]] - """ - - def __init__( - self, - from_logits=False, - label_smoothing=0.0, - axis=-1, - reduction="sum_over_batch_size", - name="binary_crossentropy", - ): - super().__init__( - binary_crossentropy, - name=name, - reduction=reduction, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis, - ) - self.from_logits = from_logits - - -@keras_core_export("keras_core.losses.BinaryFocalCrossentropy") -class BinaryFocalCrossentropy(LossFunctionWrapper): - """Computes focal cross-entropy loss between true labels and predictions. - - Binary cross-entropy loss is often used for binary (0 or 1) classification - tasks. The loss function requires the following inputs: - - - `y_true` (true label): This is either 0 or 1. - - `y_pred` (predicted value): This is the model's prediction, i.e, a single - floating-point value which either represents a - [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] - when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when - `from_logits=False`). - - According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it - helps to apply a "focal factor" to down-weight easy examples and focus more - on hard examples. By default, the focal tensor is computed as follows: - - `focal_factor = (1 - output) ** gamma` for class 1 - `focal_factor = output ** gamma` for class 0 - where `gamma` is a focusing parameter. When `gamma=0`, this function is - equivalent to the binary crossentropy loss. - - Args: - apply_class_balancing: A bool, whether to apply weight balancing on the - binary classes 0 and 1. - alpha: A weight balancing factor for class 1, default is `0.25` as - mentioned in reference [Lin et al., 2018]( - https://arxiv.org/pdf/1708.02002.pdf). The weight for class 0 is - `1.0 - alpha`. - gamma: A focusing parameter used to compute the focal factor, default is - `2.0` as mentioned in the reference - [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf). - from_logits: Whether to interpret `y_pred` as a tensor of - [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we - assume that `y_pred` are probabilities (i.e., values in `[0, 1]`). - label_smoothing: Float in `[0, 1]`. When `0`, no smoothing occurs. - When > `0`, we compute the loss between the predicted labels - and a smoothed version of the true labels, where the smoothing - squeezes the labels towards `0.5`. - Larger values of `label_smoothing` correspond to heavier smoothing. - axis: The axis along which to compute crossentropy (the features axis). - Defaults to `-1`. - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. - - Examples: - - With the `compile()` API: - - ```python - model.compile( - loss=keras_core.losses.BinaryFocalCrossentropy( - gamma=2.0, from_logits=True), - ... - ) - ``` - - As a standalone function: - - >>> # Example 1: (batch_size = 1, number of samples = 4) - >>> y_true = [0, 1, 0, 0] - >>> y_pred = [-18.6, 0.51, 2.94, -12.8] - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... gamma=2, from_logits=True) - >>> loss(y_true, y_pred) - 0.691 - - >>> # Apply class weight - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... apply_class_balancing=True, gamma=2, from_logits=True) - >>> loss(y_true, y_pred) - 0.51 - - >>> # Example 2: (batch_size = 2, number of samples = 4) - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[-18.6, 0.51], [2.94, -12.8]] - >>> # Using default 'auto'/'sum_over_batch_size' reduction type. - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... gamma=3, from_logits=True) - >>> loss(y_true, y_pred) - 0.647 - - >>> # Apply class weight - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... apply_class_balancing=True, gamma=3, from_logits=True) - >>> loss(y_true, y_pred) - 0.482 - - >>> # Using 'sample_weight' attribute with focal effect - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... gamma=3, from_logits=True) - >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]) - 0.133 - - >>> # Apply class weight - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... apply_class_balancing=True, gamma=3, from_logits=True) - >>> loss(y_true, y_pred, sample_weight=[0.8, 0.2]) - 0.097 - - >>> # Using 'sum' reduction` type. - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... gamma=4, from_logits=True, - ... reduction="sum") - >>> loss(y_true, y_pred) - 1.222 - - >>> # Apply class weight - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... apply_class_balancing=True, gamma=4, from_logits=True, - ... reduction="sum") - >>> loss(y_true, y_pred) - 0.914 - - >>> # Using 'none' reduction type. - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... gamma=5, from_logits=True, - ... reduction=None) - >>> loss(y_true, y_pred) - array([0.0017 1.1561], dtype=float32) - - >>> # Apply class weight - >>> loss = keras_core.losses.BinaryFocalCrossentropy( - ... apply_class_balancing=True, gamma=5, from_logits=True, - ... reduction=None) - >>> loss(y_true, y_pred) - array([0.0004 0.8670], dtype=float32) - """ - - def __init__( - self, - apply_class_balancing=False, - alpha=0.25, - gamma=2.0, - from_logits=False, - label_smoothing=0.0, - axis=-1, - reduction="sum_over_batch_size", - name="binary_focal_crossentropy", - ): - """Initializes `BinaryFocalCrossentropy` instance.""" - super().__init__( - binary_focal_crossentropy, - apply_class_balancing=apply_class_balancing, - alpha=alpha, - gamma=gamma, - name=name, - reduction=reduction, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis, - ) - self.from_logits = from_logits - self.apply_class_balancing = apply_class_balancing - self.alpha = alpha - self.gamma = gamma - - def get_config(self): - config = { - "apply_class_balancing": self.apply_class_balancing, - "alpha": self.alpha, - "gamma": self.gamma, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_core_export("keras_core.losses.CategoricalCrossentropy") -class CategoricalCrossentropy(LossFunctionWrapper): - """Computes the crossentropy loss between the labels and predictions. - - Use this crossentropy loss function when there are two or more label - classes. We expect labels to be provided in a `one_hot` representation. If - you want to provide labels as integers, please use - `SparseCategoricalCrossentropy` loss. There should be `# classes` floating - point values per feature. - - In the snippet below, there is `# classes` floating pointing values per - example. The shape of both `y_pred` and `y_true` are - `[batch_size, num_classes]`. - - Args: - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, - meaning the confidence on label values are relaxed. For example, if - `0.1`, use `0.1 / num_classes` for non-target labels and - `0.9 + 0.1 / num_classes` for target labels. - axis: The axis along which to compute crossentropy (the features - axis). Defaults to -1. - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. - - Examples: - - Standalone usage: - - >>> y_true = [[0, 1, 0], [0, 0, 1]] - >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> cce = keras_core.losses.CategoricalCrossentropy() - >>> cce(y_true, y_pred) - 1.177 - - >>> # Calling with 'sample_weight'. - >>> cce(y_true, y_pred, sample_weight=np.array([0.3, 0.7])) - 0.814 - - >>> # Using 'sum' reduction type. - >>> cce = keras_core.losses.CategoricalCrossentropy( - ... reduction="sum") - >>> cce(y_true, y_pred) - 2.354 - - >>> # Using 'none' reduction type. - >>> cce = keras_core.losses.CategoricalCrossentropy( - ... reduction=None) - >>> cce(y_true, y_pred) - array([0.0513, 2.303], dtype=float32) - - Usage with the `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss=keras_core.losses.CategoricalCrossentropy()) - ``` - """ - - def __init__( - self, - from_logits=False, - label_smoothing=0.0, - axis=-1, - reduction="sum_over_batch_size", - name="categorical_crossentropy", - ): - super().__init__( - categorical_crossentropy, - name=name, - reduction=reduction, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis, - ) - - -@keras_core_export("keras_core.losses.CategoricalFocalCrossentropy") -class CategoricalFocalCrossentropy(LossFunctionWrapper): - """Computes the alpha balanced focal crossentropy loss. - - Use this crossentropy loss function when there are two or more label - classes and if you want to handle class imbalance without using - `class_weights`. We expect labels to be provided in a `one_hot` - representation. - - According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it - helps to apply a focal factor to down-weight easy examples and focus more on - hard examples. The general formula for the focal loss (FL) - is as follows: - - `FL(p_t) = (1 - p_t) ** gamma * log(p_t)` - - where `p_t` is defined as follows: - `p_t = output if y_true == 1, else 1 - output` - - `(1 - p_t) ** gamma` is the `modulating_factor`, where `gamma` is a focusing - parameter. When `gamma` = 0, there is no focal effect on the cross entropy. - `gamma` reduces the importance given to simple examples in a smooth manner. - - The authors use alpha-balanced variant of focal loss (FL) in the paper: - `FL(p_t) = -alpha * (1 - p_t) ** gamma * log(p_t)` - - where `alpha` is the weight factor for the classes. If `alpha` = 1, the - loss won't be able to handle class imbalance properly as all - classes will have the same weight. This can be a constant or a list of - constants. If alpha is a list, it must have the same length as the number - of classes. - - The formula above can be generalized to: - `FL(p_t) = alpha * (1 - p_t) ** gamma * CrossEntropy(y_true, y_pred)` - - where minus comes from `CrossEntropy(y_true, y_pred)` (CE). - - Extending this to multi-class case is straightforward: - `FL(p_t) = alpha * (1 - p_t) ** gamma * CategoricalCE(y_true, y_pred)` - - In the snippet below, there is `# classes` floating pointing values per - example. The shape of both `y_pred` and `y_true` are - `(batch_size, num_classes)`. - - Args: - alpha: A weight balancing factor for all classes, default is `0.25` as - mentioned in the reference. It can be a list of floats or a scalar. - In the multi-class case, alpha may be set by inverse class - frequency by using `compute_class_weight` from `sklearn.utils`. - gamma: A focusing parameter, default is `2.0` as mentioned in the - reference. It helps to gradually reduce the importance given to - simple (easy) examples in a smooth manner. - from_logits: Whether `output` is expected to be a logits tensor. By - default, we consider that `output` encodes a probability - distribution. - label_smoothing: Float in [0, 1]. When > 0, label values are smoothed, - meaning the confidence on label values are relaxed. For example, if - `0.1`, use `0.1 / num_classes` for non-target labels and - `0.9 + 0.1 / num_classes` for target labels. - axis: The axis along which to compute crossentropy (the features - axis). Defaults to -1. - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. - - Examples: - - Standalone usage: - - >>> y_true = [[0., 1., 0.], [0., 0., 1.]] - >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> cce = keras_core.losses.CategoricalFocalCrossentropy() - >>> cce(y_true, y_pred) - 0.23315276 - - >>> # Calling with 'sample_weight'. - >>> cce(y_true, y_pred, sample_weight=np.array([0.3, 0.7])) - 0.1632 - - >>> # Using 'sum' reduction type. - >>> cce = keras_core.losses.CategoricalFocalCrossentropy( - ... reduction="sum") - >>> cce(y_true, y_pred) - 0.46631 - - >>> # Using 'none' reduction type. - >>> cce = keras_core.losses.CategoricalFocalCrossentropy( - ... reduction=None) - >>> cce(y_true, y_pred) - array([3.2058331e-05, 4.6627346e-01], dtype=float32) - - Usage with the `compile()` API: - - ```python - model.compile(optimizer='adam', - loss=keras_core.losses.CategoricalFocalCrossentropy()) - ``` - """ - - def __init__( - self, - alpha=0.25, - gamma=2.0, - from_logits=False, - label_smoothing=0.0, - axis=-1, - reduction="sum_over_batch_size", - name="categorical_focal_crossentropy", - ): - """Initializes `CategoricalFocalCrossentropy` instance.""" - super().__init__( - categorical_focal_crossentropy, - alpha=alpha, - gamma=gamma, - name=name, - reduction=reduction, - from_logits=from_logits, - label_smoothing=label_smoothing, - axis=axis, - ) - self.from_logits = from_logits - self.alpha = alpha - self.gamma = gamma - - def get_config(self): - config = { - "alpha": self.alpha, - "gamma": self.gamma, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -@keras_core_export("keras_core.losses.SparseCategoricalCrossentropy") -class SparseCategoricalCrossentropy(LossFunctionWrapper): - """Computes the crossentropy loss between the labels and predictions. - - Use this crossentropy loss function when there are two or more label - classes. We expect labels to be provided as integers. If you want to - provide labels using `one-hot` representation, please use - `CategoricalCrossentropy` loss. There should be `# classes` floating point - values per feature for `y_pred` and a single floating point value per - feature for `y_true`. - - In the snippet below, there is a single floating point value per example for - `y_true` and `# classes` floating pointing values per example for `y_pred`. - The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is - `[batch_size, num_classes]`. - - Args: - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - ignore_class: Optional integer. The ID of a class to be ignored during - loss computation. This is useful, for example, in segmentation - problems featuring a "void" class (commonly -1 or 255) in - segmentation maps. - By default (`ignore_class=None`), all classes are considered. - reduction: Type of reduction to apply to the loss. In almost all cases - this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. - name: Optional name for the loss instance. - - Examples: - - >>> y_true = [1, 2] - >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] - >>> # Using 'auto'/'sum_over_batch_size' reduction type. - >>> scce = keras_core.losses.SparseCategoricalCrossentropy() - >>> scce(y_true, y_pred) - 1.177 - - >>> # Calling with 'sample_weight'. - >>> scce(y_true, y_pred, sample_weight=np.array([0.3, 0.7])) - 0.814 - - >>> # Using 'sum' reduction type. - >>> scce = keras_core.losses.SparseCategoricalCrossentropy( - ... reduction="sum") - >>> scce(y_true, y_pred) - 2.354 - - >>> # Using 'none' reduction type. - >>> scce = keras_core.losses.SparseCategoricalCrossentropy( - ... reduction=None) - >>> scce(y_true, y_pred) - array([0.0513, 2.303], dtype=float32) - - Usage with the `compile()` API: - - ```python - model.compile(optimizer='sgd', - loss=keras_core.losses.SparseCategoricalCrossentropy()) - ``` - """ - - def __init__( - self, - from_logits=False, - ignore_class=None, - reduction="sum_over_batch_size", - name="sparse_categorical_crossentropy", - ): - super().__init__( - sparse_categorical_crossentropy, - name=name, - reduction=reduction, - from_logits=from_logits, - ignore_class=ignore_class, - ) - - def convert_binary_labels_to_hinge(y_true): """Converts binary labels into -1/1 for hinge loss/metric calculation.""" are_zeros = ops.equal(y_true, 0) @@ -914,6 +347,12 @@ def hinge(y_true, y_pred): loss = mean(maximum(1 - y_true * y_pred, 0), axis=-1) ``` + Standalone usage: + + >>> y_true = np.random.choice([-1, 1], size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = keras_core.losses.hinge(y_true, y_pred) + Args: y_true: The ground truth values. `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are provided they will be converted @@ -922,12 +361,6 @@ def hinge(y_true, y_pred): Returns: Hinge loss values with shape = `[batch_size, d0, .. dN-1]`. - - Example: - - >>> y_true = np.random.choice([-1, 1], size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = keras_core.losses.hinge(y_true, y_pred) """ y_pred = ops.convert_to_tensor(y_pred) y_true = ops.cast(y_true, dtype=y_pred.dtype) @@ -951,6 +384,12 @@ def squared_hinge(y_true, y_pred): loss = mean(square(maximum(1 - y_true * y_pred, 0)), axis=-1) ``` + Standalone usage: + + >>> y_true = np.random.choice([-1, 1], size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = keras_core.losses.squared_hinge(y_true, y_pred) + Args: y_true: The ground truth values. `y_true` values are expected to be -1 or 1. If binary (0 or 1) labels are provided we will convert them @@ -959,12 +398,6 @@ def squared_hinge(y_true, y_pred): Returns: Squared hinge loss values with shape = `[batch_size, d0, .. dN-1]`. - - Example: - - >>> y_true = np.random.choice([-1, 1], size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = keras_core.losses.squared_hinge(y_true, y_pred) """ y_pred = ops.convert_to_tensor(y_pred) y_true = ops.cast(y_true, y_pred.dtype) @@ -991,6 +424,13 @@ def categorical_hinge(y_true, y_pred): where `neg=maximum((1-y_true)*y_pred)` and `pos=sum(y_true*y_pred)` + Standalone usage: + + >>> y_true = np.random.randint(0, 3, size=(2,)) + >>> y_true = np.eye(np.max(y_true) + 1)[y_true] + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = keras_core.losses.categorical_hinge(y_true, y_pred) + Args: y_true: The ground truth values. `y_true` values are expected to be either `{-1, +1}` or `{0, 1}` (i.e. a one-hot-encoded tensor) with @@ -999,13 +439,6 @@ def categorical_hinge(y_true, y_pred): Returns: Categorical hinge loss values with shape = `[batch_size, d0, .. dN-1]`. - - Example: - - >>> y_true = np.random.randint(0, 3, size=(2,)) - >>> y_true = np.eye(np.max(y_true) + 1)[y_true] - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = keras_core.losses.categorical_hinge(y_true, y_pred) """ y_pred = ops.convert_to_tensor(y_pred) y_true = ops.cast(y_true, y_pred.dtype) @@ -1062,18 +495,18 @@ def mean_absolute_error(y_true, y_pred): loss = mean(abs(y_true - y_pred), axis=-1) ``` + Standalone usage: + + >>> y_true = np.random.randint(0, 2, size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = keras_core.losses.mean_absolute_error(y_true, y_pred) + Args: y_true: Ground truth values with shape = `[batch_size, d0, .. dN]`. y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. Returns: Mean absolute error values with shape = `[batch_size, d0, .. dN-1]`. - - Example: - - >>> y_true = np.random.randint(0, 2, size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = keras_core.losses.mean_absolute_error(y_true, y_pred) """ y_pred = ops.convert_to_tensor(y_pred) y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) @@ -1090,16 +523,18 @@ def mean_absolute_error(y_true, y_pred): def mean_absolute_percentage_error(y_true, y_pred): """Computes the mean absolute percentage error between `y_true` & `y_pred`. - Formula: - - ```python - loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1) - ``` + `loss = 100 * mean(abs((y_true - y_pred) / y_true), axis=-1)` Division by zero is prevented by dividing by `maximum(y_true, epsilon)` where `epsilon = keras_core.backend.epsilon()` (default to `1e-7`). + Standalone usage: + + >>> y_true = np.random.random(size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = keras_core.losses.mean_absolute_percentage_error(y_true, y_pred) + Args: y_true: Ground truth values with shape = `[batch_size, d0, .. dN]`. y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. @@ -1107,12 +542,6 @@ def mean_absolute_percentage_error(y_true, y_pred): Returns: Mean absolute percentage error values with shape = `[batch_size, d0, .. dN-1]`. - - Example: - - >>> y_true = np.random.random(size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = keras_core.losses.mean_absolute_percentage_error(y_true, y_pred) """ epsilon = ops.convert_to_tensor(backend.epsilon()) y_pred = ops.convert_to_tensor(y_pred) @@ -1141,19 +570,19 @@ def mean_squared_logarithmic_error(y_true, y_pred): values and 0 values will be replaced with `keras_core.backend.epsilon()` (default to `1e-7`). + Standalone usage: + + >>> y_true = np.random.randint(0, 2, size=(2, 3)) + >>> y_pred = np.random.random(size=(2, 3)) + >>> loss = keras_core.losses.mean_squared_logarithmic_error(y_true, y_pred) + Args: y_true: Ground truth values with shape = `[batch_size, d0, .. dN]`. y_pred: The predicted values with shape = `[batch_size, d0, .. dN]`. Returns: - Mean squared logarithmic error values with shape = `[batch_size, d0, .. + Mean squared logarithmic error values. shape = `[batch_size, d0, .. dN-1]`. - - Example: - - >>> y_true = np.random.randint(0, 2, size=(2, 3)) - >>> y_pred = np.random.random(size=(2, 3)) - >>> loss = keras_core.losses.mean_squared_logarithmic_error(y_true, y_pred) """ epsilon = ops.convert_to_tensor(backend.epsilon()) y_pred = ops.convert_to_tensor(y_pred) @@ -1181,6 +610,12 @@ def cosine_similarity(y_true, y_pred, axis=-1): similarity will be 0 regardless of the proximity between predictions and targets. + Standalone usage: + >>> y_true = [[0., 1.], [1., 1.], [1., 1.]] + >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]] + >>> loss = keras_core.losses.cosine_similarity(y_true, y_pred, axis=-1) + [-0., -0.99999994, 0.99999994] + Args: y_true: Tensor of true targets. y_pred: Tensor of predicted targets. @@ -1188,13 +623,6 @@ def cosine_similarity(y_true, y_pred, axis=-1): Returns: Cosine similarity tensor. - - Example: - - >>> y_true = [[0., 1.], [1., 1.], [1., 1.]] - >>> y_pred = [[1., 0.], [1., 1.], [-1., -1.]] - >>> loss = keras_core.losses.cosine_similarity(y_true, y_pred, axis=-1) - [-0., -0.99999994, 0.99999994] """ y_pred = ops.convert_to_tensor(y_pred) y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) @@ -1219,14 +647,7 @@ def kl_divergence(y_true, y_pred): loss = y_true * log(y_true / y_pred) ``` - Args: - y_true: Tensor of true targets. - y_pred: Tensor of predicted targets. - - Returns: - KL Divergence loss values with shape = `[batch_size, d0, .. dN-1]`. - - Example: + Standalone usage: >>> y_true = np.random.randint(0, 2, size=(2, 3)).astype(np.float32) >>> y_pred = np.random.random(size=(2, 3)) @@ -1236,6 +657,13 @@ def kl_divergence(y_true, y_pred): >>> y_pred = ops.clip(y_pred, 1e-7, 1) >>> assert np.array_equal( ... loss, np.sum(y_true * np.log(y_true / y_pred), axis=-1)) + + Args: + y_true: Tensor of true targets. + y_pred: Tensor of predicted targets. + + Returns: + KL Divergence loss values with shape = `[batch_size, d0, .. dN-1]`. """ y_pred = ops.convert_to_tensor(y_pred) y_true = ops.convert_to_tensor(y_true, y_pred.dtype) @@ -1259,14 +687,7 @@ def poisson(y_true, y_pred): loss = y_pred - y_true * log(y_pred) ``` - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - - Returns: - Poisson loss values with shape = `[batch_size, d0, .. dN-1]`. - - Example: + Standalone usage: >>> y_true = np.random.randint(0, 2, size=(2, 3)) >>> y_pred = np.random.random(size=(2, 3)) @@ -1276,344 +697,15 @@ def poisson(y_true, y_pred): >>> assert np.allclose( ... loss, np.mean(y_pred - y_true * np.log(y_pred), axis=-1), ... atol=1e-5) + + Args: + y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. + y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. + + Returns: + Poisson loss values with shape = `[batch_size, d0, .. dN-1]`. """ y_pred = ops.convert_to_tensor(y_pred) y_true = ops.convert_to_tensor(y_true, dtype=y_pred.dtype) epsilon = ops.convert_to_tensor(backend.epsilon()) return ops.mean(y_pred - y_true * ops.log(y_pred + epsilon), axis=-1) - - -@keras_core_export( - [ - "keras_core.metrics.categorical_crossentropy", - "keras_core.losses.categorical_crossentropy", - ] -) -def categorical_crossentropy( - y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 -): - """Computes the categorical crossentropy loss. - - Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: Defaults to -1. The dimension along which the entropy is - computed. - - Returns: - Categorical crossentropy loss value. - - Example: - - >>> y_true = [[0, 1, 0], [0, 0, 1]] - >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] - >>> loss = keras_core.losses.categorical_crossentropy(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> loss.numpy() - array([0.0513, 2.303], dtype=float32) - """ - if isinstance(axis, bool): - raise ValueError( - "`axis` must be of type `int`. " - f"Received: axis={axis} of type {type(axis)}" - ) - y_pred = ops.convert_to_tensor(y_pred) - y_true = ops.cast(y_true, y_pred.dtype) - - if y_pred.shape[-1] == 1: - warnings.warn( - "In loss categorical_crossentropy, expected " - "y_pred.shape to be (batch_size, num_classes) " - f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. " - "Consider using 'binary_crossentropy' if you only have 2 classes.", - SyntaxWarning, - stacklevel=2, - ) - - if label_smoothing: - num_classes = ops.cast(ops.shape(y_true)[-1], y_pred.dtype) - y_true = y_true * (1.0 - label_smoothing) + ( - label_smoothing / num_classes - ) - - return ops.categorical_crossentropy( - y_true, y_pred, from_logits=from_logits, axis=axis - ) - - -@keras_core_export( - [ - "keras_core.metrics.categorical_focal_crossentropy", - "keras_core.losses.categorical_focal_crossentropy", - ] -) -def categorical_focal_crossentropy( - y_true, - y_pred, - alpha=0.25, - gamma=2.0, - from_logits=False, - label_smoothing=0.0, - axis=-1, -): - """Computes the categorical focal crossentropy loss. - - Args: - y_true: Tensor of one-hot true targets. - y_pred: Tensor of predicted targets. - alpha: A weight balancing factor for all classes, default is `0.25` as - mentioned in the reference. It can be a list of floats or a scalar. - In the multi-class case, alpha may be set by inverse class - frequency by using `compute_class_weight` from `sklearn.utils`. - gamma: A focusing parameter, default is `2.0` as mentioned in the - reference. It helps to gradually reduce the importance given to - simple examples in a smooth manner. When `gamma` = 0, there is - no focal effect on the categorical crossentropy. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability - distribution. - label_smoothing: Float in [0, 1]. If > `0` then smooth the labels. For - example, if `0.1`, use `0.1 / num_classes` for non-target labels - and `0.9 + 0.1 / num_classes` for target labels. - axis: Defaults to -1. The dimension along which the entropy is - computed. - - Returns: - Categorical focal crossentropy loss value. - - Example: - - >>> y_true = [[0, 1, 0], [0, 0, 1]] - >>> y_pred = [[0.05, 0.9, 0.05], [0.1, 0.85, 0.05]] - >>> loss = keras_core.losses.categorical_focal_crossentropy(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> loss - array([2.63401289e-04, 6.75912094e-01], dtype=float32) - """ - if isinstance(axis, bool): - raise ValueError( - "`axis` must be of type `int`. " - f"Received: axis={axis} of type {type(axis)}" - ) - y_pred = ops.convert_to_tensor(y_pred) - y_true = ops.cast(y_true, y_pred.dtype) - - if y_pred.shape[-1] == 1: - warnings.warn( - "In loss categorical_focal_crossentropy, expected " - "y_pred.shape to be (batch_size, num_classes) " - f"with num_classes > 1. Received: y_pred.shape={y_pred.shape}. " - "Consider using 'binary_crossentropy' if you only have 2 classes.", - SyntaxWarning, - stacklevel=2, - ) - - if label_smoothing: - num_classes = ops.cast(ops.shape(y_true)[-1], y_pred.dtype) - y_true = y_true * (1.0 - label_smoothing) + ( - label_smoothing / num_classes - ) - - return ops.categorical_focal_crossentropy( - target=y_true, - output=y_pred, - alpha=alpha, - gamma=gamma, - from_logits=from_logits, - axis=axis, - ) - - -@keras_core_export( - [ - "keras_core.metrics.sparse_categorical_crossentropy", - "keras_core.losses.sparse_categorical_crossentropy", - ] -) -def sparse_categorical_crossentropy( - y_true, y_pred, from_logits=False, axis=-1, ignore_class=None -): - """Computes the sparse categorical crossentropy loss. - - Args: - y_true: Ground truth values. - y_pred: The predicted values. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - axis: Defaults to -1. The dimension along which the entropy is - computed. - ignore_class: Optional integer. The ID of a class to be ignored during - loss computation. This is useful, for example, in segmentation - problems featuring a "void" class (commonly -1 or 255) - in segmentation maps. By default (`ignore_class=None`), - all classes are considered. - - Returns: - Sparse categorical crossentropy loss value. - - Examples: - - >>> y_true = [1, 2] - >>> y_pred = [[0.05, 0.95, 0], [0.1, 0.8, 0.1]] - >>> loss = keras_core.losses.sparse_categorical_crossentropy(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> loss - array([0.0513, 2.303], dtype=float32) - - >>> y_true = [[[ 0, 2], - ... [-1, -1]], - ... [[ 0, 2], - ... [-1, -1]]] - >>> y_pred = [[[[1.0, 0.0, 0.0], [0.0, 0.0, 1.0]], - ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]], - ... [[[1.0, 0.0, 0.0], [0.0, 0.5, 0.5]], - ... [[0.2, 0.5, 0.3], [0.0, 1.0, 0.0]]]] - >>> loss = keras_core.losses.sparse_categorical_crossentropy( - ... y_true, y_pred, ignore_class=-1) - array([[[2.3841855e-07, 2.3841855e-07], - [0.0000000e+00, 0.0000000e+00]], - [[2.3841855e-07, 6.9314730e-01], - [0.0000000e+00, 0.0000000e+00]]], dtype=float32) - """ - return ops.sparse_categorical_crossentropy( - y_true, - y_pred, - from_logits=from_logits, - ignore_class=ignore_class, - axis=axis, - ) - - -@keras_core_export( - [ - "keras_core.metrics.binary_crossentropy", - "keras_core.losses.binary_crossentropy", - ] -) -def binary_crossentropy( - y_true, y_pred, from_logits=False, label_smoothing=0.0, axis=-1 -): - """Computes the binary crossentropy loss. - - Args: - y_true: Ground truth values. shape = `[batch_size, d0, .. dN]`. - y_pred: The predicted values. shape = `[batch_size, d0, .. dN]`. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels by - squeezing them towards 0.5, that is, - using `1. - 0.5 * label_smoothing` for the target class - and `0.5 * label_smoothing` for the non-target class. - axis: The axis along which the mean is computed. Defaults to -1. - - Returns: - Binary crossentropy loss value. shape = `[batch_size, d0, .. dN-1]`. - - Example: - - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] - >>> loss = keras_core.losses.binary_crossentropy(y_true, y_pred) - >>> assert loss.shape == (2,) - >>> loss - array([0.916 , 0.714], dtype=float32) - """ - y_pred = ops.convert_to_tensor(y_pred) - y_true = ops.cast(y_true, y_pred.dtype) - - if label_smoothing: - y_true = y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing - - return ops.mean( - ops.binary_crossentropy(y_true, y_pred, from_logits=from_logits), - axis=axis, - ) - - -@keras_core_export( - [ - "keras_core.metrics.binary_focal_crossentropy", - "keras_core.losses.binary_focal_crossentropy", - ] -) -def binary_focal_crossentropy( - y_true, - y_pred, - apply_class_balancing=False, - alpha=0.25, - gamma=2.0, - from_logits=False, - label_smoothing=0.0, - axis=-1, -): - """Computes the binary focal crossentropy loss. - - According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it - helps to apply a focal factor to down-weight easy examples and focus more on - hard examples. By default, the focal tensor is computed as follows: - - `focal_factor = (1 - output)**gamma` for class 1 - `focal_factor = output**gamma` for class 0 - where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal - effect on the binary crossentropy loss. - - If `apply_class_balancing == True`, this function also takes into account a - weight balancing factor for the binary classes 0 and 1 as follows: - - `weight = alpha` for class 1 (`target == 1`) - `weight = 1 - alpha` for class 0 - where `alpha` is a float in the range of `[0, 1]`. - - Args: - y_true: Ground truth values, of shape `(batch_size, d0, .. dN)`. - y_pred: The predicted values, of shape `(batch_size, d0, .. dN)`. - apply_class_balancing: A bool, whether to apply weight balancing on the - binary classes 0 and 1. - alpha: A weight balancing factor for class 1, default is `0.25` as - mentioned in the reference. The weight for class 0 is `1.0 - alpha`. - gamma: A focusing parameter, default is `2.0` as mentioned in the - reference. - from_logits: Whether `y_pred` is expected to be a logits tensor. By - default, we assume that `y_pred` encodes a probability distribution. - label_smoothing: Float in `[0, 1]`. If > `0` then smooth the labels by - squeezing them towards 0.5, that is, - using `1. - 0.5 * label_smoothing` for the target class - and `0.5 * label_smoothing` for the non-target class. - axis: The axis along which the mean is computed. Defaults to `-1`. - - Returns: - Binary focal crossentropy loss value - with shape = `[batch_size, d0, .. dN-1]`. - - Example: - - >>> y_true = [[0, 1], [0, 0]] - >>> y_pred = [[0.6, 0.4], [0.4, 0.6]] - >>> loss = keras_core.losses.binary_focal_crossentropy( - ... y_true, y_pred, gamma=2) - >>> assert loss.shape == (2,) - >>> loss - array([0.330, 0.206], dtype=float32) - """ - y_pred = ops.convert_to_tensor(y_pred) - y_true = ops.cast(y_true, y_pred.dtype) - - if label_smoothing: - y_true = y_true * (1.0 - label_smoothing) + 0.5 * label_smoothing - - return ops.mean( - ops.binary_focal_crossentropy( - target=y_true, - output=y_pred, - apply_class_balancing=apply_class_balancing, - alpha=alpha, - gamma=gamma, - from_logits=from_logits, - ), - axis=axis, - )