From 31cd921e5034d5f7e8ce40b0b309f164cebe0580 Mon Sep 17 00:00:00 2001 From: Chen Qian Date: Thu, 25 May 2023 21:45:35 -0700 Subject: [PATCH] Fix some docstring in keras_core/losses and fill in missing tests (#219) * initials * add tests --- keras_core/losses/losses.py | 72 +++++++++++++++----------------- keras_core/losses/losses_test.py | 56 ++++++++++++++++++------- 2 files changed, 74 insertions(+), 54 deletions(-) diff --git a/keras_core/losses/losses.py b/keras_core/losses/losses.py index 544314064..0006c0fac 100644 --- a/keras_core/losses/losses.py +++ b/keras_core/losses/losses.py @@ -47,7 +47,7 @@ class MeanSquaredError(LossFunctionWrapper): Args: reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -73,7 +73,7 @@ class MeanAbsoluteError(LossFunctionWrapper): Args: reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -99,7 +99,7 @@ class MeanAbsolutePercentageError(LossFunctionWrapper): Args: reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -129,7 +129,7 @@ class MeanSquaredLogarithmicError(LossFunctionWrapper): Args: reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -168,7 +168,7 @@ class CosineSimilarity(LossFunctionWrapper): (the features axis). Defaults to -1. reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -257,7 +257,7 @@ class Hinge(LossFunctionWrapper): Args: reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -284,7 +284,7 @@ class SquaredHinge(LossFunctionWrapper): Args: reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -310,7 +310,7 @@ class CategoricalHinge(LossFunctionWrapper): Args: reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -336,7 +336,7 @@ class KLDivergence(LossFunctionWrapper): Args: reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -360,7 +360,7 @@ class Poisson(LossFunctionWrapper): Args: reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. """ @@ -380,26 +380,25 @@ class BinaryCrossentropy(LossFunctionWrapper): - `y_true` (true label): This is either 0 or 1. - `y_pred` (predicted value): This is the model's prediction, i.e, a single - floating-point value which either represents a - [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] - when `from_logits=True`) or a probability (i.e, value in [0., 1.] when - `from_logits=False`). + floating-point value which either represents a + [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] + when `from_logits=True`) or a probability (i.e, value in [0., 1.] when + `from_logits=False`). Args: from_logits: Whether to interpret `y_pred` as a tensor of [logit](https://en.wikipedia.org/wiki/Logit) values. By default, we - assume that `y_pred` contains probabilities (i.e., values in [0, - 1]). + assume that `y_pred` is probabilities (i.e., values in [0, 1]). label_smoothing: Float in range [0, 1]. When 0, no smoothing occurs. When > 0, we compute the loss between the predicted labels and a smoothed version of the true labels, where the smoothing squeezes the labels towards 0.5. Larger values of `label_smoothing` correspond to heavier smoothing. - axis: The axis along which to compute crossentropy (the features - axis). Defaults to -1. + axis: The axis along which to compute crossentropy (the features axis). + Defaults to -1. reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. Examples: @@ -493,10 +492,10 @@ class BinaryFocalCrossentropy(LossFunctionWrapper): - `y_true` (true label): This is either 0 or 1. - `y_pred` (predicted value): This is the model's prediction, i.e, a single - floating-point value which either represents a - [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] - when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when - `from_logits=False`). + floating-point value which either represents a + [logit](https://en.wikipedia.org/wiki/Logit), (i.e, value in [-inf, inf] + when `from_logits=True`) or a probability (i.e, value in `[0., 1.]` when + `from_logits=False`). According to [Lin et al., 2018](https://arxiv.org/pdf/1708.02002.pdf), it helps to apply a "focal factor" to down-weight easy examples and focus more @@ -529,7 +528,7 @@ class BinaryFocalCrossentropy(LossFunctionWrapper): Defaults to `-1`. reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. Examples: @@ -665,11 +664,8 @@ class CategoricalCrossentropy(LossFunctionWrapper): Use this crossentropy loss function when there are two or more label classes. We expect labels to be provided in a `one_hot` representation. If you want to provide labels as integers, please use - `SparseCategoricalCrossentropy` loss. There should be `# classes` floating - point values per feature. - - In the snippet below, there is `# classes` floating pointing values per - example. The shape of both `y_pred` and `y_true` are + `SparseCategoricalCrossentropy` loss. There should be `num_classes` floating + point values per feature, i.e., the shape of both `y_pred` and `y_true` are `[batch_size, num_classes]`. Args: @@ -683,7 +679,7 @@ class CategoricalCrossentropy(LossFunctionWrapper): axis). Defaults to -1. reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. Examples: @@ -791,7 +787,7 @@ class CategoricalFocalCrossentropy(LossFunctionWrapper): Extending this to multi-class case is straightforward: `FL(p_t) = alpha * (1 - p_t) ** gamma * CategoricalCE(y_true, y_pred)` - In the snippet below, there is `# classes` floating pointing values per + In the snippet below, there is `num_classes` floating pointing values per example. The shape of both `y_pred` and `y_true` are `(batch_size, num_classes)`. @@ -814,7 +810,7 @@ class CategoricalFocalCrossentropy(LossFunctionWrapper): axis). Defaults to -1. reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. Examples: @@ -903,16 +899,16 @@ class SparseCategoricalCrossentropy(LossFunctionWrapper): feature for `y_true`. In the snippet below, there is a single floating point value per example for - `y_true` and `# classes` floating pointing values per example for `y_pred`. - The shape of `y_true` is `[batch_size]` and the shape of `y_pred` is - `[batch_size, num_classes]`. + `y_true` and `num_classes` floating pointing values per example for + `y_pred`. The shape of `y_true` is `[batch_size]` and the shape of `y_pred` + is `[batch_size, num_classes]`. Args: from_logits: Whether `y_pred` is expected to be a logits tensor. By default, we assume that `y_pred` encodes a probability distribution. reduction: Type of reduction to apply to the loss. In almost all cases this should be `"sum_over_batch_size"`. - Suuported options are `"sum"`, `"sum_over_batch_size"` or `None`. + Supported options are `"sum"`, `"sum_over_batch_size"` or `None`. name: Optional name for the loss instance. Examples: @@ -1729,8 +1725,8 @@ def binary_focal_crossentropy( helps to apply a focal factor to down-weight easy examples and focus more on hard examples. By default, the focal tensor is computed as follows: - `focal_factor = (1 - output)**gamma` for class 1 - `focal_factor = output**gamma` for class 0 + `focal_factor = (1 - output) ** gamma` for class 1 + `focal_factor = output ** gamma` for class 0 where `gamma` is a focusing parameter. When `gamma` = 0, there is no focal effect on the binary crossentropy loss. diff --git a/keras_core/losses/losses_test.py b/keras_core/losses/losses_test.py index d40b7c1ca..6401b48b6 100644 --- a/keras_core/losses/losses_test.py +++ b/keras_core/losses/losses_test.py @@ -37,8 +37,16 @@ class MeanSquaredErrorTest(testing.TestCase): self.assertAlmostEqual(loss, 767.8 / 6) def test_timestep_weighted(self): - # TODO - pass + mse_obj = losses.MeanSquaredError() + y_true = np.asarray([1, 9, 2, -5, -2, 6]).reshape(2, 3, 1) + y_pred = np.asarray([4, 8, 12, 8, 1, 3]).reshape(2, 3, 1) + sample_weight = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3)) + loss = mse_obj( + y_true, + y_pred, + sample_weight=sample_weight, + ) + self.assertAlmostEqual(loss, 97.833336) def test_zero_weighted(self): mse_obj = losses.MeanSquaredError() @@ -47,10 +55,6 @@ class MeanSquaredErrorTest(testing.TestCase): loss = mse_obj(y_true, y_pred, sample_weight=0) self.assertAlmostEqual(loss, 0.0) - def test_invalid_sample_weight(self): - # TODO - pass - def test_no_reduction(self): mse_obj = losses.MeanSquaredError(reduction=None) y_true = np.array([[1, 9, 2], [-5, -2, 6]]) @@ -101,8 +105,16 @@ class MeanAbsoluteErrorTest(testing.TestCase): self.assertAlmostEqual(loss, 81.4 / 6) def test_timestep_weighted(self): - # TODO - pass + mae_obj = losses.MeanAbsoluteError() + y_true = np.asarray([1, 9, 2, -5, -2, 6]).reshape(2, 3, 1) + y_pred = np.asarray([4, 8, 12, 8, 1, 3]).reshape(2, 3, 1) + sample_weight = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3)) + loss = mae_obj( + y_true, + y_pred, + sample_weight=sample_weight, + ) + self.assertAlmostEqual(loss, 13.833333) def test_zero_weighted(self): mae_obj = losses.MeanAbsoluteError() @@ -111,10 +123,6 @@ class MeanAbsoluteErrorTest(testing.TestCase): loss = mae_obj(y_true, y_pred, sample_weight=0) self.assertAlmostEqual(loss, 0.0) - def test_invalid_sample_weight(self): - # TODO - pass - def test_no_reduction(self): mae_obj = losses.MeanAbsoluteError(reduction=None) y_true = np.array([[1, 9, 2], [-5, -2, 6]]) @@ -165,8 +173,16 @@ class MeanAbsolutePercentageErrorTest(testing.TestCase): self.assertAlmostEqual(loss, 422.8888, 3) def test_timestep_weighted(self): - # TODO - pass + mape_obj = losses.MeanAbsolutePercentageError() + y_true = np.asarray([1, 9, 2, -5, -2, 6]).reshape(2, 3, 1) + y_pred = np.asarray([4, 8, 12, 8, 1, 3]).reshape(2, 3, 1) + sample_weight = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3)) + loss = mape_obj( + y_true, + y_pred, + sample_weight=sample_weight, + ) + self.assertAlmostEqual(loss, 694.4444) def test_zero_weighted(self): mape_obj = losses.MeanAbsolutePercentageError() @@ -212,8 +228,16 @@ class MeanSquaredLogarithmicErrorTest(testing.TestCase): self.assertAlmostEqual(loss, 3.7856, 3) def test_timestep_weighted(self): - # TODO - pass + msle_obj = losses.MeanSquaredLogarithmicError() + y_true = np.asarray([1, 9, 2, -5, -2, 6]).reshape(2, 3, 1) + y_pred = np.asarray([4, 8, 12, 8, 1, 3]).reshape(2, 3, 1) + sample_weight = np.array([3, 6, 5, 0, 4, 2]).reshape((2, 3)) + loss = msle_obj( + y_true, + y_pred, + sample_weight=sample_weight, + ) + self.assertAlmostEqual(loss, 2.647374) def test_zero_weighted(self): msle_obj = losses.MeanSquaredLogarithmicError()