From 9f3428568e849c578f7495351fe20a38567b695c Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 8 May 2023 14:42:13 -0700
Subject: [PATCH] Add discretization layer wrapper.

---
 keras_core/layers/__init__.py                 |   1 +
 .../layers/preprocessing/discretization.py    | 185 ++++++++++++++++++
 .../preprocessing/discretization_test.py      |  37 ++++
 .../preprocessing/text_vectorization.py       |   0
 keras_core/optimizers/schedules/__init__.py   |  16 --
 .../schedules/learning_rate_schedule_test.py  |  94 +++++----
 6 files changed, 282 insertions(+), 51 deletions(-)
 create mode 100644 keras_core/layers/preprocessing/discretization.py
 create mode 100644 keras_core/layers/preprocessing/discretization_test.py
 create mode 100644 keras_core/layers/preprocessing/text_vectorization.py

diff --git a/keras_core/layers/__init__.py b/keras_core/layers/__init__.py
index dcafbca75..fce385caf 100644
--- a/keras_core/layers/__init__.py
+++ b/keras_core/layers/__init__.py
@@ -53,6 +53,7 @@ from keras_core.layers.pooling.max_pooling1d import MaxPooling1D
 from keras_core.layers.pooling.max_pooling2d import MaxPooling2D
 from keras_core.layers.pooling.max_pooling3d import MaxPooling3D
 from keras_core.layers.preprocessing.center_crop import CenterCrop
+from keras_core.layers.preprocessing.discretization import Discretization
 from keras_core.layers.preprocessing.normalization import Normalization
 from keras_core.layers.preprocessing.rescaling import Rescaling
 from keras_core.layers.preprocessing.resizing import Resizing
diff --git a/keras_core/layers/preprocessing/discretization.py b/keras_core/layers/preprocessing/discretization.py
new file mode 100644
index 000000000..a290d7f24
--- /dev/null
+++ b/keras_core/layers/preprocessing/discretization.py
@@ -0,0 +1,185 @@
+import numpy as np
+import tensorflow as tf
+
+from keras_core import backend
+from keras_core.layers.layer import Layer
+
+
+class Discretization(Layer):
+    """A preprocessing layer which buckets continuous features by ranges.
+
+    This layer will place each element of its input data into one of several
+    contiguous ranges and output an integer index indicating which range each
+    element was placed in.
+
+    **Note:** This layer wraps `tf.keras.layers.Discretization`. It cannot
+    be used as part of the compiled computation graph of a model with
+    any backend other than TensorFlow.
+    It can however be used with any backend when running eagerly.
+    It can also always be used as part of an input preprocessing pipeline
+    with any backend (outside the model itself), which is how we recommend
+    to use this layer.
+
+    Input shape:
+        Any array of dimension 2 or higher.
+
+    Output shape:
+        Same as input shape.
+
+    Arguments:
+        bin_boundaries: A list of bin boundaries.
+            The leftmost and rightmost bins
+            will always extend to `-inf` and `inf`,
+            so `bin_boundaries=[0., 1., 2.]`
+            generates bins `(-inf, 0.)`, `[0., 1.)`, `[1., 2.)`,
+            and `[2., +inf)`.
+            If this option is set, `adapt()` should not be called.
+        num_bins: The integer number of bins to compute.
+            If this option is set,
+            `adapt()` should be called to learn the bin boundaries.
+        epsilon: Error tolerance, typically a small fraction
+            close to zero (e.g. 0.01). Higher values of epsilon increase
+            the quantile approximation, and hence result in more
+            unequal buckets, but could improve performance
+            and resource consumption.
+        output_mode: Specification for the output of the layer.
+            Values can be `"int"`, `"one_hot"`, `"multi_hot"`, or
+            `"count"` configuring the layer as follows:
+            - `"int"`: Return the discretized bin indices directly.
+            - `"one_hot"`: Encodes each individual element in the
+                input into an array the same size as `num_bins`,
+                containing a 1 at the input's bin
+                index. If the last dimension is size 1, will encode on that
+                dimension.  If the last dimension is not size 1,
+                will append a new dimension for the encoded output.
+            - `"multi_hot"`: Encodes each sample in the input into a
+                single array the same size as `num_bins`,
+                containing a 1 for each bin index
+                index present in the sample.
+                Treats the last dimension as the sample
+                dimension, if input shape is `(..., sample_length)`,
+                output shape will be `(..., num_tokens)`.
+            - `"count"`: As `"multi_hot"`, but the int array contains
+                a count of the number of times the bin index appeared
+                in the sample.
+            Defaults to `"int"`.
+        sparse: Boolean. Only applicable to `"one_hot"`, `"multi_hot"`,
+            and `"count"` output modes. Only supported with TensorFlow
+            backend. If `True`, returns a `SparseTensor` instead of
+            a dense `Tensor`. Defaults to `False`.
+
+    Examples:
+
+    Bucketize float values based on provided buckets.
+    >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]])
+    >>> layer = Discretization(bin_boundaries=[0., 1., 2.])
+    >>> layer(input)
+    array([[0, 2, 3, 1],
+           [1, 3, 2, 1]])
+
+    Bucketize float values based on a number of buckets to compute.
+    >>> input = np.array([[-1.5, 1.0, 3.4, .5], [0.0, 3.0, 1.3, 0.0]])
+    >>> layer = Discretization(num_bins=4, epsilon=0.01)
+    >>> layer.adapt(input)
+    >>> layer(input)
+    array([[0, 2, 3, 2],
+           [1, 3, 3, 1]])
+    """
+
+    def __init__(
+        self,
+        bin_boundaries=None,
+        num_bins=None,
+        epsilon=0.01,
+        output_mode="int",
+        sparse=False,
+        name=None,
+    ):
+        super().__init__(name=name)
+        if sparse and backend.backend() != "tensorflow":
+            raise ValueError()
+        self.layer = tf.keras.layers.Discretization(
+            bin_boundaries=bin_boundaries,
+            num_bins=num_bins,
+            epsilon=epsilon,
+            output_mode=output_mode,
+            sparse=sparse,
+            name=name,
+        )
+        self.bin_boundaries = (
+            bin_boundaries if bin_boundaries is not None else []
+        )
+        self.num_bins = num_bins
+        self.epsilon = epsilon
+        self.output_mode = output_mode
+        self.sparse = sparse
+
+    def build(self, input_shape):
+        self.layer.build(input_shape)
+        self.built = True
+
+    # We override this method solely to generate a docstring.
+    def adapt(self, data, batch_size=None, steps=None):
+        """Computes bin boundaries from quantiles in a input dataset.
+
+        Calling `adapt()` on a `Discretization` layer is an alternative to
+        passing in a `bin_boundaries` argument during construction. A
+        `Discretization` layer should always be either adapted over a dataset or
+        passed `bin_boundaries`.
+
+        During `adapt()`, the layer will estimate the quantile boundaries of the
+        input dataset. The number of quantiles can be controlled via the
+        `num_bins` argument, and the error tolerance for quantile boundaries can
+        be controlled via the `epsilon` argument.
+
+        Arguments:
+          data: The data to train on. It can be passed either as a
+              `tf.data.Dataset`, or as a numpy array.
+          batch_size: Integer or `None`.
+              Number of samples per state update.
+              If unspecified, `batch_size` will default to 32.
+              Do not specify the `batch_size` if your data is in the
+              form of datasets, generators, or `keras.utils.Sequence` instances
+              (since they generate batches).
+          steps: Integer or `None`.
+              Total number of steps (batches of samples)
+              When training with input tensors such as
+              TensorFlow data tensors, the default `None` is equal to
+              the number of samples in your dataset divided by
+              the batch size, or 1 if that cannot be determined. If x is a
+              `tf.data.Dataset`, and `steps` is `None`, the epoch will run until
+              the input dataset is exhausted. When passing an infinitely
+              repeating dataset, you must specify the `steps` argument. This
+              argument is not supported with array inputs.
+        """
+        self.layer.adapt(data, batch_size=batch_size, steps=steps)
+
+    def update_state(self, data):
+        self.layer.update_state(data)
+
+    def finalize_state(self):
+        self.layer.finalize_state()
+
+    def reset_state(self):
+        self.layer.reset_state()
+
+    def get_config(self):
+        return {
+            "bin_boundaries": self.bin_boundaries,
+            "num_bins": self.num_bins,
+            "epsilon": self.epsilon,
+            "output_mode": self.output_mode,
+            "sparse": self.sparse,
+            "name": self.name,
+        }
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
+
+    def call(self, inputs):
+        if not isinstance(inputs, (tf.Tensor, np.ndarray)):
+            inputs = tf.convert_to_tensor(np.array(inputs))
+        outputs = self.layer.call(inputs)
+        if backend.backend() != "tensorflow":
+            outputs = backend.convert_to_tensor(outputs)
+        return outputs
diff --git a/keras_core/layers/preprocessing/discretization_test.py b/keras_core/layers/preprocessing/discretization_test.py
new file mode 100644
index 000000000..52ea0a760
--- /dev/null
+++ b/keras_core/layers/preprocessing/discretization_test.py
@@ -0,0 +1,37 @@
+import numpy as np
+
+from keras_core import backend
+from keras_core import layers
+from keras_core import testing
+
+
+class DicretizationTest(testing.TestCase):
+    def test_discretization_basics(self):
+        self.run_layer_test(
+            layers.Discretization,
+            init_kwargs={
+                "bin_boundaries": [0.0, 0.5, 1.0],
+            },
+            input_shape=(2, 3),
+            expected_output_shape=(2, 3),
+            expected_num_trainable_weights=0,
+            expected_num_non_trainable_weights=0,
+            expected_num_seed_generators=0,
+            expected_num_losses=0,
+            supports_masking=False,
+        )
+
+    def test_adapt_flow(self):
+        layer = layers.Discretization(num_bins=4)
+        layer.adapt(
+            np.random.random((32, 3)),
+            batch_size=8,
+        )
+        output = layer(np.array([[0.0, 0.1, 0.3]]))
+        self.assertTrue(output.dtype, "int32")
+
+    def test_correctness(self):
+        layer = layers.Discretization(bin_boundaries=[0.0, 0.5, 1.0])
+        output = layer(np.array([[0.0, 0.1, 0.8]]))
+        self.assertTrue(backend.is_tensor(output))
+        self.assertAllClose(output, np.array([[1, 1, 2]]))
diff --git a/keras_core/layers/preprocessing/text_vectorization.py b/keras_core/layers/preprocessing/text_vectorization.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/keras_core/optimizers/schedules/__init__.py b/keras_core/optimizers/schedules/__init__.py
index 30e63c9ed..e69de29bb 100644
--- a/keras_core/optimizers/schedules/__init__.py
+++ b/keras_core/optimizers/schedules/__init__.py
@@ -1,16 +0,0 @@
-from keras_core.optimizers.schedules.learning_rate_schedule import CosineDecay
-from keras_core.optimizers.schedules.learning_rate_schedule import (
-    CosineDecayRestarts,
-)
-from keras_core.optimizers.schedules.learning_rate_schedule import (
-    ExponentialDecay,
-)
-from keras_core.optimizers.schedules.learning_rate_schedule import (
-    InverseTimeDecay,
-)
-from keras_core.optimizers.schedules.learning_rate_schedule import (
-    PiecewiseConstantDecay,
-)
-from keras_core.optimizers.schedules.learning_rate_schedule import (
-    PolynomialDecay,
-)
diff --git a/keras_core/optimizers/schedules/learning_rate_schedule_test.py b/keras_core/optimizers/schedules/learning_rate_schedule_test.py
index 25e50b40d..0bdb09e8a 100644
--- a/keras_core/optimizers/schedules/learning_rate_schedule_test.py
+++ b/keras_core/optimizers/schedules/learning_rate_schedule_test.py
@@ -6,13 +6,13 @@ import numpy as np
 
 from keras_core import backend
 from keras_core import testing
-from keras_core.optimizers import schedules
+from keras_core.optimizers.schedules import learning_rate_schedule
 
 
 class ExponentialDecayTest(testing.TestCase):
     def test_config(self):
         self.run_class_serialization_test(
-            schedules.ExponentialDecay(
+            learning_rate_schedule.ExponentialDecay(
                 initial_learning_rate=0.05,
                 decay_steps=10,
                 decay_rate=0.96,
@@ -23,13 +23,15 @@ class ExponentialDecayTest(testing.TestCase):
 
     def test_continuous(self):
         step = 5
-        decayed_lr = schedules.ExponentialDecay(0.05, 10, 0.96)
+        decayed_lr = learning_rate_schedule.ExponentialDecay(0.05, 10, 0.96)
         expected = 0.05 * 0.96 ** (5.0 / 10.0)
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
     def test_staircase(self):
         step = backend.Variable(1)
-        decayed_lr = schedules.ExponentialDecay(0.1, 3, 0.96, staircase=True)
+        decayed_lr = learning_rate_schedule.ExponentialDecay(
+            0.1, 3, 0.96, staircase=True
+        )
 
         # No change to learning rate due to staircase
         expected = 0.1
@@ -46,7 +48,9 @@ class ExponentialDecayTest(testing.TestCase):
 
     def test_variables(self):
         step = backend.Variable(1)
-        decayed_lr = schedules.ExponentialDecay(0.1, 3, 0.96, staircase=True)
+        decayed_lr = learning_rate_schedule.ExponentialDecay(
+            0.1, 3, 0.96, staircase=True
+        )
 
         # No change to learning rate
         step.assign(1)
@@ -62,14 +66,14 @@ class ExponentialDecayTest(testing.TestCase):
 class PiecewiseConstantDecayTest(testing.TestCase):
     def test_config(self):
         self.run_class_serialization_test(
-            schedules.PiecewiseConstantDecay(
+            learning_rate_schedule.PiecewiseConstantDecay(
                 boundaries=[10, 20], values=[1, 2, 3], name="my_pcd"
             )
         )
 
     def test_piecewise_values(self):
         x = backend.Variable(-999)
-        decayed_lr = schedules.PiecewiseConstantDecay(
+        decayed_lr = learning_rate_schedule.PiecewiseConstantDecay(
             [100, 110, 120], [1.0, 0.1, 0.01, 0.001]
         )
 
@@ -89,7 +93,9 @@ class PiecewiseConstantDecayTest(testing.TestCase):
         # Test casting boundaries from int32 to int64.
         x_int64 = backend.Variable(0, dtype="int64")
         boundaries, values = [1, 2, 3], [0.4, 0.5, 0.6, 0.7]
-        decayed_lr = schedules.PiecewiseConstantDecay(boundaries, values)
+        decayed_lr = learning_rate_schedule.PiecewiseConstantDecay(
+            boundaries, values
+        )
 
         self.assertAllClose(decayed_lr(x_int64), 0.4, 1e-6)
         x_int64.assign(1)
@@ -105,7 +111,7 @@ class PiecewiseConstantDecayTest(testing.TestCase):
 class LinearDecayTest(testing.TestCase):
     def test_config(self):
         self.run_class_serialization_test(
-            schedules.PolynomialDecay(
+            learning_rate_schedule.PolynomialDecay(
                 initial_learning_rate=0.1,
                 decay_steps=100,
                 end_learning_rate=0.005,
@@ -119,7 +125,7 @@ class LinearDecayTest(testing.TestCase):
         step = 5
         lr = 0.05
         end_lr = 0.0
-        decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr)
         expected = lr * 0.5
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -127,7 +133,7 @@ class LinearDecayTest(testing.TestCase):
         step = 10
         lr = 0.05
         end_lr = 0.001
-        decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr)
         expected = end_lr
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -135,7 +141,7 @@ class LinearDecayTest(testing.TestCase):
         step = 5
         lr = 0.05
         end_lr = 0.001
-        decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr)
         expected = (lr + end_lr) * 0.5
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -143,7 +149,7 @@ class LinearDecayTest(testing.TestCase):
         step = 15
         lr = 0.05
         end_lr = 0.001
-        decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(lr, 10, end_lr)
         expected = end_lr
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -151,7 +157,9 @@ class LinearDecayTest(testing.TestCase):
         step = 15
         lr = 0.05
         end_lr = 0.001
-        decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr, cycle=True)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(
+            lr, 10, end_lr, cycle=True
+        )
         expected = (lr - end_lr) * 0.25 + end_lr
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -162,7 +170,9 @@ class SqrtDecayTest(testing.TestCase):
         lr = 0.05
         end_lr = 0.0
         power = 0.5
-        decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr, power=power)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(
+            lr, 10, end_lr, power=power
+        )
         expected = lr * 0.5**power
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -171,7 +181,9 @@ class SqrtDecayTest(testing.TestCase):
         lr = 0.05
         end_lr = 0.001
         power = 0.5
-        decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr, power=power)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(
+            lr, 10, end_lr, power=power
+        )
         expected = end_lr
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -180,7 +192,9 @@ class SqrtDecayTest(testing.TestCase):
         lr = 0.05
         end_lr = 0.001
         power = 0.5
-        decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr, power=power)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(
+            lr, 10, end_lr, power=power
+        )
         expected = (lr - end_lr) * 0.5**power + end_lr
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -189,7 +203,9 @@ class SqrtDecayTest(testing.TestCase):
         lr = 0.05
         end_lr = 0.001
         power = 0.5
-        decayed_lr = schedules.PolynomialDecay(lr, 10, end_lr, power=power)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(
+            lr, 10, end_lr, power=power
+        )
         expected = end_lr
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -198,7 +214,7 @@ class SqrtDecayTest(testing.TestCase):
         lr = 0.05
         end_lr = 0.001
         power = 0.5
-        decayed_lr = schedules.PolynomialDecay(
+        decayed_lr = learning_rate_schedule.PolynomialDecay(
             lr, 10, end_lr, power=power, cycle=True
         )
         expected = (lr - end_lr) * 0.25**power + end_lr
@@ -208,7 +224,9 @@ class SqrtDecayTest(testing.TestCase):
         lr = 0.001
         decay_steps = 10
         step = 0
-        decayed_lr = schedules.PolynomialDecay(lr, decay_steps, cycle=True)
+        decayed_lr = learning_rate_schedule.PolynomialDecay(
+            lr, decay_steps, cycle=True
+        )
         expected = lr
         self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -216,7 +234,7 @@ class SqrtDecayTest(testing.TestCase):
 class InverseTimeDecayTest(testing.TestCase):
     def test_config(self):
         self.run_class_serialization_test(
-            schedules.InverseTimeDecay(
+            learning_rate_schedule.InverseTimeDecay(
                 initial_learning_rate=0.05,
                 decay_steps=10,
                 decay_rate=0.96,
@@ -230,7 +248,9 @@ class InverseTimeDecayTest(testing.TestCase):
         k = 10
         decay_rate = 0.96
         step = backend.Variable(0)
-        decayed_lr = schedules.InverseTimeDecay(initial_lr, k, decay_rate)
+        decayed_lr = learning_rate_schedule.InverseTimeDecay(
+            initial_lr, k, decay_rate
+        )
 
         for i in range(k + 1):
             expected = initial_lr / (1 + i / k * decay_rate)
@@ -242,7 +262,7 @@ class InverseTimeDecayTest(testing.TestCase):
         k = 10
         decay_rate = 0.96
         step = backend.Variable(0)
-        decayed_lr = schedules.InverseTimeDecay(
+        decayed_lr = learning_rate_schedule.InverseTimeDecay(
             initial_lr, k, decay_rate, staircase=True
         )
 
@@ -255,7 +275,7 @@ class InverseTimeDecayTest(testing.TestCase):
 class CosineDecayTest(testing.TestCase):
     def test_config(self):
         self.run_class_serialization_test(
-            schedules.CosineDecay(
+            learning_rate_schedule.CosineDecay(
                 initial_learning_rate=0.05,
                 decay_steps=10,
                 alpha=0.1,
@@ -275,7 +295,9 @@ class CosineDecayTest(testing.TestCase):
         num_training_steps = 1000
         initial_lr = 1.0
         for step in range(0, 1500, 250):
-            decayed_lr = schedules.CosineDecay(initial_lr, num_training_steps)
+            decayed_lr = learning_rate_schedule.CosineDecay(
+                initial_lr, num_training_steps
+            )
             expected = self.np_cosine_decay(step, num_training_steps)
             self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -289,7 +311,7 @@ class CosineDecayTest(testing.TestCase):
         initial_lr = 0.0
         target_lr = 10.0
         for step in range(0, 1500, 250):
-            lr = schedules.CosineDecay(
+            lr = learning_rate_schedule.CosineDecay(
                 initial_lr,
                 0,
                 warmup_target=target_lr,
@@ -305,7 +327,7 @@ class CosineDecayTest(testing.TestCase):
         initial_lr = 1.0
         alpha = 0.1
         for step in range(0, 1500, 250):
-            decayed_lr = schedules.CosineDecay(
+            decayed_lr = learning_rate_schedule.CosineDecay(
                 initial_lr, num_training_steps, alpha
             )
             expected = self.np_cosine_decay(step, num_training_steps, alpha)
@@ -315,7 +337,9 @@ class CosineDecayTest(testing.TestCase):
         num_training_steps = 1000
         initial_lr = np.float64(1.0)
         for step in range(0, 1500, 250):
-            decayed_lr = schedules.CosineDecay(initial_lr, num_training_steps)
+            decayed_lr = learning_rate_schedule.CosineDecay(
+                initial_lr, num_training_steps
+            )
             expected = self.np_cosine_decay(step, num_training_steps)
             self.assertAllClose(decayed_lr(step), expected, 1e-6)
 
@@ -325,7 +349,7 @@ class CosineDecayTest(testing.TestCase):
         initial_lr = 0.0
         target_lr = 10.0
         for step in range(0, 3000, 250):
-            lr = schedules.CosineDecay(
+            lr = learning_rate_schedule.CosineDecay(
                 initial_lr,
                 decay_steps,
                 warmup_target=target_lr,
@@ -345,7 +369,7 @@ class CosineDecayTest(testing.TestCase):
 class CosineDecayRestartsTest(testing.TestCase):
     def test_config(self):
         self.run_class_serialization_test(
-            schedules.CosineDecayRestarts(
+            learning_rate_schedule.CosineDecayRestarts(
                 initial_learning_rate=0.05,
                 first_decay_steps=10,
                 alpha=0.1,
@@ -372,7 +396,7 @@ class CosineDecayRestartsTest(testing.TestCase):
         num_training_steps = 1000
         initial_lr = 1.0
         for step in range(0, 1500, 250):
-            decayed_lr = schedules.CosineDecayRestarts(
+            decayed_lr = learning_rate_schedule.CosineDecayRestarts(
                 initial_lr, num_training_steps
             )
             expected = self.np_cosine_decay_restarts(step, num_training_steps)
@@ -382,7 +406,7 @@ class CosineDecayRestartsTest(testing.TestCase):
         num_training_steps = 1000
         initial_lr = np.float64(1.0)
         for step in range(0, 1500, 250):
-            decayed_lr = schedules.CosineDecayRestarts(
+            decayed_lr = learning_rate_schedule.CosineDecayRestarts(
                 initial_lr, num_training_steps
             )
             expected = self.np_cosine_decay_restarts(step, num_training_steps)
@@ -393,7 +417,7 @@ class CosineDecayRestartsTest(testing.TestCase):
         initial_lr = 1.0
         alpha = 0.1
         for step in range(0, 1500, 250):
-            decayed_lr = schedules.CosineDecayRestarts(
+            decayed_lr = learning_rate_schedule.CosineDecayRestarts(
                 initial_lr, num_training_steps, alpha=alpha
             )
             expected = self.np_cosine_decay_restarts(
@@ -406,7 +430,7 @@ class CosineDecayRestartsTest(testing.TestCase):
         initial_lr = 1.0
         m_mul = 0.9
         for step in range(0, 1500, 250):
-            decayed_lr = schedules.CosineDecayRestarts(
+            decayed_lr = learning_rate_schedule.CosineDecayRestarts(
                 initial_lr, num_training_steps, m_mul=m_mul
             )
             expected = self.np_cosine_decay_restarts(
@@ -419,7 +443,7 @@ class CosineDecayRestartsTest(testing.TestCase):
         initial_lr = 1.0
         t_mul = 1.0
         for step in range(0, 1500, 250):
-            decayed_lr = schedules.CosineDecayRestarts(
+            decayed_lr = learning_rate_schedule.CosineDecayRestarts(
                 initial_lr, num_training_steps, t_mul=t_mul
             )
             expected = self.np_cosine_decay_restarts(