From b660875f51c6cd9456e8722bfe10c2cd9d45b060 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Fran=C3=A7ois=20Chollet?= <francois.chollet@gmail.com>
Date: Tue, 25 Jul 2023 13:03:01 -0700
Subject: [PATCH] Add tf.keras backwards compat for nearly all non-experimental
 symbols (#603)

* Add tf.keras backwards compatibility for nearly all non-experimental symbols

* Remove print statements

* Fix identity init
---
 keras_core/backend/common/keras_tensor.py     |    4 +-
 keras_core/initializers/__init__.py           |    1 +
 .../initializers/constant_initializers.py     |   61 +-
 ..._test.py => constant_initializers_test.py} |   24 +-
 .../initializers/random_initializers.py       |   65 +-
 keras_core/legacy/__init__.py                 |    0
 keras_core/legacy/backend.py                  | 2287 +++++++++++++++++
 keras_core/legacy/layers.py                   |  245 ++
 keras_core/legacy/losses.py                   |   20 +
 keras_core/legacy/preprocessing/__init__.py   |    0
 keras_core/legacy/preprocessing/image.py      | 1898 ++++++++++++++
 keras_core/legacy/preprocessing/sequence.py   |  324 +++
 keras_core/legacy/preprocessing/text.py       |  338 +++
 keras_core/losses/losses.py                   |   35 +-
 keras_core/regularizers/regularizers.py       |    4 +-
 keras_core/version.py                         |    2 +-
 pip_build.py                                  |   78 +
 17 files changed, 5363 insertions(+), 23 deletions(-)
 rename keras_core/initializers/{constant_initalizers_test.py => constant_initializers_test.py} (59%)
 create mode 100644 keras_core/legacy/__init__.py
 create mode 100644 keras_core/legacy/backend.py
 create mode 100644 keras_core/legacy/layers.py
 create mode 100644 keras_core/legacy/losses.py
 create mode 100644 keras_core/legacy/preprocessing/__init__.py
 create mode 100644 keras_core/legacy/preprocessing/image.py
 create mode 100644 keras_core/legacy/preprocessing/sequence.py
 create mode 100644 keras_core/legacy/preprocessing/text.py

diff --git a/keras_core/backend/common/keras_tensor.py b/keras_core/backend/common/keras_tensor.py
index 09fac1e32..e22983598 100644
--- a/keras_core/backend/common/keras_tensor.py
+++ b/keras_core/backend/common/keras_tensor.py
@@ -296,7 +296,9 @@ def any_symbolic_tensors(args=None, kwargs=None):
     return False
 
 
-@keras_core_export("keras_core.utils.is_keras_tensor")
+@keras_core_export(
+    ["keras_core.utils.is_keras_tensor", "keras_core.backend.is_keras_tensor"]
+)
 def is_keras_tensor(x):
     """Returns whether `x` is a Keras tensor.
 
diff --git a/keras_core/initializers/__init__.py b/keras_core/initializers/__init__.py
index 4b17a8166..07cfbc6a8 100644
--- a/keras_core/initializers/__init__.py
+++ b/keras_core/initializers/__init__.py
@@ -2,6 +2,7 @@ import inspect
 
 from keras_core.api_export import keras_core_export
 from keras_core.initializers.constant_initializers import Constant
+from keras_core.initializers.constant_initializers import Identity
 from keras_core.initializers.constant_initializers import Ones
 from keras_core.initializers.constant_initializers import Zeros
 from keras_core.initializers.initializer import Initializer
diff --git a/keras_core/initializers/constant_initializers.py b/keras_core/initializers/constant_initializers.py
index 9dda16de1..17ace8bf2 100644
--- a/keras_core/initializers/constant_initializers.py
+++ b/keras_core/initializers/constant_initializers.py
@@ -4,7 +4,9 @@ from keras_core.backend import standardize_dtype
 from keras_core.initializers.initializer import Initializer
 
 
-@keras_core_export("keras_core.initializers.Constant")
+@keras_core_export(
+    ["keras_core.initializers.Constant", "keras_core.initializers.constant"]
+)
 class Constant(Initializer):
     """Initializer that generates tensors with constant values.
 
@@ -37,7 +39,9 @@ class Constant(Initializer):
         return {"value": self.value}
 
 
-@keras_core_export("keras_core.initializers.Zeros")
+@keras_core_export(
+    ["keras_core.initializers.Zeros", "keras_core.initializers.zeros"]
+)
 class Zeros(Initializer):
     """Initializer that generates tensors initialized to 0.
 
@@ -67,7 +71,9 @@ class Zeros(Initializer):
         return ops.zeros(shape, dtype=dtype)
 
 
-@keras_core_export("keras_core.initializers.Ones")
+@keras_core_export(
+    ["keras_core.initializers.Ones", "keras_core.initializers.ones"]
+)
 class Ones(Initializer):
     """Initializer that generates tensors initialized to 1.
 
@@ -97,3 +103,52 @@ class Ones(Initializer):
         """
         dtype = standardize_dtype(dtype)
         return ops.ones(shape, dtype=dtype)
+
+
+@keras_core_export(
+    [
+        "keras_core.initializers.IdentityInitializer",
+        "keras_core.initializers.Identity",
+        "keras_core.initializers.identity",
+    ]
+)
+class Identity(Initializer):
+    """Initializer that generates the identity matrix.
+
+    Only usable for generating 2D matrices.
+
+    Examples:
+
+    >>> # Standalone usage:
+    >>> initializer = Identity()
+    >>> values = initializer(shape=(2, 2))
+
+    >>> # Usage in a Keras layer:
+    >>> initializer = Identity()
+    >>> layer = Dense(3, kernel_initializer=initializer)
+
+    Args:
+        gain: Multiplicative factor to apply to the identity matrix.
+    """
+
+    def __init__(self, gain=1.0):
+        self.gain = gain
+
+    def __call__(self, shape, dtype=None):
+        """Returns a tensor object initialized as specified by the initializer.
+
+        Args:
+            shape: Shape of the tensor.
+            dtype: Optional dtype of the tensor. Only numeric or boolean dtypes
+                are supported. If not specified, `keras_core.backend.floatx()`
+                is used, which default to `float32` unless you configured it
+                otherwise (via `keras_core.backend.set_floatx(float_dtype)`).
+            **kwargs: Additional keyword arguments.
+        """
+        if len(shape) != 2:
+            raise ValueError(
+                "Identity matrix initializer can only be used for 2D matrices. "
+                f"Received: shape={shape} of rank {len(shape)}."
+            )
+        dtype = standardize_dtype(dtype)
+        return self.gain * ops.eye(*shape, dtype=dtype)
diff --git a/keras_core/initializers/constant_initalizers_test.py b/keras_core/initializers/constant_initializers_test.py
similarity index 59%
rename from keras_core/initializers/constant_initalizers_test.py
rename to keras_core/initializers/constant_initializers_test.py
index e1a7b43dd..af0e8fe4f 100644
--- a/keras_core/initializers/constant_initalizers_test.py
+++ b/keras_core/initializers/constant_initializers_test.py
@@ -12,8 +12,8 @@ class ConstantInitializersTest(testing.TestCase):
         initializer = initializers.Zeros()
         values = initializer(shape=shape)
         self.assertEqual(values.shape, shape)
-        np_values = backend.convert_to_numpy(values).data
-        self.assertEqual(np_values, np.zeros(shape=shape))
+        np_values = backend.convert_to_numpy(values)
+        self.assertAllClose(np_values, np.zeros(shape=shape))
 
         self.run_class_serialization_test(initializer)
 
@@ -23,8 +23,8 @@ class ConstantInitializersTest(testing.TestCase):
         initializer = initializers.Ones()
         values = initializer(shape=shape)
         self.assertEqual(values.shape, shape)
-        np_values = backend.convert_to_numpy(values).data
-        self.assertEqual(np_values, np.ones(shape=shape))
+        np_values = backend.convert_to_numpy(values)
+        self.assertAllClose(np_values, np.ones(shape=shape))
 
         self.run_class_serialization_test(initializer)
 
@@ -35,9 +35,21 @@ class ConstantInitializersTest(testing.TestCase):
         initializer = initializers.Constant(value=constant_value)
         values = initializer(shape=shape)
         self.assertEqual(values.shape, shape)
-        np_values = backend.convert_to_numpy(values).data
-        self.assertEqual(
+        np_values = backend.convert_to_numpy(values)
+        self.assertAllClose(
             np_values, np.full(shape=shape, fill_value=constant_value)
         )
 
         self.run_class_serialization_test(initializer)
+
+    def test_identity_initializer(self):
+        shape = (3, 3)
+        gain = 2
+
+        initializer = initializers.Identity(gain=gain)
+        values = initializer(shape=shape)
+        self.assertEqual(values.shape, shape)
+        np_values = backend.convert_to_numpy(values)
+        self.assertAllClose(np_values, np.eye(*shape) * gain)
+
+        self.run_class_serialization_test(initializer)
diff --git a/keras_core/initializers/random_initializers.py b/keras_core/initializers/random_initializers.py
index eb5574c7a..07ae23d38 100644
--- a/keras_core/initializers/random_initializers.py
+++ b/keras_core/initializers/random_initializers.py
@@ -7,7 +7,12 @@ from keras_core.initializers.initializer import Initializer
 from keras_core.saving import serialization_lib
 
 
-@keras_core_export("keras_core.initializers.RandomNormal")
+@keras_core_export(
+    [
+        "keras_core.initializers.RandomNormal",
+        "keras_core.initializers.random_normal",
+    ]
+)
 class RandomNormal(Initializer):
     """Random normal initializer.
 
@@ -59,7 +64,12 @@ class RandomNormal(Initializer):
         return {"mean": self.mean, "stddev": self.stddev, "seed": seed_config}
 
 
-@keras_core_export("keras_core.initializers.TruncatedNormal")
+@keras_core_export(
+    [
+        "keras_core.initializers.TruncatedNormal",
+        "keras_core.initializers.truncated_normal",
+    ]
+)
 class TruncatedNormal(Initializer):
     """Initializer that generates a truncated normal distribution.
 
@@ -114,7 +124,12 @@ class TruncatedNormal(Initializer):
         return {"mean": self.mean, "stddev": self.stddev, "seed": seed_config}
 
 
-@keras_core_export("keras_core.initializers.RandomUniform")
+@keras_core_export(
+    [
+        "keras_core.initializers.RandomUniform",
+        "keras_core.initializers.random_uniform",
+    ]
+)
 class RandomUniform(Initializer):
     """Random uniform initializer.
 
@@ -170,7 +185,12 @@ class RandomUniform(Initializer):
         }
 
 
-@keras_core_export("keras_core.initializers.VarianceScaling")
+@keras_core_export(
+    [
+        "keras_core.initializers.VarianceScaling",
+        "keras_core.initializers.variance_scaling",
+    ]
+)
 class VarianceScaling(Initializer):
     """Initializer that adapts its scale to the shape of its input tensors.
 
@@ -285,7 +305,12 @@ class VarianceScaling(Initializer):
         }
 
 
-@keras_core_export("keras_core.initializers.GlorotUniform")
+@keras_core_export(
+    [
+        "keras_core.initializers.GlorotUniform",
+        "keras_core.initializers.glorot_uniform",
+    ]
+)
 class GlorotUniform(VarianceScaling):
     """The Glorot uniform initializer, also called Xavier uniform initializer.
 
@@ -329,7 +354,12 @@ class GlorotUniform(VarianceScaling):
         }
 
 
-@keras_core_export("keras_core.initializers.GlorotNormal")
+@keras_core_export(
+    [
+        "keras_core.initializers.GlorotNormal",
+        "keras_core.initializers.glorot_normal",
+    ]
+)
 class GlorotNormal(VarianceScaling):
     """The Glorot normal initializer, also called Xavier normal initializer.
 
@@ -377,7 +407,12 @@ class GlorotNormal(VarianceScaling):
         }
 
 
-@keras_core_export("keras_core.initializers.LecunNormal")
+@keras_core_export(
+    [
+        "keras_core.initializers.LecunNormal",
+        "keras_core.initializers.lecun_normal",
+    ]
+)
 class LecunNormal(VarianceScaling):
     """Lecun normal initializer.
 
@@ -425,7 +460,12 @@ class LecunNormal(VarianceScaling):
         }
 
 
-@keras_core_export("keras_core.initializers.LecunUniform")
+@keras_core_export(
+    [
+        "keras_core.initializers.LecunUniform",
+        "keras_core.initializers.lecun_uniform",
+    ]
+)
 class LecunUniform(VarianceScaling):
     """Lecun uniform initializer.
 
@@ -469,7 +509,9 @@ class LecunUniform(VarianceScaling):
         }
 
 
-@keras_core_export("keras_core.initializers.HeNormal")
+@keras_core_export(
+    ["keras_core.initializers.HeNormal", "keras_core.initializers.he_normal"]
+)
 class HeNormal(VarianceScaling):
     """He normal initializer.
 
@@ -513,7 +555,9 @@ class HeNormal(VarianceScaling):
         }
 
 
-@keras_core_export("keras_core.initializers.HeUniform")
+@keras_core_export(
+    ["keras_core.initializers.HeUniform", "keras_core.initializers.he_uniform"]
+)
 class HeUniform(VarianceScaling):
     """He uniform variance scaling initializer.
 
@@ -589,6 +633,7 @@ def compute_fans(shape):
     [
         "keras_core.initializers.OrthogonalInitializer",
         "keras_core.initializers.Orthogonal",
+        "keras_core.initializers.orthogonal",
     ]
 )
 class OrthogonalInitializer(Initializer):
diff --git a/keras_core/legacy/__init__.py b/keras_core/legacy/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/keras_core/legacy/backend.py b/keras_core/legacy/backend.py
new file mode 100644
index 000000000..29b5562f3
--- /dev/null
+++ b/keras_core/legacy/backend.py
@@ -0,0 +1,2287 @@
+"""Legacy Keras 1/2 backend functions."""
+
+import itertools
+
+import numpy as np
+
+from keras_core import backend
+from keras_core.api_export import keras_core_export
+from keras_core.utils.module_utils import tensorflow as tf
+
+py_any = any
+py_all = all
+
+
+@keras_core_export("keras_core._legacy.backend.abs")
+def abs(x):
+    """DEPRECATED."""
+    return tf.abs(x)
+
+
+@keras_core_export("keras_core._legacy.backend.all")
+def all(x, axis=None, keepdims=False):
+    """DEPRECATED."""
+    x = tf.cast(x, tf.bool)
+    return tf.reduce_all(x, axis, keepdims)
+
+
+@keras_core_export("keras_core._legacy.backend.any")
+def any(x, axis=None, keepdims=False):
+    """DEPRECATED."""
+    x = tf.cast(x, tf.bool)
+    return tf.reduce_any(x, axis, keepdims)
+
+
+@keras_core_export("keras_core._legacy.backend.argmax")
+def argmax(x, axis=-1):
+    """DEPRECATED."""
+    return tf.argmax(x, axis)
+
+
+@keras_core_export("keras_core._legacy.backend.argmin")
+def argmin(x, axis=-1):
+    """DEPRECATED."""
+    return tf.argmin(x, axis)
+
+
+@keras_core_export("keras_core._legacy.backend.arange")
+def arange(start, stop=None, step=1, dtype="int32"):
+    """DEPRECATED."""
+    if stop is None and start < 0:
+        start = 0
+    result = tf.range(start, limit=stop, delta=step, name="arange")
+    if dtype != "int32":
+        result = tf.cast(result, dtype)
+    return result
+
+
+@keras_core_export("keras_core._legacy.backend.batch_dot")
+def batch_dot(x, y, axes=None):
+    """DEPRECATED."""
+    x_shape = x.shape
+    y_shape = y.shape
+
+    x_ndim = len(x_shape)
+    y_ndim = len(y_shape)
+
+    if x_ndim < 2 or y_ndim < 2:
+        raise ValueError(
+            "Cannot do batch_dot on inputs "
+            "with rank < 2. "
+            "Received inputs with tf.shapes "
+            + str(x_shape)
+            + " and "
+            + str(y_shape)
+            + "."
+        )
+
+    x_batch_size = x_shape[0]
+    y_batch_size = y_shape[0]
+
+    if x_batch_size is not None and y_batch_size is not None:
+        if x_batch_size != y_batch_size:
+            raise ValueError(
+                "Cannot do batch_dot on inputs "
+                "with different batch sizes. "
+                "Received inputs with tf.shapes "
+                + str(x_shape)
+                + " and "
+                + str(y_shape)
+                + "."
+            )
+    if isinstance(axes, int):
+        axes = [axes, axes]
+
+    if axes is None:
+        if y_ndim == 2:
+            axes = [x_ndim - 1, y_ndim - 1]
+        else:
+            axes = [x_ndim - 1, y_ndim - 2]
+
+    if py_any(isinstance(a, (list, tuple)) for a in axes):
+        raise ValueError(
+            "Multiple target dimensions are not supported. "
+            + "Expected: None, int, (int, int), "
+            + "Provided: "
+            + str(axes)
+        )
+
+    # if tuple, convert to list.
+    axes = list(axes)
+
+    # convert negative indices.
+    if axes[0] < 0:
+        axes[0] += x_ndim
+    if axes[1] < 0:
+        axes[1] += y_ndim
+
+    # sanity checks
+    if 0 in axes:
+        raise ValueError(
+            "Cannot perform batch_dot over axis 0. "
+            "If your inputs are not batched, "
+            "add a dummy batch dimension to your "
+            "inputs using K.expand_dims(x, 0)"
+        )
+    a0, a1 = axes
+    d1 = x_shape[a0]
+    d2 = y_shape[a1]
+
+    if d1 is not None and d2 is not None and d1 != d2:
+        raise ValueError(
+            "Cannot do batch_dot on inputs with tf.shapes "
+            + str(x_shape)
+            + " and "
+            + str(y_shape)
+            + " with axes="
+            + str(axes)
+            + ". x.shape[%d] != y.shape[%d] (%d != %d)."
+            % (axes[0], axes[1], d1, d2)
+        )
+
+    # backup ndims. Need them later.
+    orig_x_ndim = x_ndim
+    orig_y_ndim = y_ndim
+
+    # if rank is 2, expand to 3.
+    if x_ndim == 2:
+        x = tf.expand_dims(x, 1)
+        a0 += 1
+        x_ndim += 1
+    if y_ndim == 2:
+        y = tf.expand_dims(y, 2)
+        y_ndim += 1
+
+    # bring x's dimension to be reduced to last axis.
+    if a0 != x_ndim - 1:
+        pattern = list(range(x_ndim))
+        for i in range(a0, x_ndim - 1):
+            pattern[i] = pattern[i + 1]
+        pattern[-1] = a0
+        x = tf.transpose(x, pattern)
+
+    # bring y's dimension to be reduced to axis 1.
+    if a1 != 1:
+        pattern = list(range(y_ndim))
+        for i in range(a1, 1, -1):
+            pattern[i] = pattern[i - 1]
+        pattern[1] = a1
+        y = tf.transpose(y, pattern)
+
+    # normalize both inputs to rank 3.
+    if x_ndim > 3:
+        # squash middle dimensions of x.
+        x_shape = tf.shape(x)
+        x_mid_dims = x_shape[1:-1]
+        x_squashed_shape = tf.stack([x_shape[0], -1, x_shape[-1]])
+        x = tf.reshape(x, x_squashed_shape)
+        x_squashed = True
+    else:
+        x_squashed = False
+
+    if y_ndim > 3:
+        # squash trailing dimensions of y.
+        y_shape = tf.shape(y)
+        y_trail_dims = y_shape[2:]
+        y_squashed_shape = tf.stack([y_shape[0], y_shape[1], -1])
+        y = tf.reshape(y, y_squashed_shape)
+        y_squashed = True
+    else:
+        y_squashed = False
+
+    result = tf.matmul(x, y)
+
+    # if inputs were squashed, we have to reshape the matmul output.
+    output_shape = tf.shape(result)
+    do_reshape = False
+
+    if x_squashed:
+        output_shape = tf.concat(
+            [output_shape[:1], x_mid_dims, output_shape[-1:]], 0
+        )
+        do_reshape = True
+
+    if y_squashed:
+        output_shape = tf.concat([output_shape[:-1], y_trail_dims], 0)
+        do_reshape = True
+
+    if do_reshape:
+        result = tf.reshape(result, output_shape)
+
+    # if the inputs were originally rank 2, we remove the added 1 dim.
+    if orig_x_ndim == 2:
+        result = tf.squeeze(result, 1)
+    elif orig_y_ndim == 2:
+        result = tf.squeeze(result, -1)
+
+    return result
+
+
+@keras_core_export("keras_core._legacy.backend.batch_flatten")
+def batch_flatten(x):
+    """DEPRECATED."""
+    x = tf.reshape(x, tf.stack([-1, prod(tf.shape(x)[1:])]))
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.batch_get_value")
+def batch_get_value(tensors):
+    """DEPRECATED."""
+    return [x.numpy() for x in tensors]
+
+
+@keras_core_export("keras_core._legacy.backend.batch_set_value")
+def batch_set_value(tuples):
+    """DEPRECATED."""
+    if tf.executing_eagerly() or tf.inside_function():
+        for x, value in tuples:
+            value = np.asarray(value, dtype=x.dtype.name)
+            x.assign(value)
+
+
+@keras_core_export("keras_core._legacy.backend.batch_normalization")
+def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3):
+    """DEPRECATED."""
+    return tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon)
+
+
+@keras_core_export("keras_core._legacy.backend.bias_add")
+def bias_add(x, bias, data_format=None):
+    """DEPRECATED."""
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+    bias_shape = bias.shape
+    if len(bias_shape) != 1 and len(bias_shape) != ndim(x) - 1:
+        raise ValueError(
+            f"Unexpected bias dimensions {len(bias_shape)}. "
+            f"Expected it to be 1 or {ndim(x) - 1} dimensions"
+        )
+
+    if len(bias_shape) == 1:
+        if data_format == "channels_first":
+            return tf.nn.bias_add(x, bias, data_format="NCHW")
+        return tf.nn.bias_add(x, bias, data_format="NHWC")
+    if ndim(x) in (3, 4, 5):
+        if data_format == "channels_first":
+            bias_reshape_axis = (1, bias_shape[-1]) + bias_shape[:-1]
+            return x + reshape(bias, bias_reshape_axis)
+        return x + reshape(bias, (1,) + bias_shape)
+    return tf.nn.bias_add(x, bias)
+
+
+@keras_core_export("keras_core._legacy.backend.binary_crossentropy")
+def binary_crossentropy(target, output, from_logits=False):
+    """DEPRECATED."""
+    target = tf.convert_to_tensor(target)
+    output = tf.convert_to_tensor(output)
+
+    if from_logits:
+        return tf.nn.sigmoid_cross_entropy_with_logits(
+            labels=target, logits=output
+        )
+
+    epsilon_ = tf.convert_to_tensor(backend.epsilon(), output.dtype)
+    output = tf.clip_by_value(output, epsilon_, 1.0 - epsilon_)
+
+    # Compute cross entropy from probabilities.
+    bce = target * tf.math.log(output + backend.epsilon())
+    bce += (1 - target) * tf.math.log(1 - output + backend.epsilon())
+    return -bce
+
+
+@keras_core_export("keras_core._legacy.backend.binary_focal_crossentropy")
+def binary_focal_crossentropy(
+    target,
+    output,
+    apply_class_balancing=False,
+    alpha=0.25,
+    gamma=2.0,
+    from_logits=False,
+):
+    """DEPRECATED."""
+    sigmoidal = tf.sigmoid(output) if from_logits else output
+
+    p_t = target * sigmoidal + (1 - target) * (1 - sigmoidal)
+
+    # Calculate focal factor
+    focal_factor = tf.pow(1.0 - p_t, gamma)
+
+    # Binary crossentropy
+    bce = binary_crossentropy(
+        target=target,
+        output=output,
+        from_logits=from_logits,
+    )
+    focal_bce = focal_factor * bce
+
+    if apply_class_balancing:
+        weight = target * alpha + (1 - target) * (1 - alpha)
+        focal_bce = weight * focal_bce
+
+    return focal_bce
+
+
+@keras_core_export("keras_core._legacy.backend.cast")
+def cast(x, dtype):
+    """DEPRECATED."""
+    return tf.cast(x, dtype)
+
+
+@keras_core_export("keras_core._legacy.backend.cast_to_floatx")
+def cast_to_floatx(x):
+    """DEPRECATED."""
+    if isinstance(x, (tf.Tensor, tf.Variable, tf.SparseTensor)):
+        return tf.cast(x, dtype=backend.floatx())
+    return np.asarray(x, dtype=backend.floatx())
+
+
+@keras_core_export("keras_core._legacy.backend.categorical_crossentropy")
+def categorical_crossentropy(target, output, from_logits=False, axis=-1):
+    """DEPRECATED."""
+    target = tf.convert_to_tensor(target)
+    output = tf.convert_to_tensor(output)
+    target.shape.assert_is_compatible_with(output.shape)
+
+    if from_logits:
+        return tf.nn.softmax_cross_entropy_with_logits(
+            labels=target, logits=output, axis=axis
+        )
+
+    # Adjust the predictions so that the probability of
+    # each class for every sample adds up to 1
+    # This is needed to ensure that the cross entropy is
+    # computed correctly.
+    output = output / tf.reduce_sum(output, axis, True)
+
+    # Compute cross entropy from probabilities.
+    epsilon_ = tf.convert_to_tensor(backend.epsilon(), output.dtype)
+    output = tf.clip_by_value(output, epsilon_, 1.0 - epsilon_)
+    return -tf.reduce_sum(target * tf.math.log(output), axis)
+
+
+@keras_core_export("keras_core._legacy.backend.categorical_focal_crossentropy")
+def categorical_focal_crossentropy(
+    target,
+    output,
+    alpha=0.25,
+    gamma=2.0,
+    from_logits=False,
+    axis=-1,
+):
+    """DEPRECATED."""
+    target = tf.convert_to_tensor(target)
+    output = tf.convert_to_tensor(output)
+    target.shape.assert_is_compatible_with(output.shape)
+
+    if from_logits:
+        output = tf.nn.softmax(output, axis=axis)
+
+    # Adjust the predictions so that the probability of
+    # each class for every sample adds up to 1
+    # This is needed to ensure that the cross entropy is
+    # computed correctly.
+    output = output / tf.reduce_sum(output, axis=axis, keepdims=True)
+
+    epsilon_ = tf.convert_to_tensor(backend.epsilon(), output.dtype)
+    output = tf.clip_by_value(output, epsilon_, 1.0 - epsilon_)
+
+    # Calculate cross entropy
+    cce = -target * tf.math.log(output)
+
+    # Calculate factors
+    modulating_factor = tf.pow(1.0 - output, gamma)
+    weighting_factor = tf.multiply(modulating_factor, alpha)
+
+    # Apply weighting factor
+    focal_cce = tf.multiply(weighting_factor, cce)
+    focal_cce = tf.reduce_sum(focal_cce, axis=axis)
+    return focal_cce
+
+
+@keras_core_export("keras_core._legacy.backend.clip")
+def clip(x, min_value, max_value):
+    """DEPRECATED."""
+    if isinstance(min_value, (int, float)) and isinstance(
+        max_value, (int, float)
+    ):
+        if max_value < min_value:
+            max_value = min_value
+    if min_value is None:
+        min_value = -np.inf
+    if max_value is None:
+        max_value = np.inf
+    return tf.clip_by_value(x, min_value, max_value)
+
+
+@keras_core_export("keras_core._legacy.backend.concatenate")
+def concatenate(tensors, axis=-1):
+    """DEPRECATED."""
+    if axis < 0:
+        rank = ndim(tensors[0])
+        if rank:
+            axis %= rank
+        else:
+            axis = 0
+
+    if py_all(is_sparse(x) for x in tensors):
+        return tf.compat.v1.sparse_concat(axis, tensors)
+    elif py_all(isinstance(x, tf.RaggedTensor) for x in tensors):
+        return tf.concat(tensors, axis)
+    else:
+        return tf.concat([to_dense(x) for x in tensors], axis)
+
+
+@keras_core_export("keras_core._legacy.backend.constant")
+def constant(value, dtype=None, shape=None, name=None):
+    """DEPRECATED."""
+    if dtype is None:
+        dtype = backend.floatx()
+
+    return tf.constant(value, dtype=dtype, shape=shape, name=name)
+
+
+def _preprocess_conv1d_input(x, data_format):
+    tf_data_format = "NWC"  # to pass TF Conv2dNative operations
+    if data_format == "channels_first":
+        tf_data_format = "NCW"
+    return x, tf_data_format
+
+
+def _preprocess_conv2d_input(x, data_format, force_transpose=False):
+    tf_data_format = "NHWC"
+    if data_format == "channels_first":
+        if force_transpose:
+            x = tf.transpose(x, (0, 2, 3, 1))  # NCHW -> NHWC
+        else:
+            tf_data_format = "NCHW"
+    return x, tf_data_format
+
+
+def _preprocess_conv3d_input(x, data_format):
+    tf_data_format = "NDHWC"
+    if data_format == "channels_first":
+        tf_data_format = "NCDHW"
+    return x, tf_data_format
+
+
+def _preprocess_padding(padding):
+    if padding == "same":
+        padding = "SAME"
+    elif padding == "valid":
+        padding = "VALID"
+    else:
+        raise ValueError(f"Invalid padding: {padding}")
+    return padding
+
+
+@keras_core_export("keras_core._legacy.backend.conv1d")
+def conv1d(
+    x, kernel, strides=1, padding="valid", data_format=None, dilation_rate=1
+):
+    """DEPRECATED."""
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+
+    kernel_shape = kernel.shape.as_list()
+    if padding == "causal":
+        # causal (dilated) convolution:
+        left_pad = dilation_rate * (kernel_shape[0] - 1)
+        x = temporal_padding(x, (left_pad, 0))
+        padding = "valid"
+    padding = _preprocess_padding(padding)
+
+    x, tf_data_format = _preprocess_conv1d_input(x, data_format)
+    x = tf.compat.v1.nn.convolution(
+        input=x,
+        filter=kernel,
+        dilation_rate=dilation_rate,
+        strides=strides,
+        padding=padding,
+        data_format=tf_data_format,
+    )
+    if data_format == "channels_first" and tf_data_format == "NWC":
+        x = tf.transpose(x, (0, 2, 1))  # NWC -> NCW
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.conv2d")
+def conv2d(
+    x,
+    kernel,
+    strides=(1, 1),
+    padding="valid",
+    data_format=None,
+    dilation_rate=(1, 1),
+):
+    """DEPRECATED."""
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+
+    x, tf_data_format = _preprocess_conv2d_input(x, data_format)
+    padding = _preprocess_padding(padding)
+    x = tf.compat.v1.nn.convolution(
+        input=x,
+        filter=kernel,
+        dilation_rate=dilation_rate,
+        strides=strides,
+        padding=padding,
+        data_format=tf_data_format,
+    )
+    if data_format == "channels_first" and tf_data_format == "NHWC":
+        x = tf.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.conv2d_transpose")
+def conv2d_transpose(
+    x,
+    kernel,
+    output_shape,
+    strides=(1, 1),
+    padding="valid",
+    data_format=None,
+    dilation_rate=(1, 1),
+):
+    """DEPRECATED."""
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+
+    # `atrous_conv2d_transpose` only supports NHWC format, even on GPU.
+    if data_format == "channels_first" and dilation_rate != (1, 1):
+        force_transpose = True
+    else:
+        force_transpose = False
+
+    x, tf_data_format = _preprocess_conv2d_input(
+        x, data_format, force_transpose
+    )
+
+    if data_format == "channels_first" and tf_data_format == "NHWC":
+        output_shape = (
+            output_shape[0],
+            output_shape[2],
+            output_shape[3],
+            output_shape[1],
+        )
+    if output_shape[0] is None:
+        output_shape = (tf.shape(x)[0],) + tuple(output_shape[1:])
+
+    if isinstance(output_shape, (tuple, list)):
+        output_shape = tf.stack(list(output_shape))
+
+    padding = _preprocess_padding(padding)
+    if tf_data_format == "NHWC":
+        strides = (1,) + strides + (1,)
+    else:
+        strides = (1, 1) + strides
+
+    if dilation_rate == (1, 1):
+        x = tf.compat.v1.nn.conv2d_transpose(
+            x,
+            kernel,
+            output_shape,
+            strides,
+            padding=padding,
+            data_format=tf_data_format,
+        )
+    else:
+        if dilation_rate[0] != dilation_rate[1]:
+            raise ValueError(
+                "Expected the 2 dimensions of the `dilation_rate` argument "
+                "to be equal to each other. "
+                f"Received: dilation_rate={dilation_rate}"
+            )
+        x = tf.nn.atrous_conv2d_transpose(
+            x, kernel, output_shape, rate=dilation_rate[0], padding=padding
+        )
+    if data_format == "channels_first" and tf_data_format == "NHWC":
+        x = tf.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.conv3d")
+def conv3d(
+    x,
+    kernel,
+    strides=(1, 1, 1),
+    padding="valid",
+    data_format=None,
+    dilation_rate=(1, 1, 1),
+):
+    """DEPRECATED."""
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+
+    x, tf_data_format = _preprocess_conv3d_input(x, data_format)
+    padding = _preprocess_padding(padding)
+    x = tf.compat.v1.nn.convolution(
+        input=x,
+        filter=kernel,
+        dilation_rate=dilation_rate,
+        strides=strides,
+        padding=padding,
+        data_format=tf_data_format,
+    )
+    if data_format == "channels_first" and tf_data_format == "NDHWC":
+        x = tf.transpose(x, (0, 4, 1, 2, 3))
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.cos")
+def cos(x):
+    """DEPRECATED."""
+    return tf.cos(x)
+
+
+@keras_core_export("keras_core._legacy.backend.count_params")
+def count_params(x):
+    """DEPRECATED."""
+    return np.prod(x.shape.as_list())
+
+
+@keras_core_export("keras_core._legacy.backend.ctc_batch_cost")
+def ctc_batch_cost(y_true, y_pred, input_length, label_length):
+    """DEPRECATED."""
+    label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32)
+    input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32)
+    sparse_labels = tf.cast(
+        ctc_label_dense_to_sparse(y_true, label_length), tf.int32
+    )
+
+    y_pred = tf.math.log(
+        tf.transpose(y_pred, perm=[1, 0, 2]) + backend.epsilon()
+    )
+
+    return tf.expand_dims(
+        tf.compat.v1.nn.ctc_loss(
+            inputs=y_pred, labels=sparse_labels, sequence_length=input_length
+        ),
+        1,
+    )
+
+
+@keras_core_export("keras_core._legacy.backend.ctc_label_dense_to_sparse")
+def ctc_label_dense_to_sparse(labels, label_lengths):
+    """DEPRECATED."""
+    label_shape = tf.shape(labels)
+    num_batches_tns = tf.stack([label_shape[0]])
+    max_num_labels_tns = tf.stack([label_shape[1]])
+
+    def range_less_than(old_input, current_input):
+        return tf.expand_dims(tf.range(tf.shape(old_input)[1]), 0) < tf.fill(
+            max_num_labels_tns, current_input
+        )
+
+    init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool)
+    dense_mask = tf.compat.v1.scan(
+        range_less_than, label_lengths, initializer=init, parallel_iterations=1
+    )
+    dense_mask = dense_mask[:, 0, :]
+
+    label_array = tf.reshape(
+        tf.tile(tf.range(0, label_shape[1]), num_batches_tns), label_shape
+    )
+    label_ind = tf.compat.v1.boolean_mask(label_array, dense_mask)
+
+    batch_array = tf.transpose(
+        tf.reshape(
+            tf.tile(tf.range(0, label_shape[0]), max_num_labels_tns),
+            reverse(label_shape, 0),
+        )
+    )
+    batch_ind = tf.compat.v1.boolean_mask(batch_array, dense_mask)
+    indices = tf.transpose(
+        tf.reshape(concatenate([batch_ind, label_ind], axis=0), [2, -1])
+    )
+
+    vals_sparse = tf.compat.v1.gather_nd(labels, indices)
+
+    return tf.SparseTensor(
+        tf.cast(indices, tf.int64), vals_sparse, tf.cast(label_shape, tf.int64)
+    )
+
+
+@keras_core_export("keras_core._legacy.backend.ctc_decode")
+def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1):
+    """DEPRECATED."""
+    input_shape = tf.shape(y_pred)
+    num_samples, num_steps = input_shape[0], input_shape[1]
+    y_pred = tf.math.log(
+        tf.transpose(y_pred, perm=[1, 0, 2]) + backend.epsilon()
+    )
+    input_length = tf.cast(input_length, tf.int32)
+
+    if greedy:
+        (decoded, log_prob) = tf.nn.ctc_greedy_decoder(
+            inputs=y_pred, sequence_length=input_length
+        )
+    else:
+        (decoded, log_prob) = tf.compat.v1.nn.ctc_beam_search_decoder(
+            inputs=y_pred,
+            sequence_length=input_length,
+            beam_width=beam_width,
+            top_paths=top_paths,
+        )
+    decoded_dense = []
+    for st in decoded:
+        st = tf.SparseTensor(st.indices, st.values, (num_samples, num_steps))
+        decoded_dense.append(tf.sparse.to_dense(sp_input=st, default_value=-1))
+    return (decoded_dense, log_prob)
+
+
+@keras_core_export("keras_core._legacy.backend.cumsum")
+def cumsum(x, axis=0):
+    """DEPRECATED."""
+    return tf.cumsum(x, axis=axis)
+
+
+@keras_core_export("keras_core._legacy.backend.cumprod")
+def cumprod(x, axis=0):
+    """DEPRECATED."""
+    return tf.math.cumprod(x, axis=axis)
+
+
+@keras_core_export("keras_core._legacy.backend.depthwise_conv2d")
+def depthwise_conv2d(
+    x,
+    depthwise_kernel,
+    strides=(1, 1),
+    padding="valid",
+    data_format=None,
+    dilation_rate=(1, 1),
+):
+    """DEPRECATED."""
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+
+    x, tf_data_format = _preprocess_conv2d_input(x, data_format)
+    padding = _preprocess_padding(padding)
+    if tf_data_format == "NHWC":
+        strides = (1,) + strides + (1,)
+    else:
+        strides = (1, 1) + strides
+
+    x = tf.nn.depthwise_conv2d(
+        x,
+        depthwise_kernel,
+        strides=strides,
+        padding=padding,
+        dilations=dilation_rate,
+        data_format=tf_data_format,
+    )
+    if data_format == "channels_first" and tf_data_format == "NHWC":
+        x = tf.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.dot")
+def dot(x, y):
+    """DEPRECATED."""
+    if ndim(x) is not None and (ndim(x) > 2 or ndim(y) > 2):
+        x_shape = []
+        for i, s in zip(x.shape, tf.unstack(tf.shape(x))):
+            if i is not None:
+                x_shape.append(i)
+            else:
+                x_shape.append(s)
+        x_shape = tuple(x_shape)
+        y_shape = []
+        for i, s in zip(y.shape, tf.unstack(tf.shape(y))):
+            if i is not None:
+                y_shape.append(i)
+            else:
+                y_shape.append(s)
+        y_shape = tuple(y_shape)
+        y_permute_dim = list(range(ndim(y)))
+        y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim
+        xt = tf.reshape(x, [-1, x_shape[-1]])
+        yt = tf.reshape(tf.transpose(y, perm=y_permute_dim), [y_shape[-2], -1])
+        return tf.reshape(
+            tf.matmul(xt, yt), x_shape[:-1] + y_shape[:-2] + y_shape[-1:]
+        )
+    if is_sparse(x):
+        out = tf.sparse.sparse_dense_matmul(x, y)
+    else:
+        out = tf.matmul(x, y)
+    return out
+
+
+@keras_core_export("keras_core._legacy.backend.dropout")
+def dropout(x, level, noise_shape=None, seed=None):
+    """DEPRECATED."""
+    if seed is None:
+        seed = np.random.randint(10e6)
+    return tf.nn.dropout(x, rate=level, noise_shape=noise_shape, seed=seed)
+
+
+@keras_core_export("keras_core._legacy.backend.dtype")
+def dtype(x):
+    """DEPRECATED."""
+    return x.dtype.base_dtype.name
+
+
+@keras_core_export("keras_core._legacy.backend.elu")
+def elu(x, alpha=1.0):
+    """DEPRECATED."""
+    res = tf.nn.elu(x)
+    if alpha == 1:
+        return res
+    else:
+        return tf.where(x > 0, res, alpha * res)
+
+
+@keras_core_export("keras_core._legacy.backend.equal")
+def equal(x, y):
+    """DEPRECATED."""
+    return tf.equal(x, y)
+
+
+@keras_core_export("keras_core._legacy.backend.eval")
+def eval(x):
+    """DEPRECATED."""
+    return get_value(to_dense(x))
+
+
+@keras_core_export("keras_core._legacy.backend.exp")
+def exp(x):
+    """DEPRECATED."""
+    return tf.exp(x)
+
+
+@keras_core_export("keras_core._legacy.backend.expand_dims")
+def expand_dims(x, axis=-1):
+    """DEPRECATED."""
+    return tf.expand_dims(x, axis)
+
+
+@keras_core_export("keras_core._legacy.backend.eye")
+def eye(size, dtype=None, name=None):
+    """DEPRECATED."""
+    if dtype is None:
+        dtype = backend.floatx()
+    tf_dtype = tf.as_dtype(dtype)
+    return variable(tf.eye(size, dtype=tf_dtype), dtype, name)
+
+
+@keras_core_export("keras_core._legacy.backend.flatten")
+def flatten(x):
+    """DEPRECATED."""
+    return tf.reshape(x, [-1])
+
+
+@keras_core_export("keras_core._legacy.backend.foldl")
+def foldl(fn, elems, initializer=None, name=None):
+    """DEPRECATED."""
+    return tf.compat.v1.foldl(fn, elems, initializer=initializer, name=name)
+
+
+@keras_core_export("keras_core._legacy.backend.foldr")
+def foldr(fn, elems, initializer=None, name=None):
+    """DEPRECATED."""
+    return tf.compat.v1.foldr(fn, elems, initializer=initializer, name=name)
+
+
+@keras_core_export("keras_core._legacy.backend.gather")
+def gather(reference, indices):
+    """DEPRECATED."""
+    return tf.compat.v1.gather(reference, indices)
+
+
+@keras_core_export("keras_core._legacy.backend.get_value")
+def get_value(x):
+    """DEPRECATED."""
+    if not tf.is_tensor(x):
+        return x
+    if tf.executing_eagerly() or isinstance(x, tf.__internal__.EagerTensor):
+        return x.numpy()
+    if not getattr(x, "_in_graph_mode", True):
+        # This is a variable which was created in an eager context, but is being
+        # evaluated from a Graph.
+        with tf.__internal__.eager_context.eager_mode():
+            return x.numpy()
+    with tf.init_scope():
+        return x.numpy()
+
+
+@keras_core_export("keras_core._legacy.backend.gradients")
+def gradients(loss, variables):
+    """DEPRECATED."""
+    return tf.compat.v1.gradients(
+        loss, variables, colocate_gradients_with_ops=True
+    )
+
+
+@keras_core_export("keras_core._legacy.backend.greater")
+def greater(x, y):
+    """DEPRECATED."""
+    return tf.greater(x, y)
+
+
+@keras_core_export("keras_core._legacy.backend.greater_equal")
+def greater_equal(x, y):
+    """DEPRECATED."""
+    return tf.greater_equal(x, y)
+
+
+@keras_core_export("keras_core._legacy.backend.hard_sigmoid")
+def hard_sigmoid(x):
+    """DEPRECATED."""
+    point_two = tf.convert_to_tensor(0.2, dtype=x.dtype)
+    point_five = tf.convert_to_tensor(0.5, dtype=x.dtype)
+    x = tf.multiply(x, point_two)
+    x = tf.add(x, point_five)
+    x = tf.clip_by_value(x, 0.0, 1.0)
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.in_top_k")
+def in_top_k(predictions, targets, k):
+    """DEPRECATED."""
+    return tf.compat.v1.math.in_top_k(predictions, targets, k)
+
+
+@keras_core_export("keras_core._legacy.backend.int_shape")
+def int_shape(x):
+    """DEPRECATED."""
+    try:
+        shape = x.shape
+        if not isinstance(shape, tuple):
+            shape = tuple(shape.as_list())
+        return shape
+    except ValueError:
+        return None
+
+
+@keras_core_export("keras_core._legacy.backend.is_sparse")
+def is_sparse(tensor):
+    """DEPRECATED."""
+    spec = getattr(tensor, "_type_spec", None)
+    if spec is not None:
+        return isinstance(spec, tf.SparseTensorSpec)
+    return isinstance(tensor, tf.SparseTensor)
+
+
+@keras_core_export("keras_core._legacy.backend.l2_normalize")
+def l2_normalize(x, axis=None):
+    """DEPRECATED."""
+    return tf.linalg.l2_normalize(x, axis=axis)
+
+
+@keras_core_export("keras_core._legacy.backend.less")
+def less(x, y):
+    """DEPRECATED."""
+    return tf.less(x, y)
+
+
+@keras_core_export("keras_core._legacy.backend.less_equal")
+def less_equal(x, y):
+    """DEPRECATED."""
+    return tf.less_equal(x, y)
+
+
+@keras_core_export("keras_core._legacy.backend.log")
+def log(x):
+    """DEPRECATED."""
+    return tf.math.log(x)
+
+
+@keras_core_export("keras_core._legacy.backend.map_fn")
+def map_fn(fn, elems, name=None, dtype=None):
+    """DEPRECATED."""
+    return tf.compat.v1.map_fn(fn, elems, name=name, dtype=dtype)
+
+
+@keras_core_export("keras_core._legacy.backend.max")
+def max(x, axis=None, keepdims=False):
+    """DEPRECATED."""
+    return tf.reduce_max(x, axis, keepdims)
+
+
+@keras_core_export("keras_core._legacy.backend.maximum")
+def maximum(x, y):
+    """DEPRECATED."""
+    return tf.maximum(x, y)
+
+
+@keras_core_export("keras_core._legacy.backend.mean")
+def mean(x, axis=None, keepdims=False):
+    """DEPRECATED."""
+    if x.dtype.base_dtype == tf.bool:
+        x = tf.cast(x, backend.floatx())
+    return tf.reduce_mean(x, axis, keepdims)
+
+
+@keras_core_export("keras_core._legacy.backend.min")
+def min(x, axis=None, keepdims=False):
+    """DEPRECATED."""
+    return tf.reduce_min(x, axis, keepdims)
+
+
+@keras_core_export("keras_core._legacy.backend.minimum")
+def minimum(x, y):
+    """DEPRECATED."""
+    return tf.minimum(x, y)
+
+
+@keras_core_export("keras_core._legacy.backend.moving_average_update")
+def moving_average_update(x, value, momentum):
+    """DEPRECATED."""
+    momentum = tf.cast(momentum, x.dtype)
+    value = tf.cast(value, x.dtype)
+    return x.assign_sub((x - value) * (1 - momentum))
+
+
+@keras_core_export("keras_core._legacy.backend.name_scope")
+def name_scope(name):
+    """DEPRECATED."""
+    return tf.name_scope(name)
+
+
+@keras_core_export("keras_core._legacy.backend.ndim")
+def ndim(x):
+    """DEPRECATED."""
+    return x.shape.rank
+
+
+@keras_core_export("keras_core._legacy.backend.not_equal")
+def not_equal(x, y):
+    """DEPRECATED."""
+    return tf.not_equal(x, y)
+
+
+@keras_core_export("keras_core._legacy.backend.one_hot")
+def one_hot(indices, num_classes):
+    """DEPRECATED."""
+    return tf.one_hot(indices, depth=num_classes, axis=-1)
+
+
+@keras_core_export("keras_core._legacy.backend.ones")
+def ones(shape, dtype=None, name=None):
+    """DEPRECATED."""
+    with tf.init_scope():
+        if dtype is None:
+            dtype = backend.floatx()
+        tf_dtype = tf.as_dtype(dtype)
+        v = tf.ones(shape=shape, dtype=tf_dtype, name=name)
+        if py_all(v.shape.as_list()):
+            return variable(v, dtype=dtype, name=name)
+        return v
+
+
+@keras_core_export("keras_core._legacy.backend.ones_like")
+def ones_like(x, dtype=None, name=None):
+    """DEPRECATED."""
+    return tf.ones_like(x, dtype=dtype, name=name)
+
+
+@keras_core_export("keras_core._legacy.backend.permute_dimensions")
+def permute_dimensions(x, pattern):
+    """DEPRECATED."""
+    return tf.transpose(x, perm=pattern)
+
+
+@keras_core_export("keras_core._legacy.backend.pool2d")
+def pool2d(
+    x,
+    pool_size,
+    strides=(1, 1),
+    padding="valid",
+    data_format=None,
+    pool_mode="max",
+):
+    """DEPRECATED."""
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+    if len(pool_size) != 2:
+        raise ValueError("`pool_size` must be a tuple of 2 integers.")
+    if len(strides) != 2:
+        raise ValueError("`strides` must be a tuple of 2 integers.")
+
+    x, tf_data_format = _preprocess_conv2d_input(x, data_format)
+    padding = _preprocess_padding(padding)
+    if tf_data_format == "NHWC":
+        strides = (1,) + strides + (1,)
+        pool_size = (1,) + pool_size + (1,)
+    else:
+        strides = (1, 1) + strides
+        pool_size = (1, 1) + pool_size
+
+    if pool_mode == "max":
+        x = tf.compat.v1.nn.max_pool(
+            x, pool_size, strides, padding=padding, data_format=tf_data_format
+        )
+    elif pool_mode == "avg":
+        x = tf.compat.v1.nn.avg_pool(
+            x, pool_size, strides, padding=padding, data_format=tf_data_format
+        )
+    else:
+        raise ValueError("Invalid pooling mode: " + str(pool_mode))
+
+    if data_format == "channels_first" and tf_data_format == "NHWC":
+        x = tf.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.pool3d")
+def pool3d(
+    x,
+    pool_size,
+    strides=(1, 1, 1),
+    padding="valid",
+    data_format=None,
+    pool_mode="max",
+):
+    """DEPRECATED."""
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+
+    x, tf_data_format = _preprocess_conv3d_input(x, data_format)
+    padding = _preprocess_padding(padding)
+    if tf_data_format == "NDHWC":
+        strides = (1,) + strides + (1,)
+        pool_size = (1,) + pool_size + (1,)
+    else:
+        strides = (1, 1) + strides
+        pool_size = (1, 1) + pool_size
+
+    if pool_mode == "max":
+        x = tf.nn.max_pool3d(
+            x, pool_size, strides, padding=padding, data_format=tf_data_format
+        )
+    elif pool_mode == "avg":
+        x = tf.nn.avg_pool3d(
+            x, pool_size, strides, padding=padding, data_format=tf_data_format
+        )
+    else:
+        raise ValueError("Invalid pooling mode: " + str(pool_mode))
+
+    if data_format == "channels_first" and tf_data_format == "NDHWC":
+        x = tf.transpose(x, (0, 4, 1, 2, 3))
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.pow")
+def pow(x, a):
+    """DEPRECATED."""
+    return tf.pow(x, a)
+
+
+@keras_core_export("keras_core._legacy.backend.prod")
+def prod(x, axis=None, keepdims=False):
+    """DEPRECATED."""
+    return tf.reduce_prod(x, axis, keepdims)
+
+
+@keras_core_export("keras_core._legacy.backend.random_bernoulli")
+def random_bernoulli(shape, p=0.0, dtype=None, seed=None):
+    """DEPRECATED."""
+    if dtype is None:
+        dtype = backend.floatx()
+    if seed is None:
+        seed = np.random.randint(10e6)
+    return tf.where(
+        tf.random.uniform(shape, dtype=dtype, seed=seed) <= p,
+        tf.ones(shape, dtype=dtype),
+        tf.zeros(shape, dtype=dtype),
+    )
+
+
+@keras_core_export("keras_core._legacy.backend.random_normal")
+def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
+    """DEPRECATED."""
+    if dtype is None:
+        dtype = backend.floatx()
+    if seed is None:
+        seed = np.random.randint(10e6)
+    return tf.random.normal(
+        shape, mean=mean, stddev=stddev, dtype=dtype, seed=seed
+    )
+
+
+@keras_core_export("keras_core._legacy.backend.random_normal_variable")
+def random_normal_variable(
+    shape, mean, scale, dtype=None, name=None, seed=None
+):
+    """DEPRECATED."""
+    if dtype is None:
+        dtype = backend.floatx()
+    tf_dtype = tf.as_dtype(dtype)
+    if seed is None:
+        # ensure that randomness is conditioned by the Numpy RNG
+        seed = np.random.randint(10e8)
+    value = tf.compat.v1.random_normal_initializer(
+        mean, scale, dtype=tf_dtype, seed=seed
+    )(shape)
+    return variable(value, dtype=dtype, name=name)
+
+
+@keras_core_export("keras_core._legacy.backend.random_uniform")
+def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None):
+    """DEPRECATED."""
+    if dtype is None:
+        dtype = backend.floatx()
+    if seed is None:
+        seed = np.random.randint(10e6)
+    return tf.random.uniform(
+        shape, minval=minval, maxval=maxval, dtype=dtype, seed=seed
+    )
+
+
+@keras_core_export("keras_core._legacy.backend.random_uniform_variable")
+def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None):
+    """DEPRECATED."""
+    if dtype is None:
+        dtype = backend.floatx()
+    tf_dtype = tf.as_dtype(dtype)
+    if seed is None:
+        # ensure that randomness is conditioned by the Numpy RNG
+        seed = np.random.randint(10e8)
+    value = tf.compat.v1.random_uniform_initializer(
+        low, high, dtype=tf_dtype, seed=seed
+    )(shape)
+    return variable(value, dtype=dtype, name=name)
+
+
+@keras_core_export("keras_core._legacy.backend.reshape")
+def reshape(x, shape):
+    """DEPRECATED."""
+    return tf.reshape(x, shape)
+
+
+@keras_core_export("keras_core._legacy.backend.relu")
+def relu(x, alpha=0.0, max_value=None, threshold=0.0):
+    """DEPRECATED."""
+    # While x can be a tensor or variable, we also see cases where
+    # numpy arrays, lists, tuples are passed as well.
+    # lists, tuples do not have 'dtype' attribute.
+    dtype = getattr(x, "dtype", backend.floatx())
+    if alpha != 0.0:
+        if max_value is None and threshold == 0:
+            return tf.nn.leaky_relu(x, alpha=alpha)
+
+        if threshold != 0:
+            negative_part = tf.nn.relu(-x + threshold)
+        else:
+            negative_part = tf.nn.relu(-x)
+
+    clip_max = max_value is not None
+
+    if threshold != 0:
+        # computes x for x > threshold else 0
+        x = x * tf.cast(tf.greater(x, threshold), dtype=dtype)
+    elif max_value == 6:
+        # if no threshold, then can use nn.relu6 native TF op for performance
+        x = tf.nn.relu6(x)
+        clip_max = False
+    else:
+        x = tf.nn.relu(x)
+
+    if clip_max:
+        max_value = tf.convert_to_tensor(max_value, dtype=x.dtype)
+        zero = tf.convert_to_tensor(0, dtype=x.dtype)
+        x = tf.clip_by_value(x, zero, max_value)
+
+    if alpha != 0.0:
+        alpha = tf.convert_to_tensor(alpha, dtype=x.dtype)
+        x -= alpha * negative_part
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.repeat")
+def repeat(x, n):
+    """DEPRECATED."""
+    assert ndim(x) == 2
+    x = tf.expand_dims(x, 1)
+    pattern = tf.stack([1, n, 1])
+    return tf.tile(x, pattern)
+
+
+@keras_core_export("keras_core._legacy.backend.repeat_elements")
+def repeat_elements(x, rep, axis):
+    """DEPRECATED."""
+    x_shape = x.shape.as_list()
+    # For static axis
+    if x_shape[axis] is not None:
+        # slices along the repeat axis
+        splits = tf.split(value=x, num_or_size_splits=x_shape[axis], axis=axis)
+        # repeat each slice the given number of reps
+        x_rep = [s for s in splits for _ in range(rep)]
+        return concatenate(x_rep, axis)
+
+    # Here we use tf.tile to mimic behavior of np.repeat so that
+    # we can handle dynamic shapes (that include None).
+    # To do that, we need an auxiliary axis to repeat elements along
+    # it and then merge them along the desired axis.
+
+    # Repeating
+    auxiliary_axis = axis + 1
+    x_shape = tf.shape(x)
+    x_rep = tf.expand_dims(x, axis=auxiliary_axis)
+    reps = np.ones(len(x.shape) + 1)
+    reps[auxiliary_axis] = rep
+    x_rep = tf.tile(x_rep, reps)
+
+    # Merging
+    reps = np.delete(reps, auxiliary_axis)
+    reps[axis] = rep
+    reps = tf.constant(reps, dtype="int32")
+    x_shape *= reps
+    x_rep = tf.reshape(x_rep, x_shape)
+
+    # Fix shape representation
+    x_shape = x.shape.as_list()
+    x_rep.set_shape(x_shape)
+    return x_rep
+
+
+@keras_core_export("keras_core._legacy.backend.resize_images")
+def resize_images(
+    x, height_factor, width_factor, data_format, interpolation="nearest"
+):
+    """DEPRECATED."""
+    if data_format == "channels_first":
+        rows, cols = 2, 3
+    elif data_format == "channels_last":
+        rows, cols = 1, 2
+    else:
+        raise ValueError(f"Invalid `data_format` argument: {data_format}")
+
+    new_shape = x.shape[rows : cols + 1]
+    if new_shape.is_fully_defined():
+        new_shape = tf.constant(new_shape.as_list(), dtype="int32")
+    else:
+        new_shape = tf.shape(x)[rows : cols + 1]
+    new_shape *= tf.constant(
+        np.array([height_factor, width_factor], dtype="int32")
+    )
+
+    if data_format == "channels_first":
+        x = permute_dimensions(x, [0, 2, 3, 1])
+    interpolations = {
+        "area": tf.image.ResizeMethod.AREA,
+        "bicubic": tf.image.ResizeMethod.BICUBIC,
+        "bilinear": tf.image.ResizeMethod.BILINEAR,
+        "gaussian": tf.image.ResizeMethod.GAUSSIAN,
+        "lanczos3": tf.image.ResizeMethod.LANCZOS3,
+        "lanczos5": tf.image.ResizeMethod.LANCZOS5,
+        "mitchellcubic": tf.image.ResizeMethod.MITCHELLCUBIC,
+        "nearest": tf.image.ResizeMethod.NEAREST_NEIGHBOR,
+    }
+    interploations_list = '"' + '", "'.join(interpolations.keys()) + '"'
+    if interpolation in interpolations:
+        x = tf.image.resize(x, new_shape, method=interpolations[interpolation])
+    else:
+        raise ValueError(
+            "`interpolation` argument should be one of: "
+            f'{interploations_list}. Received: "{interpolation}".'
+        )
+    if data_format == "channels_first":
+        x = permute_dimensions(x, [0, 3, 1, 2])
+
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.resize_volumes")
+def resize_volumes(x, depth_factor, height_factor, width_factor, data_format):
+    """DEPRECATED."""
+    if data_format == "channels_first":
+        output = repeat_elements(x, depth_factor, axis=2)
+        output = repeat_elements(output, height_factor, axis=3)
+        output = repeat_elements(output, width_factor, axis=4)
+        return output
+    elif data_format == "channels_last":
+        output = repeat_elements(x, depth_factor, axis=1)
+        output = repeat_elements(output, height_factor, axis=2)
+        output = repeat_elements(output, width_factor, axis=3)
+        return output
+    else:
+        raise ValueError(f"Invalid data_format: {data_format}")
+
+
+@keras_core_export("keras_core._legacy.backend.reverse")
+def reverse(x, axes):
+    """DEPRECATED."""
+    if isinstance(axes, int):
+        axes = [axes]
+    return tf.reverse(x, axes)
+
+
+@keras_core_export("keras_core._legacy.backend.rnn")
+def rnn(
+    step_function,
+    inputs,
+    initial_states,
+    go_backwards=False,
+    mask=None,
+    constants=None,
+    unroll=False,
+    input_length=None,
+    time_major=False,
+    zero_output_for_mask=False,
+    return_all_outputs=True,
+):
+    """DEPRECATED."""
+    if not tf.__internal__.tf2.enabled():
+        return_all_outputs = True  # Not supported in TF1.
+
+    def swap_batch_timestep(input_t):
+        # Swap the batch and timestep dim for the incoming tensor.
+        axes = list(range(len(input_t.shape)))
+        axes[0], axes[1] = 1, 0
+        return tf.transpose(input_t, axes)
+
+    if not time_major:
+        inputs = tf.nest.map_structure(swap_batch_timestep, inputs)
+
+    flatted_inputs = tf.nest.flatten(inputs)
+    time_steps = flatted_inputs[0].shape[0]
+    batch = flatted_inputs[0].shape[1]
+    time_steps_t = tf.shape(flatted_inputs[0])[0]
+
+    for input_ in flatted_inputs:
+        input_.shape.with_rank_at_least(3)
+
+    if mask is not None:
+        if mask.dtype != tf.bool:
+            mask = tf.cast(mask, tf.bool)
+        if len(mask.shape) == 2:
+            mask = expand_dims(mask)
+        if not time_major:
+            mask = swap_batch_timestep(mask)
+
+    if constants is None:
+        constants = []
+
+    # tf.where needs its condition tensor to be the same shape as its two
+    # result tensors, but in our case the condition (mask) tensor is
+    # (nsamples, 1), and inputs are (nsamples, ndimensions) or even more.
+    # So we need to broadcast the mask to match the shape of inputs.
+    # That's what the tile call does, it just repeats the mask along its
+    # second dimension n times.
+    def _expand_mask(mask_t, input_t, fixed_dim=1):
+        if tf.nest.is_nested(mask_t):
+            raise ValueError(
+                f"mask_t is expected to be tensor, but got {mask_t}"
+            )
+        if tf.nest.is_nested(input_t):
+            raise ValueError(
+                f"input_t is expected to be tensor, but got {input_t}"
+            )
+        rank_diff = len(input_t.shape) - len(mask_t.shape)
+        for _ in range(rank_diff):
+            mask_t = tf.expand_dims(mask_t, -1)
+        multiples = [1] * fixed_dim + input_t.shape.as_list()[fixed_dim:]
+        return tf.tile(mask_t, multiples)
+
+    if unroll:
+        if not time_steps:
+            raise ValueError("Unrolling requires a fixed number of timesteps.")
+        states = tuple(initial_states)
+        successive_states = []
+        successive_outputs = []
+
+        # Process the input tensors. The input tensor need to be split on the
+        # time_step dim, and reverse if go_backwards is True. In the case of
+        # nested input, the input is flattened and then transformed
+        # individually.  The result of this will be a tuple of lists, each of
+        # the item in tuple is list of the tensor with shape (batch, feature)
+        def _process_single_input_t(input_t):
+            input_t = tf.unstack(input_t)  # unstack for time_step dim
+            if go_backwards:
+                input_t.reverse()
+            return input_t
+
+        if tf.nest.is_nested(inputs):
+            processed_input = tf.nest.map_structure(
+                _process_single_input_t, inputs
+            )
+        else:
+            processed_input = (_process_single_input_t(inputs),)
+
+        def _get_input_tensor(time):
+            inp = [t_[time] for t_ in processed_input]
+            return tf.nest.pack_sequence_as(inputs, inp)
+
+        if mask is not None:
+            mask_list = tf.unstack(mask)
+            if go_backwards:
+                mask_list.reverse()
+
+            for i in range(time_steps):
+                inp = _get_input_tensor(i)
+                mask_t = mask_list[i]
+                output, new_states = step_function(
+                    inp, tuple(states) + tuple(constants)
+                )
+                tiled_mask_t = _expand_mask(mask_t, output)
+
+                if not successive_outputs:
+                    prev_output = zeros_like(output)
+                else:
+                    prev_output = successive_outputs[-1]
+
+                output = tf.where(tiled_mask_t, output, prev_output)
+
+                flat_states = tf.nest.flatten(states)
+                flat_new_states = tf.nest.flatten(new_states)
+                tiled_mask_t = tuple(
+                    _expand_mask(mask_t, s) for s in flat_states
+                )
+                flat_final_states = tuple(
+                    tf.where(m, s, ps)
+                    for m, s, ps in zip(
+                        tiled_mask_t, flat_new_states, flat_states
+                    )
+                )
+                states = tf.nest.pack_sequence_as(states, flat_final_states)
+
+                if return_all_outputs:
+                    successive_outputs.append(output)
+                    successive_states.append(states)
+                else:
+                    successive_outputs = [output]
+                    successive_states = [states]
+            last_output = successive_outputs[-1]
+            new_states = successive_states[-1]
+            outputs = tf.stack(successive_outputs)
+
+            if zero_output_for_mask:
+                last_output = tf.where(
+                    _expand_mask(mask_list[-1], last_output),
+                    last_output,
+                    zeros_like(last_output),
+                )
+                outputs = tf.where(
+                    _expand_mask(mask, outputs, fixed_dim=2),
+                    outputs,
+                    zeros_like(outputs),
+                )
+
+        else:  # mask is None
+            for i in range(time_steps):
+                inp = _get_input_tensor(i)
+                output, states = step_function(
+                    inp, tuple(states) + tuple(constants)
+                )
+                if return_all_outputs:
+                    successive_outputs.append(output)
+                    successive_states.append(states)
+                else:
+                    successive_outputs = [output]
+                    successive_states = [states]
+            last_output = successive_outputs[-1]
+            new_states = successive_states[-1]
+            outputs = tf.stack(successive_outputs)
+
+    else:  # Unroll == False
+        states = tuple(initial_states)
+
+        # Create input tensor array, if the inputs is nested tensors, then it
+        # will be flattened first, and tensor array will be created one per
+        # flattened tensor.
+        input_ta = tuple(
+            tf.TensorArray(
+                dtype=inp.dtype,
+                size=time_steps_t,
+                tensor_array_name=f"input_ta_{i}",
+            )
+            for i, inp in enumerate(flatted_inputs)
+        )
+        input_ta = tuple(
+            ta.unstack(input_)
+            if not go_backwards
+            else ta.unstack(reverse(input_, 0))
+            for ta, input_ in zip(input_ta, flatted_inputs)
+        )
+
+        # Get the time(0) input and compute the output for that, the output will
+        # be used to determine the dtype of output tensor array. Don't read from
+        # input_ta due to TensorArray clear_after_read default to True.
+        input_time_zero = tf.nest.pack_sequence_as(
+            inputs, [inp[0] for inp in flatted_inputs]
+        )
+        # output_time_zero is used to determine the cell output shape and its
+        # dtype.  the value is discarded.
+        output_time_zero, _ = step_function(
+            input_time_zero, tuple(initial_states) + tuple(constants)
+        )
+
+        output_ta_size = time_steps_t if return_all_outputs else 1
+        output_ta = tuple(
+            tf.TensorArray(
+                dtype=out.dtype,
+                size=output_ta_size,
+                element_shape=out.shape,
+                tensor_array_name=f"output_ta_{i}",
+            )
+            for i, out in enumerate(tf.nest.flatten(output_time_zero))
+        )
+
+        time = tf.constant(0, dtype="int32", name="time")
+
+        if input_length is None:
+            max_iterations = time_steps_t
+        else:
+            max_iterations = tf.reduce_max(input_length)
+
+        while_loop_kwargs = {
+            "cond": lambda time, *_: time < time_steps_t,
+            "maximum_iterations": max_iterations,
+            "parallel_iterations": 32,
+            "swap_memory": True,
+        }
+        if mask is not None:
+            if go_backwards:
+                mask = reverse(mask, 0)
+
+            mask_ta = tf.TensorArray(
+                dtype=tf.bool, size=time_steps_t, tensor_array_name="mask_ta"
+            )
+            mask_ta = mask_ta.unstack(mask)
+
+            def masking_fn(time):
+                return mask_ta.read(time)
+
+            def compute_masked_output(mask_t, flat_out, flat_mask):
+                tiled_mask_t = tuple(
+                    _expand_mask(mask_t, o, fixed_dim=len(mask_t.shape))
+                    for o in flat_out
+                )
+                return tuple(
+                    tf.where(m, o, fm)
+                    for m, o, fm in zip(tiled_mask_t, flat_out, flat_mask)
+                )
+
+        elif isinstance(input_length, tf.Tensor):
+            if go_backwards:
+                max_len = tf.reduce_max(input_length, axis=0)
+                rev_input_length = tf.subtract(max_len - 1, input_length)
+
+                def masking_fn(time):
+                    return tf.less(rev_input_length, time)
+
+            else:
+
+                def masking_fn(time):
+                    return tf.greater(input_length, time)
+
+            def compute_masked_output(mask_t, flat_out, flat_mask):
+                return tuple(
+                    tf.compat.v1.where(mask_t, o, zo)
+                    for (o, zo) in zip(flat_out, flat_mask)
+                )
+
+        else:
+            masking_fn = None
+
+        if masking_fn is not None:
+            # Mask for the T output will be base on the output of T - 1. In the
+            # case T = 0, a zero filled tensor will be used.
+            flat_zero_output = tuple(
+                tf.zeros_like(o) for o in tf.nest.flatten(output_time_zero)
+            )
+
+            def _step(time, output_ta_t, prev_output, *states):
+                """RNN step function.
+
+                Args:
+                    time: Current timestep value.
+                    output_ta_t: TensorArray.
+                    prev_output: tuple of outputs from time - 1.
+                    *states: List of states.
+
+                Returns:
+                    Tuple: `(time + 1, output_ta_t, output) + tuple(new_states)`
+                """
+                current_input = tuple(ta.read(time) for ta in input_ta)
+                # maybe set shape.
+                current_input = tf.nest.pack_sequence_as(inputs, current_input)
+                mask_t = masking_fn(time)
+                output, new_states = step_function(
+                    current_input, tuple(states) + tuple(constants)
+                )
+                # mask output
+                flat_output = tf.nest.flatten(output)
+                flat_mask_output = (
+                    flat_zero_output
+                    if zero_output_for_mask
+                    else tf.nest.flatten(prev_output)
+                )
+                flat_new_output = compute_masked_output(
+                    mask_t, flat_output, flat_mask_output
+                )
+
+                # mask states
+                flat_state = tf.nest.flatten(states)
+                flat_new_state = tf.nest.flatten(new_states)
+                for state, new_state in zip(flat_state, flat_new_state):
+                    if isinstance(new_state, tf.Tensor):
+                        new_state.set_shape(state.shape)
+                flat_final_state = compute_masked_output(
+                    mask_t, flat_new_state, flat_state
+                )
+                new_states = tf.nest.pack_sequence_as(
+                    new_states, flat_final_state
+                )
+
+                ta_index_to_write = time if return_all_outputs else 0
+                output_ta_t = tuple(
+                    ta.write(ta_index_to_write, out)
+                    for ta, out in zip(output_ta_t, flat_new_output)
+                )
+
+                return (time + 1, output_ta_t, tuple(flat_new_output)) + tuple(
+                    new_states
+                )
+
+            final_outputs = tf.compat.v1.while_loop(
+                body=_step,
+                loop_vars=(time, output_ta, flat_zero_output) + states,
+                **while_loop_kwargs,
+            )
+            # Skip final_outputs[2] which is the output for final timestep.
+            new_states = final_outputs[3:]
+        else:
+
+            def _step(time, output_ta_t, *states):
+                """RNN step function.
+
+                Args:
+                    time: Current timestep value.
+                    output_ta_t: TensorArray.
+                    *states: List of states.
+
+                Returns:
+                    Tuple: `(time + 1,output_ta_t) + tuple(new_states)`
+                """
+                current_input = tuple(ta.read(time) for ta in input_ta)
+                current_input = tf.nest.pack_sequence_as(inputs, current_input)
+                output, new_states = step_function(
+                    current_input, tuple(states) + tuple(constants)
+                )
+                flat_state = tf.nest.flatten(states)
+                flat_new_state = tf.nest.flatten(new_states)
+                for state, new_state in zip(flat_state, flat_new_state):
+                    if isinstance(new_state, tf.Tensor):
+                        new_state.set_shape(state.shape)
+
+                flat_output = tf.nest.flatten(output)
+                ta_index_to_write = time if return_all_outputs else 0
+                output_ta_t = tuple(
+                    ta.write(ta_index_to_write, out)
+                    for ta, out in zip(output_ta_t, flat_output)
+                )
+
+                new_states = tf.nest.pack_sequence_as(
+                    initial_states, flat_new_state
+                )
+                return (time + 1, output_ta_t) + tuple(new_states)
+
+            final_outputs = tf.compat.v1.while_loop(
+                body=_step,
+                loop_vars=(time, output_ta) + states,
+                **while_loop_kwargs,
+            )
+            new_states = final_outputs[2:]
+
+        output_ta = final_outputs[1]
+
+        outputs = tuple(o.stack() for o in output_ta)
+        last_output = tuple(o[-1] for o in outputs)
+
+        outputs = tf.nest.pack_sequence_as(output_time_zero, outputs)
+        last_output = tf.nest.pack_sequence_as(output_time_zero, last_output)
+
+    # static shape inference
+    def set_shape(output_):
+        if isinstance(output_, tf.Tensor):
+            shape = output_.shape.as_list()
+            if return_all_outputs:
+                shape[0] = time_steps
+            else:
+                shape[0] = 1
+            shape[1] = batch
+            output_.set_shape(shape)
+        return output_
+
+    outputs = tf.nest.map_structure(set_shape, outputs)
+
+    if not time_major:
+        outputs = tf.nest.map_structure(swap_batch_timestep, outputs)
+
+    return last_output, outputs, new_states
+
+
+@keras_core_export("keras_core._legacy.backend.round")
+def round(x):
+    """DEPRECATED."""
+    return tf.round(x)
+
+
+@keras_core_export("keras_core._legacy.backend.separable_conv2d")
+def separable_conv2d(
+    x,
+    depthwise_kernel,
+    pointwise_kernel,
+    strides=(1, 1),
+    padding="valid",
+    data_format=None,
+    dilation_rate=(1, 1),
+):
+    """DEPRECATED."""
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+    if len(strides) != 2:
+        raise ValueError("`strides` must be a tuple of 2 integers.")
+
+    x, tf_data_format = _preprocess_conv2d_input(x, data_format)
+    padding = _preprocess_padding(padding)
+    if not isinstance(strides, tuple):
+        strides = tuple(strides)
+    if tf_data_format == "NHWC":
+        strides = (1,) + strides + (1,)
+    else:
+        strides = (1, 1) + strides
+
+    x = tf.nn.separable_conv2d(
+        x,
+        depthwise_kernel,
+        pointwise_kernel,
+        strides=strides,
+        padding=padding,
+        dilations=dilation_rate,
+        data_format=tf_data_format,
+    )
+    if data_format == "channels_first" and tf_data_format == "NHWC":
+        x = tf.transpose(x, (0, 3, 1, 2))  # NHWC -> NCHW
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.set_value")
+def set_value(x, value):
+    """DEPRECATED."""
+    value = np.asarray(value, dtype=x.dtype.name)
+    x.assign(value)
+
+
+@keras_core_export("keras_core._legacy.backend.shape")
+def shape(x):
+    """DEPRECATED."""
+    return tf.shape(x)
+
+
+@keras_core_export("keras_core._legacy.backend.sigmoid")
+def sigmoid(x):
+    """DEPRECATED."""
+    output = tf.sigmoid(x)
+    return output
+
+
+@keras_core_export("keras_core._legacy.backend.sign")
+def sign(x):
+    """DEPRECATED."""
+    return tf.sign(x)
+
+
+@keras_core_export("keras_core._legacy.backend.sin")
+def sin(x):
+    """DEPRECATED."""
+    return tf.sin(x)
+
+
+@keras_core_export("keras_core._legacy.backend.softmax")
+def softmax(x, axis=-1):
+    """DEPRECATED."""
+    if x.shape.rank <= 1:
+        raise ValueError(
+            f"Cannot apply softmax to a tensor that is 1D. Received input: {x}"
+        )
+
+    if isinstance(axis, int):
+        output = tf.nn.softmax(x, axis=axis)
+    else:
+        # nn.softmax does not support tuple axis.
+        numerator = tf.exp(x - tf.reduce_max(x, axis=axis, keepdims=True))
+        denominator = tf.reduce_sum(numerator, axis=axis, keepdims=True)
+        output = numerator / denominator
+
+    # Cache the logits to use for crossentropy loss.
+    output._keras_logits = x
+    return output
+
+
+@keras_core_export("keras_core._legacy.backend.softplus")
+def softplus(x):
+    """DEPRECATED."""
+    return tf.math.softplus(x)
+
+
+@keras_core_export("keras_core._legacy.backend.softsign")
+def softsign(x):
+    """DEPRECATED."""
+    return tf.math.softsign(x)
+
+
+@keras_core_export("keras_core._legacy.backend.sparse_categorical_crossentropy")
+def sparse_categorical_crossentropy(
+    target, output, from_logits=False, axis=-1, ignore_class=None
+):
+    """DEPRECATED."""
+    target = tf.convert_to_tensor(target)
+    output = tf.convert_to_tensor(output)
+
+    target = cast(target, "int64")
+
+    if not from_logits:
+        epsilon_ = tf.convert_to_tensor(backend.epsilon(), output.dtype)
+        output = tf.clip_by_value(output, epsilon_, 1 - epsilon_)
+        output = tf.math.log(output)
+
+    # Permute output so that the last axis contains the logits/probabilities.
+    if isinstance(output.shape, (tuple, list)):
+        output_rank = len(output.shape)
+    else:
+        output_rank = output.shape.ndims
+    if output_rank is not None:
+        axis %= output_rank
+        if axis != output_rank - 1:
+            permutation = list(
+                itertools.chain(
+                    range(axis), range(axis + 1, output_rank), [axis]
+                )
+            )
+            output = tf.transpose(output, perm=permutation)
+    elif axis != -1:
+        raise ValueError(
+            "Cannot compute sparse categorical crossentropy with `axis={}` "
+            "on an output tensor with unknown rank".format(axis)
+        )
+
+    # Try to adjust the shape so that rank of labels = rank of logits - 1.
+    output_shape = tf.shape(output)
+    target_rank = target.shape.ndims
+
+    update_shape = (
+        target_rank is not None
+        and output_rank is not None
+        and target_rank != output_rank - 1
+    )
+    if update_shape:
+        target = flatten(target)
+        output = tf.reshape(output, [-1, output_shape[-1]])
+
+    if ignore_class is not None:
+        valid_mask = tf.not_equal(target, cast(ignore_class, target.dtype))
+        target = target[valid_mask]
+        output = output[valid_mask]
+
+    res = tf.nn.sparse_softmax_cross_entropy_with_logits(
+        labels=target, logits=output
+    )
+
+    if ignore_class is not None:
+        res_shape = cast(output_shape[:-1], "int64")
+        valid_mask = tf.reshape(valid_mask, res_shape)
+        res = tf.scatter_nd(tf.where(valid_mask), res, res_shape)
+        res._keras_mask = valid_mask
+
+        return res
+
+    if update_shape and output_rank >= 3:
+        # If our output includes timesteps or
+        # spatial dimensions we need to reshape
+        res = tf.reshape(res, output_shape[:-1])
+
+    return res
+
+
+@keras_core_export("keras_core._legacy.backend.spatial_2d_padding")
+def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None):
+    """DEPRECATED."""
+    assert len(padding) == 2
+    assert len(padding[0]) == 2
+    assert len(padding[1]) == 2
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+
+    if data_format == "channels_first":
+        pattern = [[0, 0], [0, 0], list(padding[0]), list(padding[1])]
+    else:
+        pattern = [[0, 0], list(padding[0]), list(padding[1]), [0, 0]]
+    return tf.compat.v1.pad(x, pattern)
+
+
+@keras_core_export("keras_core._legacy.backend.spatial_3d_padding")
+def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None):
+    """DEPRECATED."""
+    assert len(padding) == 3
+    assert len(padding[0]) == 2
+    assert len(padding[1]) == 2
+    assert len(padding[2]) == 2
+    if data_format is None:
+        data_format = backend.image_data_format()
+    if data_format not in {"channels_first", "channels_last"}:
+        raise ValueError(f"Unknown data_format: {data_format}")
+
+    if data_format == "channels_first":
+        pattern = [
+            [0, 0],
+            [0, 0],
+            [padding[0][0], padding[0][1]],
+            [padding[1][0], padding[1][1]],
+            [padding[2][0], padding[2][1]],
+        ]
+    else:
+        pattern = [
+            [0, 0],
+            [padding[0][0], padding[0][1]],
+            [padding[1][0], padding[1][1]],
+            [padding[2][0], padding[2][1]],
+            [0, 0],
+        ]
+    return tf.compat.v1.pad(x, pattern)
+
+
+@keras_core_export("keras_core._legacy.backend.sqrt")
+def sqrt(x):
+    """DEPRECATED."""
+    zero = tf.convert_to_tensor(0.0, x.dtype)
+    x = tf.maximum(x, zero)
+    return tf.sqrt(x)
+
+
+@keras_core_export("keras_core._legacy.backend.square")
+def square(x):
+    """DEPRECATED."""
+    return tf.square(x)
+
+
+@keras_core_export("keras_core._legacy.backend.squeeze")
+def squeeze(x, axis):
+    """DEPRECATED."""
+    return tf.squeeze(x, [axis])
+
+
+@keras_core_export("keras_core._legacy.backend.stack")
+def stack(x, axis=0):
+    """DEPRECATED."""
+    return tf.stack(x, axis=axis)
+
+
+@keras_core_export("keras_core._legacy.backend.std")
+def std(x, axis=None, keepdims=False):
+    """DEPRECATED."""
+    if x.dtype.base_dtype == tf.bool:
+        x = tf.cast(x, backend.floatx())
+    return tf.math.reduce_std(x, axis=axis, keepdims=keepdims)
+
+
+@keras_core_export("keras_core._legacy.backend.stop_gradient")
+def stop_gradient(variables):
+    """DEPRECATED."""
+    if isinstance(variables, (list, tuple)):
+        return map(tf.stop_gradient, variables)
+    return tf.stop_gradient(variables)
+
+
+@keras_core_export("keras_core._legacy.backend.sum")
+def sum(x, axis=None, keepdims=False):
+    """DEPRECATED."""
+    return tf.reduce_sum(x, axis, keepdims)
+
+
+@keras_core_export("keras_core._legacy.backend.switch")
+def switch(condition, then_expression, else_expression):
+    """DEPRECATED."""
+    if condition.dtype != tf.bool:
+        condition = tf.cast(condition, "bool")
+    cond_ndim = ndim(condition)
+    if not cond_ndim:
+        if not callable(then_expression):
+
+            def then_expression_fn():
+                return then_expression
+
+        else:
+            then_expression_fn = then_expression
+        if not callable(else_expression):
+
+            def else_expression_fn():
+                return else_expression
+
+        else:
+            else_expression_fn = else_expression
+        x = tf.compat.v1.cond(condition, then_expression_fn, else_expression_fn)
+    else:
+        # tf.where needs its condition tensor
+        # to be the same shape as its two
+        # result tensors
+        if callable(then_expression):
+            then_expression = then_expression()
+        if callable(else_expression):
+            else_expression = else_expression()
+        expr_ndim = ndim(then_expression)
+        if cond_ndim > expr_ndim:
+            raise ValueError(
+                "Rank of `condition` should be less than or"
+                " equal to rank of `then_expression` and "
+                "`else_expression`. ndim(condition)="
+                + str(cond_ndim)
+                + ", ndim(then_expression)="
+                + str(expr_ndim)
+            )
+        if cond_ndim > 1:
+            ndim_diff = expr_ndim - cond_ndim
+            cond_shape = tf.concat(
+                [tf.shape(condition), [1] * ndim_diff], axis=0
+            )
+            condition = tf.reshape(condition, cond_shape)
+            expr_shape = tf.shape(then_expression)
+            shape_diff = expr_shape - cond_shape
+            tile_shape = tf.where(
+                shape_diff > 0, expr_shape, tf.ones_like(expr_shape)
+            )
+            condition = tf.tile(condition, tile_shape)
+        x = tf.where(condition, then_expression, else_expression)
+    return x
+
+
+@keras_core_export("keras_core._legacy.backend.tanh")
+def tanh(x):
+    """DEPRECATED."""
+    return tf.tanh(x)
+
+
+@keras_core_export("keras_core._legacy.backend.temporal_padding")
+def temporal_padding(x, padding=(1, 1)):
+    """DEPRECATED."""
+    assert len(padding) == 2
+    pattern = [[0, 0], [padding[0], padding[1]], [0, 0]]
+    return tf.compat.v1.pad(x, pattern)
+
+
+@keras_core_export("keras_core._legacy.backend.tile")
+def tile(x, n):
+    """DEPRECATED."""
+    if isinstance(n, int):
+        n = [n]
+    return tf.tile(x, n)
+
+
+@keras_core_export("keras_core._legacy.backend.to_dense")
+def to_dense(tensor):
+    """DEPRECATED."""
+    if is_sparse(tensor):
+        return tf.sparse.to_dense(tensor)
+    else:
+        return tensor
+
+
+@keras_core_export("keras_core._legacy.backend.transpose")
+def transpose(x):
+    """DEPRECATED."""
+    return tf.transpose(x)
+
+
+@keras_core_export("keras_core._legacy.backend.truncated_normal")
+def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None):
+    """DEPRECATED."""
+    if dtype is None:
+        dtype = backend.floatx()
+    if seed is None:
+        seed = np.random.randint(10e6)
+    return tf.random.truncated_normal(
+        shape, mean, stddev, dtype=dtype, seed=seed
+    )
+
+
+@keras_core_export("keras_core._legacy.backend.update")
+def update(x, new_x):
+    """DEPRECATED."""
+    return tf.compat.v1.assign(x, new_x)
+
+
+@keras_core_export("keras_core._legacy.backend.update_add")
+def update_add(x, increment):
+    """DEPRECATED."""
+    return tf.compat.v1.assign_add(x, increment)
+
+
+@keras_core_export("keras_core._legacy.backend.update_sub")
+def update_sub(x, decrement):
+    """DEPRECATED."""
+    return tf.compat.v1.assign_sub(x, decrement)
+
+
+@keras_core_export("keras_core._legacy.backend.var")
+def var(x, axis=None, keepdims=False):
+    """DEPRECATED."""
+    if x.dtype.base_dtype == tf.bool:
+        x = tf.cast(x, backend.floatx())
+    return tf.math.reduce_variance(x, axis=axis, keepdims=keepdims)
+
+
+@keras_core_export("keras_core._legacy.backend.variable")
+def variable(value, dtype=None, name=None, constraint=None):
+    """DEPRECATED."""
+    if dtype is None:
+        dtype = backend.floatx()
+    if hasattr(value, "tocoo"):
+        sparse_coo = value.tocoo()
+        indices = np.concatenate(
+            (
+                np.expand_dims(sparse_coo.row, 1),
+                np.expand_dims(sparse_coo.col, 1),
+            ),
+            1,
+        )
+        v = tf.SparseTensor(
+            indices=indices,
+            values=sparse_coo.data,
+            dense_shape=sparse_coo.shape,
+        )
+        v._keras_shape = sparse_coo.shape
+        return v
+    v = tf.Variable(
+        value, dtype=tf.as_dtype(dtype), name=name, constraint=constraint
+    )
+    return v
+
+
+@keras_core_export("keras_core._legacy.backend.zeros")
+def zeros(shape, dtype=None, name=None):
+    """DEPRECATED."""
+    with tf.init_scope():
+        if dtype is None:
+            dtype = backend.floatx()
+        tf_dtype = tf.as_dtype(dtype)
+        v = tf.zeros(shape=shape, dtype=tf_dtype, name=name)
+        if py_all(v.shape.as_list()):
+            return variable(v, dtype=dtype, name=name)
+        return v
+
+
+@keras_core_export("keras_core._legacy.backend.zeros_like")
+def zeros_like(x, dtype=None, name=None):
+    """DEPRECATED."""
+    return tf.zeros_like(x, dtype=dtype, name=name)
diff --git a/keras_core/legacy/layers.py b/keras_core/legacy/layers.py
new file mode 100644
index 000000000..bc7114929
--- /dev/null
+++ b/keras_core/legacy/layers.py
@@ -0,0 +1,245 @@
+"""Legacy Keras 1/2 layers.
+
+AlphaDropout
+RandomHeight
+RandomWidth
+ThresholdedReLU
+"""
+
+from keras_core import backend
+from keras_core.api_export import keras_core_export
+from keras_core.layers.layer import Layer
+from keras_core.utils.module_utils import tensorflow as tf
+
+
+@keras_core_export("keras_core._legacy.layers.AlphaDropout")
+class AlphaDropout(Layer):
+    """DEPRECATED."""
+
+    def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
+        super().__init__(**kwargs)
+        self.rate = rate
+        self.seed = seed
+        self.noise_shape = noise_shape
+        self.seed_generator = backend.random.SeedGenerator(seed)
+        self.supports_masking = True
+        self.built = True
+
+    def call(self, inputs, training=False):
+        if training and self.rate > 0:
+            alpha = 1.6732632423543772848170429916717
+            scale = 1.0507009873554804934193349852946
+            alpha_p = -alpha * scale
+
+            if self.noise_shape is None:
+                noise_shape = tf.shape(inputs)
+            else:
+                noise_shape = self.noise_shape
+            kept_idx = tf.greater_equal(
+                backend.random.uniform(noise_shape),
+                self.rate,
+                seed=self.seed_generator,
+            )
+            kept_idx = tf.cast(kept_idx, inputs.dtype)
+
+            # Get affine transformation params
+            a = ((1 - self.rate) * (1 + self.rate * alpha_p**2)) ** -0.5
+            b = -a * alpha_p * self.rate
+
+            # Apply mask
+            x = inputs * kept_idx + alpha_p * (1 - kept_idx)
+
+            # Do affine transformation
+            return a * x + b
+        return inputs
+
+    def get_config(self):
+        config = {"rate": self.rate, "seed": self.seed}
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
+
+
+@keras_core_export("keras_core._legacy.layers.RandomHeight")
+class RandomHeight(Layer):
+    """DEPRECATED."""
+
+    def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs):
+        super().__init__(**kwargs)
+        self.seed_generator = backend.random.SeedGenerator(seed)
+        self.factor = factor
+        if isinstance(factor, (tuple, list)):
+            self.height_lower = factor[0]
+            self.height_upper = factor[1]
+        else:
+            self.height_lower = -factor
+            self.height_upper = factor
+
+        if self.height_upper < self.height_lower:
+            raise ValueError(
+                "`factor` argument cannot have an upper bound lesser than the "
+                f"lower bound. Received: factor={factor}"
+            )
+        if self.height_lower < -1.0 or self.height_upper < -1.0:
+            raise ValueError(
+                "`factor` argument must have values larger than -1. "
+                f"Received: factor={factor}"
+            )
+        self.interpolation = interpolation
+        self.seed = seed
+
+    def call(self, inputs, training=True):
+        inputs = tf.convert_to_tensor(inputs, dtype=self.compute_dtype)
+
+        def random_height_inputs(inputs):
+            """Inputs height-adjusted with random ops."""
+            inputs_shape = tf.shape(inputs)
+            img_hd = tf.cast(inputs_shape[-3], tf.float32)
+            img_wd = inputs_shape[-2]
+            height_factor = backend.random.uniform(
+                shape=[],
+                minval=(1.0 + self.height_lower),
+                maxval=(1.0 + self.height_upper),
+                seed=self.seed_generator,
+            )
+            adjusted_height = tf.cast(height_factor * img_hd, tf.int32)
+            adjusted_size = tf.stack([adjusted_height, img_wd])
+            output = tf.image.resize(
+                images=inputs,
+                size=adjusted_size,
+                method=self.interpolation,
+            )
+            # tf.resize will output float32 regardless of input type.
+            output = tf.cast(output, self.compute_dtype)
+            output_shape = inputs.shape.as_list()
+            output_shape[-3] = None
+            output.set_shape(output_shape)
+            return output
+
+        if training:
+            return random_height_inputs(inputs)
+        else:
+            return inputs
+
+    def compute_output_shape(self, input_shape):
+        input_shape = list(input_shape)
+        input_shape[-3] = None
+        return tuple(input_shape)
+
+    def get_config(self):
+        config = {
+            "factor": self.factor,
+            "interpolation": self.interpolation,
+            "seed": self.seed,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+
+@keras_core_export("keras_core._legacy.layers.RandomWidth")
+class RandomWidth(Layer):
+    """DEPRECATED."""
+
+    def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs):
+        super().__init__(**kwargs)
+        self.seed_generator = backend.random.SeedGenerator(seed)
+        self.factor = factor
+        if isinstance(factor, (tuple, list)):
+            self.width_lower = factor[0]
+            self.width_upper = factor[1]
+        else:
+            self.width_lower = -factor
+            self.width_upper = factor
+        if self.width_upper < self.width_lower:
+            raise ValueError(
+                "`factor` argument cannot have an upper bound less than the "
+                f"lower bound. Received: factor={factor}"
+            )
+        if self.width_lower < -1.0 or self.width_upper < -1.0:
+            raise ValueError(
+                "`factor` argument must have values larger than -1. "
+                f"Received: factor={factor}"
+            )
+        self.interpolation = interpolation
+        self.seed = seed
+
+    def call(self, inputs, training=True):
+        inputs = tf.convert_to_tensor(inputs, dtype=self.compute_dtype)
+
+        def random_width_inputs(inputs):
+            """Inputs width-adjusted with random ops."""
+            inputs_shape = tf.shape(inputs)
+            img_hd = inputs_shape[-3]
+            img_wd = tf.cast(inputs_shape[-2], tf.float32)
+            width_factor = backend.random.uniform(
+                shape=[],
+                minval=(1.0 + self.width_lower),
+                maxval=(1.0 + self.width_upper),
+                seed=self.seed_generator,
+            )
+            adjusted_width = tf.cast(width_factor * img_wd, tf.int32)
+            adjusted_size = tf.stack([img_hd, adjusted_width])
+            output = tf.image.resize(
+                images=inputs,
+                size=adjusted_size,
+                method=self.interpolation,
+            )
+            # tf.resize will output float32 regardless of input type.
+            output = tf.cast(output, self.compute_dtype)
+            output_shape = inputs.shape.as_list()
+            output_shape[-2] = None
+            output.set_shape(output_shape)
+            return output
+
+        if training:
+            return random_width_inputs(inputs)
+        else:
+            return inputs
+
+    def compute_output_shape(self, input_shape):
+        input_shape = list(input_shape)
+        input_shape[-2] = None
+        return tuple(input_shape)
+
+    def get_config(self):
+        config = {
+            "factor": self.factor,
+            "interpolation": self.interpolation,
+            "seed": self.seed,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+
+@keras_core_export("keras_core._legacy.layers.ThresholdedReLU")
+class ThresholdedReLU(Layer):
+    """DEPRECATED."""
+
+    def __init__(self, theta=1.0, **kwargs):
+        super().__init__(**kwargs)
+        if theta is None:
+            raise ValueError(
+                "Theta of a Thresholded ReLU layer cannot be None, expecting a "
+                f"float. Received: {theta}"
+            )
+        if theta < 0:
+            raise ValueError(
+                "The theta value of a Thresholded ReLU layer "
+                f"should be >=0. Received: {theta}"
+            )
+        self.supports_masking = True
+        self.theta = tf.convert_to_tensor(theta, dtype=self.compute_dtype)
+
+    def call(self, inputs):
+        dtype = self.compute_dtype
+        return inputs * tf.cast(tf.greater(inputs, self.theta), dtype)
+
+    def get_config(self):
+        config = {"theta": float(self.theta)}
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
diff --git a/keras_core/legacy/losses.py b/keras_core/legacy/losses.py
new file mode 100644
index 000000000..8e0d25fd8
--- /dev/null
+++ b/keras_core/legacy/losses.py
@@ -0,0 +1,20 @@
+from keras_core.api_export import keras_core_export
+
+
+@keras_core_export("keras_core._legacy.losses.Reduction")
+class Reduction:
+    AUTO = "auto"
+    NONE = "none"
+    SUM = "sum"
+    SUM_OVER_BATCH_SIZE = "sum_over_batch_size"
+
+    @classmethod
+    def all(cls):
+        return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE)
+
+    @classmethod
+    def validate(cls, key):
+        if key not in cls.all():
+            raise ValueError(
+                f'Invalid Reduction Key: {key}. Expected keys are "{cls.all()}"'
+            )
diff --git a/keras_core/legacy/preprocessing/__init__.py b/keras_core/legacy/preprocessing/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/keras_core/legacy/preprocessing/image.py b/keras_core/legacy/preprocessing/image.py
new file mode 100644
index 000000000..6f62e0cbb
--- /dev/null
+++ b/keras_core/legacy/preprocessing/image.py
@@ -0,0 +1,1898 @@
+"""Deprecated image preprocessing APIs from Keras 1."""
+
+import collections
+import multiprocessing
+import os
+import threading
+import warnings
+
+import numpy as np
+
+from keras_core import backend
+from keras_core.api_export import keras_core_export
+from keras_core.trainers.data_adapters.py_dataset_adapter import PyDataset
+from keras_core.utils import image_utils
+from keras_core.utils import io_utils
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.Iterator")
+class Iterator(PyDataset):
+    """Base class for image data iterators.
+
+    DEPRECATED.
+
+    Every `Iterator` must implement the `_get_batches_of_transformed_samples`
+    method.
+
+    Args:
+        n: Integer, total number of samples in the dataset to loop over.
+        batch_size: Integer, size of a batch.
+        shuffle: Boolean, whether to shuffle the data between epochs.
+        seed: Random seeding for data shuffling.
+    """
+
+    white_list_formats = ("png", "jpg", "jpeg", "bmp", "ppm", "tif", "tiff")
+
+    def __init__(self, n, batch_size, shuffle, seed):
+        self.n = n
+        self.batch_size = batch_size
+        self.seed = seed
+        self.shuffle = shuffle
+        self.batch_index = 0
+        self.total_batches_seen = 0
+        self.lock = threading.Lock()
+        self.index_array = None
+        self.index_generator = self._flow_index()
+
+    def _set_index_array(self):
+        self.index_array = np.arange(self.n)
+        if self.shuffle:
+            self.index_array = np.random.permutation(self.n)
+
+    def __getitem__(self, idx):
+        if idx >= len(self):
+            raise ValueError(
+                "Asked to retrieve element {idx}, "
+                "but the Sequence "
+                "has length {length}".format(idx=idx, length=len(self))
+            )
+        if self.seed is not None:
+            np.random.seed(self.seed + self.total_batches_seen)
+        self.total_batches_seen += 1
+        if self.index_array is None:
+            self._set_index_array()
+        index_array = self.index_array[
+            self.batch_size * idx : self.batch_size * (idx + 1)
+        ]
+        return self._get_batches_of_transformed_samples(index_array)
+
+    def __len__(self):
+        return (self.n + self.batch_size - 1) // self.batch_size  # round up
+
+    def on_epoch_end(self):
+        self._set_index_array()
+
+    def reset(self):
+        self.batch_index = 0
+
+    def _flow_index(self):
+        # Ensure self.batch_index is 0.
+        self.reset()
+        while 1:
+            if self.seed is not None:
+                np.random.seed(self.seed + self.total_batches_seen)
+            if self.batch_index == 0:
+                self._set_index_array()
+
+            if self.n == 0:
+                # Avoiding modulo by zero error
+                current_index = 0
+            else:
+                current_index = (self.batch_index * self.batch_size) % self.n
+            if self.n > current_index + self.batch_size:
+                self.batch_index += 1
+            else:
+                self.batch_index = 0
+            self.total_batches_seen += 1
+            yield self.index_array[
+                current_index : current_index + self.batch_size
+            ]
+
+    def __iter__(self):
+        # Needed if we want to do something like:
+        # for x, y in data_gen.flow(...):
+        return self
+
+    def __next__(self):
+        with self.lock:
+            index_array = next(self.index_generator)
+        # The transformation of images is not under thread lock
+        # so it can be done in parallel
+        return self._get_batches_of_transformed_samples(index_array)
+
+    def _get_batches_of_transformed_samples(self, index_array):
+        """Gets a batch of transformed samples.
+
+        Args:
+            index_array: Array of sample indices to include in batch.
+        Returns:
+            A batch of transformed samples.
+        """
+        raise NotImplementedError
+
+
+def _iter_valid_files(directory, white_list_formats, follow_links):
+    """Iterates on files with extension.
+
+    Args:
+        directory: Absolute path to the directory
+            containing files to be counted
+        white_list_formats: Set of strings containing allowed extensions for
+            the files to be counted.
+        follow_links: Boolean, follow symbolic links to subdirectories.
+    Yields:
+        Tuple of (root, filename) with extension in `white_list_formats`.
+    """
+
+    def _recursive_list(subpath):
+        return sorted(
+            os.walk(subpath, followlinks=follow_links), key=lambda x: x[0]
+        )
+
+    for root, _, files in _recursive_list(directory):
+        for fname in sorted(files):
+            if fname.lower().endswith(".tiff"):
+                warnings.warn(
+                    'Using ".tiff" files with multiple bands '
+                    "will cause distortion. Please verify your output."
+                )
+            if fname.lower().endswith(white_list_formats):
+                yield root, fname
+
+
+def _list_valid_filenames_in_directory(
+    directory, white_list_formats, split, class_indices, follow_links
+):
+    """Lists paths of files in `subdir` with extensions in `white_list_formats`.
+
+    Args:
+        directory: absolute path to a directory containing the files to list.
+            The directory name is used as class label
+            and must be a key of `class_indices`.
+        white_list_formats: set of strings containing allowed extensions for
+            the files to be counted.
+        split: tuple of floats (e.g. `(0.2, 0.6)`) to only take into
+            account a certain fraction of files in each directory.
+            E.g.: `segment=(0.6, 1.0)` would only account for last 40 percent
+            of images in each directory.
+        class_indices: dictionary mapping a class name to its index.
+        follow_links: boolean, follow symbolic links to subdirectories.
+
+    Returns:
+         classes: a list of class indices
+         filenames: the path of valid files in `directory`, relative from
+             `directory`'s parent (e.g., if `directory` is "dataset/class1",
+            the filenames will be
+            `["class1/file1.jpg", "class1/file2.jpg", ...]`).
+    """
+    dirname = os.path.basename(directory)
+    if split:
+        all_files = list(
+            _iter_valid_files(directory, white_list_formats, follow_links)
+        )
+        num_files = len(all_files)
+        start, stop = int(split[0] * num_files), int(split[1] * num_files)
+        valid_files = all_files[start:stop]
+    else:
+        valid_files = _iter_valid_files(
+            directory, white_list_formats, follow_links
+        )
+    classes = []
+    filenames = []
+    for root, fname in valid_files:
+        classes.append(class_indices[dirname])
+        absolute_path = os.path.join(root, fname)
+        relative_path = os.path.join(
+            dirname, os.path.relpath(absolute_path, directory)
+        )
+        filenames.append(relative_path)
+
+    return classes, filenames
+
+
+class BatchFromFilesMixin:
+    """Adds methods related to getting batches from filenames.
+
+    It includes the logic to transform image files to batches.
+    """
+
+    def set_processing_attrs(
+        self,
+        image_data_generator,
+        target_size,
+        color_mode,
+        data_format,
+        save_to_dir,
+        save_prefix,
+        save_format,
+        subset,
+        interpolation,
+        keep_aspect_ratio,
+    ):
+        """Sets attributes to use later for processing files into a batch.
+
+        Args:
+            image_data_generator: Instance of `ImageDataGenerator`
+                to use for random transformations and normalization.
+            target_size: tuple of integers, dimensions to resize input images
+            to.
+            color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`.
+                Color mode to read images.
+            data_format: String, one of `channels_first`, `channels_last`.
+            save_to_dir: Optional directory where to save the pictures
+                being yielded, in a viewable format. This is useful
+                for visualizing the random transformations being
+                applied, for debugging purposes.
+            save_prefix: String prefix to use for saving sample
+                images (if `save_to_dir` is set).
+            save_format: Format to use for saving sample images
+                (if `save_to_dir` is set).
+            subset: Subset of data (`"training"` or `"validation"`) if
+                validation_split is set in ImageDataGenerator.
+            interpolation: Interpolation method used to resample the image if
+                the target size is different from that of the loaded image.
+                Supported methods are "nearest", "bilinear", and "bicubic". If
+                PIL version 1.1.3 or newer is installed, "lanczos" is also
+                supported. If PIL version 3.4.0 or newer is installed, "box" and
+                "hamming" are also supported. By default, "nearest" is used.
+            keep_aspect_ratio: Boolean, whether to resize images to a target
+                size without aspect ratio distortion. The image is cropped in
+                the center with target aspect ratio before resizing.
+        """
+        self.image_data_generator = image_data_generator
+        self.target_size = tuple(target_size)
+        self.keep_aspect_ratio = keep_aspect_ratio
+        if color_mode not in {"rgb", "rgba", "grayscale"}:
+            raise ValueError(
+                f"Invalid color mode: {color_mode}"
+                '; expected "rgb", "rgba", or "grayscale".'
+            )
+        self.color_mode = color_mode
+        self.data_format = data_format
+        if self.color_mode == "rgba":
+            if self.data_format == "channels_last":
+                self.image_shape = self.target_size + (4,)
+            else:
+                self.image_shape = (4,) + self.target_size
+        elif self.color_mode == "rgb":
+            if self.data_format == "channels_last":
+                self.image_shape = self.target_size + (3,)
+            else:
+                self.image_shape = (3,) + self.target_size
+        else:
+            if self.data_format == "channels_last":
+                self.image_shape = self.target_size + (1,)
+            else:
+                self.image_shape = (1,) + self.target_size
+        self.save_to_dir = save_to_dir
+        self.save_prefix = save_prefix
+        self.save_format = save_format
+        self.interpolation = interpolation
+        if subset is not None:
+            validation_split = self.image_data_generator._validation_split
+            if subset == "validation":
+                split = (0, validation_split)
+            elif subset == "training":
+                split = (validation_split, 1)
+            else:
+                raise ValueError(
+                    f"Invalid subset name: {subset};"
+                    'expected "training" or "validation"'
+                )
+        else:
+            split = None
+        self.split = split
+        self.subset = subset
+
+    def _get_batches_of_transformed_samples(self, index_array):
+        """Gets a batch of transformed samples.
+
+        Args:
+            index_array: Array of sample indices to include in batch.
+        Returns:
+            A batch of transformed samples.
+        """
+        batch_x = np.zeros(
+            (len(index_array),) + self.image_shape, dtype=self.dtype
+        )
+        # build batch of image data
+        # self.filepaths is dynamic, is better to call it once outside the loop
+        filepaths = self.filepaths
+        for i, j in enumerate(index_array):
+            img = image_utils.load_img(
+                filepaths[j],
+                color_mode=self.color_mode,
+                target_size=self.target_size,
+                interpolation=self.interpolation,
+                keep_aspect_ratio=self.keep_aspect_ratio,
+            )
+            x = image_utils.img_to_array(img, data_format=self.data_format)
+            # Pillow images should be closed after `load_img`,
+            # but not PIL images.
+            if hasattr(img, "close"):
+                img.close()
+            if self.image_data_generator:
+                params = self.image_data_generator.get_random_transform(x.shape)
+                x = self.image_data_generator.apply_transform(x, params)
+                x = self.image_data_generator.standardize(x)
+            batch_x[i] = x
+        # optionally save augmented images to disk for debugging purposes
+        if self.save_to_dir:
+            for i, j in enumerate(index_array):
+                img = image_utils.array_to_img(
+                    batch_x[i], self.data_format, scale=True
+                )
+                fname = "{prefix}_{index}_{hash}.{format}".format(
+                    prefix=self.save_prefix,
+                    index=j,
+                    hash=np.random.randint(1e7),
+                    format=self.save_format,
+                )
+                img.save(os.path.join(self.save_to_dir, fname))
+        # build batch of labels
+        if self.class_mode == "input":
+            batch_y = batch_x.copy()
+        elif self.class_mode in {"binary", "sparse"}:
+            batch_y = np.empty(len(batch_x), dtype=self.dtype)
+            for i, n_observation in enumerate(index_array):
+                batch_y[i] = self.classes[n_observation]
+        elif self.class_mode == "categorical":
+            batch_y = np.zeros(
+                (len(batch_x), len(self.class_indices)), dtype=self.dtype
+            )
+            for i, n_observation in enumerate(index_array):
+                batch_y[i, self.classes[n_observation]] = 1.0
+        elif self.class_mode == "multi_output":
+            batch_y = [output[index_array] for output in self.labels]
+        elif self.class_mode == "raw":
+            batch_y = self.labels[index_array]
+        else:
+            return batch_x
+        if self.sample_weight is None:
+            return batch_x, batch_y
+        else:
+            return batch_x, batch_y, self.sample_weight[index_array]
+
+    @property
+    def filepaths(self):
+        """List of absolute paths to image files."""
+        raise NotImplementedError(
+            "`filepaths` property method has not "
+            "been implemented in {}.".format(type(self).__name__)
+        )
+
+    @property
+    def labels(self):
+        """Class labels of every observation."""
+        raise NotImplementedError(
+            "`labels` property method has not been implemented in {}.".format(
+                type(self).__name__
+            )
+        )
+
+    @property
+    def sample_weight(self):
+        raise NotImplementedError(
+            "`sample_weight` property method has not "
+            "been implemented in {}.".format(type(self).__name__)
+        )
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.DirectoryIterator")
+class DirectoryIterator(BatchFromFilesMixin, Iterator):
+    """Iterator capable of reading images from a directory on disk.
+
+    DEPRECATED.
+    """
+
+    allowed_class_modes = {"categorical", "binary", "sparse", "input", None}
+
+    def __init__(
+        self,
+        directory,
+        image_data_generator,
+        target_size=(256, 256),
+        color_mode="rgb",
+        classes=None,
+        class_mode="categorical",
+        batch_size=32,
+        shuffle=True,
+        seed=None,
+        data_format=None,
+        save_to_dir=None,
+        save_prefix="",
+        save_format="png",
+        follow_links=False,
+        subset=None,
+        interpolation="nearest",
+        keep_aspect_ratio=False,
+        dtype=None,
+    ):
+        if data_format is None:
+            data_format = backend.image_data_format()
+        if dtype is None:
+            dtype = backend.floatx()
+        super().set_processing_attrs(
+            image_data_generator,
+            target_size,
+            color_mode,
+            data_format,
+            save_to_dir,
+            save_prefix,
+            save_format,
+            subset,
+            interpolation,
+            keep_aspect_ratio,
+        )
+        self.directory = directory
+        self.classes = classes
+        if class_mode not in self.allowed_class_modes:
+            raise ValueError(
+                "Invalid class_mode: {}; expected one of: {}".format(
+                    class_mode, self.allowed_class_modes
+                )
+            )
+        self.class_mode = class_mode
+        self.dtype = dtype
+        # First, count the number of samples and classes.
+        self.samples = 0
+
+        if not classes:
+            classes = []
+            for subdir in sorted(os.listdir(directory)):
+                if os.path.isdir(os.path.join(directory, subdir)):
+                    classes.append(subdir)
+        self.num_classes = len(classes)
+        self.class_indices = dict(zip(classes, range(len(classes))))
+
+        pool = multiprocessing.pool.ThreadPool()
+
+        # Second, build an index of the images
+        # in the different class subfolders.
+        results = []
+        self.filenames = []
+        i = 0
+        for dirpath in (os.path.join(directory, subdir) for subdir in classes):
+            results.append(
+                pool.apply_async(
+                    _list_valid_filenames_in_directory,
+                    (
+                        dirpath,
+                        self.white_list_formats,
+                        self.split,
+                        self.class_indices,
+                        follow_links,
+                    ),
+                )
+            )
+        classes_list = []
+        for res in results:
+            classes, filenames = res.get()
+            classes_list.append(classes)
+            self.filenames += filenames
+        self.samples = len(self.filenames)
+        self.classes = np.zeros((self.samples,), dtype="int32")
+        for classes in classes_list:
+            self.classes[i : i + len(classes)] = classes
+            i += len(classes)
+
+        io_utils.print_msg(
+            f"Found {self.samples} images belonging to "
+            f"{self.num_classes} classes."
+        )
+        pool.close()
+        pool.join()
+        self._filepaths = [
+            os.path.join(self.directory, fname) for fname in self.filenames
+        ]
+        super().__init__(self.samples, batch_size, shuffle, seed)
+
+    @property
+    def filepaths(self):
+        return self._filepaths
+
+    @property
+    def labels(self):
+        return self.classes
+
+    @property  # mixin needs this property to work
+    def sample_weight(self):
+        # no sample weights will be returned
+        return None
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.NumpyArrayIterator")
+class NumpyArrayIterator(Iterator):
+    """Iterator yielding data from a Numpy array.
+
+    DEPRECATED.
+    """
+
+    def __init__(
+        self,
+        x,
+        y,
+        image_data_generator,
+        batch_size=32,
+        shuffle=False,
+        sample_weight=None,
+        seed=None,
+        data_format=None,
+        save_to_dir=None,
+        save_prefix="",
+        save_format="png",
+        subset=None,
+        ignore_class_split=False,
+        dtype=None,
+    ):
+        if data_format is None:
+            data_format = backend.image_data_format()
+        if dtype is None:
+            dtype = backend.floatx()
+        self.dtype = dtype
+        if isinstance(x, tuple) or isinstance(x, list):
+            if not isinstance(x[1], list):
+                x_misc = [np.asarray(x[1])]
+            else:
+                x_misc = [np.asarray(xx) for xx in x[1]]
+            x = x[0]
+            for xx in x_misc:
+                if len(x) != len(xx):
+                    raise ValueError(
+                        "All of the arrays in `x` "
+                        "should have the same length. "
+                        "Found a pair with: "
+                        f"len(x[0]) = {len(x)}, len(x[?]) = {len(xx)}"
+                    )
+        else:
+            x_misc = []
+
+        if y is not None and len(x) != len(y):
+            raise ValueError(
+                "`x` (images tensor) and `y` (labels) "
+                "should have the same length. "
+                f"Found: x.shape = {np.asarray(x).shape}, "
+                f"y.shape = {np.asarray(y).shape}"
+            )
+        if sample_weight is not None and len(x) != len(sample_weight):
+            raise ValueError(
+                "`x` (images tensor) and `sample_weight` "
+                "should have the same length. "
+                f"Found: x.shape = {np.asarray(x).shape}, "
+                f"sample_weight.shape = {np.asarray(sample_weight).shape}"
+            )
+        if subset is not None:
+            if subset not in {"training", "validation"}:
+                raise ValueError(
+                    f"Invalid subset name: {subset}"
+                    '; expected "training" or "validation".'
+                )
+            split_idx = int(len(x) * image_data_generator._validation_split)
+
+            if (
+                y is not None
+                and not ignore_class_split
+                and not np.array_equal(
+                    np.unique(y[:split_idx]), np.unique(y[split_idx:])
+                )
+            ):
+                raise ValueError(
+                    "Training and validation subsets "
+                    "have different number of classes after "
+                    "the split. If your numpy arrays are "
+                    "sorted by the label, you might want "
+                    "to shuffle them."
+                )
+
+            if subset == "validation":
+                x = x[:split_idx]
+                x_misc = [np.asarray(xx[:split_idx]) for xx in x_misc]
+                if y is not None:
+                    y = y[:split_idx]
+            else:
+                x = x[split_idx:]
+                x_misc = [np.asarray(xx[split_idx:]) for xx in x_misc]
+                if y is not None:
+                    y = y[split_idx:]
+
+        self.x = np.asarray(x, dtype=self.dtype)
+        self.x_misc = x_misc
+        if self.x.ndim != 4:
+            raise ValueError(
+                "Input data in `NumpyArrayIterator` "
+                "should have rank 4. You passed an array "
+                f"with shape {self.x.shape}"
+            )
+        channels_axis = 3 if data_format == "channels_last" else 1
+        if self.x.shape[channels_axis] not in {1, 3, 4}:
+            warnings.warn(
+                'NumpyArrayIterator is set to use the data format convention "'
+                + data_format
+                + '" (channels on axis '
+                + str(channels_axis)
+                + "), i.e. expected either 1, 3, or 4 channels on axis "
+                + str(channels_axis)
+                + ". However, it was passed an array with shape "
+                + str(self.x.shape)
+                + " ("
+                + str(self.x.shape[channels_axis])
+                + " channels)."
+            )
+        if y is not None:
+            self.y = np.asarray(y)
+        else:
+            self.y = None
+        if sample_weight is not None:
+            self.sample_weight = np.asarray(sample_weight)
+        else:
+            self.sample_weight = None
+        self.image_data_generator = image_data_generator
+        self.data_format = data_format
+        self.save_to_dir = save_to_dir
+        self.save_prefix = save_prefix
+        self.save_format = save_format
+        super().__init__(x.shape[0], batch_size, shuffle, seed)
+
+    def _get_batches_of_transformed_samples(self, index_array):
+        batch_x = np.zeros(
+            tuple([len(index_array)] + list(self.x.shape)[1:]), dtype=self.dtype
+        )
+        for i, j in enumerate(index_array):
+            x = self.x[j]
+            params = self.image_data_generator.get_random_transform(x.shape)
+            x = self.image_data_generator.apply_transform(
+                x.astype(self.dtype), params
+            )
+            x = self.image_data_generator.standardize(x)
+            batch_x[i] = x
+
+        if self.save_to_dir:
+            for i, j in enumerate(index_array):
+                img = image_utils.array_to_img(
+                    batch_x[i], self.data_format, scale=True
+                )
+                fname = "{prefix}_{index}_{hash}.{format}".format(
+                    prefix=self.save_prefix,
+                    index=j,
+                    hash=np.random.randint(1e4),
+                    format=self.save_format,
+                )
+                img.save(os.path.join(self.save_to_dir, fname))
+        batch_x_miscs = [xx[index_array] for xx in self.x_misc]
+        output = (batch_x if not batch_x_miscs else [batch_x] + batch_x_miscs,)
+        if self.y is None:
+            return output[0]
+        output += (self.y[index_array],)
+        if self.sample_weight is not None:
+            output += (self.sample_weight[index_array],)
+        return output
+
+
+def validate_filename(filename, white_list_formats):
+    """Check if a filename refers to a valid file.
+
+    Args:
+        filename: String, absolute path to a file
+        white_list_formats: Set, allowed file extensions
+    Returns:
+        A boolean value indicating if the filename is valid or not
+    """
+    return filename.lower().endswith(white_list_formats) and os.path.isfile(
+        filename
+    )
+
+
+class DataFrameIterator(BatchFromFilesMixin, Iterator):
+    """Iterator capable of reading images from a directory as a dataframe."""
+
+    allowed_class_modes = {
+        "binary",
+        "categorical",
+        "input",
+        "multi_output",
+        "raw",
+        "sparse",
+        None,
+    }
+
+    def __init__(
+        self,
+        dataframe,
+        directory=None,
+        image_data_generator=None,
+        x_col="filename",
+        y_col="class",
+        weight_col=None,
+        target_size=(256, 256),
+        color_mode="rgb",
+        classes=None,
+        class_mode="categorical",
+        batch_size=32,
+        shuffle=True,
+        seed=None,
+        data_format="channels_last",
+        save_to_dir=None,
+        save_prefix="",
+        save_format="png",
+        subset=None,
+        interpolation="nearest",
+        keep_aspect_ratio=False,
+        dtype="float32",
+        validate_filenames=True,
+    ):
+        super().set_processing_attrs(
+            image_data_generator,
+            target_size,
+            color_mode,
+            data_format,
+            save_to_dir,
+            save_prefix,
+            save_format,
+            subset,
+            interpolation,
+            keep_aspect_ratio,
+        )
+        df = dataframe.copy()
+        self.directory = directory or ""
+        self.class_mode = class_mode
+        self.dtype = dtype
+        # check that inputs match the required class_mode
+        self._check_params(df, x_col, y_col, weight_col, classes)
+        if (
+            validate_filenames
+        ):  # check which image files are valid and keep them
+            df = self._filter_valid_filepaths(df, x_col)
+        if class_mode not in ["input", "multi_output", "raw", None]:
+            df, classes = self._filter_classes(df, y_col, classes)
+            num_classes = len(classes)
+            # build an index of all the unique classes
+            self.class_indices = dict(zip(classes, range(len(classes))))
+        # retrieve only training or validation set
+        if self.split:
+            num_files = len(df)
+            start = int(self.split[0] * num_files)
+            stop = int(self.split[1] * num_files)
+            df = df.iloc[start:stop, :]
+        # get labels for each observation
+        if class_mode not in ["input", "multi_output", "raw", None]:
+            self.classes = self.get_classes(df, y_col)
+        self.filenames = df[x_col].tolist()
+        self._sample_weight = df[weight_col].values if weight_col else None
+
+        if class_mode == "multi_output":
+            self._targets = [np.array(df[col].tolist()) for col in y_col]
+        if class_mode == "raw":
+            self._targets = df[y_col].values
+        self.samples = len(self.filenames)
+        validated_string = (
+            "validated" if validate_filenames else "non-validated"
+        )
+        if class_mode in ["input", "multi_output", "raw", None]:
+            io_utils.print_msg(
+                f"Found {self.samples} {validated_string} image filenames."
+            )
+        else:
+            io_utils.print_msg(
+                f"Found {self.samples} {validated_string} image filenames "
+                f"belonging to {num_classes} classes."
+            )
+        self._filepaths = [
+            os.path.join(self.directory, fname) for fname in self.filenames
+        ]
+        super().__init__(self.samples, batch_size, shuffle, seed)
+
+    def _check_params(self, df, x_col, y_col, weight_col, classes):
+        # check class mode is one of the currently supported
+        if self.class_mode not in self.allowed_class_modes:
+            raise ValueError(
+                "Invalid class_mode: {}; expected one of: {}".format(
+                    self.class_mode, self.allowed_class_modes
+                )
+            )
+        # check that y_col has several column names if class_mode is
+        # multi_output
+        if (self.class_mode == "multi_output") and not isinstance(y_col, list):
+            raise TypeError(
+                'If class_mode="{}", y_col must be a list. Received {}.'.format(
+                    self.class_mode, type(y_col).__name__
+                )
+            )
+        # check that filenames/filepaths column values are all strings
+        if not all(df[x_col].apply(lambda x: isinstance(x, str))):
+            raise TypeError(
+                f"All values in column x_col={x_col} must be strings."
+            )
+        # check labels are string if class_mode is binary or sparse
+        if self.class_mode in {"binary", "sparse"}:
+            if not all(df[y_col].apply(lambda x: isinstance(x, str))):
+                raise TypeError(
+                    'If class_mode="{}", y_col="{}" column '
+                    "values must be strings.".format(self.class_mode, y_col)
+                )
+        # check that if binary there are only 2 different classes
+        if self.class_mode == "binary":
+            if classes:
+                classes = set(classes)
+                if len(classes) != 2:
+                    raise ValueError(
+                        'If class_mode="binary" there must be 2 '
+                        "classes. {} class/es were given.".format(len(classes))
+                    )
+            elif df[y_col].nunique() != 2:
+                raise ValueError(
+                    'If class_mode="binary" there must be 2 classes. '
+                    "Found {} classes.".format(df[y_col].nunique())
+                )
+        # check values are string, list or tuple if class_mode is categorical
+        if self.class_mode == "categorical":
+            types = (str, list, tuple)
+            if not all(df[y_col].apply(lambda x: isinstance(x, types))):
+                raise TypeError(
+                    'If class_mode="{}", y_col="{}" column '
+                    "values must be type string, list or tuple.".format(
+                        self.class_mode, y_col
+                    )
+                )
+        # raise warning if classes are given but will be unused
+        if classes and self.class_mode in {
+            "input",
+            "multi_output",
+            "raw",
+            None,
+        }:
+            warnings.warn(
+                '`classes` will be ignored given the class_mode="{}"'.format(
+                    self.class_mode
+                )
+            )
+        # check that if weight column that the values are numerical
+        if weight_col and not issubclass(df[weight_col].dtype.type, np.number):
+            raise TypeError(f"Column weight_col={weight_col} must be numeric.")
+
+    def get_classes(self, df, y_col):
+        labels = []
+        for label in df[y_col]:
+            if isinstance(label, (list, tuple)):
+                labels.append([self.class_indices[lbl] for lbl in label])
+            else:
+                labels.append(self.class_indices[label])
+        return labels
+
+    @staticmethod
+    def _filter_classes(df, y_col, classes):
+        df = df.copy()
+
+        def remove_classes(labels, classes):
+            if isinstance(labels, (list, tuple)):
+                labels = [cls for cls in labels if cls in classes]
+                return labels or None
+            elif isinstance(labels, str):
+                return labels if labels in classes else None
+            else:
+                raise TypeError(
+                    "Expect string, list or tuple "
+                    "but found {} in {} column ".format(type(labels), y_col)
+                )
+
+        if classes:
+            # prepare for membership lookup
+            classes = list(collections.OrderedDict.fromkeys(classes).keys())
+            df[y_col] = df[y_col].apply(lambda x: remove_classes(x, classes))
+        else:
+            classes = set()
+            for v in df[y_col]:
+                if isinstance(v, (list, tuple)):
+                    classes.update(v)
+                else:
+                    classes.add(v)
+            classes = sorted(classes)
+        return df.dropna(subset=[y_col]), classes
+
+    def _filter_valid_filepaths(self, df, x_col):
+        """Keep only dataframe rows with valid filenames.
+
+        Args:
+            df: Pandas dataframe containing filenames in a column
+            x_col: string, column in `df` that contains the filenames or
+                filepaths
+        Returns:
+            absolute paths to image files
+        """
+        filepaths = df[x_col].map(
+            lambda fname: os.path.join(self.directory, fname)
+        )
+        mask = filepaths.apply(
+            validate_filename, args=(self.white_list_formats,)
+        )
+        n_invalid = (~mask).sum()
+        if n_invalid:
+            warnings.warn(
+                'Found {} invalid image filename(s) in x_col="{}". '
+                "These filename(s) will be ignored.".format(n_invalid, x_col)
+            )
+        return df[mask]
+
+    @property
+    def filepaths(self):
+        return self._filepaths
+
+    @property
+    def labels(self):
+        if self.class_mode in {"multi_output", "raw"}:
+            return self._targets
+        else:
+            return self.classes
+
+    @property
+    def sample_weight(self):
+        return self._sample_weight
+
+
+def flip_axis(x, axis):
+    x = np.asarray(x).swapaxes(axis, 0)
+    x = x[::-1, ...]
+    x = x.swapaxes(0, axis)
+    return x
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.ImageDataGenerator")
+class ImageDataGenerator:
+    """DEPRECATED."""
+
+    def __init__(
+        self,
+        featurewise_center=False,
+        samplewise_center=False,
+        featurewise_std_normalization=False,
+        samplewise_std_normalization=False,
+        zca_whitening=False,
+        zca_epsilon=1e-6,
+        rotation_range=0,
+        width_shift_range=0.0,
+        height_shift_range=0.0,
+        brightness_range=None,
+        shear_range=0.0,
+        zoom_range=0.0,
+        channel_shift_range=0.0,
+        fill_mode="nearest",
+        cval=0.0,
+        horizontal_flip=False,
+        vertical_flip=False,
+        rescale=None,
+        preprocessing_function=None,
+        data_format=None,
+        validation_split=0.0,
+        interpolation_order=1,
+        dtype=None,
+    ):
+        if data_format is None:
+            data_format = backend.image_data_format()
+        if dtype is None:
+            dtype = backend.floatx()
+
+        self.featurewise_center = featurewise_center
+        self.samplewise_center = samplewise_center
+        self.featurewise_std_normalization = featurewise_std_normalization
+        self.samplewise_std_normalization = samplewise_std_normalization
+        self.zca_whitening = zca_whitening
+        self.zca_epsilon = zca_epsilon
+        self.rotation_range = rotation_range
+        self.width_shift_range = width_shift_range
+        self.height_shift_range = height_shift_range
+        self.shear_range = shear_range
+        self.zoom_range = zoom_range
+        self.channel_shift_range = channel_shift_range
+        self.fill_mode = fill_mode
+        self.cval = cval
+        self.horizontal_flip = horizontal_flip
+        self.vertical_flip = vertical_flip
+        self.rescale = rescale
+        self.preprocessing_function = preprocessing_function
+        self.dtype = dtype
+        self.interpolation_order = interpolation_order
+
+        if data_format not in {"channels_last", "channels_first"}:
+            raise ValueError(
+                '`data_format` should be `"channels_last"` '
+                "(channel after row and column) or "
+                '`"channels_first"` (channel before row and column). '
+                f"Received: {data_format}"
+            )
+        self.data_format = data_format
+        if data_format == "channels_first":
+            self.channel_axis = 1
+            self.row_axis = 2
+            self.col_axis = 3
+        if data_format == "channels_last":
+            self.channel_axis = 3
+            self.row_axis = 1
+            self.col_axis = 2
+        if validation_split and not 0 < validation_split < 1:
+            raise ValueError(
+                "`validation_split` must be strictly between 0 and 1. "
+                f" Received: {validation_split}"
+            )
+        self._validation_split = validation_split
+
+        self.mean = None
+        self.std = None
+        self.zca_whitening_matrix = None
+
+        if isinstance(zoom_range, (float, int)):
+            self.zoom_range = [1 - zoom_range, 1 + zoom_range]
+        elif len(zoom_range) == 2 and all(
+            isinstance(val, (float, int)) for val in zoom_range
+        ):
+            self.zoom_range = [zoom_range[0], zoom_range[1]]
+        else:
+            raise ValueError(
+                "`zoom_range` should be a float or "
+                "a tuple or list of two floats. "
+                f"Received: {zoom_range}"
+            )
+        if zca_whitening:
+            if not featurewise_center:
+                self.featurewise_center = True
+                warnings.warn(
+                    "This ImageDataGenerator specifies "
+                    "`zca_whitening`, which overrides "
+                    "setting of `featurewise_center`."
+                )
+            if featurewise_std_normalization:
+                self.featurewise_std_normalization = False
+                warnings.warn(
+                    "This ImageDataGenerator specifies "
+                    "`zca_whitening` "
+                    "which overrides setting of"
+                    "`featurewise_std_normalization`."
+                )
+        if featurewise_std_normalization:
+            if not featurewise_center:
+                self.featurewise_center = True
+                warnings.warn(
+                    "This ImageDataGenerator specifies "
+                    "`featurewise_std_normalization`, "
+                    "which overrides setting of "
+                    "`featurewise_center`."
+                )
+        if samplewise_std_normalization:
+            if not samplewise_center:
+                self.samplewise_center = True
+                warnings.warn(
+                    "This ImageDataGenerator specifies "
+                    "`samplewise_std_normalization`, "
+                    "which overrides setting of "
+                    "`samplewise_center`."
+                )
+        if brightness_range is not None:
+            if (
+                not isinstance(brightness_range, (tuple, list))
+                or len(brightness_range) != 2
+            ):
+                raise ValueError(
+                    "`brightness_range should be tuple or list of two floats. "
+                    f"Received: {brightness_range}"
+                )
+        self.brightness_range = brightness_range
+
+    def flow(
+        self,
+        x,
+        y=None,
+        batch_size=32,
+        shuffle=True,
+        sample_weight=None,
+        seed=None,
+        save_to_dir=None,
+        save_prefix="",
+        save_format="png",
+        ignore_class_split=False,
+        subset=None,
+    ):
+        return NumpyArrayIterator(
+            x,
+            y,
+            self,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            sample_weight=sample_weight,
+            seed=seed,
+            data_format=self.data_format,
+            save_to_dir=save_to_dir,
+            save_prefix=save_prefix,
+            save_format=save_format,
+            ignore_class_split=ignore_class_split,
+            subset=subset,
+            dtype=self.dtype,
+        )
+
+    def flow_from_directory(
+        self,
+        directory,
+        target_size=(256, 256),
+        color_mode="rgb",
+        classes=None,
+        class_mode="categorical",
+        batch_size=32,
+        shuffle=True,
+        seed=None,
+        save_to_dir=None,
+        save_prefix="",
+        save_format="png",
+        follow_links=False,
+        subset=None,
+        interpolation="nearest",
+        keep_aspect_ratio=False,
+    ):
+        return DirectoryIterator(
+            directory,
+            self,
+            target_size=target_size,
+            color_mode=color_mode,
+            keep_aspect_ratio=keep_aspect_ratio,
+            classes=classes,
+            class_mode=class_mode,
+            data_format=self.data_format,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            seed=seed,
+            save_to_dir=save_to_dir,
+            save_prefix=save_prefix,
+            save_format=save_format,
+            follow_links=follow_links,
+            subset=subset,
+            interpolation=interpolation,
+            dtype=self.dtype,
+        )
+
+    def flow_from_dataframe(
+        self,
+        dataframe,
+        directory=None,
+        x_col="filename",
+        y_col="class",
+        weight_col=None,
+        target_size=(256, 256),
+        color_mode="rgb",
+        classes=None,
+        class_mode="categorical",
+        batch_size=32,
+        shuffle=True,
+        seed=None,
+        save_to_dir=None,
+        save_prefix="",
+        save_format="png",
+        subset=None,
+        interpolation="nearest",
+        validate_filenames=True,
+        **kwargs,
+    ):
+        if "has_ext" in kwargs:
+            warnings.warn(
+                "has_ext is deprecated, filenames in the dataframe have "
+                "to match the exact filenames in disk.",
+                DeprecationWarning,
+            )
+        if "sort" in kwargs:
+            warnings.warn(
+                "sort is deprecated, batches will be created in the"
+                "same order than the filenames provided if shuffle"
+                "is set to False.",
+                DeprecationWarning,
+            )
+        if class_mode == "other":
+            warnings.warn(
+                '`class_mode` "other" is deprecated, please use '
+                '`class_mode` "raw".',
+                DeprecationWarning,
+            )
+            class_mode = "raw"
+        if "drop_duplicates" in kwargs:
+            warnings.warn(
+                "drop_duplicates is deprecated, you can drop duplicates "
+                "by using the pandas.DataFrame.drop_duplicates method.",
+                DeprecationWarning,
+            )
+
+        return DataFrameIterator(
+            dataframe,
+            directory,
+            self,
+            x_col=x_col,
+            y_col=y_col,
+            weight_col=weight_col,
+            target_size=target_size,
+            color_mode=color_mode,
+            classes=classes,
+            class_mode=class_mode,
+            data_format=self.data_format,
+            batch_size=batch_size,
+            shuffle=shuffle,
+            seed=seed,
+            save_to_dir=save_to_dir,
+            save_prefix=save_prefix,
+            save_format=save_format,
+            subset=subset,
+            interpolation=interpolation,
+            validate_filenames=validate_filenames,
+            dtype=self.dtype,
+        )
+
+    def standardize(self, x):
+        """Applies the normalization configuration in-place to a batch of
+        inputs.
+
+        `x` is changed in-place since the function is mainly used internally
+        to standardize images and feed them to your network. If a copy of `x`
+        would be created instead it would have a significant performance cost.
+        If you want to apply this method without changing the input in-place
+        you can call the method creating a copy before:
+
+        standardize(np.copy(x))
+
+        Args:
+            x: Batch of inputs to be normalized.
+
+        Returns:
+            The inputs, normalized.
+        """
+        if self.preprocessing_function:
+            x = self.preprocessing_function(x)
+        if self.rescale:
+            x *= self.rescale
+        if self.samplewise_center:
+            x -= np.mean(x, keepdims=True)
+        if self.samplewise_std_normalization:
+            x /= np.std(x, keepdims=True) + 1e-6
+
+        if self.featurewise_center:
+            if self.mean is not None:
+                x -= self.mean
+            else:
+                warnings.warn(
+                    "This ImageDataGenerator specifies "
+                    "`featurewise_center`, but it hasn't "
+                    "been fit on any training data. Fit it "
+                    "first by calling `.fit(numpy_data)`."
+                )
+        if self.featurewise_std_normalization:
+            if self.std is not None:
+                x /= self.std + 1e-6
+            else:
+                warnings.warn(
+                    "This ImageDataGenerator specifies "
+                    "`featurewise_std_normalization`, "
+                    "but it hasn't "
+                    "been fit on any training data. Fit it "
+                    "first by calling `.fit(numpy_data)`."
+                )
+        if self.zca_whitening:
+            if self.zca_whitening_matrix is not None:
+                flat_x = x.reshape(-1, np.prod(x.shape[-3:]))
+                white_x = flat_x @ self.zca_whitening_matrix
+                x = np.reshape(white_x, x.shape)
+            else:
+                warnings.warn(
+                    "This ImageDataGenerator specifies "
+                    "`zca_whitening`, but it hasn't "
+                    "been fit on any training data. Fit it "
+                    "first by calling `.fit(numpy_data)`."
+                )
+        return x
+
+    def get_random_transform(self, img_shape, seed=None):
+        """Generates random parameters for a transformation.
+
+        Args:
+            img_shape: Tuple of integers.
+                Shape of the image that is transformed.
+            seed: Random seed.
+
+        Returns:
+            A dictionary containing randomly chosen parameters describing the
+            transformation.
+        """
+        img_row_axis = self.row_axis - 1
+        img_col_axis = self.col_axis - 1
+
+        if seed is not None:
+            np.random.seed(seed)
+
+        if self.rotation_range:
+            theta = np.random.uniform(-self.rotation_range, self.rotation_range)
+        else:
+            theta = 0
+
+        if self.height_shift_range:
+            try:  # 1-D array-like or int
+                tx = np.random.choice(self.height_shift_range)
+                tx *= np.random.choice([-1, 1])
+            except ValueError:  # floating point
+                tx = np.random.uniform(
+                    -self.height_shift_range, self.height_shift_range
+                )
+            if np.max(self.height_shift_range) < 1:
+                tx *= img_shape[img_row_axis]
+        else:
+            tx = 0
+
+        if self.width_shift_range:
+            try:  # 1-D array-like or int
+                ty = np.random.choice(self.width_shift_range)
+                ty *= np.random.choice([-1, 1])
+            except ValueError:  # floating point
+                ty = np.random.uniform(
+                    -self.width_shift_range, self.width_shift_range
+                )
+            if np.max(self.width_shift_range) < 1:
+                ty *= img_shape[img_col_axis]
+        else:
+            ty = 0
+
+        if self.shear_range:
+            shear = np.random.uniform(-self.shear_range, self.shear_range)
+        else:
+            shear = 0
+
+        if self.zoom_range[0] == 1 and self.zoom_range[1] == 1:
+            zx, zy = 1, 1
+        else:
+            zx, zy = np.random.uniform(
+                self.zoom_range[0], self.zoom_range[1], 2
+            )
+
+        flip_horizontal = (np.random.random() < 0.5) * self.horizontal_flip
+        flip_vertical = (np.random.random() < 0.5) * self.vertical_flip
+
+        channel_shift_intensity = None
+        if self.channel_shift_range != 0:
+            channel_shift_intensity = np.random.uniform(
+                -self.channel_shift_range, self.channel_shift_range
+            )
+
+        brightness = None
+        if self.brightness_range is not None:
+            brightness = np.random.uniform(
+                self.brightness_range[0], self.brightness_range[1]
+            )
+
+        transform_parameters = {
+            "theta": theta,
+            "tx": tx,
+            "ty": ty,
+            "shear": shear,
+            "zx": zx,
+            "zy": zy,
+            "flip_horizontal": flip_horizontal,
+            "flip_vertical": flip_vertical,
+            "channel_shift_intensity": channel_shift_intensity,
+            "brightness": brightness,
+        }
+
+        return transform_parameters
+
+    def apply_transform(self, x, transform_parameters):
+        """Applies a transformation to an image according to given parameters.
+
+        Args:
+            x: 3D tensor, single image.
+            transform_parameters: Dictionary with string - parameter pairs
+                describing the transformation.
+                Currently, the following parameters
+                from the dictionary are used:
+                - `'theta'`: Float. Rotation angle in degrees.
+                - `'tx'`: Float. Shift in the x direction.
+                - `'ty'`: Float. Shift in the y direction.
+                - `'shear'`: Float. Shear angle in degrees.
+                - `'zx'`: Float. Zoom in the x direction.
+                - `'zy'`: Float. Zoom in the y direction.
+                - `'flip_horizontal'`: Boolean. Horizontal flip.
+                - `'flip_vertical'`: Boolean. Vertical flip.
+                - `'channel_shift_intensity'`: Float. Channel shift intensity.
+                - `'brightness'`: Float. Brightness shift intensity.
+
+        Returns:
+            A transformed version of the input (same shape).
+        """
+        # x is a single image, so it doesn't have image number at index 0
+        img_row_axis = self.row_axis - 1
+        img_col_axis = self.col_axis - 1
+        img_channel_axis = self.channel_axis - 1
+
+        x = apply_affine_transform(
+            x,
+            transform_parameters.get("theta", 0),
+            transform_parameters.get("tx", 0),
+            transform_parameters.get("ty", 0),
+            transform_parameters.get("shear", 0),
+            transform_parameters.get("zx", 1),
+            transform_parameters.get("zy", 1),
+            row_axis=img_row_axis,
+            col_axis=img_col_axis,
+            channel_axis=img_channel_axis,
+            fill_mode=self.fill_mode,
+            cval=self.cval,
+            order=self.interpolation_order,
+        )
+
+        if transform_parameters.get("channel_shift_intensity") is not None:
+            x = apply_channel_shift(
+                x,
+                transform_parameters["channel_shift_intensity"],
+                img_channel_axis,
+            )
+
+        if transform_parameters.get("flip_horizontal", False):
+            x = flip_axis(x, img_col_axis)
+
+        if transform_parameters.get("flip_vertical", False):
+            x = flip_axis(x, img_row_axis)
+
+        if transform_parameters.get("brightness") is not None:
+            x = apply_brightness_shift(
+                x, transform_parameters["brightness"], False
+            )
+
+        return x
+
+    def random_transform(self, x, seed=None):
+        """Applies a random transformation to an image.
+
+        Args:
+            x: 3D tensor, single image.
+            seed: Random seed.
+
+        Returns:
+            A randomly transformed version of the input (same shape).
+        """
+        params = self.get_random_transform(x.shape, seed)
+        return self.apply_transform(x, params)
+
+    def fit(self, x, augment=False, rounds=1, seed=None):
+        """Fits the data generator to some sample data.
+
+        This computes the internal data stats related to the
+        data-dependent transformations, based on an array of sample data.
+
+        Only required if `featurewise_center` or
+        `featurewise_std_normalization` or `zca_whitening` are set to True.
+
+        When `rescale` is set to a value, rescaling is applied to
+        sample data before computing the internal data stats.
+
+        Args:
+            x: Sample data. Should have rank 4.
+             In case of grayscale data,
+             the channels axis should have value 1, in case
+             of RGB data, it should have value 3, and in case
+             of RGBA data, it should have value 4.
+            augment: Boolean (default: False).
+                Whether to fit on randomly augmented samples.
+            rounds: Int (default: 1).
+                If using data augmentation (`augment=True`),
+                this is how many augmentation passes over the data to use.
+            seed: Int (default: None). Random seed.
+        """
+        x = np.asarray(x, dtype=self.dtype)
+        if x.ndim != 4:
+            raise ValueError(
+                "Input to `.fit()` should have rank 4. Got array with shape: "
+                + str(x.shape)
+            )
+        if x.shape[self.channel_axis] not in {1, 3, 4}:
+            warnings.warn(
+                "Expected input to be images (as Numpy array) "
+                'following the data format convention "'
+                + self.data_format
+                + '" (channels on axis '
+                + str(self.channel_axis)
+                + "), i.e. expected either 1, 3 or 4 channels on axis "
+                + str(self.channel_axis)
+                + ". However, it was passed an array with shape "
+                + str(x.shape)
+                + " ("
+                + str(x.shape[self.channel_axis])
+                + " channels)."
+            )
+
+        if seed is not None:
+            np.random.seed(seed)
+
+        x = np.copy(x)
+        if self.rescale:
+            x *= self.rescale
+
+        if augment:
+            ax = np.zeros(
+                tuple([rounds * x.shape[0]] + list(x.shape)[1:]),
+                dtype=self.dtype,
+            )
+            for r in range(rounds):
+                for i in range(x.shape[0]):
+                    ax[i + r * x.shape[0]] = self.random_transform(x[i])
+            x = ax
+
+        if self.featurewise_center:
+            self.mean = np.mean(x, axis=(0, self.row_axis, self.col_axis))
+            broadcast_shape = [1, 1, 1]
+            broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis]
+            self.mean = np.reshape(self.mean, broadcast_shape)
+            x -= self.mean
+
+        if self.featurewise_std_normalization:
+            self.std = np.std(x, axis=(0, self.row_axis, self.col_axis))
+            broadcast_shape = [1, 1, 1]
+            broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis]
+            self.std = np.reshape(self.std, broadcast_shape)
+            x /= self.std + 1e-6
+
+        if self.zca_whitening:
+            n = len(x)
+            flat_x = np.reshape(x, (n, -1))
+
+            u, s, _ = np.linalg.svd(flat_x.T, full_matrices=False)
+            s_inv = np.sqrt(n) / (s + self.zca_epsilon)
+            self.zca_whitening_matrix = (u * s_inv).dot(u.T)
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.random_rotation")
+def random_rotation(
+    x,
+    rg,
+    row_axis=1,
+    col_axis=2,
+    channel_axis=0,
+    fill_mode="nearest",
+    cval=0.0,
+    interpolation_order=1,
+):
+    """DEPRECATED."""
+    theta = np.random.uniform(-rg, rg)
+    x = apply_affine_transform(
+        x,
+        theta=theta,
+        row_axis=row_axis,
+        col_axis=col_axis,
+        channel_axis=channel_axis,
+        fill_mode=fill_mode,
+        cval=cval,
+        order=interpolation_order,
+    )
+    return x
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.random_shift")
+def random_shift(
+    x,
+    wrg,
+    hrg,
+    row_axis=1,
+    col_axis=2,
+    channel_axis=0,
+    fill_mode="nearest",
+    cval=0.0,
+    interpolation_order=1,
+):
+    """DEPRECATED."""
+    h, w = x.shape[row_axis], x.shape[col_axis]
+    tx = np.random.uniform(-hrg, hrg) * h
+    ty = np.random.uniform(-wrg, wrg) * w
+    x = apply_affine_transform(
+        x,
+        tx=tx,
+        ty=ty,
+        row_axis=row_axis,
+        col_axis=col_axis,
+        channel_axis=channel_axis,
+        fill_mode=fill_mode,
+        cval=cval,
+        order=interpolation_order,
+    )
+    return x
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.random_shear")
+def random_shear(
+    x,
+    intensity,
+    row_axis=1,
+    col_axis=2,
+    channel_axis=0,
+    fill_mode="nearest",
+    cval=0.0,
+    interpolation_order=1,
+):
+    """DEPRECATED."""
+    shear = np.random.uniform(-intensity, intensity)
+    x = apply_affine_transform(
+        x,
+        shear=shear,
+        row_axis=row_axis,
+        col_axis=col_axis,
+        channel_axis=channel_axis,
+        fill_mode=fill_mode,
+        cval=cval,
+        order=interpolation_order,
+    )
+    return x
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.random_zoom")
+def random_zoom(
+    x,
+    zoom_range,
+    row_axis=1,
+    col_axis=2,
+    channel_axis=0,
+    fill_mode="nearest",
+    cval=0.0,
+    interpolation_order=1,
+):
+    """DEPRECATED."""
+    if len(zoom_range) != 2:
+        raise ValueError(
+            "`zoom_range` should be a tuple or list of two floats. "
+            f"Received: {zoom_range}"
+        )
+
+    if zoom_range[0] == 1 and zoom_range[1] == 1:
+        zx, zy = 1, 1
+    else:
+        zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2)
+    x = apply_affine_transform(
+        x,
+        zx=zx,
+        zy=zy,
+        row_axis=row_axis,
+        col_axis=col_axis,
+        channel_axis=channel_axis,
+        fill_mode=fill_mode,
+        cval=cval,
+        order=interpolation_order,
+    )
+    return x
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.apply_channel_shift")
+def apply_channel_shift(x, intensity, channel_axis=0):
+    """Performs a channel shift.
+
+    DEPRECATED.
+
+    Args:
+        x: Input tensor. Must be 3D.
+        intensity: Transformation intensity.
+        channel_axis: Index of axis for channels in the input tensor.
+
+    Returns:
+        Numpy image tensor.
+    """
+    x = np.rollaxis(x, channel_axis, 0)
+    min_x, max_x = np.min(x), np.max(x)
+    channel_images = [
+        np.clip(x_channel + intensity, min_x, max_x) for x_channel in x
+    ]
+    x = np.stack(channel_images, axis=0)
+    x = np.rollaxis(x, 0, channel_axis + 1)
+    return x
+
+
+@keras_core_export(
+    "keras_core._legacy.preprocessing.image.random_channel_shift"
+)
+def random_channel_shift(x, intensity_range, channel_axis=0):
+    """Performs a random channel shift.
+
+    DEPRECATED.
+
+    Args:
+        x: Input tensor. Must be 3D.
+        intensity_range: Transformation intensity.
+        channel_axis: Index of axis for channels in the input tensor.
+
+    Returns:
+        Numpy image tensor.
+    """
+    intensity = np.random.uniform(-intensity_range, intensity_range)
+    return apply_channel_shift(x, intensity, channel_axis=channel_axis)
+
+
+@keras_core_export(
+    "keras_core._legacy.preprocessing.image.apply_brightness_shift"
+)
+def apply_brightness_shift(x, brightness, scale=True):
+    """Performs a brightness shift.
+
+    DEPRECATED.
+
+    Args:
+        x: Input tensor. Must be 3D.
+        brightness: Float. The new brightness value.
+        scale: Whether to rescale the image such that minimum and maximum values
+            are 0 and 255 respectively. Default: True.
+
+    Returns:
+        Numpy image tensor.
+
+    Raises:
+        ImportError: if PIL is not available.
+    """
+    from PIL import ImageEnhance
+
+    x_min, x_max = np.min(x), np.max(x)
+    local_scale = (x_min < 0) or (x_max > 255)
+    x = image_utils.array_to_img(x, scale=local_scale or scale)
+    x = imgenhancer_Brightness = ImageEnhance.Brightness(x)
+    x = imgenhancer_Brightness.enhance(brightness)
+    x = image_utils.img_to_array(x)
+    if not scale and local_scale:
+        x = x / 255 * (x_max - x_min) + x_min
+    return x
+
+
+@keras_core_export("keras_core._legacy.preprocessing.image.random_brightness")
+def random_brightness(x, brightness_range, scale=True):
+    """Performs a random brightness shift.
+
+    DEPRECATED.
+
+    Args:
+        x: Input tensor. Must be 3D.
+        brightness_range: Tuple of floats; brightness range.
+        scale: Whether to rescale the image such that minimum and maximum values
+            are 0 and 255 respectively. Default: True.
+
+    Returns:
+        Numpy image tensor.
+
+    Raises:
+        ValueError if `brightness_range` isn't a tuple.
+    """
+    if len(brightness_range) != 2:
+        raise ValueError(
+            "`brightness_range should be tuple or list of two floats. "
+            f"Received: {brightness_range}"
+        )
+
+    u = np.random.uniform(brightness_range[0], brightness_range[1])
+    return apply_brightness_shift(x, u, scale)
+
+
+def transform_matrix_offset_center(matrix, x, y):
+    o_x = float(x) / 2 - 0.5
+    o_y = float(y) / 2 - 0.5
+    offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]])
+    reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]])
+    transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix)
+    return transform_matrix
+
+
+@keras_core_export(
+    "keras_core._legacy.preprocessing.image.apply_affine_transform"
+)
+def apply_affine_transform(
+    x,
+    theta=0,
+    tx=0,
+    ty=0,
+    shear=0,
+    zx=1,
+    zy=1,
+    row_axis=1,
+    col_axis=2,
+    channel_axis=0,
+    fill_mode="nearest",
+    cval=0.0,
+    order=1,
+):
+    """Applies an affine transformation specified by the parameters given.
+
+    DEPRECATED.
+    """
+    from scipy import ndimage
+
+    # Input sanity checks:
+    # 1. x must 2D image with one or more channels (i.e., a 3D tensor)
+    # 2. channels must be either first or last dimension
+    if np.unique([row_axis, col_axis, channel_axis]).size != 3:
+        raise ValueError(
+            "'row_axis', 'col_axis', and 'channel_axis' must be distinct"
+        )
+
+    # shall we support negative indices?
+    valid_indices = set([0, 1, 2])
+    actual_indices = set([row_axis, col_axis, channel_axis])
+    if actual_indices != valid_indices:
+        raise ValueError(
+            f"Invalid axis' indices: {actual_indices - valid_indices}"
+        )
+
+    if x.ndim != 3:
+        raise ValueError("Input arrays must be multi-channel 2D images.")
+    if channel_axis not in [0, 2]:
+        raise ValueError(
+            "Channels are allowed and the first and last dimensions."
+        )
+
+    transform_matrix = None
+    if theta != 0:
+        theta = np.deg2rad(theta)
+        rotation_matrix = np.array(
+            [
+                [np.cos(theta), -np.sin(theta), 0],
+                [np.sin(theta), np.cos(theta), 0],
+                [0, 0, 1],
+            ]
+        )
+        transform_matrix = rotation_matrix
+
+    if tx != 0 or ty != 0:
+        shift_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]])
+        if transform_matrix is None:
+            transform_matrix = shift_matrix
+        else:
+            transform_matrix = np.dot(transform_matrix, shift_matrix)
+
+    if shear != 0:
+        shear = np.deg2rad(shear)
+        shear_matrix = np.array(
+            [[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]]
+        )
+        if transform_matrix is None:
+            transform_matrix = shear_matrix
+        else:
+            transform_matrix = np.dot(transform_matrix, shear_matrix)
+
+    if zx != 1 or zy != 1:
+        zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]])
+        if transform_matrix is None:
+            transform_matrix = zoom_matrix
+        else:
+            transform_matrix = np.dot(transform_matrix, zoom_matrix)
+
+    if transform_matrix is not None:
+        h, w = x.shape[row_axis], x.shape[col_axis]
+        transform_matrix = transform_matrix_offset_center(
+            transform_matrix, h, w
+        )
+        x = np.rollaxis(x, channel_axis, 0)
+
+        # Matrix construction assumes that coordinates are x, y (in that order).
+        # However, regular numpy arrays use y,x (aka i,j) indexing.
+        # Possible solution is:
+        #   1. Swap the x and y axes.
+        #   2. Apply transform.
+        #   3. Swap the x and y axes again to restore image-like data ordering.
+        # Mathematically, it is equivalent to the following transformation:
+        # M' = PMP, where P is the permutation matrix, M is the original
+        # transformation matrix.
+        if col_axis > row_axis:
+            transform_matrix[:, [0, 1]] = transform_matrix[:, [1, 0]]
+            transform_matrix[[0, 1]] = transform_matrix[[1, 0]]
+        final_affine_matrix = transform_matrix[:2, :2]
+        final_offset = transform_matrix[:2, 2]
+
+        channel_images = [
+            ndimage.interpolation.affine_transform(
+                x_channel,
+                final_affine_matrix,
+                final_offset,
+                order=order,
+                mode=fill_mode,
+                cval=cval,
+            )
+            for x_channel in x
+        ]
+        x = np.stack(channel_images, axis=0)
+        x = np.rollaxis(x, 0, channel_axis + 1)
+    return x
diff --git a/keras_core/legacy/preprocessing/sequence.py b/keras_core/legacy/preprocessing/sequence.py
new file mode 100644
index 000000000..92e2f853b
--- /dev/null
+++ b/keras_core/legacy/preprocessing/sequence.py
@@ -0,0 +1,324 @@
+"""Deprecated sequence preprocessing APIs from Keras 1."""
+
+import json
+import random
+
+import numpy as np
+
+from keras_core.api_export import keras_core_export
+from keras_core.trainers.data_adapters.py_dataset_adapter import PyDataset
+
+
+@keras_core_export(
+    "keras_core._legacy.preprocessing.sequence.TimeseriesGenerator"
+)
+class TimeseriesGenerator(PyDataset):
+    """Utility class for generating batches of temporal data.
+
+    DEPRECATED.
+
+    This class takes in a sequence of data-points gathered at
+    equal intervals, along with time series parameters such as
+    stride, length of history, etc., to produce batches for
+    training/validation.
+
+    Arguments:
+        data: Indexable generator (such as list or Numpy array)
+            containing consecutive data points (timesteps).
+            The data should be at 2D, and axis 0 is expected
+            to be the time dimension.
+        targets: Targets corresponding to timesteps in `data`.
+            It should have same length as `data`.
+        length: Length of the output sequences (in number of timesteps).
+        sampling_rate: Period between successive individual timesteps
+            within sequences. For rate `r`, timesteps
+            `data[i]`, `data[i-r]`, ... `data[i - length]`
+            are used for create a sample sequence.
+        stride: Period between successive output sequences.
+            For stride `s`, consecutive output samples would
+            be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
+        start_index: Data points earlier than `start_index` will not be used
+            in the output sequences. This is useful to reserve part of the
+            data for test or validation.
+        end_index: Data points later than `end_index` will not be used
+            in the output sequences. This is useful to reserve part of the
+            data for test or validation.
+        shuffle: Whether to shuffle output samples,
+            or instead draw them in chronological order.
+        reverse: Boolean: if `true`, timesteps in each output sample will be
+            in reverse chronological order.
+        batch_size: Number of timeseries samples in each batch
+            (except maybe the last one).
+
+    Returns:
+        A PyDataset instance.
+    """
+
+    def __init__(
+        self,
+        data,
+        targets,
+        length,
+        sampling_rate=1,
+        stride=1,
+        start_index=0,
+        end_index=None,
+        shuffle=False,
+        reverse=False,
+        batch_size=128,
+    ):
+        if len(data) != len(targets):
+            raise ValueError(
+                "Data and targets have to be "
+                f"of same length. Data length is {len(data)} "
+                f"while target length is {len(targets)}"
+            )
+
+        self.data = data
+        self.targets = targets
+        self.length = length
+        self.sampling_rate = sampling_rate
+        self.stride = stride
+        self.start_index = start_index + length
+        if end_index is None:
+            end_index = len(data) - 1
+        self.end_index = end_index
+        self.shuffle = shuffle
+        self.reverse = reverse
+        self.batch_size = batch_size
+
+        if self.start_index > self.end_index:
+            raise ValueError(
+                f"`start_index+length={self.start_index} "
+                f"> end_index={self.end_index}` "
+                "is disallowed, as no part of the sequence "
+                "would be left to be used as current step."
+            )
+
+    def __len__(self):
+        return (
+            self.end_index - self.start_index + self.batch_size * self.stride
+        ) // (self.batch_size * self.stride)
+
+    def __getitem__(self, index):
+        if self.shuffle:
+            rows = np.random.randint(
+                self.start_index, self.end_index + 1, size=self.batch_size
+            )
+        else:
+            i = self.start_index + self.batch_size * self.stride * index
+            rows = np.arange(
+                i,
+                min(i + self.batch_size * self.stride, self.end_index + 1),
+                self.stride,
+            )
+
+        samples = np.array(
+            [
+                self.data[row - self.length : row : self.sampling_rate]
+                for row in rows
+            ]
+        )
+        targets = np.array([self.targets[row] for row in rows])
+
+        if self.reverse:
+            return samples[:, ::-1, ...], targets
+        return samples, targets
+
+    def get_config(self):
+        """Returns the TimeseriesGenerator configuration as Python dictionary.
+
+        Returns:
+            A Python dictionary with the TimeseriesGenerator configuration.
+        """
+        data = self.data
+        if type(self.data).__module__ == np.__name__:
+            data = self.data.tolist()
+        try:
+            json_data = json.dumps(data)
+        except TypeError as e:
+            raise TypeError(f"Data not JSON Serializable: {data}") from e
+
+        targets = self.targets
+        if type(self.targets).__module__ == np.__name__:
+            targets = self.targets.tolist()
+        try:
+            json_targets = json.dumps(targets)
+        except TypeError as e:
+            raise TypeError(f"Targets not JSON Serializable: {targets}") from e
+
+        return {
+            "data": json_data,
+            "targets": json_targets,
+            "length": self.length,
+            "sampling_rate": self.sampling_rate,
+            "stride": self.stride,
+            "start_index": self.start_index,
+            "end_index": self.end_index,
+            "shuffle": self.shuffle,
+            "reverse": self.reverse,
+            "batch_size": self.batch_size,
+        }
+
+    def to_json(self, **kwargs):
+        """Returns a JSON string containing the generator's configuration.
+
+        Args:
+            **kwargs: Additional keyword arguments to be passed
+                to `json.dumps()`.
+
+        Returns:
+            A JSON string containing the tokenizer configuration.
+        """
+        config = self.get_config()
+        timeseries_generator_config = {
+            "class_name": self.__class__.__name__,
+            "config": config,
+        }
+        return json.dumps(timeseries_generator_config, **kwargs)
+
+
+@keras_core_export(
+    "keras_core._legacy.preprocessing.sequence.make_sampling_table"
+)
+def make_sampling_table(size, sampling_factor=1e-5):
+    """Generates a word rank-based probabilistic sampling table.
+
+    DEPRECATED.
+
+    Used for generating the `sampling_table` argument for `skipgrams`.
+    `sampling_table[i]` is the probability of sampling
+    the word i-th most common word in a dataset
+    (more common words should be sampled less frequently, for balance).
+
+    The sampling probabilities are generated according
+    to the sampling distribution used in word2vec:
+
+    ```
+    p(word) = (min(1, sqrt(word_frequency / sampling_factor) /
+        (word_frequency / sampling_factor)))
+    ```
+
+    We assume that the word frequencies follow Zipf's law (s=1) to derive
+    a numerical approximation of frequency(rank):
+
+    `frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))`
+    where `gamma` is the Euler-Mascheroni constant.
+
+    Args:
+        size: Int, number of possible words to sample.
+        sampling_factor: The sampling factor in the word2vec formula.
+
+    Returns:
+        A 1D Numpy array of length `size` where the ith entry
+        is the probability that a word of rank i should be sampled.
+    """
+    gamma = 0.577
+    rank = np.arange(size)
+    rank[0] = 1
+    inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1.0 / (12.0 * rank)
+    f = sampling_factor * inv_fq
+
+    return np.minimum(1.0, f / np.sqrt(f))
+
+
+@keras_core_export("keras_core._legacy.preprocessing.sequence.skipgrams")
+def skipgrams(
+    sequence,
+    vocabulary_size,
+    window_size=4,
+    negative_samples=1.0,
+    shuffle=True,
+    categorical=False,
+    sampling_table=None,
+    seed=None,
+):
+    """Generates skipgram word pairs.
+
+    DEPRECATED.
+
+    This function transforms a sequence of word indexes (list of integers)
+    into tuples of words of the form:
+
+    - (word, word in the same window), with label 1 (positive samples).
+    - (word, random word from the vocabulary), with label 0 (negative samples).
+
+    Read more about Skipgram in this gnomic paper by Mikolov et al.:
+    [Efficient Estimation of Word Representations in
+    Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf)
+
+    Args:
+        sequence: A word sequence (sentence), encoded as a list
+            of word indices (integers). If using a `sampling_table`,
+            word indices are expected to match the rank
+            of the words in a reference dataset (e.g. 10 would encode
+            the 10-th most frequently occurring token).
+            Note that index 0 is expected to be a non-word and will be skipped.
+        vocabulary_size: Int, maximum possible word index + 1
+        window_size: Int, size of sampling windows (technically half-window).
+            The window of a word `w_i` will be
+            `[i - window_size, i + window_size+1]`.
+        negative_samples: Float >= 0. 0 for no negative (i.e. random) samples.
+            1 for same number as positive samples.
+        shuffle: Whether to shuffle the word couples before returning them.
+        categorical: bool. if False, labels will be
+            integers (eg. `[0, 1, 1 .. ]`),
+            if `True`, labels will be categorical, e.g.
+            `[[1,0],[0,1],[0,1] .. ]`.
+        sampling_table: 1D array of size `vocabulary_size` where the entry i
+            encodes the probability to sample a word of rank i.
+        seed: Random seed.
+
+    Returns:
+        couples, labels: where `couples` are int pairs and
+            `labels` are either 0 or 1.
+
+    Note:
+        By convention, index 0 in the vocabulary is
+        a non-word and will be skipped.
+    """
+    couples = []
+    labels = []
+    for i, wi in enumerate(sequence):
+        if not wi:
+            continue
+        if sampling_table is not None:
+            if sampling_table[wi] < random.random():
+                continue
+
+        window_start = max(0, i - window_size)
+        window_end = min(len(sequence), i + window_size + 1)
+        for j in range(window_start, window_end):
+            if j != i:
+                wj = sequence[j]
+                if not wj:
+                    continue
+                couples.append([wi, wj])
+                if categorical:
+                    labels.append([0, 1])
+                else:
+                    labels.append(1)
+
+    if negative_samples > 0:
+        num_negative_samples = int(len(labels) * negative_samples)
+        words = [c[0] for c in couples]
+        random.shuffle(words)
+
+        couples += [
+            [words[i % len(words)], random.randint(1, vocabulary_size - 1)]
+            for i in range(num_negative_samples)
+        ]
+        if categorical:
+            labels += [[1, 0]] * num_negative_samples
+        else:
+            labels += [0] * num_negative_samples
+
+    if shuffle:
+        if seed is None:
+            seed = random.randint(0, 10e6)
+        random.seed(seed)
+        random.shuffle(couples)
+        random.seed(seed)
+        random.shuffle(labels)
+
+    return couples, labels
diff --git a/keras_core/legacy/preprocessing/text.py b/keras_core/legacy/preprocessing/text.py
new file mode 100644
index 000000000..f300953ca
--- /dev/null
+++ b/keras_core/legacy/preprocessing/text.py
@@ -0,0 +1,338 @@
+"""Deprecated text preprocessing APIs from Keras 1."""
+
+import collections
+import hashlib
+import json
+import warnings
+
+import numpy as np
+
+from keras_core.api_export import keras_core_export
+
+
+@keras_core_export(
+    "keras_core._legacy.preprocessing.text.text_to_word_sequence"
+)
+def text_to_word_sequence(
+    input_text,
+    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
+    lower=True,
+    split=" ",
+):
+    """DEPRECATED."""
+    if lower:
+        input_text = input_text.lower()
+
+    translate_dict = {c: split for c in filters}
+    translate_map = str.maketrans(translate_dict)
+    input_text = input_text.translate(translate_map)
+
+    seq = input_text.split(split)
+    return [i for i in seq if i]
+
+
+@keras_core_export("keras_core._legacy.preprocessing.text.one_hot")
+def one_hot(
+    input_text,
+    n,
+    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
+    lower=True,
+    split=" ",
+    analyzer=None,
+):
+    """DEPRECATED."""
+    return hashing_trick(
+        input_text,
+        n,
+        hash_function=hash,
+        filters=filters,
+        lower=lower,
+        split=split,
+        analyzer=analyzer,
+    )
+
+
+@keras_core_export("keras_core._legacy.preprocessing.text.hashing_trick")
+def hashing_trick(
+    text,
+    n,
+    hash_function=None,
+    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
+    lower=True,
+    split=" ",
+    analyzer=None,
+):
+    """DEPRECATED."""
+    if hash_function is None:
+        hash_function = hash
+    elif hash_function == "md5":
+
+        def hash_function(w):
+            return int(hashlib.md5(w.encode()).hexdigest(), 16)
+
+    if analyzer is None:
+        seq = text_to_word_sequence(
+            text, filters=filters, lower=lower, split=split
+        )
+    else:
+        seq = analyzer(text)
+
+    return [(hash_function(w) % (n - 1) + 1) for w in seq]
+
+
+@keras_core_export("keras_core._legacy.preprocessing.text.Tokenizer")
+class Tokenizer(object):
+    """DEPRECATED."""
+
+    def __init__(
+        self,
+        num_words=None,
+        filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
+        lower=True,
+        split=" ",
+        char_level=False,
+        oov_token=None,
+        analyzer=None,
+        **kwargs
+    ):
+        # Legacy support
+        if "nb_words" in kwargs:
+            warnings.warn(
+                "The `nb_words` argument in `Tokenizer` "
+                "has been renamed `num_words`."
+            )
+            num_words = kwargs.pop("nb_words")
+        document_count = kwargs.pop("document_count", 0)
+        if kwargs:
+            raise TypeError("Unrecognized keyword arguments: " + str(kwargs))
+
+        self.word_counts = collections.OrderedDict()
+        self.word_docs = collections.defaultdict(int)
+        self.filters = filters
+        self.split = split
+        self.lower = lower
+        self.num_words = num_words
+        self.document_count = document_count
+        self.char_level = char_level
+        self.oov_token = oov_token
+        self.index_docs = collections.defaultdict(int)
+        self.word_index = {}
+        self.index_word = {}
+        self.analyzer = analyzer
+
+    def fit_on_texts(self, texts):
+        for text in texts:
+            self.document_count += 1
+            if self.char_level or isinstance(text, list):
+                if self.lower:
+                    if isinstance(text, list):
+                        text = [text_elem.lower() for text_elem in text]
+                    else:
+                        text = text.lower()
+                seq = text
+            else:
+                if self.analyzer is None:
+                    seq = text_to_word_sequence(
+                        text,
+                        filters=self.filters,
+                        lower=self.lower,
+                        split=self.split,
+                    )
+                else:
+                    seq = self.analyzer(text)
+            for w in seq:
+                if w in self.word_counts:
+                    self.word_counts[w] += 1
+                else:
+                    self.word_counts[w] = 1
+            for w in set(seq):
+                # In how many documents each word occurs
+                self.word_docs[w] += 1
+
+        wcounts = list(self.word_counts.items())
+        wcounts.sort(key=lambda x: x[1], reverse=True)
+        # forcing the oov_token to index 1 if it exists
+        if self.oov_token is None:
+            sorted_voc = []
+        else:
+            sorted_voc = [self.oov_token]
+        sorted_voc.extend(wc[0] for wc in wcounts)
+
+        # note that index 0 is reserved, never assigned to an existing word
+        self.word_index = dict(
+            zip(sorted_voc, list(range(1, len(sorted_voc) + 1)))
+        )
+
+        self.index_word = {c: w for w, c in self.word_index.items()}
+
+        for w, c in list(self.word_docs.items()):
+            self.index_docs[self.word_index[w]] = c
+
+    def fit_on_sequences(self, sequences):
+        self.document_count += len(sequences)
+        for seq in sequences:
+            seq = set(seq)
+            for i in seq:
+                self.index_docs[i] += 1
+
+    def texts_to_sequences(self, texts):
+        return list(self.texts_to_sequences_generator(texts))
+
+    def texts_to_sequences_generator(self, texts):
+        num_words = self.num_words
+        oov_token_index = self.word_index.get(self.oov_token)
+        for text in texts:
+            if self.char_level or isinstance(text, list):
+                if self.lower:
+                    if isinstance(text, list):
+                        text = [text_elem.lower() for text_elem in text]
+                    else:
+                        text = text.lower()
+                seq = text
+            else:
+                if self.analyzer is None:
+                    seq = text_to_word_sequence(
+                        text,
+                        filters=self.filters,
+                        lower=self.lower,
+                        split=self.split,
+                    )
+                else:
+                    seq = self.analyzer(text)
+            vect = []
+            for w in seq:
+                i = self.word_index.get(w)
+                if i is not None:
+                    if num_words and i >= num_words:
+                        if oov_token_index is not None:
+                            vect.append(oov_token_index)
+                    else:
+                        vect.append(i)
+                elif self.oov_token is not None:
+                    vect.append(oov_token_index)
+            yield vect
+
+    def sequences_to_texts(self, sequences):
+        return list(self.sequences_to_texts_generator(sequences))
+
+    def sequences_to_texts_generator(self, sequences):
+        num_words = self.num_words
+        oov_token_index = self.word_index.get(self.oov_token)
+        for seq in sequences:
+            vect = []
+            for num in seq:
+                word = self.index_word.get(num)
+                if word is not None:
+                    if num_words and num >= num_words:
+                        if oov_token_index is not None:
+                            vect.append(self.index_word[oov_token_index])
+                    else:
+                        vect.append(word)
+                elif self.oov_token is not None:
+                    vect.append(self.index_word[oov_token_index])
+            vect = " ".join(vect)
+            yield vect
+
+    def texts_to_matrix(self, texts, mode="binary"):
+        sequences = self.texts_to_sequences(texts)
+        return self.sequences_to_matrix(sequences, mode=mode)
+
+    def sequences_to_matrix(self, sequences, mode="binary"):
+        if not self.num_words:
+            if self.word_index:
+                num_words = len(self.word_index) + 1
+            else:
+                raise ValueError(
+                    "Specify a dimension (`num_words` argument), "
+                    "or fit on some text data first."
+                )
+        else:
+            num_words = self.num_words
+
+        if mode == "tfidf" and not self.document_count:
+            raise ValueError(
+                "Fit the Tokenizer on some data before using tfidf mode."
+            )
+
+        x = np.zeros((len(sequences), num_words))
+        for i, seq in enumerate(sequences):
+            if not seq:
+                continue
+            counts = collections.defaultdict(int)
+            for j in seq:
+                if j >= num_words:
+                    continue
+                counts[j] += 1
+            for j, c in list(counts.items()):
+                if mode == "count":
+                    x[i][j] = c
+                elif mode == "freq":
+                    x[i][j] = c / len(seq)
+                elif mode == "binary":
+                    x[i][j] = 1
+                elif mode == "tfidf":
+                    # Use weighting scheme 2 in
+                    # https://en.wikipedia.org/wiki/Tf%E2%80%93idf
+                    tf = 1 + np.log(c)
+                    idf = np.log(
+                        1
+                        + self.document_count / (1 + self.index_docs.get(j, 0))
+                    )
+                    x[i][j] = tf * idf
+                else:
+                    raise ValueError("Unknown vectorization mode:", mode)
+        return x
+
+    def get_config(self):
+        json_word_counts = json.dumps(self.word_counts)
+        json_word_docs = json.dumps(self.word_docs)
+        json_index_docs = json.dumps(self.index_docs)
+        json_word_index = json.dumps(self.word_index)
+        json_index_word = json.dumps(self.index_word)
+
+        return {
+            "num_words": self.num_words,
+            "filters": self.filters,
+            "lower": self.lower,
+            "split": self.split,
+            "char_level": self.char_level,
+            "oov_token": self.oov_token,
+            "document_count": self.document_count,
+            "word_counts": json_word_counts,
+            "word_docs": json_word_docs,
+            "index_docs": json_index_docs,
+            "index_word": json_index_word,
+            "word_index": json_word_index,
+        }
+
+    def to_json(self, **kwargs):
+        config = self.get_config()
+        tokenizer_config = {
+            "class_name": self.__class__.__name__,
+            "config": config,
+        }
+        return json.dumps(tokenizer_config, **kwargs)
+
+
+@keras_core_export("keras_core._legacy.preprocessing.text.tokenizer_from_json")
+def tokenizer_from_json(json_string):
+    """DEPRECATED."""
+    tokenizer_config = json.loads(json_string)
+    config = tokenizer_config.get("config")
+
+    word_counts = json.loads(config.pop("word_counts"))
+    word_docs = json.loads(config.pop("word_docs"))
+    index_docs = json.loads(config.pop("index_docs"))
+    # Integer indexing gets converted to strings with json.dumps()
+    index_docs = {int(k): v for k, v in index_docs.items()}
+    index_word = json.loads(config.pop("index_word"))
+    index_word = {int(k): v for k, v in index_word.items()}
+    word_index = json.loads(config.pop("word_index"))
+
+    tokenizer = Tokenizer(**config)
+    tokenizer.word_counts = word_counts
+    tokenizer.word_docs = word_docs
+    tokenizer.index_docs = index_docs
+    tokenizer.word_index = word_index
+    tokenizer.index_word = index_word
+    return tokenizer
diff --git a/keras_core/losses/losses.py b/keras_core/losses/losses.py
index 56d4a0e6a..f826060bd 100644
--- a/keras_core/losses/losses.py
+++ b/keras_core/losses/losses.py
@@ -1106,6 +1106,11 @@ def categorical_hinge(y_true, y_pred):
     [
         "keras_core.metrics.mean_squared_error",
         "keras_core.losses.mean_squared_error",
+        # Legacy aliases
+        "keras_core._legacy.losses.mse",
+        "keras_core._legacy.losses.MSE",
+        "keras_core._legacy.metrics.mse",
+        "keras_core._legacy.metrics.MSE",
     ]
 )
 def mean_squared_error(y_true, y_pred):
@@ -1140,6 +1145,11 @@ def mean_squared_error(y_true, y_pred):
     [
         "keras_core.metrics.mean_absolute_error",
         "keras_core.losses.mean_absolute_error",
+        # Legacy aliases
+        "keras_core._legacy.losses.MAE",
+        "keras_core._legacy.losses.mae",
+        "keras_core._legacy.metrics.MAE",
+        "keras_core._legacy.metrics.mae",
     ]
 )
 def mean_absolute_error(y_true, y_pred):
@@ -1172,6 +1182,11 @@ def mean_absolute_error(y_true, y_pred):
     [
         "keras_core.metrics.mean_absolute_percentage_error",
         "keras_core.losses.mean_absolute_percentage_error",
+        # Legacy aliases
+        "keras_core._legacy.losses.mape",
+        "keras_core._legacy.losses.MAPE",
+        "keras_core._legacy.metrics.mape",
+        "keras_core._legacy.metrics.MAPE",
     ]
 )
 def mean_absolute_percentage_error(y_true, y_pred):
@@ -1213,6 +1228,11 @@ def mean_absolute_percentage_error(y_true, y_pred):
     [
         "keras_core.metrics.mean_squared_logarithmic_error",
         "keras_core.losses.mean_squared_logarithmic_error",
+        # Legacy aliases
+        "keras_core._legacy.losses.msle",
+        "keras_core._legacy.losses.MSLE",
+        "keras_core._legacy.metrics.msle",
+        "keras_core._legacy.metrics.MSLE",
     ]
 )
 def mean_squared_logarithmic_error(y_true, y_pred):
@@ -1342,7 +1362,13 @@ def huber(y_true, y_pred, delta=1.0):
 
 
 @keras_core_export(
-    ["keras_core.losses.log_cosh", "keras_core.metrics.log_cosh"]
+    [
+        "keras_core.losses.log_cosh",
+        "keras_core.metrics.log_cosh",
+        # Legacy aliases
+        "keras_core._legacy.losses.logcosh",
+        "keras_core._legacy.metrics.logcosh",
+    ]
 )
 def log_cosh(y_true, y_pred):
     """Logarithm of the hyperbolic cosine of the prediction error.
@@ -1386,6 +1412,13 @@ def log_cosh(y_true, y_pred):
     [
         "keras_core.metrics.kl_divergence",
         "keras_core.losses.kl_divergence",
+        # Legacy aliases
+        "keras_core._legacy.losses.KLD",
+        "keras_core._legacy.losses.kld",
+        "keras_core._legacy.losses.kullback_leibler_divergence",
+        "keras_core._legacy.metrics.KLD",
+        "keras_core._legacy.metrics.kld",
+        "keras_core._legacy.metrics.kullback_leibler_divergence",
     ]
 )
 def kl_divergence(y_true, y_pred):
diff --git a/keras_core/regularizers/regularizers.py b/keras_core/regularizers/regularizers.py
index 8588ad199..f88abeac1 100644
--- a/keras_core/regularizers/regularizers.py
+++ b/keras_core/regularizers/regularizers.py
@@ -167,7 +167,9 @@ class Regularizer:
         raise NotImplementedError(f"{self} does not implement get_config()")
 
 
-@keras_core_export("keras_core.regularizers.L1L2")
+@keras_core_export(
+    ["keras_core.regularizers.L1L2", "keras_core.regularizers.l1_l2"]
+)
 class L1L2(Regularizer):
     """A regularizer that applies both L1 and L2 regularization penalties.
 
diff --git a/keras_core/version.py b/keras_core/version.py
index 53d6311d0..dc85b96ca 100644
--- a/keras_core/version.py
+++ b/keras_core/version.py
@@ -1,2 +1,2 @@
 # Unique source of truth for the version number.
-__version__ = "0.1.2"
+__version__ = "0.1.3"
diff --git a/pip_build.py b/pip_build.py
index db53b373a..19338058f 100644
--- a/pip_build.py
+++ b/pip_build.py
@@ -59,6 +59,84 @@ def build():
         # Generate API __init__.py files in `keras_core/`
         namex.generate_api_files(package, code_directory="src", verbose=True)
 
+        # Make keras_core/_tf_keras/ by copying keras_core/
+        tf_keras_dirpath = os.path.join(package, "_tf_keras")
+        os.makedirs(tf_keras_dirpath)
+        with open(os.path.join(package, "__init__.py")) as f:
+            init_file = f.read()
+            init_file = init_file.replace(
+                "from keras_core import _legacy",
+                "from keras_core import _tf_keras",
+            )
+        with open(os.path.join(package, "__init__.py"), "w") as f:
+            f.write(init_file)
+        with open(os.path.join(tf_keras_dirpath, "__init__.py"), "w") as f:
+            f.write(init_file)
+        for dirname in os.listdir(package):
+            dirpath = os.path.join(package, dirname)
+            if os.path.isdir(dirpath) and dirname not in (
+                "_legacy",
+                "_tf_keras",
+                "src",
+            ):
+                shutil.copytree(
+                    dirpath,
+                    os.path.join(tf_keras_dirpath, dirname),
+                    ignore=ignore_files,
+                )
+
+        # Copy keras_core/_legacy/ file contents to keras_core/_tf_keras/
+        legacy_submodules = [
+            path[:-3]
+            for path in os.listdir(os.path.join(package, "src", "legacy"))
+            if path.endswith(".py")
+        ]
+        legacy_submodules += [
+            path
+            for path in os.listdir(os.path.join(package, "src", "legacy"))
+            if os.path.isdir(os.path.join(package, "src", "legacy", path))
+        ]
+
+        for root, _, fnames in os.walk(os.path.join(package, "_legacy")):
+            for fname in fnames:
+                if fname.endswith(".py"):
+                    legacy_fpath = os.path.join(root, fname)
+                    tf_keras_root = root.replace("/_legacy", "/_tf_keras")
+                    core_api_fpath = os.path.join(
+                        root.replace("/_legacy", ""), fname
+                    )
+                    if not os.path.exists(tf_keras_root):
+                        os.makedirs(tf_keras_root)
+                    tf_keras_fpath = os.path.join(tf_keras_root, fname)
+                    with open(legacy_fpath) as f:
+                        legacy_contents = f.read()
+                        legacy_contents = legacy_contents.replace(
+                            "keras_core._legacy", "keras_core._tf_keras"
+                        )
+                    if os.path.exists(core_api_fpath):
+                        with open(core_api_fpath) as f:
+                            core_api_contents = f.read()
+                        core_api_contents = core_api_contents.replace(
+                            "from keras_core import _tf_keras\n", ""
+                        )
+                        for legacy_submodule in legacy_submodules:
+                            core_api_contents = core_api_contents.replace(
+                                f"from keras_core import {legacy_submodule}\n",
+                                "",
+                            )
+                            core_api_contents = core_api_contents.replace(
+                                f"keras_core.{legacy_submodule}",
+                                f"keras_core._tf_keras.{legacy_submodule}",
+                            )
+                        legacy_contents = (
+                            core_api_contents + "\n" + legacy_contents
+                        )
+                    with open(tf_keras_fpath, "w") as f:
+                        f.write(legacy_contents)
+
+        # Delete keras_core/_legacy/
+        shutil.rmtree(os.path.join(package, "_legacy"))
+
         # Make sure to export the __version__ string
         from keras_core.src.version import __version__  # noqa: E402