Add tf.keras backwards compat for nearly all non-experimental symbols (#603)

* Add tf.keras backwards compatibility for nearly all non-experimental symbols * Remove print statements * Fix identity init
2023-07-25 13:03:01 -07:00 · 2023-07-25 13:03:01 -07:00 · b660875f51
commit b660875f51
parent f73d98df60
17 changed files with 5363 additions and 23 deletions
--- a/keras_core/backend/common/keras_tensor.py
+++ b/keras_core/backend/common/keras_tensor.py
@ -296,7 +296,9 @@ def any_symbolic_tensors(args=None, kwargs=None):
    return False


-@keras_core_export("keras_core.utils.is_keras_tensor")
+@keras_core_export(
+    ["keras_core.utils.is_keras_tensor", "keras_core.backend.is_keras_tensor"]
+)
 def is_keras_tensor(x):
    """Returns whether `x` is a Keras tensor.

--- a/keras_core/initializers/init.py
+++ b/keras_core/initializers/init.py
@ -2,6 +2,7 @@ import inspect

 from keras_core.api_export import keras_core_export
 from keras_core.initializers.constant_initializers import Constant
+from keras_core.initializers.constant_initializers import Identity
 from keras_core.initializers.constant_initializers import Ones
 from keras_core.initializers.constant_initializers import Zeros
 from keras_core.initializers.initializer import Initializer
--- a/keras_core/initializers/constant_initializers.py
+++ b/keras_core/initializers/constant_initializers.py
@ -4,7 +4,9 @@ from keras_core.backend import standardize_dtype
 from keras_core.initializers.initializer import Initializer


-@keras_core_export("keras_core.initializers.Constant")
+@keras_core_export(
+    ["keras_core.initializers.Constant", "keras_core.initializers.constant"]
+)
 class Constant(Initializer):
    """Initializer that generates tensors with constant values.

@ -37,7 +39,9 @@ class Constant(Initializer):
        return {"value": self.value}


-@keras_core_export("keras_core.initializers.Zeros")
+@keras_core_export(
+    ["keras_core.initializers.Zeros", "keras_core.initializers.zeros"]
+)
 class Zeros(Initializer):
    """Initializer that generates tensors initialized to 0.

@ -67,7 +71,9 @@ class Zeros(Initializer):
        return ops.zeros(shape, dtype=dtype)


-@keras_core_export("keras_core.initializers.Ones")
+@keras_core_export(
+    ["keras_core.initializers.Ones", "keras_core.initializers.ones"]
+)
 class Ones(Initializer):
    """Initializer that generates tensors initialized to 1.

@ -97,3 +103,52 @@ class Ones(Initializer):
        """
        dtype = standardize_dtype(dtype)
        return ops.ones(shape, dtype=dtype)
+
+
+@keras_core_export(
+    [
+        "keras_core.initializers.IdentityInitializer",
+        "keras_core.initializers.Identity",
+        "keras_core.initializers.identity",
+    ]
+)
+class Identity(Initializer):
+    """Initializer that generates the identity matrix.
+
+    Only usable for generating 2D matrices.
+
+    Examples:
+
+    >>> # Standalone usage:
+    >>> initializer = Identity()
+    >>> values = initializer(shape=(2, 2))
+
+    >>> # Usage in a Keras layer:
+    >>> initializer = Identity()
+    >>> layer = Dense(3, kernel_initializer=initializer)
+
+    Args:
+        gain: Multiplicative factor to apply to the identity matrix.
+    """
+
+    def __init__(self, gain=1.0):
+        self.gain = gain
+
+    def __call__(self, shape, dtype=None):
+        """Returns a tensor object initialized as specified by the initializer.
+
+        Args:
+            shape: Shape of the tensor.
+            dtype: Optional dtype of the tensor. Only numeric or boolean dtypes
+                are supported. If not specified, `keras_core.backend.floatx()`
+                is used, which default to `float32` unless you configured it
+                otherwise (via `keras_core.backend.set_floatx(float_dtype)`).
+            **kwargs: Additional keyword arguments.
+        """
+        if len(shape) != 2:
+            raise ValueError(
+                "Identity matrix initializer can only be used for 2D matrices. "
+                f"Received: shape={shape} of rank {len(shape)}."
+            )
+        dtype = standardize_dtype(dtype)
+        return self.gain * ops.eye(*shape, dtype=dtype)
--- a/keras_core/initializers/constant_initializers_test.py
+++ b/keras_core/initializers/constant_initializers_test.py
@ -12,8 +12,8 @@ class ConstantInitializersTest(testing.TestCase):
        initializer = initializers.Zeros()
        values = initializer(shape=shape)
        self.assertEqual(values.shape, shape)
-        np_values = backend.convert_to_numpy(values).data
-        self.assertEqual(np_values, np.zeros(shape=shape))
+        np_values = backend.convert_to_numpy(values)
+        self.assertAllClose(np_values, np.zeros(shape=shape))

        self.run_class_serialization_test(initializer)

@ -23,8 +23,8 @@ class ConstantInitializersTest(testing.TestCase):
        initializer = initializers.Ones()
        values = initializer(shape=shape)
        self.assertEqual(values.shape, shape)
-        np_values = backend.convert_to_numpy(values).data
-        self.assertEqual(np_values, np.ones(shape=shape))
+        np_values = backend.convert_to_numpy(values)
+        self.assertAllClose(np_values, np.ones(shape=shape))

        self.run_class_serialization_test(initializer)

@ -35,9 +35,21 @@ class ConstantInitializersTest(testing.TestCase):
        initializer = initializers.Constant(value=constant_value)
        values = initializer(shape=shape)
        self.assertEqual(values.shape, shape)
-        np_values = backend.convert_to_numpy(values).data
-        self.assertEqual(
+        np_values = backend.convert_to_numpy(values)
+        self.assertAllClose(
            np_values, np.full(shape=shape, fill_value=constant_value)
        )

        self.run_class_serialization_test(initializer)
+
+    def test_identity_initializer(self):
+        shape = (3, 3)
+        gain = 2
+
+        initializer = initializers.Identity(gain=gain)
+        values = initializer(shape=shape)
+        self.assertEqual(values.shape, shape)
+        np_values = backend.convert_to_numpy(values)
+        self.assertAllClose(np_values, np.eye(*shape) * gain)
+
+        self.run_class_serialization_test(initializer)
--- a/keras_core/initializers/random_initializers.py
+++ b/keras_core/initializers/random_initializers.py
@ -7,7 +7,12 @@ from keras_core.initializers.initializer import Initializer
 from keras_core.saving import serialization_lib


-@keras_core_export("keras_core.initializers.RandomNormal")
+@keras_core_export(
+    [
+        "keras_core.initializers.RandomNormal",
+        "keras_core.initializers.random_normal",
+    ]
+)
 class RandomNormal(Initializer):
    """Random normal initializer.

@ -59,7 +64,12 @@ class RandomNormal(Initializer):
        return {"mean": self.mean, "stddev": self.stddev, "seed": seed_config}


-@keras_core_export("keras_core.initializers.TruncatedNormal")
+@keras_core_export(
+    [
+        "keras_core.initializers.TruncatedNormal",
+        "keras_core.initializers.truncated_normal",
+    ]
+)
 class TruncatedNormal(Initializer):
    """Initializer that generates a truncated normal distribution.

@ -114,7 +124,12 @@ class TruncatedNormal(Initializer):
        return {"mean": self.mean, "stddev": self.stddev, "seed": seed_config}


-@keras_core_export("keras_core.initializers.RandomUniform")
+@keras_core_export(
+    [
+        "keras_core.initializers.RandomUniform",
+        "keras_core.initializers.random_uniform",
+    ]
+)
 class RandomUniform(Initializer):
    """Random uniform initializer.

@ -170,7 +185,12 @@ class RandomUniform(Initializer):
        }


-@keras_core_export("keras_core.initializers.VarianceScaling")
+@keras_core_export(
+    [
+        "keras_core.initializers.VarianceScaling",
+        "keras_core.initializers.variance_scaling",
+    ]
+)
 class VarianceScaling(Initializer):
    """Initializer that adapts its scale to the shape of its input tensors.

@ -285,7 +305,12 @@ class VarianceScaling(Initializer):
        }


-@keras_core_export("keras_core.initializers.GlorotUniform")
+@keras_core_export(
+    [
+        "keras_core.initializers.GlorotUniform",
+        "keras_core.initializers.glorot_uniform",
+    ]
+)
 class GlorotUniform(VarianceScaling):
    """The Glorot uniform initializer, also called Xavier uniform initializer.

@ -329,7 +354,12 @@ class GlorotUniform(VarianceScaling):
        }


-@keras_core_export("keras_core.initializers.GlorotNormal")
+@keras_core_export(
+    [
+        "keras_core.initializers.GlorotNormal",
+        "keras_core.initializers.glorot_normal",
+    ]
+)
 class GlorotNormal(VarianceScaling):
    """The Glorot normal initializer, also called Xavier normal initializer.

@ -377,7 +407,12 @@ class GlorotNormal(VarianceScaling):
        }


-@keras_core_export("keras_core.initializers.LecunNormal")
+@keras_core_export(
+    [
+        "keras_core.initializers.LecunNormal",
+        "keras_core.initializers.lecun_normal",
+    ]
+)
 class LecunNormal(VarianceScaling):
    """Lecun normal initializer.

@ -425,7 +460,12 @@ class LecunNormal(VarianceScaling):
        }


-@keras_core_export("keras_core.initializers.LecunUniform")
+@keras_core_export(
+    [
+        "keras_core.initializers.LecunUniform",
+        "keras_core.initializers.lecun_uniform",
+    ]
+)
 class LecunUniform(VarianceScaling):
    """Lecun uniform initializer.

@ -469,7 +509,9 @@ class LecunUniform(VarianceScaling):
        }


-@keras_core_export("keras_core.initializers.HeNormal")
+@keras_core_export(
+    ["keras_core.initializers.HeNormal", "keras_core.initializers.he_normal"]
+)
 class HeNormal(VarianceScaling):
    """He normal initializer.

@ -513,7 +555,9 @@ class HeNormal(VarianceScaling):
        }


-@keras_core_export("keras_core.initializers.HeUniform")
+@keras_core_export(
+    ["keras_core.initializers.HeUniform", "keras_core.initializers.he_uniform"]
+)
 class HeUniform(VarianceScaling):
    """He uniform variance scaling initializer.

@ -589,6 +633,7 @@ def compute_fans(shape):
    [
        "keras_core.initializers.OrthogonalInitializer",
        "keras_core.initializers.Orthogonal",
+        "keras_core.initializers.orthogonal",
    ]
 )
 class OrthogonalInitializer(Initializer):
--- a/keras_core/legacy/init.py
+++ b/keras_core/legacy/init.py
--- a/keras_core/legacy/backend.py
+++ b/keras_core/legacy/backend.py
--- a/keras_core/legacy/layers.py
+++ b/keras_core/legacy/layers.py
@ -0,0 +1,245 @@
+"""Legacy Keras 1/2 layers.
+
+AlphaDropout
+RandomHeight
+RandomWidth
+ThresholdedReLU
+"""
+
+from keras_core import backend
+from keras_core.api_export import keras_core_export
+from keras_core.layers.layer import Layer
+from keras_core.utils.module_utils import tensorflow as tf
+
+
+@keras_core_export("keras_core._legacy.layers.AlphaDropout")
+class AlphaDropout(Layer):
+    """DEPRECATED."""
+
+    def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
+        super().__init__(**kwargs)
+        self.rate = rate
+        self.seed = seed
+        self.noise_shape = noise_shape
+        self.seed_generator = backend.random.SeedGenerator(seed)
+        self.supports_masking = True
+        self.built = True
+
+    def call(self, inputs, training=False):
+        if training and self.rate > 0:
+            alpha = 1.6732632423543772848170429916717
+            scale = 1.0507009873554804934193349852946
+            alpha_p = -alpha * scale
+
+            if self.noise_shape is None:
+                noise_shape = tf.shape(inputs)
+            else:
+                noise_shape = self.noise_shape
+            kept_idx = tf.greater_equal(
+                backend.random.uniform(noise_shape),
+                self.rate,
+                seed=self.seed_generator,
+            )
+            kept_idx = tf.cast(kept_idx, inputs.dtype)
+
+            # Get affine transformation params
+            a = ((1 - self.rate) * (1 + self.rate * alpha_p**2)) ** -0.5
+            b = -a * alpha_p * self.rate
+
+            # Apply mask
+            x = inputs * kept_idx + alpha_p * (1 - kept_idx)
+
+            # Do affine transformation
+            return a * x + b
+        return inputs
+
+    def get_config(self):
+        config = {"rate": self.rate, "seed": self.seed}
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
+
+
+@keras_core_export("keras_core._legacy.layers.RandomHeight")
+class RandomHeight(Layer):
+    """DEPRECATED."""
+
+    def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs):
+        super().__init__(**kwargs)
+        self.seed_generator = backend.random.SeedGenerator(seed)
+        self.factor = factor
+        if isinstance(factor, (tuple, list)):
+            self.height_lower = factor[0]
+            self.height_upper = factor[1]
+        else:
+            self.height_lower = -factor
+            self.height_upper = factor
+
+        if self.height_upper < self.height_lower:
+            raise ValueError(
+                "`factor` argument cannot have an upper bound lesser than the "
+                f"lower bound. Received: factor={factor}"
+            )
+        if self.height_lower < -1.0 or self.height_upper < -1.0:
+            raise ValueError(
+                "`factor` argument must have values larger than -1. "
+                f"Received: factor={factor}"
+            )
+        self.interpolation = interpolation
+        self.seed = seed
+
+    def call(self, inputs, training=True):
+        inputs = tf.convert_to_tensor(inputs, dtype=self.compute_dtype)
+
+        def random_height_inputs(inputs):
+            """Inputs height-adjusted with random ops."""
+            inputs_shape = tf.shape(inputs)
+            img_hd = tf.cast(inputs_shape[-3], tf.float32)
+            img_wd = inputs_shape[-2]
+            height_factor = backend.random.uniform(
+                shape=[],
+                minval=(1.0 + self.height_lower),
+                maxval=(1.0 + self.height_upper),
+                seed=self.seed_generator,
+            )
+            adjusted_height = tf.cast(height_factor * img_hd, tf.int32)
+            adjusted_size = tf.stack([adjusted_height, img_wd])
+            output = tf.image.resize(
+                images=inputs,
+                size=adjusted_size,
+                method=self.interpolation,
+            )
+            # tf.resize will output float32 regardless of input type.
+            output = tf.cast(output, self.compute_dtype)
+            output_shape = inputs.shape.as_list()
+            output_shape[-3] = None
+            output.set_shape(output_shape)
+            return output
+
+        if training:
+            return random_height_inputs(inputs)
+        else:
+            return inputs
+
+    def compute_output_shape(self, input_shape):
+        input_shape = list(input_shape)
+        input_shape[-3] = None
+        return tuple(input_shape)
+
+    def get_config(self):
+        config = {
+            "factor": self.factor,
+            "interpolation": self.interpolation,
+            "seed": self.seed,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+
+@keras_core_export("keras_core._legacy.layers.RandomWidth")
+class RandomWidth(Layer):
+    """DEPRECATED."""
+
+    def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs):
+        super().__init__(**kwargs)
+        self.seed_generator = backend.random.SeedGenerator(seed)
+        self.factor = factor
+        if isinstance(factor, (tuple, list)):
+            self.width_lower = factor[0]
+            self.width_upper = factor[1]
+        else:
+            self.width_lower = -factor
+            self.width_upper = factor
+        if self.width_upper < self.width_lower:
+            raise ValueError(
+                "`factor` argument cannot have an upper bound less than the "
+                f"lower bound. Received: factor={factor}"
+            )
+        if self.width_lower < -1.0 or self.width_upper < -1.0:
+            raise ValueError(
+                "`factor` argument must have values larger than -1. "
+                f"Received: factor={factor}"
+            )
+        self.interpolation = interpolation
+        self.seed = seed
+
+    def call(self, inputs, training=True):
+        inputs = tf.convert_to_tensor(inputs, dtype=self.compute_dtype)
+
+        def random_width_inputs(inputs):
+            """Inputs width-adjusted with random ops."""
+            inputs_shape = tf.shape(inputs)
+            img_hd = inputs_shape[-3]
+            img_wd = tf.cast(inputs_shape[-2], tf.float32)
+            width_factor = backend.random.uniform(
+                shape=[],
+                minval=(1.0 + self.width_lower),
+                maxval=(1.0 + self.width_upper),
+                seed=self.seed_generator,
+            )
+            adjusted_width = tf.cast(width_factor * img_wd, tf.int32)
+            adjusted_size = tf.stack([img_hd, adjusted_width])
+            output = tf.image.resize(
+                images=inputs,
+                size=adjusted_size,
+                method=self.interpolation,
+            )
+            # tf.resize will output float32 regardless of input type.
+            output = tf.cast(output, self.compute_dtype)
+            output_shape = inputs.shape.as_list()
+            output_shape[-2] = None
+            output.set_shape(output_shape)
+            return output
+
+        if training:
+            return random_width_inputs(inputs)
+        else:
+            return inputs
+
+    def compute_output_shape(self, input_shape):
+        input_shape = list(input_shape)
+        input_shape[-2] = None
+        return tuple(input_shape)
+
+    def get_config(self):
+        config = {
+            "factor": self.factor,
+            "interpolation": self.interpolation,
+            "seed": self.seed,
+        }
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+
+@keras_core_export("keras_core._legacy.layers.ThresholdedReLU")
+class ThresholdedReLU(Layer):
+    """DEPRECATED."""
+
+    def __init__(self, theta=1.0, **kwargs):
+        super().__init__(**kwargs)
+        if theta is None:
+            raise ValueError(
+                "Theta of a Thresholded ReLU layer cannot be None, expecting a "
+                f"float. Received: {theta}"
+            )
+        if theta < 0:
+            raise ValueError(
+                "The theta value of a Thresholded ReLU layer "
+                f"should be >=0. Received: {theta}"
+            )
+        self.supports_masking = True
+        self.theta = tf.convert_to_tensor(theta, dtype=self.compute_dtype)
+
+    def call(self, inputs):
+        dtype = self.compute_dtype
+        return inputs * tf.cast(tf.greater(inputs, self.theta), dtype)
+
+    def get_config(self):
+        config = {"theta": float(self.theta)}
+        base_config = super().get_config()
+        return {**base_config, **config}
+
+    def compute_output_shape(self, input_shape):
+        return input_shape
--- a/keras_core/legacy/losses.py
+++ b/keras_core/legacy/losses.py
@ -0,0 +1,20 @@
+from keras_core.api_export import keras_core_export
+
+
+@keras_core_export("keras_core._legacy.losses.Reduction")
+class Reduction:
+    AUTO = "auto"
+    NONE = "none"
+    SUM = "sum"
+    SUM_OVER_BATCH_SIZE = "sum_over_batch_size"
+
+    @classmethod
+    def all(cls):
+        return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE)
+
+    @classmethod
+    def validate(cls, key):
+        if key not in cls.all():
+            raise ValueError(
+                f'Invalid Reduction Key: {key}. Expected keys are "{cls.all()}"'
+            )
--- a/keras_core/legacy/preprocessing/init.py
+++ b/keras_core/legacy/preprocessing/init.py
--- a/keras_core/legacy/preprocessing/image.py
+++ b/keras_core/legacy/preprocessing/image.py
--- a/keras_core/legacy/preprocessing/sequence.py
+++ b/keras_core/legacy/preprocessing/sequence.py
@ -0,0 +1,324 @@
+"""Deprecated sequence preprocessing APIs from Keras 1."""
+
+import json
+import random
+
+import numpy as np
+
+from keras_core.api_export import keras_core_export
+from keras_core.trainers.data_adapters.py_dataset_adapter import PyDataset
+
+
+@keras_core_export(
+    "keras_core._legacy.preprocessing.sequence.TimeseriesGenerator"
+)
+class TimeseriesGenerator(PyDataset):
+    """Utility class for generating batches of temporal data.
+
+    DEPRECATED.
+
+    This class takes in a sequence of data-points gathered at
+    equal intervals, along with time series parameters such as
+    stride, length of history, etc., to produce batches for
+    training/validation.
+
+    Arguments:
+        data: Indexable generator (such as list or Numpy array)
+            containing consecutive data points (timesteps).
+            The data should be at 2D, and axis 0 is expected
+            to be the time dimension.
+        targets: Targets corresponding to timesteps in `data`.
+            It should have same length as `data`.
+        length: Length of the output sequences (in number of timesteps).
+        sampling_rate: Period between successive individual timesteps
+            within sequences. For rate `r`, timesteps
+            `data[i]`, `data[i-r]`, ... `data[i - length]`
+            are used for create a sample sequence.
+        stride: Period between successive output sequences.
+            For stride `s`, consecutive output samples would
+            be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
+        start_index: Data points earlier than `start_index` will not be used
+            in the output sequences. This is useful to reserve part of the
+            data for test or validation.
+        end_index: Data points later than `end_index` will not be used
+            in the output sequences. This is useful to reserve part of the
+            data for test or validation.
+        shuffle: Whether to shuffle output samples,
+            or instead draw them in chronological order.
+        reverse: Boolean: if `true`, timesteps in each output sample will be
+            in reverse chronological order.
+        batch_size: Number of timeseries samples in each batch
+            (except maybe the last one).
+
+    Returns:
+        A PyDataset instance.
+    """
+
+    def __init__(
+        self,
+        data,
+        targets,
+        length,
+        sampling_rate=1,
+        stride=1,
+        start_index=0,
+        end_index=None,
+        shuffle=False,
+        reverse=False,
+        batch_size=128,
+    ):
+        if len(data) != len(targets):
+            raise ValueError(
+                "Data and targets have to be "
+                f"of same length. Data length is {len(data)} "
+                f"while target length is {len(targets)}"
+            )
+
+        self.data = data
+        self.targets = targets
+        self.length = length
+        self.sampling_rate = sampling_rate
+        self.stride = stride
+        self.start_index = start_index + length
+        if end_index is None:
+            end_index = len(data) - 1
+        self.end_index = end_index
+        self.shuffle = shuffle
+        self.reverse = reverse
+        self.batch_size = batch_size
+
+        if self.start_index > self.end_index:
+            raise ValueError(
+                f"`start_index+length={self.start_index} "
+                f"> end_index={self.end_index}` "
+                "is disallowed, as no part of the sequence "
+                "would be left to be used as current step."
+            )
+
+    def __len__(self):
+        return (
+            self.end_index - self.start_index + self.batch_size * self.stride
+        ) // (self.batch_size * self.stride)
+
+    def __getitem__(self, index):
+        if self.shuffle:
+            rows = np.random.randint(
+                self.start_index, self.end_index + 1, size=self.batch_size
+            )
+        else:
+            i = self.start_index + self.batch_size * self.stride * index
+            rows = np.arange(
+                i,
+                min(i + self.batch_size * self.stride, self.end_index + 1),
+                self.stride,
+            )
+
+        samples = np.array(
+            [
+                self.data[row - self.length : row : self.sampling_rate]
+                for row in rows
+            ]
+        )
+        targets = np.array([self.targets[row] for row in rows])
+
+        if self.reverse:
+            return samples[:, ::-1, ...], targets
+        return samples, targets
+
+    def get_config(self):
+        """Returns the TimeseriesGenerator configuration as Python dictionary.
+
+        Returns:
+            A Python dictionary with the TimeseriesGenerator configuration.
+        """
+        data = self.data
+        if type(self.data).__module__ == np.__name__:
+            data = self.data.tolist()
+        try:
+            json_data = json.dumps(data)
+        except TypeError as e:
+            raise TypeError(f"Data not JSON Serializable: {data}") from e
+
+        targets = self.targets
+        if type(self.targets).__module__ == np.__name__:
+            targets = self.targets.tolist()
+        try:
+            json_targets = json.dumps(targets)
+        except TypeError as e:
+            raise TypeError(f"Targets not JSON Serializable: {targets}") from e
+
+        return {
+            "data": json_data,
+            "targets": json_targets,
+            "length": self.length,
+            "sampling_rate": self.sampling_rate,
+            "stride": self.stride,
+            "start_index": self.start_index,
+            "end_index": self.end_index,
+            "shuffle": self.shuffle,
+            "reverse": self.reverse,
+            "batch_size": self.batch_size,
+        }
+
+    def to_json(self, **kwargs):
+        """Returns a JSON string containing the generator's configuration.
+
+        Args:
+            **kwargs: Additional keyword arguments to be passed
+                to `json.dumps()`.
+
+        Returns:
+            A JSON string containing the tokenizer configuration.
+        """
+        config = self.get_config()
+        timeseries_generator_config = {
+            "class_name": self.__class__.__name__,
+            "config": config,
+        }
+        return json.dumps(timeseries_generator_config, **kwargs)
+
+
+@keras_core_export(
+    "keras_core._legacy.preprocessing.sequence.make_sampling_table"
+)
+def make_sampling_table(size, sampling_factor=1e-5):
+    """Generates a word rank-based probabilistic sampling table.
+
+    DEPRECATED.
+
+    Used for generating the `sampling_table` argument for `skipgrams`.
+    `sampling_table[i]` is the probability of sampling
+    the word i-th most common word in a dataset
+    (more common words should be sampled less frequently, for balance).
+
+    The sampling probabilities are generated according
+    to the sampling distribution used in word2vec:
+
+    ```
+    p(word) = (min(1, sqrt(word_frequency / sampling_factor) /
+        (word_frequency / sampling_factor)))
+    ```
+
+    We assume that the word frequencies follow Zipf's law (s=1) to derive
+    a numerical approximation of frequency(rank):
+
+    `frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))`
+    where `gamma` is the Euler-Mascheroni constant.
+
+    Args:
+        size: Int, number of possible words to sample.
+        sampling_factor: The sampling factor in the word2vec formula.
+
+    Returns:
+        A 1D Numpy array of length `size` where the ith entry
+        is the probability that a word of rank i should be sampled.
+    """
+    gamma = 0.577
+    rank = np.arange(size)
+    rank[0] = 1
+    inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1.0 / (12.0 * rank)
+    f = sampling_factor * inv_fq
+
+    return np.minimum(1.0, f / np.sqrt(f))
+
+
+@keras_core_export("keras_core._legacy.preprocessing.sequence.skipgrams")
+def skipgrams(
+    sequence,
+    vocabulary_size,
+    window_size=4,
+    negative_samples=1.0,
+    shuffle=True,
+    categorical=False,
+    sampling_table=None,
+    seed=None,
+):
+    """Generates skipgram word pairs.
+
+    DEPRECATED.
+
+    This function transforms a sequence of word indexes (list of integers)
+    into tuples of words of the form:
+
+    - (word, word in the same window), with label 1 (positive samples).
+    - (word, random word from the vocabulary), with label 0 (negative samples).
+
+    Read more about Skipgram in this gnomic paper by Mikolov et al.:
+    [Efficient Estimation of Word Representations in
+    Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf)
+
+    Args:
+        sequence: A word sequence (sentence), encoded as a list
+            of word indices (integers). If using a `sampling_table`,
+            word indices are expected to match the rank
+            of the words in a reference dataset (e.g. 10 would encode
+            the 10-th most frequently occurring token).
+            Note that index 0 is expected to be a non-word and will be skipped.
+        vocabulary_size: Int, maximum possible word index + 1
+        window_size: Int, size of sampling windows (technically half-window).
+            The window of a word `w_i` will be
+            `[i - window_size, i + window_size+1]`.
+        negative_samples: Float >= 0. 0 for no negative (i.e. random) samples.
+            1 for same number as positive samples.
+        shuffle: Whether to shuffle the word couples before returning them.
+        categorical: bool. if False, labels will be
+            integers (eg. `[0, 1, 1 .. ]`),
+            if `True`, labels will be categorical, e.g.
+            `[[1,0],[0,1],[0,1] .. ]`.
+        sampling_table: 1D array of size `vocabulary_size` where the entry i
+            encodes the probability to sample a word of rank i.
+        seed: Random seed.
+
+    Returns:
+        couples, labels: where `couples` are int pairs and
+            `labels` are either 0 or 1.
+
+    Note:
+        By convention, index 0 in the vocabulary is
+        a non-word and will be skipped.
+    """
+    couples = []
+    labels = []
+    for i, wi in enumerate(sequence):
+        if not wi:
+            continue
+        if sampling_table is not None:
+            if sampling_table[wi] < random.random():
+                continue
+
+        window_start = max(0, i - window_size)
+        window_end = min(len(sequence), i + window_size + 1)
+        for j in range(window_start, window_end):
+            if j != i:
+                wj = sequence[j]
+                if not wj:
+                    continue
+                couples.append([wi, wj])
+                if categorical:
+                    labels.append([0, 1])
+                else:
+                    labels.append(1)
+
+    if negative_samples > 0:
+        num_negative_samples = int(len(labels) * negative_samples)
+        words = [c[0] for c in couples]
+        random.shuffle(words)
+
+        couples += [
+            [words[i % len(words)], random.randint(1, vocabulary_size - 1)]
+            for i in range(num_negative_samples)
+        ]
+        if categorical:
+            labels += [[1, 0]] * num_negative_samples
+        else:
+            labels += [0] * num_negative_samples
+
+    if shuffle:
+        if seed is None:
+            seed = random.randint(0, 10e6)
+        random.seed(seed)
+        random.shuffle(couples)
+        random.seed(seed)
+        random.shuffle(labels)
+
+    return couples, labels
--- a/keras_core/legacy/preprocessing/text.py
+++ b/keras_core/legacy/preprocessing/text.py
@ -0,0 +1,338 @@
+"""Deprecated text preprocessing APIs from Keras 1."""
+
+import collections
+import hashlib
+import json
+import warnings
+
+import numpy as np
+
+from keras_core.api_export import keras_core_export
+
+
+@keras_core_export(
+    "keras_core._legacy.preprocessing.text.text_to_word_sequence"
+)
+def text_to_word_sequence(
+    input_text,
+    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
+    lower=True,
+    split=" ",
+):
+    """DEPRECATED."""
+    if lower:
+        input_text = input_text.lower()
+
+    translate_dict = {c: split for c in filters}
+    translate_map = str.maketrans(translate_dict)
+    input_text = input_text.translate(translate_map)
+
+    seq = input_text.split(split)
+    return [i for i in seq if i]
+
+
+@keras_core_export("keras_core._legacy.preprocessing.text.one_hot")
+def one_hot(
+    input_text,
+    n,
+    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
+    lower=True,
+    split=" ",
+    analyzer=None,
+):
+    """DEPRECATED."""
+    return hashing_trick(
+        input_text,
+        n,
+        hash_function=hash,
+        filters=filters,
+        lower=lower,
+        split=split,
+        analyzer=analyzer,
+    )
+
+
+@keras_core_export("keras_core._legacy.preprocessing.text.hashing_trick")
+def hashing_trick(
+    text,
+    n,
+    hash_function=None,
+    filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
+    lower=True,
+    split=" ",
+    analyzer=None,
+):
+    """DEPRECATED."""
+    if hash_function is None:
+        hash_function = hash
+    elif hash_function == "md5":
+
+        def hash_function(w):
+            return int(hashlib.md5(w.encode()).hexdigest(), 16)
+
+    if analyzer is None:
+        seq = text_to_word_sequence(
+            text, filters=filters, lower=lower, split=split
+        )
+    else:
+        seq = analyzer(text)
+
+    return [(hash_function(w) % (n - 1) + 1) for w in seq]
+
+
+@keras_core_export("keras_core._legacy.preprocessing.text.Tokenizer")
+class Tokenizer(object):
+    """DEPRECATED."""
+
+    def __init__(
+        self,
+        num_words=None,
+        filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
+        lower=True,
+        split=" ",
+        char_level=False,
+        oov_token=None,
+        analyzer=None,
+        **kwargs
+    ):
+        # Legacy support
+        if "nb_words" in kwargs:
+            warnings.warn(
+                "The `nb_words` argument in `Tokenizer` "
+                "has been renamed `num_words`."
+            )
+            num_words = kwargs.pop("nb_words")
+        document_count = kwargs.pop("document_count", 0)
+        if kwargs:
+            raise TypeError("Unrecognized keyword arguments: " + str(kwargs))
+
+        self.word_counts = collections.OrderedDict()
+        self.word_docs = collections.defaultdict(int)
+        self.filters = filters
+        self.split = split
+        self.lower = lower
+        self.num_words = num_words
+        self.document_count = document_count
+        self.char_level = char_level
+        self.oov_token = oov_token
+        self.index_docs = collections.defaultdict(int)
+        self.word_index = {}
+        self.index_word = {}
+        self.analyzer = analyzer
+
+    def fit_on_texts(self, texts):
+        for text in texts:
+            self.document_count += 1
+            if self.char_level or isinstance(text, list):
+                if self.lower:
+                    if isinstance(text, list):
+                        text = [text_elem.lower() for text_elem in text]
+                    else:
+                        text = text.lower()
+                seq = text
+            else:
+                if self.analyzer is None:
+                    seq = text_to_word_sequence(
+                        text,
+                        filters=self.filters,
+                        lower=self.lower,
+                        split=self.split,
+                    )
+                else:
+                    seq = self.analyzer(text)
+            for w in seq:
+                if w in self.word_counts:
+                    self.word_counts[w] += 1
+                else:
+                    self.word_counts[w] = 1
+            for w in set(seq):
+                # In how many documents each word occurs
+                self.word_docs[w] += 1
+
+        wcounts = list(self.word_counts.items())
+        wcounts.sort(key=lambda x: x[1], reverse=True)
+        # forcing the oov_token to index 1 if it exists
+        if self.oov_token is None:
+            sorted_voc = []
+        else:
+            sorted_voc = [self.oov_token]
+        sorted_voc.extend(wc[0] for wc in wcounts)
+
+        # note that index 0 is reserved, never assigned to an existing word
+        self.word_index = dict(
+            zip(sorted_voc, list(range(1, len(sorted_voc) + 1)))
+        )
+
+        self.index_word = {c: w for w, c in self.word_index.items()}
+
+        for w, c in list(self.word_docs.items()):
+            self.index_docs[self.word_index[w]] = c
+
+    def fit_on_sequences(self, sequences):
+        self.document_count += len(sequences)
+        for seq in sequences:
+            seq = set(seq)
+            for i in seq:
+                self.index_docs[i] += 1
+
+    def texts_to_sequences(self, texts):
+        return list(self.texts_to_sequences_generator(texts))
+
+    def texts_to_sequences_generator(self, texts):
+        num_words = self.num_words
+        oov_token_index = self.word_index.get(self.oov_token)
+        for text in texts:
+            if self.char_level or isinstance(text, list):
+                if self.lower:
+                    if isinstance(text, list):
+                        text = [text_elem.lower() for text_elem in text]
+                    else:
+                        text = text.lower()
+                seq = text
+            else:
+                if self.analyzer is None:
+                    seq = text_to_word_sequence(
+                        text,
+                        filters=self.filters,
+                        lower=self.lower,
+                        split=self.split,
+                    )
+                else:
+                    seq = self.analyzer(text)
+            vect = []
+            for w in seq:
+                i = self.word_index.get(w)
+                if i is not None:
+                    if num_words and i >= num_words:
+                        if oov_token_index is not None:
+                            vect.append(oov_token_index)
+                    else:
+                        vect.append(i)
+                elif self.oov_token is not None:
+                    vect.append(oov_token_index)
+            yield vect
+
+    def sequences_to_texts(self, sequences):
+        return list(self.sequences_to_texts_generator(sequences))
+
+    def sequences_to_texts_generator(self, sequences):
+        num_words = self.num_words
+        oov_token_index = self.word_index.get(self.oov_token)
+        for seq in sequences:
+            vect = []
+            for num in seq:
+                word = self.index_word.get(num)
+                if word is not None:
+                    if num_words and num >= num_words:
+                        if oov_token_index is not None:
+                            vect.append(self.index_word[oov_token_index])
+                    else:
+                        vect.append(word)
+                elif self.oov_token is not None:
+                    vect.append(self.index_word[oov_token_index])
+            vect = " ".join(vect)
+            yield vect
+
+    def texts_to_matrix(self, texts, mode="binary"):
+        sequences = self.texts_to_sequences(texts)
+        return self.sequences_to_matrix(sequences, mode=mode)
+
+    def sequences_to_matrix(self, sequences, mode="binary"):
+        if not self.num_words:
+            if self.word_index:
+                num_words = len(self.word_index) + 1
+            else:
+                raise ValueError(
+                    "Specify a dimension (`num_words` argument), "
+                    "or fit on some text data first."
+                )
+        else:
+            num_words = self.num_words
+
+        if mode == "tfidf" and not self.document_count:
+            raise ValueError(
+                "Fit the Tokenizer on some data before using tfidf mode."
+            )
+
+        x = np.zeros((len(sequences), num_words))
+        for i, seq in enumerate(sequences):
+            if not seq:
+                continue
+            counts = collections.defaultdict(int)
+            for j in seq:
+                if j >= num_words:
+                    continue
+                counts[j] += 1
+            for j, c in list(counts.items()):
+                if mode == "count":
+                    x[i][j] = c
+                elif mode == "freq":
+                    x[i][j] = c / len(seq)
+                elif mode == "binary":
+                    x[i][j] = 1
+                elif mode == "tfidf":
+                    # Use weighting scheme 2 in
+                    # https://en.wikipedia.org/wiki/Tf%E2%80%93idf
+                    tf = 1 + np.log(c)
+                    idf = np.log(
+                        1
+                        + self.document_count / (1 + self.index_docs.get(j, 0))
+                    )
+                    x[i][j] = tf * idf
+                else:
+                    raise ValueError("Unknown vectorization mode:", mode)
+        return x
+
+    def get_config(self):
+        json_word_counts = json.dumps(self.word_counts)
+        json_word_docs = json.dumps(self.word_docs)
+        json_index_docs = json.dumps(self.index_docs)
+        json_word_index = json.dumps(self.word_index)
+        json_index_word = json.dumps(self.index_word)
+
+        return {
+            "num_words": self.num_words,
+            "filters": self.filters,
+            "lower": self.lower,
+            "split": self.split,
+            "char_level": self.char_level,
+            "oov_token": self.oov_token,
+            "document_count": self.document_count,
+            "word_counts": json_word_counts,
+            "word_docs": json_word_docs,
+            "index_docs": json_index_docs,
+            "index_word": json_index_word,
+            "word_index": json_word_index,
+        }
+
+    def to_json(self, **kwargs):
+        config = self.get_config()
+        tokenizer_config = {
+            "class_name": self.__class__.__name__,
+            "config": config,
+        }
+        return json.dumps(tokenizer_config, **kwargs)
+
+
+@keras_core_export("keras_core._legacy.preprocessing.text.tokenizer_from_json")
+def tokenizer_from_json(json_string):
+    """DEPRECATED."""
+    tokenizer_config = json.loads(json_string)
+    config = tokenizer_config.get("config")
+
+    word_counts = json.loads(config.pop("word_counts"))
+    word_docs = json.loads(config.pop("word_docs"))
+    index_docs = json.loads(config.pop("index_docs"))
+    # Integer indexing gets converted to strings with json.dumps()
+    index_docs = {int(k): v for k, v in index_docs.items()}
+    index_word = json.loads(config.pop("index_word"))
+    index_word = {int(k): v for k, v in index_word.items()}
+    word_index = json.loads(config.pop("word_index"))
+
+    tokenizer = Tokenizer(**config)
+    tokenizer.word_counts = word_counts
+    tokenizer.word_docs = word_docs
+    tokenizer.index_docs = index_docs
+    tokenizer.word_index = word_index
+    tokenizer.index_word = index_word
+    return tokenizer
--- a/keras_core/losses/losses.py
+++ b/keras_core/losses/losses.py
@ -1106,6 +1106,11 @@ def categorical_hinge(y_true, y_pred):
    [
        "keras_core.metrics.mean_squared_error",
        "keras_core.losses.mean_squared_error",
+        # Legacy aliases
+        "keras_core._legacy.losses.mse",
+        "keras_core._legacy.losses.MSE",
+        "keras_core._legacy.metrics.mse",
+        "keras_core._legacy.metrics.MSE",
    ]
 )
 def mean_squared_error(y_true, y_pred):
@ -1140,6 +1145,11 @@ def mean_squared_error(y_true, y_pred):
    [
        "keras_core.metrics.mean_absolute_error",
        "keras_core.losses.mean_absolute_error",
+        # Legacy aliases
+        "keras_core._legacy.losses.MAE",
+        "keras_core._legacy.losses.mae",
+        "keras_core._legacy.metrics.MAE",
+        "keras_core._legacy.metrics.mae",
    ]
 )
 def mean_absolute_error(y_true, y_pred):
@ -1172,6 +1182,11 @@ def mean_absolute_error(y_true, y_pred):
    [
        "keras_core.metrics.mean_absolute_percentage_error",
        "keras_core.losses.mean_absolute_percentage_error",
+        # Legacy aliases
+        "keras_core._legacy.losses.mape",
+        "keras_core._legacy.losses.MAPE",
+        "keras_core._legacy.metrics.mape",
+        "keras_core._legacy.metrics.MAPE",
    ]
 )
 def mean_absolute_percentage_error(y_true, y_pred):
@ -1213,6 +1228,11 @@ def mean_absolute_percentage_error(y_true, y_pred):
    [
        "keras_core.metrics.mean_squared_logarithmic_error",
        "keras_core.losses.mean_squared_logarithmic_error",
+        # Legacy aliases
+        "keras_core._legacy.losses.msle",
+        "keras_core._legacy.losses.MSLE",
+        "keras_core._legacy.metrics.msle",
+        "keras_core._legacy.metrics.MSLE",
    ]
 )
 def mean_squared_logarithmic_error(y_true, y_pred):
@ -1342,7 +1362,13 @@ def huber(y_true, y_pred, delta=1.0):


@keras_core_export(
-    ["keras_core.losses.log_cosh", "keras_core.metrics.log_cosh"]
+    [
+        "keras_core.losses.log_cosh",
+        "keras_core.metrics.log_cosh",
+        # Legacy aliases
+        "keras_core._legacy.losses.logcosh",
+        "keras_core._legacy.metrics.logcosh",
+    ]
 )
 def log_cosh(y_true, y_pred):
    """Logarithm of the hyperbolic cosine of the prediction error.
@ -1386,6 +1412,13 @@ def log_cosh(y_true, y_pred):
    [
        "keras_core.metrics.kl_divergence",
        "keras_core.losses.kl_divergence",
+        # Legacy aliases
+        "keras_core._legacy.losses.KLD",
+        "keras_core._legacy.losses.kld",
+        "keras_core._legacy.losses.kullback_leibler_divergence",
+        "keras_core._legacy.metrics.KLD",
+        "keras_core._legacy.metrics.kld",
+        "keras_core._legacy.metrics.kullback_leibler_divergence",
    ]
 )
 def kl_divergence(y_true, y_pred):
--- a/keras_core/regularizers/regularizers.py
+++ b/keras_core/regularizers/regularizers.py
@ -167,7 +167,9 @@ class Regularizer:
        raise NotImplementedError(f"{self} does not implement get_config()")


-@keras_core_export("keras_core.regularizers.L1L2")
+@keras_core_export(
+    ["keras_core.regularizers.L1L2", "keras_core.regularizers.l1_l2"]
+)
 class L1L2(Regularizer):
    """A regularizer that applies both L1 and L2 regularization penalties.

--- a/keras_core/version.py
+++ b/keras_core/version.py
@ -1,2 +1,2 @@
 # Unique source of truth for the version number.
-__version__ = "0.1.2"
+__version__ = "0.1.3"
--- a/pip_build.py
+++ b/pip_build.py
@ -59,6 +59,84 @@ def build():
        # Generate API __init__.py files in `keras_core/`
        namex.generate_api_files(package, code_directory="src", verbose=True)

+        # Make keras_core/_tf_keras/ by copying keras_core/
+        tf_keras_dirpath = os.path.join(package, "_tf_keras")
+        os.makedirs(tf_keras_dirpath)
+        with open(os.path.join(package, "__init__.py")) as f:
+            init_file = f.read()
+            init_file = init_file.replace(
+                "from keras_core import _legacy",
+                "from keras_core import _tf_keras",
+            )
+        with open(os.path.join(package, "__init__.py"), "w") as f:
+            f.write(init_file)
+        with open(os.path.join(tf_keras_dirpath, "__init__.py"), "w") as f:
+            f.write(init_file)
+        for dirname in os.listdir(package):
+            dirpath = os.path.join(package, dirname)
+            if os.path.isdir(dirpath) and dirname not in (
+                "_legacy",
+                "_tf_keras",
+                "src",
+            ):
+                shutil.copytree(
+                    dirpath,
+                    os.path.join(tf_keras_dirpath, dirname),
+                    ignore=ignore_files,
+                )
+
+        # Copy keras_core/_legacy/ file contents to keras_core/_tf_keras/
+        legacy_submodules = [
+            path[:-3]
+            for path in os.listdir(os.path.join(package, "src", "legacy"))
+            if path.endswith(".py")
+        ]
+        legacy_submodules += [
+            path
+            for path in os.listdir(os.path.join(package, "src", "legacy"))
+            if os.path.isdir(os.path.join(package, "src", "legacy", path))
+        ]
+
+        for root, _, fnames in os.walk(os.path.join(package, "_legacy")):
+            for fname in fnames:
+                if fname.endswith(".py"):
+                    legacy_fpath = os.path.join(root, fname)
+                    tf_keras_root = root.replace("/_legacy", "/_tf_keras")
+                    core_api_fpath = os.path.join(
+                        root.replace("/_legacy", ""), fname
+                    )
+                    if not os.path.exists(tf_keras_root):
+                        os.makedirs(tf_keras_root)
+                    tf_keras_fpath = os.path.join(tf_keras_root, fname)
+                    with open(legacy_fpath) as f:
+                        legacy_contents = f.read()
+                        legacy_contents = legacy_contents.replace(
+                            "keras_core._legacy", "keras_core._tf_keras"
+                        )
+                    if os.path.exists(core_api_fpath):
+                        with open(core_api_fpath) as f:
+                            core_api_contents = f.read()
+                        core_api_contents = core_api_contents.replace(
+                            "from keras_core import _tf_keras\n", ""
+                        )
+                        for legacy_submodule in legacy_submodules:
+                            core_api_contents = core_api_contents.replace(
+                                f"from keras_core import {legacy_submodule}\n",
+                                "",
+                            )
+                            core_api_contents = core_api_contents.replace(
+                                f"keras_core.{legacy_submodule}",
+                                f"keras_core._tf_keras.{legacy_submodule}",
+                            )
+                        legacy_contents = (
+                            core_api_contents + "\n" + legacy_contents
+                        )
+                    with open(tf_keras_fpath, "w") as f:
+                        f.write(legacy_contents)
+
+        # Delete keras_core/_legacy/
+        shutil.rmtree(os.path.join(package, "_legacy"))
+
        # Make sure to export the __version__ string
        from keras_core.src.version import __version__  # noqa: E402