Add tf.keras backwards compat for nearly all non-experimental symbols (#603)
* Add tf.keras backwards compatibility for nearly all non-experimental symbols * Remove print statements * Fix identity init
This commit is contained in:
parent
f73d98df60
commit
b660875f51
@ -296,7 +296,9 @@ def any_symbolic_tensors(args=None, kwargs=None):
|
||||
return False
|
||||
|
||||
|
||||
@keras_core_export("keras_core.utils.is_keras_tensor")
|
||||
@keras_core_export(
|
||||
["keras_core.utils.is_keras_tensor", "keras_core.backend.is_keras_tensor"]
|
||||
)
|
||||
def is_keras_tensor(x):
|
||||
"""Returns whether `x` is a Keras tensor.
|
||||
|
||||
|
@ -2,6 +2,7 @@ import inspect
|
||||
|
||||
from keras_core.api_export import keras_core_export
|
||||
from keras_core.initializers.constant_initializers import Constant
|
||||
from keras_core.initializers.constant_initializers import Identity
|
||||
from keras_core.initializers.constant_initializers import Ones
|
||||
from keras_core.initializers.constant_initializers import Zeros
|
||||
from keras_core.initializers.initializer import Initializer
|
||||
|
@ -4,7 +4,9 @@ from keras_core.backend import standardize_dtype
|
||||
from keras_core.initializers.initializer import Initializer
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.Constant")
|
||||
@keras_core_export(
|
||||
["keras_core.initializers.Constant", "keras_core.initializers.constant"]
|
||||
)
|
||||
class Constant(Initializer):
|
||||
"""Initializer that generates tensors with constant values.
|
||||
|
||||
@ -37,7 +39,9 @@ class Constant(Initializer):
|
||||
return {"value": self.value}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.Zeros")
|
||||
@keras_core_export(
|
||||
["keras_core.initializers.Zeros", "keras_core.initializers.zeros"]
|
||||
)
|
||||
class Zeros(Initializer):
|
||||
"""Initializer that generates tensors initialized to 0.
|
||||
|
||||
@ -67,7 +71,9 @@ class Zeros(Initializer):
|
||||
return ops.zeros(shape, dtype=dtype)
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.Ones")
|
||||
@keras_core_export(
|
||||
["keras_core.initializers.Ones", "keras_core.initializers.ones"]
|
||||
)
|
||||
class Ones(Initializer):
|
||||
"""Initializer that generates tensors initialized to 1.
|
||||
|
||||
@ -97,3 +103,52 @@ class Ones(Initializer):
|
||||
"""
|
||||
dtype = standardize_dtype(dtype)
|
||||
return ops.ones(shape, dtype=dtype)
|
||||
|
||||
|
||||
@keras_core_export(
|
||||
[
|
||||
"keras_core.initializers.IdentityInitializer",
|
||||
"keras_core.initializers.Identity",
|
||||
"keras_core.initializers.identity",
|
||||
]
|
||||
)
|
||||
class Identity(Initializer):
|
||||
"""Initializer that generates the identity matrix.
|
||||
|
||||
Only usable for generating 2D matrices.
|
||||
|
||||
Examples:
|
||||
|
||||
>>> # Standalone usage:
|
||||
>>> initializer = Identity()
|
||||
>>> values = initializer(shape=(2, 2))
|
||||
|
||||
>>> # Usage in a Keras layer:
|
||||
>>> initializer = Identity()
|
||||
>>> layer = Dense(3, kernel_initializer=initializer)
|
||||
|
||||
Args:
|
||||
gain: Multiplicative factor to apply to the identity matrix.
|
||||
"""
|
||||
|
||||
def __init__(self, gain=1.0):
|
||||
self.gain = gain
|
||||
|
||||
def __call__(self, shape, dtype=None):
|
||||
"""Returns a tensor object initialized as specified by the initializer.
|
||||
|
||||
Args:
|
||||
shape: Shape of the tensor.
|
||||
dtype: Optional dtype of the tensor. Only numeric or boolean dtypes
|
||||
are supported. If not specified, `keras_core.backend.floatx()`
|
||||
is used, which default to `float32` unless you configured it
|
||||
otherwise (via `keras_core.backend.set_floatx(float_dtype)`).
|
||||
**kwargs: Additional keyword arguments.
|
||||
"""
|
||||
if len(shape) != 2:
|
||||
raise ValueError(
|
||||
"Identity matrix initializer can only be used for 2D matrices. "
|
||||
f"Received: shape={shape} of rank {len(shape)}."
|
||||
)
|
||||
dtype = standardize_dtype(dtype)
|
||||
return self.gain * ops.eye(*shape, dtype=dtype)
|
||||
|
@ -12,8 +12,8 @@ class ConstantInitializersTest(testing.TestCase):
|
||||
initializer = initializers.Zeros()
|
||||
values = initializer(shape=shape)
|
||||
self.assertEqual(values.shape, shape)
|
||||
np_values = backend.convert_to_numpy(values).data
|
||||
self.assertEqual(np_values, np.zeros(shape=shape))
|
||||
np_values = backend.convert_to_numpy(values)
|
||||
self.assertAllClose(np_values, np.zeros(shape=shape))
|
||||
|
||||
self.run_class_serialization_test(initializer)
|
||||
|
||||
@ -23,8 +23,8 @@ class ConstantInitializersTest(testing.TestCase):
|
||||
initializer = initializers.Ones()
|
||||
values = initializer(shape=shape)
|
||||
self.assertEqual(values.shape, shape)
|
||||
np_values = backend.convert_to_numpy(values).data
|
||||
self.assertEqual(np_values, np.ones(shape=shape))
|
||||
np_values = backend.convert_to_numpy(values)
|
||||
self.assertAllClose(np_values, np.ones(shape=shape))
|
||||
|
||||
self.run_class_serialization_test(initializer)
|
||||
|
||||
@ -35,9 +35,21 @@ class ConstantInitializersTest(testing.TestCase):
|
||||
initializer = initializers.Constant(value=constant_value)
|
||||
values = initializer(shape=shape)
|
||||
self.assertEqual(values.shape, shape)
|
||||
np_values = backend.convert_to_numpy(values).data
|
||||
self.assertEqual(
|
||||
np_values = backend.convert_to_numpy(values)
|
||||
self.assertAllClose(
|
||||
np_values, np.full(shape=shape, fill_value=constant_value)
|
||||
)
|
||||
|
||||
self.run_class_serialization_test(initializer)
|
||||
|
||||
def test_identity_initializer(self):
|
||||
shape = (3, 3)
|
||||
gain = 2
|
||||
|
||||
initializer = initializers.Identity(gain=gain)
|
||||
values = initializer(shape=shape)
|
||||
self.assertEqual(values.shape, shape)
|
||||
np_values = backend.convert_to_numpy(values)
|
||||
self.assertAllClose(np_values, np.eye(*shape) * gain)
|
||||
|
||||
self.run_class_serialization_test(initializer)
|
@ -7,7 +7,12 @@ from keras_core.initializers.initializer import Initializer
|
||||
from keras_core.saving import serialization_lib
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.RandomNormal")
|
||||
@keras_core_export(
|
||||
[
|
||||
"keras_core.initializers.RandomNormal",
|
||||
"keras_core.initializers.random_normal",
|
||||
]
|
||||
)
|
||||
class RandomNormal(Initializer):
|
||||
"""Random normal initializer.
|
||||
|
||||
@ -59,7 +64,12 @@ class RandomNormal(Initializer):
|
||||
return {"mean": self.mean, "stddev": self.stddev, "seed": seed_config}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.TruncatedNormal")
|
||||
@keras_core_export(
|
||||
[
|
||||
"keras_core.initializers.TruncatedNormal",
|
||||
"keras_core.initializers.truncated_normal",
|
||||
]
|
||||
)
|
||||
class TruncatedNormal(Initializer):
|
||||
"""Initializer that generates a truncated normal distribution.
|
||||
|
||||
@ -114,7 +124,12 @@ class TruncatedNormal(Initializer):
|
||||
return {"mean": self.mean, "stddev": self.stddev, "seed": seed_config}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.RandomUniform")
|
||||
@keras_core_export(
|
||||
[
|
||||
"keras_core.initializers.RandomUniform",
|
||||
"keras_core.initializers.random_uniform",
|
||||
]
|
||||
)
|
||||
class RandomUniform(Initializer):
|
||||
"""Random uniform initializer.
|
||||
|
||||
@ -170,7 +185,12 @@ class RandomUniform(Initializer):
|
||||
}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.VarianceScaling")
|
||||
@keras_core_export(
|
||||
[
|
||||
"keras_core.initializers.VarianceScaling",
|
||||
"keras_core.initializers.variance_scaling",
|
||||
]
|
||||
)
|
||||
class VarianceScaling(Initializer):
|
||||
"""Initializer that adapts its scale to the shape of its input tensors.
|
||||
|
||||
@ -285,7 +305,12 @@ class VarianceScaling(Initializer):
|
||||
}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.GlorotUniform")
|
||||
@keras_core_export(
|
||||
[
|
||||
"keras_core.initializers.GlorotUniform",
|
||||
"keras_core.initializers.glorot_uniform",
|
||||
]
|
||||
)
|
||||
class GlorotUniform(VarianceScaling):
|
||||
"""The Glorot uniform initializer, also called Xavier uniform initializer.
|
||||
|
||||
@ -329,7 +354,12 @@ class GlorotUniform(VarianceScaling):
|
||||
}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.GlorotNormal")
|
||||
@keras_core_export(
|
||||
[
|
||||
"keras_core.initializers.GlorotNormal",
|
||||
"keras_core.initializers.glorot_normal",
|
||||
]
|
||||
)
|
||||
class GlorotNormal(VarianceScaling):
|
||||
"""The Glorot normal initializer, also called Xavier normal initializer.
|
||||
|
||||
@ -377,7 +407,12 @@ class GlorotNormal(VarianceScaling):
|
||||
}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.LecunNormal")
|
||||
@keras_core_export(
|
||||
[
|
||||
"keras_core.initializers.LecunNormal",
|
||||
"keras_core.initializers.lecun_normal",
|
||||
]
|
||||
)
|
||||
class LecunNormal(VarianceScaling):
|
||||
"""Lecun normal initializer.
|
||||
|
||||
@ -425,7 +460,12 @@ class LecunNormal(VarianceScaling):
|
||||
}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.LecunUniform")
|
||||
@keras_core_export(
|
||||
[
|
||||
"keras_core.initializers.LecunUniform",
|
||||
"keras_core.initializers.lecun_uniform",
|
||||
]
|
||||
)
|
||||
class LecunUniform(VarianceScaling):
|
||||
"""Lecun uniform initializer.
|
||||
|
||||
@ -469,7 +509,9 @@ class LecunUniform(VarianceScaling):
|
||||
}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.HeNormal")
|
||||
@keras_core_export(
|
||||
["keras_core.initializers.HeNormal", "keras_core.initializers.he_normal"]
|
||||
)
|
||||
class HeNormal(VarianceScaling):
|
||||
"""He normal initializer.
|
||||
|
||||
@ -513,7 +555,9 @@ class HeNormal(VarianceScaling):
|
||||
}
|
||||
|
||||
|
||||
@keras_core_export("keras_core.initializers.HeUniform")
|
||||
@keras_core_export(
|
||||
["keras_core.initializers.HeUniform", "keras_core.initializers.he_uniform"]
|
||||
)
|
||||
class HeUniform(VarianceScaling):
|
||||
"""He uniform variance scaling initializer.
|
||||
|
||||
@ -589,6 +633,7 @@ def compute_fans(shape):
|
||||
[
|
||||
"keras_core.initializers.OrthogonalInitializer",
|
||||
"keras_core.initializers.Orthogonal",
|
||||
"keras_core.initializers.orthogonal",
|
||||
]
|
||||
)
|
||||
class OrthogonalInitializer(Initializer):
|
||||
|
0
keras_core/legacy/__init__.py
Normal file
0
keras_core/legacy/__init__.py
Normal file
2287
keras_core/legacy/backend.py
Normal file
2287
keras_core/legacy/backend.py
Normal file
File diff suppressed because it is too large
Load Diff
245
keras_core/legacy/layers.py
Normal file
245
keras_core/legacy/layers.py
Normal file
@ -0,0 +1,245 @@
|
||||
"""Legacy Keras 1/2 layers.
|
||||
|
||||
AlphaDropout
|
||||
RandomHeight
|
||||
RandomWidth
|
||||
ThresholdedReLU
|
||||
"""
|
||||
|
||||
from keras_core import backend
|
||||
from keras_core.api_export import keras_core_export
|
||||
from keras_core.layers.layer import Layer
|
||||
from keras_core.utils.module_utils import tensorflow as tf
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.layers.AlphaDropout")
|
||||
class AlphaDropout(Layer):
|
||||
"""DEPRECATED."""
|
||||
|
||||
def __init__(self, rate, noise_shape=None, seed=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.rate = rate
|
||||
self.seed = seed
|
||||
self.noise_shape = noise_shape
|
||||
self.seed_generator = backend.random.SeedGenerator(seed)
|
||||
self.supports_masking = True
|
||||
self.built = True
|
||||
|
||||
def call(self, inputs, training=False):
|
||||
if training and self.rate > 0:
|
||||
alpha = 1.6732632423543772848170429916717
|
||||
scale = 1.0507009873554804934193349852946
|
||||
alpha_p = -alpha * scale
|
||||
|
||||
if self.noise_shape is None:
|
||||
noise_shape = tf.shape(inputs)
|
||||
else:
|
||||
noise_shape = self.noise_shape
|
||||
kept_idx = tf.greater_equal(
|
||||
backend.random.uniform(noise_shape),
|
||||
self.rate,
|
||||
seed=self.seed_generator,
|
||||
)
|
||||
kept_idx = tf.cast(kept_idx, inputs.dtype)
|
||||
|
||||
# Get affine transformation params
|
||||
a = ((1 - self.rate) * (1 + self.rate * alpha_p**2)) ** -0.5
|
||||
b = -a * alpha_p * self.rate
|
||||
|
||||
# Apply mask
|
||||
x = inputs * kept_idx + alpha_p * (1 - kept_idx)
|
||||
|
||||
# Do affine transformation
|
||||
return a * x + b
|
||||
return inputs
|
||||
|
||||
def get_config(self):
|
||||
config = {"rate": self.rate, "seed": self.seed}
|
||||
base_config = super().get_config()
|
||||
return {**base_config, **config}
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
return input_shape
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.layers.RandomHeight")
|
||||
class RandomHeight(Layer):
|
||||
"""DEPRECATED."""
|
||||
|
||||
def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.seed_generator = backend.random.SeedGenerator(seed)
|
||||
self.factor = factor
|
||||
if isinstance(factor, (tuple, list)):
|
||||
self.height_lower = factor[0]
|
||||
self.height_upper = factor[1]
|
||||
else:
|
||||
self.height_lower = -factor
|
||||
self.height_upper = factor
|
||||
|
||||
if self.height_upper < self.height_lower:
|
||||
raise ValueError(
|
||||
"`factor` argument cannot have an upper bound lesser than the "
|
||||
f"lower bound. Received: factor={factor}"
|
||||
)
|
||||
if self.height_lower < -1.0 or self.height_upper < -1.0:
|
||||
raise ValueError(
|
||||
"`factor` argument must have values larger than -1. "
|
||||
f"Received: factor={factor}"
|
||||
)
|
||||
self.interpolation = interpolation
|
||||
self.seed = seed
|
||||
|
||||
def call(self, inputs, training=True):
|
||||
inputs = tf.convert_to_tensor(inputs, dtype=self.compute_dtype)
|
||||
|
||||
def random_height_inputs(inputs):
|
||||
"""Inputs height-adjusted with random ops."""
|
||||
inputs_shape = tf.shape(inputs)
|
||||
img_hd = tf.cast(inputs_shape[-3], tf.float32)
|
||||
img_wd = inputs_shape[-2]
|
||||
height_factor = backend.random.uniform(
|
||||
shape=[],
|
||||
minval=(1.0 + self.height_lower),
|
||||
maxval=(1.0 + self.height_upper),
|
||||
seed=self.seed_generator,
|
||||
)
|
||||
adjusted_height = tf.cast(height_factor * img_hd, tf.int32)
|
||||
adjusted_size = tf.stack([adjusted_height, img_wd])
|
||||
output = tf.image.resize(
|
||||
images=inputs,
|
||||
size=adjusted_size,
|
||||
method=self.interpolation,
|
||||
)
|
||||
# tf.resize will output float32 regardless of input type.
|
||||
output = tf.cast(output, self.compute_dtype)
|
||||
output_shape = inputs.shape.as_list()
|
||||
output_shape[-3] = None
|
||||
output.set_shape(output_shape)
|
||||
return output
|
||||
|
||||
if training:
|
||||
return random_height_inputs(inputs)
|
||||
else:
|
||||
return inputs
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
input_shape = list(input_shape)
|
||||
input_shape[-3] = None
|
||||
return tuple(input_shape)
|
||||
|
||||
def get_config(self):
|
||||
config = {
|
||||
"factor": self.factor,
|
||||
"interpolation": self.interpolation,
|
||||
"seed": self.seed,
|
||||
}
|
||||
base_config = super().get_config()
|
||||
return {**base_config, **config}
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.layers.RandomWidth")
|
||||
class RandomWidth(Layer):
|
||||
"""DEPRECATED."""
|
||||
|
||||
def __init__(self, factor, interpolation="bilinear", seed=None, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
self.seed_generator = backend.random.SeedGenerator(seed)
|
||||
self.factor = factor
|
||||
if isinstance(factor, (tuple, list)):
|
||||
self.width_lower = factor[0]
|
||||
self.width_upper = factor[1]
|
||||
else:
|
||||
self.width_lower = -factor
|
||||
self.width_upper = factor
|
||||
if self.width_upper < self.width_lower:
|
||||
raise ValueError(
|
||||
"`factor` argument cannot have an upper bound less than the "
|
||||
f"lower bound. Received: factor={factor}"
|
||||
)
|
||||
if self.width_lower < -1.0 or self.width_upper < -1.0:
|
||||
raise ValueError(
|
||||
"`factor` argument must have values larger than -1. "
|
||||
f"Received: factor={factor}"
|
||||
)
|
||||
self.interpolation = interpolation
|
||||
self.seed = seed
|
||||
|
||||
def call(self, inputs, training=True):
|
||||
inputs = tf.convert_to_tensor(inputs, dtype=self.compute_dtype)
|
||||
|
||||
def random_width_inputs(inputs):
|
||||
"""Inputs width-adjusted with random ops."""
|
||||
inputs_shape = tf.shape(inputs)
|
||||
img_hd = inputs_shape[-3]
|
||||
img_wd = tf.cast(inputs_shape[-2], tf.float32)
|
||||
width_factor = backend.random.uniform(
|
||||
shape=[],
|
||||
minval=(1.0 + self.width_lower),
|
||||
maxval=(1.0 + self.width_upper),
|
||||
seed=self.seed_generator,
|
||||
)
|
||||
adjusted_width = tf.cast(width_factor * img_wd, tf.int32)
|
||||
adjusted_size = tf.stack([img_hd, adjusted_width])
|
||||
output = tf.image.resize(
|
||||
images=inputs,
|
||||
size=adjusted_size,
|
||||
method=self.interpolation,
|
||||
)
|
||||
# tf.resize will output float32 regardless of input type.
|
||||
output = tf.cast(output, self.compute_dtype)
|
||||
output_shape = inputs.shape.as_list()
|
||||
output_shape[-2] = None
|
||||
output.set_shape(output_shape)
|
||||
return output
|
||||
|
||||
if training:
|
||||
return random_width_inputs(inputs)
|
||||
else:
|
||||
return inputs
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
input_shape = list(input_shape)
|
||||
input_shape[-2] = None
|
||||
return tuple(input_shape)
|
||||
|
||||
def get_config(self):
|
||||
config = {
|
||||
"factor": self.factor,
|
||||
"interpolation": self.interpolation,
|
||||
"seed": self.seed,
|
||||
}
|
||||
base_config = super().get_config()
|
||||
return {**base_config, **config}
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.layers.ThresholdedReLU")
|
||||
class ThresholdedReLU(Layer):
|
||||
"""DEPRECATED."""
|
||||
|
||||
def __init__(self, theta=1.0, **kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if theta is None:
|
||||
raise ValueError(
|
||||
"Theta of a Thresholded ReLU layer cannot be None, expecting a "
|
||||
f"float. Received: {theta}"
|
||||
)
|
||||
if theta < 0:
|
||||
raise ValueError(
|
||||
"The theta value of a Thresholded ReLU layer "
|
||||
f"should be >=0. Received: {theta}"
|
||||
)
|
||||
self.supports_masking = True
|
||||
self.theta = tf.convert_to_tensor(theta, dtype=self.compute_dtype)
|
||||
|
||||
def call(self, inputs):
|
||||
dtype = self.compute_dtype
|
||||
return inputs * tf.cast(tf.greater(inputs, self.theta), dtype)
|
||||
|
||||
def get_config(self):
|
||||
config = {"theta": float(self.theta)}
|
||||
base_config = super().get_config()
|
||||
return {**base_config, **config}
|
||||
|
||||
def compute_output_shape(self, input_shape):
|
||||
return input_shape
|
20
keras_core/legacy/losses.py
Normal file
20
keras_core/legacy/losses.py
Normal file
@ -0,0 +1,20 @@
|
||||
from keras_core.api_export import keras_core_export
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.losses.Reduction")
|
||||
class Reduction:
|
||||
AUTO = "auto"
|
||||
NONE = "none"
|
||||
SUM = "sum"
|
||||
SUM_OVER_BATCH_SIZE = "sum_over_batch_size"
|
||||
|
||||
@classmethod
|
||||
def all(cls):
|
||||
return (cls.AUTO, cls.NONE, cls.SUM, cls.SUM_OVER_BATCH_SIZE)
|
||||
|
||||
@classmethod
|
||||
def validate(cls, key):
|
||||
if key not in cls.all():
|
||||
raise ValueError(
|
||||
f'Invalid Reduction Key: {key}. Expected keys are "{cls.all()}"'
|
||||
)
|
0
keras_core/legacy/preprocessing/__init__.py
Normal file
0
keras_core/legacy/preprocessing/__init__.py
Normal file
1898
keras_core/legacy/preprocessing/image.py
Normal file
1898
keras_core/legacy/preprocessing/image.py
Normal file
File diff suppressed because it is too large
Load Diff
324
keras_core/legacy/preprocessing/sequence.py
Normal file
324
keras_core/legacy/preprocessing/sequence.py
Normal file
@ -0,0 +1,324 @@
|
||||
"""Deprecated sequence preprocessing APIs from Keras 1."""
|
||||
|
||||
import json
|
||||
import random
|
||||
|
||||
import numpy as np
|
||||
|
||||
from keras_core.api_export import keras_core_export
|
||||
from keras_core.trainers.data_adapters.py_dataset_adapter import PyDataset
|
||||
|
||||
|
||||
@keras_core_export(
|
||||
"keras_core._legacy.preprocessing.sequence.TimeseriesGenerator"
|
||||
)
|
||||
class TimeseriesGenerator(PyDataset):
|
||||
"""Utility class for generating batches of temporal data.
|
||||
|
||||
DEPRECATED.
|
||||
|
||||
This class takes in a sequence of data-points gathered at
|
||||
equal intervals, along with time series parameters such as
|
||||
stride, length of history, etc., to produce batches for
|
||||
training/validation.
|
||||
|
||||
Arguments:
|
||||
data: Indexable generator (such as list or Numpy array)
|
||||
containing consecutive data points (timesteps).
|
||||
The data should be at 2D, and axis 0 is expected
|
||||
to be the time dimension.
|
||||
targets: Targets corresponding to timesteps in `data`.
|
||||
It should have same length as `data`.
|
||||
length: Length of the output sequences (in number of timesteps).
|
||||
sampling_rate: Period between successive individual timesteps
|
||||
within sequences. For rate `r`, timesteps
|
||||
`data[i]`, `data[i-r]`, ... `data[i - length]`
|
||||
are used for create a sample sequence.
|
||||
stride: Period between successive output sequences.
|
||||
For stride `s`, consecutive output samples would
|
||||
be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc.
|
||||
start_index: Data points earlier than `start_index` will not be used
|
||||
in the output sequences. This is useful to reserve part of the
|
||||
data for test or validation.
|
||||
end_index: Data points later than `end_index` will not be used
|
||||
in the output sequences. This is useful to reserve part of the
|
||||
data for test or validation.
|
||||
shuffle: Whether to shuffle output samples,
|
||||
or instead draw them in chronological order.
|
||||
reverse: Boolean: if `true`, timesteps in each output sample will be
|
||||
in reverse chronological order.
|
||||
batch_size: Number of timeseries samples in each batch
|
||||
(except maybe the last one).
|
||||
|
||||
Returns:
|
||||
A PyDataset instance.
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
data,
|
||||
targets,
|
||||
length,
|
||||
sampling_rate=1,
|
||||
stride=1,
|
||||
start_index=0,
|
||||
end_index=None,
|
||||
shuffle=False,
|
||||
reverse=False,
|
||||
batch_size=128,
|
||||
):
|
||||
if len(data) != len(targets):
|
||||
raise ValueError(
|
||||
"Data and targets have to be "
|
||||
f"of same length. Data length is {len(data)} "
|
||||
f"while target length is {len(targets)}"
|
||||
)
|
||||
|
||||
self.data = data
|
||||
self.targets = targets
|
||||
self.length = length
|
||||
self.sampling_rate = sampling_rate
|
||||
self.stride = stride
|
||||
self.start_index = start_index + length
|
||||
if end_index is None:
|
||||
end_index = len(data) - 1
|
||||
self.end_index = end_index
|
||||
self.shuffle = shuffle
|
||||
self.reverse = reverse
|
||||
self.batch_size = batch_size
|
||||
|
||||
if self.start_index > self.end_index:
|
||||
raise ValueError(
|
||||
f"`start_index+length={self.start_index} "
|
||||
f"> end_index={self.end_index}` "
|
||||
"is disallowed, as no part of the sequence "
|
||||
"would be left to be used as current step."
|
||||
)
|
||||
|
||||
def __len__(self):
|
||||
return (
|
||||
self.end_index - self.start_index + self.batch_size * self.stride
|
||||
) // (self.batch_size * self.stride)
|
||||
|
||||
def __getitem__(self, index):
|
||||
if self.shuffle:
|
||||
rows = np.random.randint(
|
||||
self.start_index, self.end_index + 1, size=self.batch_size
|
||||
)
|
||||
else:
|
||||
i = self.start_index + self.batch_size * self.stride * index
|
||||
rows = np.arange(
|
||||
i,
|
||||
min(i + self.batch_size * self.stride, self.end_index + 1),
|
||||
self.stride,
|
||||
)
|
||||
|
||||
samples = np.array(
|
||||
[
|
||||
self.data[row - self.length : row : self.sampling_rate]
|
||||
for row in rows
|
||||
]
|
||||
)
|
||||
targets = np.array([self.targets[row] for row in rows])
|
||||
|
||||
if self.reverse:
|
||||
return samples[:, ::-1, ...], targets
|
||||
return samples, targets
|
||||
|
||||
def get_config(self):
|
||||
"""Returns the TimeseriesGenerator configuration as Python dictionary.
|
||||
|
||||
Returns:
|
||||
A Python dictionary with the TimeseriesGenerator configuration.
|
||||
"""
|
||||
data = self.data
|
||||
if type(self.data).__module__ == np.__name__:
|
||||
data = self.data.tolist()
|
||||
try:
|
||||
json_data = json.dumps(data)
|
||||
except TypeError as e:
|
||||
raise TypeError(f"Data not JSON Serializable: {data}") from e
|
||||
|
||||
targets = self.targets
|
||||
if type(self.targets).__module__ == np.__name__:
|
||||
targets = self.targets.tolist()
|
||||
try:
|
||||
json_targets = json.dumps(targets)
|
||||
except TypeError as e:
|
||||
raise TypeError(f"Targets not JSON Serializable: {targets}") from e
|
||||
|
||||
return {
|
||||
"data": json_data,
|
||||
"targets": json_targets,
|
||||
"length": self.length,
|
||||
"sampling_rate": self.sampling_rate,
|
||||
"stride": self.stride,
|
||||
"start_index": self.start_index,
|
||||
"end_index": self.end_index,
|
||||
"shuffle": self.shuffle,
|
||||
"reverse": self.reverse,
|
||||
"batch_size": self.batch_size,
|
||||
}
|
||||
|
||||
def to_json(self, **kwargs):
|
||||
"""Returns a JSON string containing the generator's configuration.
|
||||
|
||||
Args:
|
||||
**kwargs: Additional keyword arguments to be passed
|
||||
to `json.dumps()`.
|
||||
|
||||
Returns:
|
||||
A JSON string containing the tokenizer configuration.
|
||||
"""
|
||||
config = self.get_config()
|
||||
timeseries_generator_config = {
|
||||
"class_name": self.__class__.__name__,
|
||||
"config": config,
|
||||
}
|
||||
return json.dumps(timeseries_generator_config, **kwargs)
|
||||
|
||||
|
||||
@keras_core_export(
|
||||
"keras_core._legacy.preprocessing.sequence.make_sampling_table"
|
||||
)
|
||||
def make_sampling_table(size, sampling_factor=1e-5):
|
||||
"""Generates a word rank-based probabilistic sampling table.
|
||||
|
||||
DEPRECATED.
|
||||
|
||||
Used for generating the `sampling_table` argument for `skipgrams`.
|
||||
`sampling_table[i]` is the probability of sampling
|
||||
the word i-th most common word in a dataset
|
||||
(more common words should be sampled less frequently, for balance).
|
||||
|
||||
The sampling probabilities are generated according
|
||||
to the sampling distribution used in word2vec:
|
||||
|
||||
```
|
||||
p(word) = (min(1, sqrt(word_frequency / sampling_factor) /
|
||||
(word_frequency / sampling_factor)))
|
||||
```
|
||||
|
||||
We assume that the word frequencies follow Zipf's law (s=1) to derive
|
||||
a numerical approximation of frequency(rank):
|
||||
|
||||
`frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))`
|
||||
where `gamma` is the Euler-Mascheroni constant.
|
||||
|
||||
Args:
|
||||
size: Int, number of possible words to sample.
|
||||
sampling_factor: The sampling factor in the word2vec formula.
|
||||
|
||||
Returns:
|
||||
A 1D Numpy array of length `size` where the ith entry
|
||||
is the probability that a word of rank i should be sampled.
|
||||
"""
|
||||
gamma = 0.577
|
||||
rank = np.arange(size)
|
||||
rank[0] = 1
|
||||
inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1.0 / (12.0 * rank)
|
||||
f = sampling_factor * inv_fq
|
||||
|
||||
return np.minimum(1.0, f / np.sqrt(f))
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.preprocessing.sequence.skipgrams")
|
||||
def skipgrams(
|
||||
sequence,
|
||||
vocabulary_size,
|
||||
window_size=4,
|
||||
negative_samples=1.0,
|
||||
shuffle=True,
|
||||
categorical=False,
|
||||
sampling_table=None,
|
||||
seed=None,
|
||||
):
|
||||
"""Generates skipgram word pairs.
|
||||
|
||||
DEPRECATED.
|
||||
|
||||
This function transforms a sequence of word indexes (list of integers)
|
||||
into tuples of words of the form:
|
||||
|
||||
- (word, word in the same window), with label 1 (positive samples).
|
||||
- (word, random word from the vocabulary), with label 0 (negative samples).
|
||||
|
||||
Read more about Skipgram in this gnomic paper by Mikolov et al.:
|
||||
[Efficient Estimation of Word Representations in
|
||||
Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf)
|
||||
|
||||
Args:
|
||||
sequence: A word sequence (sentence), encoded as a list
|
||||
of word indices (integers). If using a `sampling_table`,
|
||||
word indices are expected to match the rank
|
||||
of the words in a reference dataset (e.g. 10 would encode
|
||||
the 10-th most frequently occurring token).
|
||||
Note that index 0 is expected to be a non-word and will be skipped.
|
||||
vocabulary_size: Int, maximum possible word index + 1
|
||||
window_size: Int, size of sampling windows (technically half-window).
|
||||
The window of a word `w_i` will be
|
||||
`[i - window_size, i + window_size+1]`.
|
||||
negative_samples: Float >= 0. 0 for no negative (i.e. random) samples.
|
||||
1 for same number as positive samples.
|
||||
shuffle: Whether to shuffle the word couples before returning them.
|
||||
categorical: bool. if False, labels will be
|
||||
integers (eg. `[0, 1, 1 .. ]`),
|
||||
if `True`, labels will be categorical, e.g.
|
||||
`[[1,0],[0,1],[0,1] .. ]`.
|
||||
sampling_table: 1D array of size `vocabulary_size` where the entry i
|
||||
encodes the probability to sample a word of rank i.
|
||||
seed: Random seed.
|
||||
|
||||
Returns:
|
||||
couples, labels: where `couples` are int pairs and
|
||||
`labels` are either 0 or 1.
|
||||
|
||||
Note:
|
||||
By convention, index 0 in the vocabulary is
|
||||
a non-word and will be skipped.
|
||||
"""
|
||||
couples = []
|
||||
labels = []
|
||||
for i, wi in enumerate(sequence):
|
||||
if not wi:
|
||||
continue
|
||||
if sampling_table is not None:
|
||||
if sampling_table[wi] < random.random():
|
||||
continue
|
||||
|
||||
window_start = max(0, i - window_size)
|
||||
window_end = min(len(sequence), i + window_size + 1)
|
||||
for j in range(window_start, window_end):
|
||||
if j != i:
|
||||
wj = sequence[j]
|
||||
if not wj:
|
||||
continue
|
||||
couples.append([wi, wj])
|
||||
if categorical:
|
||||
labels.append([0, 1])
|
||||
else:
|
||||
labels.append(1)
|
||||
|
||||
if negative_samples > 0:
|
||||
num_negative_samples = int(len(labels) * negative_samples)
|
||||
words = [c[0] for c in couples]
|
||||
random.shuffle(words)
|
||||
|
||||
couples += [
|
||||
[words[i % len(words)], random.randint(1, vocabulary_size - 1)]
|
||||
for i in range(num_negative_samples)
|
||||
]
|
||||
if categorical:
|
||||
labels += [[1, 0]] * num_negative_samples
|
||||
else:
|
||||
labels += [0] * num_negative_samples
|
||||
|
||||
if shuffle:
|
||||
if seed is None:
|
||||
seed = random.randint(0, 10e6)
|
||||
random.seed(seed)
|
||||
random.shuffle(couples)
|
||||
random.seed(seed)
|
||||
random.shuffle(labels)
|
||||
|
||||
return couples, labels
|
338
keras_core/legacy/preprocessing/text.py
Normal file
338
keras_core/legacy/preprocessing/text.py
Normal file
@ -0,0 +1,338 @@
|
||||
"""Deprecated text preprocessing APIs from Keras 1."""
|
||||
|
||||
import collections
|
||||
import hashlib
|
||||
import json
|
||||
import warnings
|
||||
|
||||
import numpy as np
|
||||
|
||||
from keras_core.api_export import keras_core_export
|
||||
|
||||
|
||||
@keras_core_export(
|
||||
"keras_core._legacy.preprocessing.text.text_to_word_sequence"
|
||||
)
|
||||
def text_to_word_sequence(
|
||||
input_text,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True,
|
||||
split=" ",
|
||||
):
|
||||
"""DEPRECATED."""
|
||||
if lower:
|
||||
input_text = input_text.lower()
|
||||
|
||||
translate_dict = {c: split for c in filters}
|
||||
translate_map = str.maketrans(translate_dict)
|
||||
input_text = input_text.translate(translate_map)
|
||||
|
||||
seq = input_text.split(split)
|
||||
return [i for i in seq if i]
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.preprocessing.text.one_hot")
|
||||
def one_hot(
|
||||
input_text,
|
||||
n,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True,
|
||||
split=" ",
|
||||
analyzer=None,
|
||||
):
|
||||
"""DEPRECATED."""
|
||||
return hashing_trick(
|
||||
input_text,
|
||||
n,
|
||||
hash_function=hash,
|
||||
filters=filters,
|
||||
lower=lower,
|
||||
split=split,
|
||||
analyzer=analyzer,
|
||||
)
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.preprocessing.text.hashing_trick")
|
||||
def hashing_trick(
|
||||
text,
|
||||
n,
|
||||
hash_function=None,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True,
|
||||
split=" ",
|
||||
analyzer=None,
|
||||
):
|
||||
"""DEPRECATED."""
|
||||
if hash_function is None:
|
||||
hash_function = hash
|
||||
elif hash_function == "md5":
|
||||
|
||||
def hash_function(w):
|
||||
return int(hashlib.md5(w.encode()).hexdigest(), 16)
|
||||
|
||||
if analyzer is None:
|
||||
seq = text_to_word_sequence(
|
||||
text, filters=filters, lower=lower, split=split
|
||||
)
|
||||
else:
|
||||
seq = analyzer(text)
|
||||
|
||||
return [(hash_function(w) % (n - 1) + 1) for w in seq]
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.preprocessing.text.Tokenizer")
|
||||
class Tokenizer(object):
|
||||
"""DEPRECATED."""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
num_words=None,
|
||||
filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n',
|
||||
lower=True,
|
||||
split=" ",
|
||||
char_level=False,
|
||||
oov_token=None,
|
||||
analyzer=None,
|
||||
**kwargs
|
||||
):
|
||||
# Legacy support
|
||||
if "nb_words" in kwargs:
|
||||
warnings.warn(
|
||||
"The `nb_words` argument in `Tokenizer` "
|
||||
"has been renamed `num_words`."
|
||||
)
|
||||
num_words = kwargs.pop("nb_words")
|
||||
document_count = kwargs.pop("document_count", 0)
|
||||
if kwargs:
|
||||
raise TypeError("Unrecognized keyword arguments: " + str(kwargs))
|
||||
|
||||
self.word_counts = collections.OrderedDict()
|
||||
self.word_docs = collections.defaultdict(int)
|
||||
self.filters = filters
|
||||
self.split = split
|
||||
self.lower = lower
|
||||
self.num_words = num_words
|
||||
self.document_count = document_count
|
||||
self.char_level = char_level
|
||||
self.oov_token = oov_token
|
||||
self.index_docs = collections.defaultdict(int)
|
||||
self.word_index = {}
|
||||
self.index_word = {}
|
||||
self.analyzer = analyzer
|
||||
|
||||
def fit_on_texts(self, texts):
|
||||
for text in texts:
|
||||
self.document_count += 1
|
||||
if self.char_level or isinstance(text, list):
|
||||
if self.lower:
|
||||
if isinstance(text, list):
|
||||
text = [text_elem.lower() for text_elem in text]
|
||||
else:
|
||||
text = text.lower()
|
||||
seq = text
|
||||
else:
|
||||
if self.analyzer is None:
|
||||
seq = text_to_word_sequence(
|
||||
text,
|
||||
filters=self.filters,
|
||||
lower=self.lower,
|
||||
split=self.split,
|
||||
)
|
||||
else:
|
||||
seq = self.analyzer(text)
|
||||
for w in seq:
|
||||
if w in self.word_counts:
|
||||
self.word_counts[w] += 1
|
||||
else:
|
||||
self.word_counts[w] = 1
|
||||
for w in set(seq):
|
||||
# In how many documents each word occurs
|
||||
self.word_docs[w] += 1
|
||||
|
||||
wcounts = list(self.word_counts.items())
|
||||
wcounts.sort(key=lambda x: x[1], reverse=True)
|
||||
# forcing the oov_token to index 1 if it exists
|
||||
if self.oov_token is None:
|
||||
sorted_voc = []
|
||||
else:
|
||||
sorted_voc = [self.oov_token]
|
||||
sorted_voc.extend(wc[0] for wc in wcounts)
|
||||
|
||||
# note that index 0 is reserved, never assigned to an existing word
|
||||
self.word_index = dict(
|
||||
zip(sorted_voc, list(range(1, len(sorted_voc) + 1)))
|
||||
)
|
||||
|
||||
self.index_word = {c: w for w, c in self.word_index.items()}
|
||||
|
||||
for w, c in list(self.word_docs.items()):
|
||||
self.index_docs[self.word_index[w]] = c
|
||||
|
||||
def fit_on_sequences(self, sequences):
|
||||
self.document_count += len(sequences)
|
||||
for seq in sequences:
|
||||
seq = set(seq)
|
||||
for i in seq:
|
||||
self.index_docs[i] += 1
|
||||
|
||||
def texts_to_sequences(self, texts):
|
||||
return list(self.texts_to_sequences_generator(texts))
|
||||
|
||||
def texts_to_sequences_generator(self, texts):
|
||||
num_words = self.num_words
|
||||
oov_token_index = self.word_index.get(self.oov_token)
|
||||
for text in texts:
|
||||
if self.char_level or isinstance(text, list):
|
||||
if self.lower:
|
||||
if isinstance(text, list):
|
||||
text = [text_elem.lower() for text_elem in text]
|
||||
else:
|
||||
text = text.lower()
|
||||
seq = text
|
||||
else:
|
||||
if self.analyzer is None:
|
||||
seq = text_to_word_sequence(
|
||||
text,
|
||||
filters=self.filters,
|
||||
lower=self.lower,
|
||||
split=self.split,
|
||||
)
|
||||
else:
|
||||
seq = self.analyzer(text)
|
||||
vect = []
|
||||
for w in seq:
|
||||
i = self.word_index.get(w)
|
||||
if i is not None:
|
||||
if num_words and i >= num_words:
|
||||
if oov_token_index is not None:
|
||||
vect.append(oov_token_index)
|
||||
else:
|
||||
vect.append(i)
|
||||
elif self.oov_token is not None:
|
||||
vect.append(oov_token_index)
|
||||
yield vect
|
||||
|
||||
def sequences_to_texts(self, sequences):
|
||||
return list(self.sequences_to_texts_generator(sequences))
|
||||
|
||||
def sequences_to_texts_generator(self, sequences):
|
||||
num_words = self.num_words
|
||||
oov_token_index = self.word_index.get(self.oov_token)
|
||||
for seq in sequences:
|
||||
vect = []
|
||||
for num in seq:
|
||||
word = self.index_word.get(num)
|
||||
if word is not None:
|
||||
if num_words and num >= num_words:
|
||||
if oov_token_index is not None:
|
||||
vect.append(self.index_word[oov_token_index])
|
||||
else:
|
||||
vect.append(word)
|
||||
elif self.oov_token is not None:
|
||||
vect.append(self.index_word[oov_token_index])
|
||||
vect = " ".join(vect)
|
||||
yield vect
|
||||
|
||||
def texts_to_matrix(self, texts, mode="binary"):
|
||||
sequences = self.texts_to_sequences(texts)
|
||||
return self.sequences_to_matrix(sequences, mode=mode)
|
||||
|
||||
def sequences_to_matrix(self, sequences, mode="binary"):
|
||||
if not self.num_words:
|
||||
if self.word_index:
|
||||
num_words = len(self.word_index) + 1
|
||||
else:
|
||||
raise ValueError(
|
||||
"Specify a dimension (`num_words` argument), "
|
||||
"or fit on some text data first."
|
||||
)
|
||||
else:
|
||||
num_words = self.num_words
|
||||
|
||||
if mode == "tfidf" and not self.document_count:
|
||||
raise ValueError(
|
||||
"Fit the Tokenizer on some data before using tfidf mode."
|
||||
)
|
||||
|
||||
x = np.zeros((len(sequences), num_words))
|
||||
for i, seq in enumerate(sequences):
|
||||
if not seq:
|
||||
continue
|
||||
counts = collections.defaultdict(int)
|
||||
for j in seq:
|
||||
if j >= num_words:
|
||||
continue
|
||||
counts[j] += 1
|
||||
for j, c in list(counts.items()):
|
||||
if mode == "count":
|
||||
x[i][j] = c
|
||||
elif mode == "freq":
|
||||
x[i][j] = c / len(seq)
|
||||
elif mode == "binary":
|
||||
x[i][j] = 1
|
||||
elif mode == "tfidf":
|
||||
# Use weighting scheme 2 in
|
||||
# https://en.wikipedia.org/wiki/Tf%E2%80%93idf
|
||||
tf = 1 + np.log(c)
|
||||
idf = np.log(
|
||||
1
|
||||
+ self.document_count / (1 + self.index_docs.get(j, 0))
|
||||
)
|
||||
x[i][j] = tf * idf
|
||||
else:
|
||||
raise ValueError("Unknown vectorization mode:", mode)
|
||||
return x
|
||||
|
||||
def get_config(self):
|
||||
json_word_counts = json.dumps(self.word_counts)
|
||||
json_word_docs = json.dumps(self.word_docs)
|
||||
json_index_docs = json.dumps(self.index_docs)
|
||||
json_word_index = json.dumps(self.word_index)
|
||||
json_index_word = json.dumps(self.index_word)
|
||||
|
||||
return {
|
||||
"num_words": self.num_words,
|
||||
"filters": self.filters,
|
||||
"lower": self.lower,
|
||||
"split": self.split,
|
||||
"char_level": self.char_level,
|
||||
"oov_token": self.oov_token,
|
||||
"document_count": self.document_count,
|
||||
"word_counts": json_word_counts,
|
||||
"word_docs": json_word_docs,
|
||||
"index_docs": json_index_docs,
|
||||
"index_word": json_index_word,
|
||||
"word_index": json_word_index,
|
||||
}
|
||||
|
||||
def to_json(self, **kwargs):
|
||||
config = self.get_config()
|
||||
tokenizer_config = {
|
||||
"class_name": self.__class__.__name__,
|
||||
"config": config,
|
||||
}
|
||||
return json.dumps(tokenizer_config, **kwargs)
|
||||
|
||||
|
||||
@keras_core_export("keras_core._legacy.preprocessing.text.tokenizer_from_json")
|
||||
def tokenizer_from_json(json_string):
|
||||
"""DEPRECATED."""
|
||||
tokenizer_config = json.loads(json_string)
|
||||
config = tokenizer_config.get("config")
|
||||
|
||||
word_counts = json.loads(config.pop("word_counts"))
|
||||
word_docs = json.loads(config.pop("word_docs"))
|
||||
index_docs = json.loads(config.pop("index_docs"))
|
||||
# Integer indexing gets converted to strings with json.dumps()
|
||||
index_docs = {int(k): v for k, v in index_docs.items()}
|
||||
index_word = json.loads(config.pop("index_word"))
|
||||
index_word = {int(k): v for k, v in index_word.items()}
|
||||
word_index = json.loads(config.pop("word_index"))
|
||||
|
||||
tokenizer = Tokenizer(**config)
|
||||
tokenizer.word_counts = word_counts
|
||||
tokenizer.word_docs = word_docs
|
||||
tokenizer.index_docs = index_docs
|
||||
tokenizer.word_index = word_index
|
||||
tokenizer.index_word = index_word
|
||||
return tokenizer
|
@ -1106,6 +1106,11 @@ def categorical_hinge(y_true, y_pred):
|
||||
[
|
||||
"keras_core.metrics.mean_squared_error",
|
||||
"keras_core.losses.mean_squared_error",
|
||||
# Legacy aliases
|
||||
"keras_core._legacy.losses.mse",
|
||||
"keras_core._legacy.losses.MSE",
|
||||
"keras_core._legacy.metrics.mse",
|
||||
"keras_core._legacy.metrics.MSE",
|
||||
]
|
||||
)
|
||||
def mean_squared_error(y_true, y_pred):
|
||||
@ -1140,6 +1145,11 @@ def mean_squared_error(y_true, y_pred):
|
||||
[
|
||||
"keras_core.metrics.mean_absolute_error",
|
||||
"keras_core.losses.mean_absolute_error",
|
||||
# Legacy aliases
|
||||
"keras_core._legacy.losses.MAE",
|
||||
"keras_core._legacy.losses.mae",
|
||||
"keras_core._legacy.metrics.MAE",
|
||||
"keras_core._legacy.metrics.mae",
|
||||
]
|
||||
)
|
||||
def mean_absolute_error(y_true, y_pred):
|
||||
@ -1172,6 +1182,11 @@ def mean_absolute_error(y_true, y_pred):
|
||||
[
|
||||
"keras_core.metrics.mean_absolute_percentage_error",
|
||||
"keras_core.losses.mean_absolute_percentage_error",
|
||||
# Legacy aliases
|
||||
"keras_core._legacy.losses.mape",
|
||||
"keras_core._legacy.losses.MAPE",
|
||||
"keras_core._legacy.metrics.mape",
|
||||
"keras_core._legacy.metrics.MAPE",
|
||||
]
|
||||
)
|
||||
def mean_absolute_percentage_error(y_true, y_pred):
|
||||
@ -1213,6 +1228,11 @@ def mean_absolute_percentage_error(y_true, y_pred):
|
||||
[
|
||||
"keras_core.metrics.mean_squared_logarithmic_error",
|
||||
"keras_core.losses.mean_squared_logarithmic_error",
|
||||
# Legacy aliases
|
||||
"keras_core._legacy.losses.msle",
|
||||
"keras_core._legacy.losses.MSLE",
|
||||
"keras_core._legacy.metrics.msle",
|
||||
"keras_core._legacy.metrics.MSLE",
|
||||
]
|
||||
)
|
||||
def mean_squared_logarithmic_error(y_true, y_pred):
|
||||
@ -1342,7 +1362,13 @@ def huber(y_true, y_pred, delta=1.0):
|
||||
|
||||
|
||||
@keras_core_export(
|
||||
["keras_core.losses.log_cosh", "keras_core.metrics.log_cosh"]
|
||||
[
|
||||
"keras_core.losses.log_cosh",
|
||||
"keras_core.metrics.log_cosh",
|
||||
# Legacy aliases
|
||||
"keras_core._legacy.losses.logcosh",
|
||||
"keras_core._legacy.metrics.logcosh",
|
||||
]
|
||||
)
|
||||
def log_cosh(y_true, y_pred):
|
||||
"""Logarithm of the hyperbolic cosine of the prediction error.
|
||||
@ -1386,6 +1412,13 @@ def log_cosh(y_true, y_pred):
|
||||
[
|
||||
"keras_core.metrics.kl_divergence",
|
||||
"keras_core.losses.kl_divergence",
|
||||
# Legacy aliases
|
||||
"keras_core._legacy.losses.KLD",
|
||||
"keras_core._legacy.losses.kld",
|
||||
"keras_core._legacy.losses.kullback_leibler_divergence",
|
||||
"keras_core._legacy.metrics.KLD",
|
||||
"keras_core._legacy.metrics.kld",
|
||||
"keras_core._legacy.metrics.kullback_leibler_divergence",
|
||||
]
|
||||
)
|
||||
def kl_divergence(y_true, y_pred):
|
||||
|
@ -167,7 +167,9 @@ class Regularizer:
|
||||
raise NotImplementedError(f"{self} does not implement get_config()")
|
||||
|
||||
|
||||
@keras_core_export("keras_core.regularizers.L1L2")
|
||||
@keras_core_export(
|
||||
["keras_core.regularizers.L1L2", "keras_core.regularizers.l1_l2"]
|
||||
)
|
||||
class L1L2(Regularizer):
|
||||
"""A regularizer that applies both L1 and L2 regularization penalties.
|
||||
|
||||
|
@ -1,2 +1,2 @@
|
||||
# Unique source of truth for the version number.
|
||||
__version__ = "0.1.2"
|
||||
__version__ = "0.1.3"
|
||||
|
78
pip_build.py
78
pip_build.py
@ -59,6 +59,84 @@ def build():
|
||||
# Generate API __init__.py files in `keras_core/`
|
||||
namex.generate_api_files(package, code_directory="src", verbose=True)
|
||||
|
||||
# Make keras_core/_tf_keras/ by copying keras_core/
|
||||
tf_keras_dirpath = os.path.join(package, "_tf_keras")
|
||||
os.makedirs(tf_keras_dirpath)
|
||||
with open(os.path.join(package, "__init__.py")) as f:
|
||||
init_file = f.read()
|
||||
init_file = init_file.replace(
|
||||
"from keras_core import _legacy",
|
||||
"from keras_core import _tf_keras",
|
||||
)
|
||||
with open(os.path.join(package, "__init__.py"), "w") as f:
|
||||
f.write(init_file)
|
||||
with open(os.path.join(tf_keras_dirpath, "__init__.py"), "w") as f:
|
||||
f.write(init_file)
|
||||
for dirname in os.listdir(package):
|
||||
dirpath = os.path.join(package, dirname)
|
||||
if os.path.isdir(dirpath) and dirname not in (
|
||||
"_legacy",
|
||||
"_tf_keras",
|
||||
"src",
|
||||
):
|
||||
shutil.copytree(
|
||||
dirpath,
|
||||
os.path.join(tf_keras_dirpath, dirname),
|
||||
ignore=ignore_files,
|
||||
)
|
||||
|
||||
# Copy keras_core/_legacy/ file contents to keras_core/_tf_keras/
|
||||
legacy_submodules = [
|
||||
path[:-3]
|
||||
for path in os.listdir(os.path.join(package, "src", "legacy"))
|
||||
if path.endswith(".py")
|
||||
]
|
||||
legacy_submodules += [
|
||||
path
|
||||
for path in os.listdir(os.path.join(package, "src", "legacy"))
|
||||
if os.path.isdir(os.path.join(package, "src", "legacy", path))
|
||||
]
|
||||
|
||||
for root, _, fnames in os.walk(os.path.join(package, "_legacy")):
|
||||
for fname in fnames:
|
||||
if fname.endswith(".py"):
|
||||
legacy_fpath = os.path.join(root, fname)
|
||||
tf_keras_root = root.replace("/_legacy", "/_tf_keras")
|
||||
core_api_fpath = os.path.join(
|
||||
root.replace("/_legacy", ""), fname
|
||||
)
|
||||
if not os.path.exists(tf_keras_root):
|
||||
os.makedirs(tf_keras_root)
|
||||
tf_keras_fpath = os.path.join(tf_keras_root, fname)
|
||||
with open(legacy_fpath) as f:
|
||||
legacy_contents = f.read()
|
||||
legacy_contents = legacy_contents.replace(
|
||||
"keras_core._legacy", "keras_core._tf_keras"
|
||||
)
|
||||
if os.path.exists(core_api_fpath):
|
||||
with open(core_api_fpath) as f:
|
||||
core_api_contents = f.read()
|
||||
core_api_contents = core_api_contents.replace(
|
||||
"from keras_core import _tf_keras\n", ""
|
||||
)
|
||||
for legacy_submodule in legacy_submodules:
|
||||
core_api_contents = core_api_contents.replace(
|
||||
f"from keras_core import {legacy_submodule}\n",
|
||||
"",
|
||||
)
|
||||
core_api_contents = core_api_contents.replace(
|
||||
f"keras_core.{legacy_submodule}",
|
||||
f"keras_core._tf_keras.{legacy_submodule}",
|
||||
)
|
||||
legacy_contents = (
|
||||
core_api_contents + "\n" + legacy_contents
|
||||
)
|
||||
with open(tf_keras_fpath, "w") as f:
|
||||
f.write(legacy_contents)
|
||||
|
||||
# Delete keras_core/_legacy/
|
||||
shutil.rmtree(os.path.join(package, "_legacy"))
|
||||
|
||||
# Make sure to export the __version__ string
|
||||
from keras_core.src.version import __version__ # noqa: E402
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user