keras/keras_core/layers/core/embedding.py

from keras_core import constraints
from keras_core import initializers
from keras_core import operations as ops
from keras_core import regularizers
from keras_core.api_export import keras_core_export
from keras_core.layers.layer import Layer


@keras_core_export("keras_core.layers.Embedding")
class Embedding(Layer):
    """Turns positive integers (indexes) into dense vectors of fixed size.

    e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]`

    This layer can only be used on positive integer inputs of a fixed range.

    Example:

    >>> model = keras_core.Sequential()
    >>> model.add(keras_core.layers.Embedding(1000, 64, input_length=10))
    >>> # The model will take as input an integer matrix of size (batch,
    >>> # input_length), and the largest integer (i.e. word index) in the input
    >>> # should be no larger than 999 (vocabulary size).
    >>> # Now model.output_shape is (None, 10, 64), where `None` is the batch
    >>> # dimension.
    >>> input_array = np.random.randint(1000, size=(32, 10))
    >>> model.compile('rmsprop', 'mse')
    >>> output_array = model.predict(input_array)
    >>> print(output_array.shape)
    (32, 10, 64)

    Args:
        input_dim: Integer. Size of the vocabulary,
            i.e. maximum integer index + 1.
        output_dim: Integer. Dimension of the dense embedding.
        embeddings_initializer: Initializer for the `embeddings`
            matrix (see `keras.initializers`).
        embeddings_regularizer: Regularizer function applied to
            the `embeddings` matrix (see `keras.regularizers`).
        embeddings_constraint: Constraint function applied to
            the `embeddings` matrix (see `keras.constraints`).
        mask_zero: Boolean, whether or not the input value 0 is a special
            "padding" value that should be masked out.
            This is useful when using recurrent layers which
            may take variable length input. If this is `True`,
            then all subsequent layers in the model need
            to support masking or an exception will be raised.
            If mask_zero is set to True, as a consequence,
            index 0 cannot be used in the vocabulary (input_dim should
            equal size of vocabulary + 1).

    Input shape:
        2D tensor with shape: `(batch_size, input_length)`.

    Output shape:
        3D tensor with shape: `(batch_size, input_length, output_dim)`.
    """

    def __init__(
        self,
        input_dim,
        output_dim,
        embeddings_initializer="uniform",
        embeddings_regularizer=None,
        embeddings_constraint=None,
        mask_zero=False,
        **kwargs,
    ):
        super().__init__(**kwargs)
        self.input_dim = input_dim
        self.output_dim = output_dim
        self.embeddings_initializer = initializers.get(embeddings_initializer)
        self.embeddings_regularizer = regularizers.get(embeddings_regularizer)
        self.embeddings_constraint = constraints.get(embeddings_constraint)
        self.mask_zero = mask_zero
        self.supports_masking = mask_zero

    def build(self, input_shape=None):
        self.embeddings = self.add_weight(
            shape=(self.input_dim, self.output_dim),
            initializer=self.embeddings_initializer,
            name="embeddings",
            regularizer=self.embeddings_regularizer,
            constraint=self.embeddings_constraint,
            trainable=True,
            # autocast=False,  # TODO
        )
        self.built = True

    def call(self, inputs):
        if inputs.dtype != "int32" and inputs.dtype != "int64":
            inputs = ops.cast(inputs, "int32")
        one_hot_data = ops.one_hot(inputs, num_classes=self.input_dim)
        return ops.matmul(one_hot_data, self.embeddings)

    def compute_mask(self, inputs, mask=None):
        if not self.mask_zero:
            return None
        return ops.not_equal(inputs, 0)

    def compute_output_shape(self, input_shape):
        return input_shape + (self.output_dim,)

    def get_config(self):
        base_config = super().get_config()
        config = {
            "input_dim": self.input_dim,
            "output_dim": self.output_dim,
            "embeddings_initializer": initializers.serialize(
                self.embeddings_initializer
            ),
            "embeddings_regularizer": regularizers.serialize(
                self.embeddings_regularizer
            ),
            "activity_regularizer": regularizers.serialize(
                self.activity_regularizer
            ),
            "embeddings_constraint": constraints.serialize(
                self.embeddings_constraint
            ),
            "mask_zero": self.mask_zero,
        }
        return {**base_config, **config}
Add Embedding layer 2023-04-27 03:22:03 +00:00			`from keras_core import constraints`
			`from keras_core import initializers`
			`from keras_core import operations as ops`
			`from keras_core import regularizers`
			`from keras_core.api_export import keras_core_export`
			`from keras_core.layers.layer import Layer`


			`@keras_core_export("keras_core.layers.Embedding")`
			`class Embedding(Layer):`
			`"""Turns positive integers (indexes) into dense vectors of fixed size.`

			e.g. `[[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]`

			`This layer can only be used on positive integer inputs of a fixed range.`

			`Example:`

			`>>> model = keras_core.Sequential()`
			`>>> model.add(keras_core.layers.Embedding(1000, 64, input_length=10))`
			`>>> # The model will take as input an integer matrix of size (batch,`
			`>>> # input_length), and the largest integer (i.e. word index) in the input`
			`>>> # should be no larger than 999 (vocabulary size).`
			>>> # Now model.output_shape is (None, 10, 64), where `None` is the batch
			`>>> # dimension.`
			`>>> input_array = np.random.randint(1000, size=(32, 10))`
			`>>> model.compile('rmsprop', 'mse')`
			`>>> output_array = model.predict(input_array)`
			`>>> print(output_array.shape)`
			`(32, 10, 64)`

			`Args:`
			`input_dim: Integer. Size of the vocabulary,`
			`i.e. maximum integer index + 1.`
			`output_dim: Integer. Dimension of the dense embedding.`
			embeddings_initializer: Initializer for the `embeddings`
			matrix (see `keras.initializers`).
			`embeddings_regularizer: Regularizer function applied to`
			the `embeddings` matrix (see `keras.regularizers`).
			`embeddings_constraint: Constraint function applied to`
			the `embeddings` matrix (see `keras.constraints`).
			`mask_zero: Boolean, whether or not the input value 0 is a special`
Shorten lines 2023-04-27 03:54:38 +00:00			`"padding" value that should be masked out.`
			`This is useful when using recurrent layers which`
			may take variable length input. If this is `True`,
			`then all subsequent layers in the model need`
			`to support masking or an exception will be raised.`
			`If mask_zero is set to True, as a consequence,`
			`index 0 cannot be used in the vocabulary (input_dim should`
Add Embedding layer 2023-04-27 03:22:03 +00:00			`equal size of vocabulary + 1).`

			`Input shape:`
			2D tensor with shape: `(batch_size, input_length)`.

			`Output shape:`
			3D tensor with shape: `(batch_size, input_length, output_dim)`.
			`"""`

			`def __init__(`
			`self,`
			`input_dim,`
			`output_dim,`
			`embeddings_initializer="uniform",`
			`embeddings_regularizer=None,`
			`embeddings_constraint=None,`
			`mask_zero=False,`
			`**kwargs,`
			`):`
			`super().__init__(**kwargs)`
			`self.input_dim = input_dim`
			`self.output_dim = output_dim`
			`self.embeddings_initializer = initializers.get(embeddings_initializer)`
			`self.embeddings_regularizer = regularizers.get(embeddings_regularizer)`
			`self.embeddings_constraint = constraints.get(embeddings_constraint)`
			`self.mask_zero = mask_zero`
			`self.supports_masking = mask_zero`

			`def build(self, input_shape=None):`
			`self.embeddings = self.add_weight(`
			`shape=(self.input_dim, self.output_dim),`
			`initializer=self.embeddings_initializer,`
			`name="embeddings",`
			`regularizer=self.embeddings_regularizer,`
			`constraint=self.embeddings_constraint,`
			`trainable=True,`
			`# autocast=False, # TODO`
			`)`
			`self.built = True`

			`def call(self, inputs):`
			`if inputs.dtype != "int32" and inputs.dtype != "int64":`
			`inputs = ops.cast(inputs, "int32")`
Add nn.one_hot (#52) 2023-04-28 00:52:47 +00:00			`one_hot_data = ops.one_hot(inputs, num_classes=self.input_dim)`
Add Embedding layer 2023-04-27 03:22:03 +00:00			`return ops.matmul(one_hot_data, self.embeddings)`

			`def compute_mask(self, inputs, mask=None):`
			`if not self.mask_zero:`
			`return None`
			`return ops.not_equal(inputs, 0)`

			`def compute_output_shape(self, input_shape):`
			`return input_shape + (self.output_dim,)`

			`def get_config(self):`
			`base_config = super().get_config()`
			`config = {`
			`"input_dim": self.input_dim,`
			`"output_dim": self.output_dim,`
			`"embeddings_initializer": initializers.serialize(`
			`self.embeddings_initializer`
			`),`
			`"embeddings_regularizer": regularizers.serialize(`
			`self.embeddings_regularizer`
			`),`
			`"activity_regularizer": regularizers.serialize(`
			`self.activity_regularizer`
			`),`
			`"embeddings_constraint": constraints.serialize(`
			`self.embeddings_constraint`
			`),`
			`"mask_zero": self.mask_zero,`
			`}`
			`return {base_config, config}`