Add convlstm3D layer

2023-05-17 16:23:59 -07:00 · 2023-05-17 16:23:59 -07:00 · 7525d0e182
commit 7525d0e182
parent b781ef7e53
6 changed files with 244 additions and 7 deletions
--- a/keras_core/layers/init.py
+++ b/keras_core/layers/init.py
@ -107,6 +107,7 @@ from keras_core.layers.reshaping.zero_padding3d import ZeroPadding3D
 from keras_core.layers.rnn.bidirectional import Bidirectional
 from keras_core.layers.rnn.conv_lstm1d import ConvLSTM1D
 from keras_core.layers.rnn.conv_lstm2d import ConvLSTM2D
+from keras_core.layers.rnn.conv_lstm3d import ConvLSTM3D
 from keras_core.layers.rnn.gru import GRU
 from keras_core.layers.rnn.lstm import LSTM
 from keras_core.layers.rnn.rnn import RNN
--- a/keras_core/layers/rnn/conv_lstm.py
+++ b/keras_core/layers/rnn/conv_lstm.py
@ -438,6 +438,13 @@ class ConvLSTM(RNN):
        recurrent_constraint: Constraint function applied to
            the `recurrent_kernel` weights matrix.
        bias_constraint: Constraint function applied to the bias vector.
+        dropout: Float between 0 and 1.
+            Fraction of the units to drop for
+            the linear transformation of the inputs.
+        recurrent_dropout: Float between 0 and 1.
+            Fraction of the units to drop for
+            the linear transformation of the recurrent state.
+        seed: Random seed for dropout.
        return_sequences: Boolean. Whether to return the last output
            in the output sequence, or the full sequence. (default False)
        return_state: Boolean Whether to return the last state
@ -447,12 +454,6 @@ class ConvLSTM(RNN):
        stateful: Boolean (default False). If True, the last state
            for each sample at index i in a batch will be used as initial
            state for the sample of index i in the following batch.
-        dropout: Float between 0 and 1.
-            Fraction of the units to drop for
-            the linear transformation of the inputs.
-        recurrent_dropout: Float between 0 and 1.
-            Fraction of the units to drop for
-            the linear transformation of the recurrent state.
    """

    def __init__(
--- a/keras_core/layers/rnn/conv_lstm1d.py
+++ b/keras_core/layers/rnn/conv_lstm1d.py
@ -59,6 +59,7 @@ class ConvLSTM1D(ConvLSTM):
            linear transformation of the inputs.
        recurrent_dropout: Float between 0 and 1. Fraction of the units to drop
            for the linear transformation of the recurrent state.
+        seed: Random seed for dropout.
        return_sequences: Boolean. Whether to return the last output
            in the output sequence, or the full sequence. Default: `False`.
        return_state: Boolean. Whether to return the last state in addition
@ -143,6 +144,7 @@ class ConvLSTM1D(ConvLSTM):
        bias_constraint=None,
        dropout=0.0,
        recurrent_dropout=0.0,
+        seed=None,
        return_sequences=False,
        return_state=False,
        go_backwards=False,
@ -177,5 +179,6 @@ class ConvLSTM1D(ConvLSTM):
            stateful=stateful,
            dropout=dropout,
            recurrent_dropout=recurrent_dropout,
+            seed=seed,
            **kwargs
        )
--- a/keras_core/layers/rnn/conv_lstm2d.py
+++ b/keras_core/layers/rnn/conv_lstm2d.py
@ -59,6 +59,7 @@ class ConvLSTM2D(ConvLSTM):
            linear transformation of the inputs.
        recurrent_dropout: Float between 0 and 1. Fraction of the units to drop
            for the linear transformation of the recurrent state.
+        seed: Random seed for dropout.
        return_sequences: Boolean. Whether to return the last output
            in the output sequence, or the full sequence. Default: `False`.
        return_state: Boolean. Whether to return the last state in addition
@ -110,7 +111,7 @@ class ConvLSTM2D(ConvLSTM):
    - Else, 4D tensor with shape: `(samples, filters, new_rows, new_cols)` if
        `data_format='channels_first'`
        or shape: `(samples, new_rows, new_cols, filters)` if
-            `data_format='channels_last'`.
+        `data_format='channels_last'`.

    References:

@ -143,6 +144,7 @@ class ConvLSTM2D(ConvLSTM):
        bias_constraint=None,
        dropout=0.0,
        recurrent_dropout=0.0,
+        seed=None,
        return_sequences=False,
        return_state=False,
        go_backwards=False,
@ -177,5 +179,6 @@ class ConvLSTM2D(ConvLSTM):
            stateful=stateful,
            dropout=dropout,
            recurrent_dropout=recurrent_dropout,
+            seed=seed,
            **kwargs
        )
--- a/keras_core/layers/rnn/conv_lstm3d.py
+++ b/keras_core/layers/rnn/conv_lstm3d.py
@ -0,0 +1,183 @@
+from keras_core.api_export import keras_core_export
+from keras_core.layers.rnn.conv_lstm import ConvLSTM
+
+
+@keras_core_export("keras_core.layers.ConvLSTM3D")
+class ConvLSTM3D(ConvLSTM):
+    """3D Convolutional LSTM.
+
+    Similar to an LSTM layer, but the input transformations
+    and recurrent transformations are both convolutional.
+
+    Args:
+        filters: int, the dimension of the output space (the number of filters
+            in the convolution).
+        kernel_size: int or tuple/list of 3 integers, specifying the size of the
+            convolution window.
+        strides: int or tuple/list of 3 integers, specifying the stride length
+            of the convolution. `strides > 1` is incompatible with
+            `dilation_rate > 1`.
+        padding: string, `"valid"` or `"same"` (case-insensitive).
+            `"valid"` means no padding. `"same"` results in padding evenly to
+            the left/right or up/down of the input such that output has the same
+            height/width dimension as the input.
+        data_format: string, either `"channels_last"` or `"channels_first"`.
+            The ordering of the dimensions in the inputs. `"channels_last"`
+            corresponds to inputs with shape `(batch, steps, features)`
+            while `"channels_first"` corresponds to inputs with shape
+            `(batch, features, steps)`. It defaults to the `image_data_format`
+            value found in your Keras config file at `~/.keras/keras.json`.
+            If you never set it, then it will be `"channels_last"`.
+        dilation_rate: int or tuple/list of 3 integers, specifying the dilation
+            rate to use for dilated convolution.
+        activation: Activation function to use. By default hyperbolic tangent
+            activation function is applied (`tanh(x)`).
+        recurrent_activation: Activation function to use for the recurrent step.
+        use_bias: Boolean, whether the layer uses a bias vector.
+        kernel_initializer: Initializer for the `kernel` weights matrix,
+            used for the linear transformation of the inputs.
+        recurrent_initializer: Initializer for the `recurrent_kernel` weights
+            matrix, used for the linear transformation of the recurrent state.
+        bias_initializer: Initializer for the bias vector.
+        unit_forget_bias: Boolean. If `True`, add 1 to the bias of the forget
+            gate at initialization.
+            Use in combination with `bias_initializer="zeros"`.
+            This is recommended in [Jozefowicz et al., 2015](
+            http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf)
+        kernel_regularizer: Regularizer function applied to the `kernel` weights
+            matrix.
+        recurrent_regularizer: Regularizer function applied to the
+            `recurrent_kernel` weights matrix.
+        bias_regularizer: Regularizer function applied to the bias vector.
+        activity_regularizer: Regularizer function applied to.
+        kernel_constraint: Constraint function applied to the `kernel` weights
+            matrix.
+        recurrent_constraint: Constraint function applied to the
+            `recurrent_kernel` weights matrix.
+        bias_constraint: Constraint function applied to the bias vector.
+        dropout: Float between 0 and 1. Fraction of the units to drop for the
+            linear transformation of the inputs.
+        recurrent_dropout: Float between 0 and 1. Fraction of the units to drop
+            for the linear transformation of the recurrent state.
+        seed: Random seed for dropout.
+        return_sequences: Boolean. Whether to return the last output
+            in the output sequence, or the full sequence. Default: `False`.
+        return_state: Boolean. Whether to return the last state in addition
+            to the output. Default: `False`.
+        go_backwards: Boolean (default: `False`).
+            If `True`, process the input sequence backwards and return the
+            reversed sequence.
+        stateful: Boolean (default False). If `True`, the last state
+            for each sample at index i in a batch will be used as initial
+            state for the sample of index i in the following batch.
+        unroll: Boolean (default: `False`).
+            If `True`, the network will be unrolled,
+            else a symbolic loop will be used.
+            Unrolling can speed-up a RNN,
+            although it tends to be more memory-intensive.
+            Unrolling is only suitable for short sequences.
+
+
+    Call arguments:
+        inputs: A 6D tensor.
+        mask: Binary tensor of shape `(samples, timesteps)` indicating whether a
+            given timestep should be masked.
+        training: Python boolean indicating whether the layer should behave in
+            training mode or in inference mode.
+            This is only relevant if `dropout` or `recurrent_dropout` are set.
+        initial_state: List of initial state tensors to be passed to the first
+            call of the cell.
+
+    Input shape:
+
+    - If `data_format='channels_first'`:
+        5D tensor with shape: `(samples, time, channels, *spatial_dims)`
+    - If `data_format='channels_last'`:
+        5D tensor with shape: `(samples, time, *spatial_dims, channels)`
+
+    Output shape:
+
+    - If `return_state`: a list of tensors. The first tensor is the output.
+        The remaining tensors are the last states,
+        each 4D tensor with shape: `(samples, filters, *spatial_dims)` if
+        `data_format='channels_first'`
+        or shape: `(samples, *spatial_dims, filters)` if
+        `data_format='channels_last'`.
+    - If `return_sequences`: 5D tensor with shape: `(samples, timesteps,
+        filters, *spatial_dims)` if data_format='channels_first'
+        or shape: `(samples, timesteps, *spatial_dims, filters)` if
+        `data_format='channels_last'`.
+    - Else, 4D tensor with shape: `(samples, filters, *spatial_dims)` if
+        `data_format='channels_first'`
+        or shape: `(samples, *spatial_dims, filters)` if
+        `data_format='channels_last'`.
+
+    References:
+
+    - [Shi et al., 2015](http://arxiv.org/abs/1506.04214v1)
+        (the current implementation does not include the feedback loop on the
+        cells output).
+    """
+
+    def __init__(
+        self,
+        filters,
+        kernel_size,
+        strides=1,
+        padding="valid",
+        data_format=None,
+        dilation_rate=1,
+        activation="tanh",
+        recurrent_activation="hard_sigmoid",
+        use_bias=True,
+        kernel_initializer="glorot_uniform",
+        recurrent_initializer="orthogonal",
+        bias_initializer="zeros",
+        unit_forget_bias=True,
+        kernel_regularizer=None,
+        recurrent_regularizer=None,
+        bias_regularizer=None,
+        activity_regularizer=None,
+        kernel_constraint=None,
+        recurrent_constraint=None,
+        bias_constraint=None,
+        dropout=0.0,
+        recurrent_dropout=0.0,
+        seed=None,
+        return_sequences=False,
+        return_state=False,
+        go_backwards=False,
+        stateful=False,
+        **kwargs
+    ):
+        super().__init__(
+            rank=3,
+            filters=filters,
+            kernel_size=kernel_size,
+            strides=strides,
+            padding=padding,
+            data_format=data_format,
+            dilation_rate=dilation_rate,
+            activation=activation,
+            recurrent_activation=recurrent_activation,
+            use_bias=use_bias,
+            kernel_initializer=kernel_initializer,
+            recurrent_initializer=recurrent_initializer,
+            bias_initializer=bias_initializer,
+            unit_forget_bias=unit_forget_bias,
+            kernel_regularizer=kernel_regularizer,
+            recurrent_regularizer=recurrent_regularizer,
+            bias_regularizer=bias_regularizer,
+            activity_regularizer=activity_regularizer,
+            kernel_constraint=kernel_constraint,
+            recurrent_constraint=recurrent_constraint,
+            bias_constraint=bias_constraint,
+            return_sequences=return_sequences,
+            return_state=return_state,
+            go_backwards=go_backwards,
+            stateful=stateful,
+            dropout=dropout,
+            recurrent_dropout=recurrent_dropout,
+            seed=seed,
+            **kwargs
+        )
--- a/keras_core/layers/rnn/conv_lstm3d_test.py
+++ b/keras_core/layers/rnn/conv_lstm3d_test.py
@ -0,0 +1,46 @@
+from keras_core import layers
+from keras_core import testing
+
+
+class ConvLSTM1DTest(testing.TestCase):
+    def test_basics(self):
+        self.run_layer_test(
+            layers.ConvLSTM3D,
+            init_kwargs={"filters": 5, "kernel_size": 3, "padding": "same"},
+            input_shape=(3, 2, 4, 4, 4, 3),
+            expected_output_shape=(3, 4, 4, 4, 5),
+            expected_num_trainable_weights=3,
+            expected_num_non_trainable_weights=0,
+            supports_masking=True,
+        )
+        self.run_layer_test(
+            layers.ConvLSTM3D,
+            init_kwargs={
+                "filters": 5,
+                "kernel_size": 3,
+                "padding": "valid",
+                "recurrent_dropout": 0.5,
+            },
+            input_shape=(3, 2, 8, 8, 8, 3),
+            call_kwargs={"training": True},
+            expected_output_shape=(3, 6, 6, 6, 5),
+            expected_num_trainable_weights=3,
+            expected_num_non_trainable_weights=0,
+            supports_masking=True,
+        )
+        self.run_layer_test(
+            layers.ConvLSTM3D,
+            init_kwargs={
+                "filters": 5,
+                "kernel_size": 3,
+                "padding": "valid",
+                "return_sequences": True,
+            },
+            input_shape=(3, 2, 8, 8, 8, 3),
+            expected_output_shape=(3, 2, 6, 6, 6, 5),
+            expected_num_trainable_weights=3,
+            expected_num_non_trainable_weights=0,
+            supports_masking=True,
+        )
+
+    # TODO: correctness testing