diff --git a/examples/conv_lstm.py b/examples/conv_lstm.py new file mode 100644 index 000000000..d3d245e8e --- /dev/null +++ b/examples/conv_lstm.py @@ -0,0 +1,143 @@ +""" This script demonstrate the use of convolutional LSTM network +This network is used to predict the next frame of an artificialy +generated movie which contain moving squares. +""" +from keras.models import Sequential +from keras.layers.convolutional import Convolution3D +from keras.layers.convolutional_recurrent import ConvLSTM2D +from keras.layers.normalization import BatchNormalization +import numpy as np +import pylab as plt + +# We create a layer which take as input movies of shape +# (n_frames, width, height, channel) and that returns a movie +# of identical shape. + +seq = Sequential() +seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, + input_shape=(None, 40, 40, 1), + border_mode='same', return_sequences=True)) +seq.add(BatchNormalization()) + +seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, + border_mode='same', return_sequences=True)) +seq.add(BatchNormalization()) + +seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, + border_mode='same', return_sequences=True)) +seq.add(BatchNormalization()) + +seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, + border_mode='same', return_sequences=True)) +seq.add(BatchNormalization()) + +seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3, + kernel_dim3=3, activation='sigmoid', + border_mode='same', dim_ordering='tf')) + +seq.compile(loss='binary_crossentropy', optimizer='adadelta') + + +# Generating artificial data: +# Generate movies with 3 to 7 moving squares inside. +# The squares are of shape one by one or two by two pixels and +# they move linearly trought time. +# For convenience we first create movies with bigger width and height, (80x80) +# and at the end we select a 40x40 window + +def generate_movies(n_samples=1200, n_frames=15): + row = 80 + col = 80 + noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float) + shifted_movies = np.zeros((n_samples, n_frames, row, col, 1), + dtype=np.float) + + for i in range(n_samples): + + # add from 3 to 7 moving squares + n = np.random.randint(3, 8) + + for j in range(n): + # Initial position + xstart = np.random.randint(20, 60) + ystart = np.random.randint(20, 60) + # Direction of motion + directionx = np.random.randint(0, 3) - 1 + directiony = np.random.randint(0, 3) - 1 + + # Size of the square + w = np.random.randint(2, 4) + + for t in range(n_frames): + x_shift = xstart + directionx * t + y_shift = ystart + directiony * t + noisy_movies[i, t, x_shift - w: x_shift + w, + y_shift - w: y_shift + w, 0] += 1 + + # Make it more robust by adding noise. + # The idea is that if during predict time, + # the value of the pixel is not exactly one, + # we need to train the network to be robust and stille + # consider it is a pixel belonging to a square. + if np.random.randint(0, 2): + noise_f = (-1)**np.random.randint(0, 2) + noisy_movies[i, t, + x_shift - w - 1: x_shift + w + 1, + y_shift - w - 1: y_shift + w + 1, + 0] += noise_f * 0.1 + + # Shitf the ground truth by 1 + x_shift = xstart + directionx * (t + 1) + y_shift = ystart + directiony * (t + 1) + shifted_movies[i, t, x_shift - w: x_shift + w, + y_shift - w: y_shift + w, 0] += 1 + + # Cut to a forty's sized window + noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::] + shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::] + noisy_movies[noisy_movies >= 1] = 1 + shifted_movies[shifted_movies >= 1] = 1 + return noisy_movies, shifted_movies + +# Train the network +noisy_movies, shifted_movies = generate_movies(n_samples=1200) +seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10, + nb_epoch=300, validation_split=0.05) + +# Testing the network on one movie +# feed it with the first 7 positions and then +# predict the new positions +which = 1004 +track = noisy_movies[which][:7, ::, ::, ::] + +for j in range(16): + new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::]) + new = new_pos[::, -1, ::, ::, ::] + track = np.concatenate((track, new), axis=0) + + +# And then compare the predictions +# to the ground truth +track2 = noisy_movies[which][::, ::, ::, ::] +for i in range(15): + fig = plt.figure(figsize=(10, 5)) + + ax = fig.add_subplot(121) + + if i >= 7: + ax.text(1, 3, 'Predictions !', fontsize=20, color='w') + else: + ax.text(1, 3, 'Inital trajectory', fontsize=20) + + toplot = track[i, ::, ::, 0] + + plt.imshow(toplot) + ax = fig.add_subplot(122) + plt.text(1, 3, 'Ground truth', fontsize=20) + + toplot = track2[i, ::, ::, 0] + if i >= 2: + toplot = shifted_movies[which][i - 1, ::, ::, 0] + + plt.imshow(toplot) + plt.savefig('%i_animate.png' % (i + 1)) diff --git a/examples/lstm_conv.py b/examples/lstm_conv.py deleted file mode 100644 index 694217d2e..000000000 --- a/examples/lstm_conv.py +++ /dev/null @@ -1,136 +0,0 @@ -from keras.models import Sequential -from keras.layers.convolutional import Convolution3D -from keras.layers.recurrent_convolutional import ConvLSTM2D -from keras.layers.normalization import BatchNormalization -import numpy as np -from pylab import * - -# We create a layer whose take movies as input -# of shape (time, width, height, channel) and that return a movie -# with identical shape. - -seq = Sequential() -seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, - input_shape=(None, 40, 40, 1), - border_mode="same", return_sequences=True)) -seq.add(BatchNormalization()) - -seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, - border_mode="same", return_sequences=True)) -seq.add(BatchNormalization()) - -seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, - border_mode="same", return_sequences=True)) -seq.add(BatchNormalization()) - -seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3, - border_mode="same", return_sequences=True)) -seq.add(BatchNormalization()) - -seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3, - kernel_dim3=3, activation='sigmoid', - border_mode="same", dim_ordering="tf")) - -seq.compile(loss="binary_crossentropy", optimizer="adadelta") - - -# Generating artificial data: -# We are going to create a movie with -# square of size one or two by two pixels moving linearly -# trought time. For convenience we first create -# a movie with bigger width and height, and at the end -# we cut it to 40x40 - -time = 15 -row = 80 -col = 80 -filters = 1 -training = 1200 -train = np.zeros((training, time, row, col, 1), dtype=np.float) -gt = np.zeros((training, time, row, col, 1), dtype=np.float) - -for i in range(training): - - # add from 3 to 7 moving squares - n = np.random.randint(3, 8) - - for j in range(n): - # Initial position - xstart = np.random.randint(20, 60) - ystart = np.random.randint(20, 60) - # Direction of motion - directionx = np.random.randint(0, 3) - 1 - directiony = np.random.randint(0, 3) - 1 - - # Size of the square - w = np.random.randint(2, 4) - - for t in range(time): - x_shift = xstart + directionx * t - y_shift = ystart + directiony * t - train[i, t, x_shift - w: x_shift + w, - y_shift - w: y_shift + w, 0] += 1 - - # Make it more robust by adding noise. - # The idea is that if during predict time, - # the value of the pixel is not exactly one, - # we need to train the network to be robust and stille - # consider it is a pixel belonging to a square. - if np.random.randint(0, 2): - noise_f = (-1)**np.random.randint(0, 2) - train[i, t, x_shift - w - 1: x_shift + w + 1, - y_shift - w - 1: y_shift + w + 1, 0] += noise_f * 0.1 - - # Shitf the ground truth by 1 - x_shift = xstart + directionx * (t + 1) - y_shift = ystart + directiony * (t + 1) - gt[i, t, x_shift - w: x_shift + w, - y_shift - w: y_shift + w, 0] += 1 - -# Cut to a forty's sized window -train = train[::, ::, 20:60, 20:60, ::] -gt = gt[::, ::, 20:60, 20:60, ::] -train[train >= 1] = 1 -gt[gt >= 1] = 1 - -# Train the network -seq.fit(train[:1000], gt[:1000], batch_size=10, - nb_epoch=300, validation_split=0.05) - -# Testing the network on one movie -# feed it with the first 7 positions and then -# predict the new positions -which = 1004 -track = train[which][:7, ::, ::, ::] - -for j in range(16): - new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::]) - new = new_pos[::, -1, ::, ::, ::] - track = np.concatenate((track, new), axis=0) - - -# And then compare the predictions -# to the ground truth -track2 = train[which][::, ::, ::, ::] -for i in range(15): - fig = figure(figsize=(10, 5)) - - ax = fig.add_subplot(121) - - if i >= 7: - ax.text(1, 3, "Predictions !", fontsize=20, color="w") - else: - ax.text(1, 3, "Inital trajectory", fontsize=20) - - toplot = track[i, ::, ::, 0] - - imshow(toplot) - ax = fig.add_subplot(122) - text(1, 3, "Ground truth", fontsize=20) - - toplot = track2[i, ::, ::, 0] - if i >= 2: - toplot = gt[which][i - 1, ::, ::, 0] - - imshow(toplot) - savefig("%i_animate.png" % (i + 1)) diff --git a/keras/layers/__init__.py b/keras/layers/__init__.py index 8e2dd2877..5337e9fba 100644 --- a/keras/layers/__init__.py +++ b/keras/layers/__init__.py @@ -10,4 +10,4 @@ from .embeddings import * from .noise import * from .advanced_activations import * from .wrappers import * -from .recurrent_convolutional import * +from .convolutional_recurrent import * diff --git a/keras/layers/recurrent_convolutional.py b/keras/layers/convolutional_recurrent.py similarity index 97% rename from keras/layers/recurrent_convolutional.py rename to keras/layers/convolutional_recurrent.py index 167ac1016..d9cd40d44 100644 --- a/keras/layers/recurrent_convolutional.py +++ b/keras/layers/convolutional_recurrent.py @@ -8,20 +8,20 @@ import warnings class ConvRecurrent2D(Layer): - '''Abstract base class for recurrent layers. + '''Abstract base class for convolutionnal recurrent layers. Do not use in a model -- it's not a functional layer! - All recurrent layers (GRU, LSTM, SimpleRNN) also + ConvLSTM2D follow the specifications of this class and accept the keyword arguments listed below. # Input shape - 5D tensor with shape `(nb_samples, timesteps, channels,rows,cols)`. + 5D tensor with shape `(nb_samples, timesteps, channels, rows, cols)`. # Output shape - if `return_sequences`: 5D tensor with shape - `(nb_samples, timesteps, channels,rows,cols)`. - - else, 2D tensor with shape `(nb_samples, channels,rows,cols)`. + `(nb_samples, timesteps, channels, rows, cols)`. + - else, 4D tensor with shape `(nb_samples, channels, rows, cols)`. # Arguments weights: list of numpy arrays to set as initial weights. @@ -200,7 +200,8 @@ class ConvRecurrent2D(Layer): class ConvLSTM2D(ConvRecurrent2D): - ''' + '''Convolutional LSTM. + # Input shape - if dim_ordering='th' 5D tensor with shape: @@ -213,17 +214,17 @@ class ConvLSTM2D(ConvRecurrent2D): - if `return_sequences` - if dim_ordering='th' 5D tensor with shape: - `(samples, time, nb_filter, o_row, o_col)` + `(samples, time, nb_filter, output_row, output_col)` - if dim_ordering='tf' 5D tensor with shape: - `(samples, time, o_row, o_col, nb_filter)` + `(samples, time, output_row, output_col, nb_filter)` - else - if dim_ordering ='th' 4D tensor with shape: - `(samples, nb_filter, o_row, o_col)` + `(samples, nb_filter, output_row, output_col)` - if dim_ordering='tf' 4D tensor with shape: - `(samples, o_row, o_col, nb_filter)` + `(samples, output_row, output_col, nb_filter)` where o_row and o_col depend on the shape of the filter and the border_mode diff --git a/tests/keras/layers/test_recurrent_convolutional.py b/tests/keras/layers/test_convolutional_recurrent.py similarity index 92% rename from tests/keras/layers/test_recurrent_convolutional.py rename to tests/keras/layers/test_convolutional_recurrent.py index 18000caa3..cb98d84e6 100644 --- a/tests/keras/layers/test_recurrent_convolutional.py +++ b/tests/keras/layers/test_convolutional_recurrent.py @@ -4,7 +4,7 @@ from numpy.testing import assert_allclose from keras import backend as K from keras.models import Sequential -from keras.layers import recurrent_convolutional +from keras.layers import convolutional_recurrent from keras.utils.test_utils import layer_test from keras import regularizers @@ -15,10 +15,10 @@ def test_recurrent_convolutional(): nb_col = 4 nb_filter = 20 nb_samples = 5 - input_channel = 3 - input_nb_row = 30 - input_nb_col = 30 - sequence_len = 10 + input_channel = 2 + input_nb_row = 10 + input_nb_col = 10 + sequence_len = 2 for dim_ordering in ['th', 'tf']: if dim_ordering == 'th': @@ -32,7 +32,7 @@ def test_recurrent_convolutional(): for return_sequences in [True, False]: # test for ouptput shape: - output = layer_test(recurrent_convolutional.ConvLSTM2D, + output = layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter, @@ -67,7 +67,7 @@ def test_recurrent_convolutional(): 'stateful': True, 'batch_input_shape': input.shape, 'border_mode': "same"} - layer = recurrent_convolutional.ConvLSTM2D(**kwargs) + layer = convolutional_recurrent.ConvLSTM2D(**kwargs) model.add(layer) model.compile(optimizer='sgd', loss='mse') @@ -110,13 +110,13 @@ def test_recurrent_convolutional(): 'b_regularizer': 'l2', 'border_mode': "same"} - layer = recurrent_convolutional.ConvLSTM2D(**kwargs) + layer = convolutional_recurrent.ConvLSTM2D(**kwargs) layer.set_input(K.variable(np.ones(input.shape)), shape=input.shape) K.eval(layer.output) # check dropout - layer_test(recurrent_convolutional.ConvLSTM2D, + layer_test(convolutional_recurrent.ConvLSTM2D, kwargs={'dim_ordering': dim_ordering, 'return_sequences': return_sequences, 'nb_filter': nb_filter,