keras/examples/conv_lstm.py

""" This script demonstrates the use of a convolutional LSTM network.
This network is used to predict the next frame of an artificially
generated movie which contains moving squares.
"""
from keras.models import Sequential
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional_recurrent import ConvLSTM2D
from keras.layers.normalization import BatchNormalization
import numpy as np
import pylab as plt

# We create a layer which take as input movies of shape
# (n_frames, width, height, channels) and returns a movie
# of identical shape.

seq = Sequential()
seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
                   input_shape=(None, 40, 40, 1),
                   border_mode='same', return_sequences=True))
seq.add(BatchNormalization())

seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
                   border_mode='same', return_sequences=True))
seq.add(BatchNormalization())

seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
                   border_mode='same', return_sequences=True))
seq.add(BatchNormalization())

seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
                   border_mode='same', return_sequences=True))
seq.add(BatchNormalization())

seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3,
                      kernel_dim3=3, activation='sigmoid',
                      border_mode='same', dim_ordering='tf'))

seq.compile(loss='binary_crossentropy', optimizer='adadelta')


# Artificial data generation:
# Generate movies with 3 to 7 moving squares inside.
# The squares are of shape 1x1 or 2x2 pixels,
# which move linearly over time.
# For convenience we first create movies with bigger width and height (80x80)
# and at the end we select a 40x40 window.

def generate_movies(n_samples=1200, n_frames=15):
    row = 80
    col = 80
    noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)
    shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),
                              dtype=np.float)

    for i in range(n_samples):
        # Add 3 to 7 moving squares
        n = np.random.randint(3, 8)

        for j in range(n):
            # Initial position
            xstart = np.random.randint(20, 60)
            ystart = np.random.randint(20, 60)
            # Direction of motion
            directionx = np.random.randint(0, 3) - 1
            directiony = np.random.randint(0, 3) - 1

            # Size of the square
            w = np.random.randint(2, 4)

            for t in range(n_frames):
                x_shift = xstart + directionx * t
                y_shift = ystart + directiony * t
                noisy_movies[i, t, x_shift - w: x_shift + w,
                             y_shift - w: y_shift + w, 0] += 1

                # Make it more robust by adding noise.
                # The idea is that if during inference,
                # the value of the pixel is not exactly one,
                # we need to train the network to be robust and still
                # consider it as a pixel belonging to a square.
                if np.random.randint(0, 2):
                    noise_f = (-1)**np.random.randint(0, 2)
                    noisy_movies[i, t,
                                 x_shift - w - 1: x_shift + w + 1,
                                 y_shift - w - 1: y_shift + w + 1,
                                 0] += noise_f * 0.1

                # Shift the ground truth by 1
                x_shift = xstart + directionx * (t + 1)
                y_shift = ystart + directiony * (t + 1)
                shifted_movies[i, t, x_shift - w: x_shift + w,
                               y_shift - w: y_shift + w, 0] += 1

    # Cut to a 40x40 window
    noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]
    shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]
    noisy_movies[noisy_movies >= 1] = 1
    shifted_movies[shifted_movies >= 1] = 1
    return noisy_movies, shifted_movies

# Train the network
noisy_movies, shifted_movies = generate_movies(n_samples=1200)
seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10,
        nb_epoch=300, validation_split=0.05)

# Testing the network on one movie
# feed it with the first 7 positions and then
# predict the new positions
which = 1004
track = noisy_movies[which][:7, ::, ::, ::]

for j in range(16):
    new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])
    new = new_pos[::, -1, ::, ::, ::]
    track = np.concatenate((track, new), axis=0)


# And then compare the predictions
# to the ground truth
track2 = noisy_movies[which][::, ::, ::, ::]
for i in range(15):
    fig = plt.figure(figsize=(10, 5))

    ax = fig.add_subplot(121)

    if i >= 7:
        ax.text(1, 3, 'Predictions !', fontsize=20, color='w')
    else:
        ax.text(1, 3, 'Inital trajectory', fontsize=20)

    toplot = track[i, ::, ::, 0]

    plt.imshow(toplot)
    ax = fig.add_subplot(122)
    plt.text(1, 3, 'Ground truth', fontsize=20)

    toplot = track2[i, ::, ::, 0]
    if i >= 2:
        toplot = shifted_movies[which][i - 1, ::, ::, 0]

    plt.imshow(toplot)
    plt.savefig('%i_animate.png' % (i + 1))
Style fixes 2016-11-05 20:45:50 +00:00			`""" This script demonstrates the use of a convolutional LSTM network.`
			`This network is used to predict the next frame of an artificially`
			`generated movie which contains moving squares.`
Fix review 2016-11-02 11:08:31 +00:00			`"""`
			`from keras.models import Sequential`
			`from keras.layers.convolutional import Convolution3D`
			`from keras.layers.convolutional_recurrent import ConvLSTM2D`
			`from keras.layers.normalization import BatchNormalization`
			`import numpy as np`
			`import pylab as plt`

			`# We create a layer which take as input movies of shape`
Style fixes 2016-11-05 20:45:50 +00:00			`# (n_frames, width, height, channels) and returns a movie`
Fix review 2016-11-02 11:08:31 +00:00			`# of identical shape.`

			`seq = Sequential()`
			`seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,`
			`input_shape=(None, 40, 40, 1),`
			`border_mode='same', return_sequences=True))`
			`seq.add(BatchNormalization())`

			`seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,`
			`border_mode='same', return_sequences=True))`
			`seq.add(BatchNormalization())`

			`seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,`
			`border_mode='same', return_sequences=True))`
			`seq.add(BatchNormalization())`

			`seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,`
			`border_mode='same', return_sequences=True))`
			`seq.add(BatchNormalization())`

			`seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3,`
			`kernel_dim3=3, activation='sigmoid',`
			`border_mode='same', dim_ordering='tf'))`

			`seq.compile(loss='binary_crossentropy', optimizer='adadelta')`


Style fixes 2016-11-05 20:45:50 +00:00			`# Artificial data generation:`
Fix review 2016-11-02 11:08:31 +00:00			`# Generate movies with 3 to 7 moving squares inside.`
Style fixes 2016-11-05 20:45:50 +00:00			`# The squares are of shape 1x1 or 2x2 pixels,`
			`# which move linearly over time.`
			`# For convenience we first create movies with bigger width and height (80x80)`
			`# and at the end we select a 40x40 window.`
Fix review 2016-11-02 11:08:31 +00:00
			`def generate_movies(n_samples=1200, n_frames=15):`
			`row = 80`
			`col = 80`
			`noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)`
			`shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),`
			`dtype=np.float)`

			`for i in range(n_samples):`
Style fixes 2016-11-05 20:45:50 +00:00			`# Add 3 to 7 moving squares`
Fix review 2016-11-02 11:08:31 +00:00			`n = np.random.randint(3, 8)`

			`for j in range(n):`
			`# Initial position`
			`xstart = np.random.randint(20, 60)`
			`ystart = np.random.randint(20, 60)`
			`# Direction of motion`
			`directionx = np.random.randint(0, 3) - 1`
			`directiony = np.random.randint(0, 3) - 1`

			`# Size of the square`
			`w = np.random.randint(2, 4)`

			`for t in range(n_frames):`
			`x_shift = xstart + directionx * t`
			`y_shift = ystart + directiony * t`
			`noisy_movies[i, t, x_shift - w: x_shift + w,`
			`y_shift - w: y_shift + w, 0] += 1`

			`# Make it more robust by adding noise.`
Style fixes 2016-11-05 20:45:50 +00:00			`# The idea is that if during inference,`
Fix review 2016-11-02 11:08:31 +00:00			`# the value of the pixel is not exactly one,`
Style fixes 2016-11-05 20:45:50 +00:00			`# we need to train the network to be robust and still`
			`# consider it as a pixel belonging to a square.`
Fix review 2016-11-02 11:08:31 +00:00			`if np.random.randint(0, 2):`
			`noise_f = (-1)**np.random.randint(0, 2)`
			`noisy_movies[i, t,`
			`x_shift - w - 1: x_shift + w + 1,`
			`y_shift - w - 1: y_shift + w + 1,`
			`0] += noise_f * 0.1`

Style fixes 2016-11-05 20:45:50 +00:00			`# Shift the ground truth by 1`
Fix review 2016-11-02 11:08:31 +00:00			`x_shift = xstart + directionx * (t + 1)`
			`y_shift = ystart + directiony * (t + 1)`
			`shifted_movies[i, t, x_shift - w: x_shift + w,`
			`y_shift - w: y_shift + w, 0] += 1`

Style fixes 2016-11-05 20:45:50 +00:00			`# Cut to a 40x40 window`
Fix review 2016-11-02 11:08:31 +00:00			`noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]`
			`shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]`
			`noisy_movies[noisy_movies >= 1] = 1`
			`shifted_movies[shifted_movies >= 1] = 1`
			`return noisy_movies, shifted_movies`

			`# Train the network`
			`noisy_movies, shifted_movies = generate_movies(n_samples=1200)`
			`seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10,`
			`nb_epoch=300, validation_split=0.05)`

			`# Testing the network on one movie`
			`# feed it with the first 7 positions and then`
			`# predict the new positions`
			`which = 1004`
			`track = noisy_movies[which][:7, ::, ::, ::]`

			`for j in range(16):`
			`new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])`
			`new = new_pos[::, -1, ::, ::, ::]`
			`track = np.concatenate((track, new), axis=0)`


			`# And then compare the predictions`
			`# to the ground truth`
			`track2 = noisy_movies[which][::, ::, ::, ::]`
			`for i in range(15):`
			`fig = plt.figure(figsize=(10, 5))`

			`ax = fig.add_subplot(121)`

			`if i >= 7:`
			`ax.text(1, 3, 'Predictions !', fontsize=20, color='w')`
			`else:`
			`ax.text(1, 3, 'Inital trajectory', fontsize=20)`

			`toplot = track[i, ::, ::, 0]`

			`plt.imshow(toplot)`
			`ax = fig.add_subplot(122)`
			`plt.text(1, 3, 'Ground truth', fontsize=20)`

			`toplot = track2[i, ::, ::, 0]`
			`if i >= 2:`
			`toplot = shifted_movies[which][i - 1, ::, ::, 0]`

			`plt.imshow(toplot)`
			`plt.savefig('%i_animate.png' % (i + 1))`