diff --git a/examples/conv_lstm.py b/examples/conv_lstm.py
new file mode 100644
index 000000000..d3d245e8e
--- /dev/null
+++ b/examples/conv_lstm.py
@@ -0,0 +1,143 @@
+""" This script demonstrate the use of convolutional LSTM network
+This network is used to predict the next frame of an artificialy
+generated movie which contain moving squares.
+"""
+from keras.models import Sequential
+from keras.layers.convolutional import Convolution3D
+from keras.layers.convolutional_recurrent import ConvLSTM2D
+from keras.layers.normalization import BatchNormalization
+import numpy as np
+import pylab as plt
+
+# We create a layer which take as input movies of shape
+# (n_frames, width, height, channel) and that returns a movie
+# of identical shape.
+
+seq = Sequential()
+seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
+                   input_shape=(None, 40, 40, 1),
+                   border_mode='same', return_sequences=True))
+seq.add(BatchNormalization())
+
+seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
+                   border_mode='same', return_sequences=True))
+seq.add(BatchNormalization())
+
+seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
+                   border_mode='same', return_sequences=True))
+seq.add(BatchNormalization())
+
+seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
+                   border_mode='same', return_sequences=True))
+seq.add(BatchNormalization())
+
+seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3,
+                      kernel_dim3=3, activation='sigmoid',
+                      border_mode='same', dim_ordering='tf'))
+
+seq.compile(loss='binary_crossentropy', optimizer='adadelta')
+
+
+# Generating artificial data:
+# Generate movies with 3 to 7 moving squares inside.
+# The squares are of shape one by one or two by two pixels and
+# they move linearly trought time.
+# For convenience we first create movies with bigger width and height, (80x80)
+# and at the end we select a 40x40 window
+
+def generate_movies(n_samples=1200, n_frames=15):
+    row = 80
+    col = 80
+    noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float)
+    shifted_movies = np.zeros((n_samples, n_frames, row, col, 1),
+                              dtype=np.float)
+
+    for i in range(n_samples):
+
+        # add from 3 to 7 moving squares
+        n = np.random.randint(3, 8)
+
+        for j in range(n):
+            # Initial position
+            xstart = np.random.randint(20, 60)
+            ystart = np.random.randint(20, 60)
+            # Direction of motion
+            directionx = np.random.randint(0, 3) - 1
+            directiony = np.random.randint(0, 3) - 1
+
+            # Size of the square
+            w = np.random.randint(2, 4)
+
+            for t in range(n_frames):
+                x_shift = xstart + directionx * t
+                y_shift = ystart + directiony * t
+                noisy_movies[i, t, x_shift - w: x_shift + w,
+                             y_shift - w: y_shift + w, 0] += 1
+
+                # Make it more robust by adding noise.
+                # The idea is that if during predict time,
+                # the value of the pixel is not exactly one,
+                # we need to train the network to be robust and stille
+                # consider it is a pixel belonging to a square.
+                if np.random.randint(0, 2):
+                    noise_f = (-1)**np.random.randint(0, 2)
+                    noisy_movies[i, t,
+                                 x_shift - w - 1: x_shift + w + 1,
+                                 y_shift - w - 1: y_shift + w + 1,
+                                 0] += noise_f * 0.1
+
+                # Shitf the ground truth by 1
+                x_shift = xstart + directionx * (t + 1)
+                y_shift = ystart + directiony * (t + 1)
+                shifted_movies[i, t, x_shift - w: x_shift + w,
+                               y_shift - w: y_shift + w, 0] += 1
+
+    # Cut to a forty's sized window
+    noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::]
+    shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::]
+    noisy_movies[noisy_movies >= 1] = 1
+    shifted_movies[shifted_movies >= 1] = 1
+    return noisy_movies, shifted_movies
+
+# Train the network
+noisy_movies, shifted_movies = generate_movies(n_samples=1200)
+seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10,
+        nb_epoch=300, validation_split=0.05)
+
+# Testing the network on one movie
+# feed it with the first 7 positions and then
+# predict the new positions
+which = 1004
+track = noisy_movies[which][:7, ::, ::, ::]
+
+for j in range(16):
+    new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])
+    new = new_pos[::, -1, ::, ::, ::]
+    track = np.concatenate((track, new), axis=0)
+
+
+# And then compare the predictions
+# to the ground truth
+track2 = noisy_movies[which][::, ::, ::, ::]
+for i in range(15):
+    fig = plt.figure(figsize=(10, 5))
+
+    ax = fig.add_subplot(121)
+
+    if i >= 7:
+        ax.text(1, 3, 'Predictions !', fontsize=20, color='w')
+    else:
+        ax.text(1, 3, 'Inital trajectory', fontsize=20)
+
+    toplot = track[i, ::, ::, 0]
+
+    plt.imshow(toplot)
+    ax = fig.add_subplot(122)
+    plt.text(1, 3, 'Ground truth', fontsize=20)
+
+    toplot = track2[i, ::, ::, 0]
+    if i >= 2:
+        toplot = shifted_movies[which][i - 1, ::, ::, 0]
+
+    plt.imshow(toplot)
+    plt.savefig('%i_animate.png' % (i + 1))
diff --git a/examples/lstm_conv.py b/examples/lstm_conv.py
deleted file mode 100644
index 694217d2e..000000000
--- a/examples/lstm_conv.py
+++ /dev/null
@@ -1,136 +0,0 @@
-from keras.models import Sequential
-from keras.layers.convolutional import Convolution3D
-from keras.layers.recurrent_convolutional import ConvLSTM2D
-from keras.layers.normalization import BatchNormalization
-import numpy as np
-from pylab import *
-
-# We create a layer whose take movies as input
-# of shape (time, width, height, channel) and that return a movie
-# with identical shape.
-
-seq = Sequential()
-seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
-                   input_shape=(None, 40, 40, 1),
-                   border_mode="same", return_sequences=True))
-seq.add(BatchNormalization())
-
-seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
-                   border_mode="same", return_sequences=True))
-seq.add(BatchNormalization())
-
-seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
-                   border_mode="same", return_sequences=True))
-seq.add(BatchNormalization())
-
-seq.add(ConvLSTM2D(nb_filter=40, nb_row=3, nb_col=3,
-                   border_mode="same", return_sequences=True))
-seq.add(BatchNormalization())
-
-seq.add(Convolution3D(nb_filter=1, kernel_dim1=1, kernel_dim2=3,
-                      kernel_dim3=3, activation='sigmoid',
-                      border_mode="same", dim_ordering="tf"))
-
-seq.compile(loss="binary_crossentropy", optimizer="adadelta")
-
-
-# Generating artificial data:
-# We are going to create a movie with
-# square of size one or two by two pixels moving linearly
-# trought time. For convenience we first create
-# a movie with bigger width and height, and at the end
-# we cut it to 40x40
-
-time = 15
-row = 80
-col = 80
-filters = 1
-training = 1200
-train = np.zeros((training, time, row, col, 1), dtype=np.float)
-gt = np.zeros((training, time, row, col, 1), dtype=np.float)
-
-for i in range(training):
-
-    # add from 3 to 7 moving squares
-    n = np.random.randint(3, 8)
-
-    for j in range(n):
-        # Initial position
-        xstart = np.random.randint(20, 60)
-        ystart = np.random.randint(20, 60)
-        # Direction of motion
-        directionx = np.random.randint(0, 3) - 1
-        directiony = np.random.randint(0, 3) - 1
-
-        # Size of the square
-        w = np.random.randint(2, 4)
-
-        for t in range(time):
-            x_shift = xstart + directionx * t
-            y_shift = ystart + directiony * t
-            train[i, t, x_shift - w: x_shift + w,
-                  y_shift - w: y_shift + w, 0] += 1
-
-            # Make it more robust by adding noise.
-            # The idea is that if during predict time,
-            # the value of the pixel is not exactly one,
-            # we need to train the network to be robust and stille
-            # consider it is a pixel belonging to a square.
-            if np.random.randint(0, 2):
-                noise_f = (-1)**np.random.randint(0, 2)
-                train[i, t, x_shift - w - 1: x_shift + w + 1,
-                      y_shift - w - 1: y_shift + w + 1, 0] += noise_f * 0.1
-
-            # Shitf the ground truth by 1
-            x_shift = xstart + directionx * (t + 1)
-            y_shift = ystart + directiony * (t + 1)
-            gt[i, t, x_shift - w: x_shift + w,
-               y_shift - w: y_shift + w, 0] += 1
-
-# Cut to a forty's sized window
-train = train[::, ::, 20:60, 20:60, ::]
-gt = gt[::, ::, 20:60, 20:60, ::]
-train[train >= 1] = 1
-gt[gt >= 1] = 1
-
-# Train the network
-seq.fit(train[:1000], gt[:1000], batch_size=10,
-        nb_epoch=300, validation_split=0.05)
-
-# Testing the network on one movie
-# feed it with the first 7 positions and then
-# predict the new positions
-which = 1004
-track = train[which][:7, ::, ::, ::]
-
-for j in range(16):
-    new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::])
-    new = new_pos[::, -1, ::, ::, ::]
-    track = np.concatenate((track, new), axis=0)
-
-
-# And then compare the predictions
-# to the ground truth
-track2 = train[which][::, ::, ::, ::]
-for i in range(15):
-    fig = figure(figsize=(10, 5))
-
-    ax = fig.add_subplot(121)
-
-    if i >= 7:
-        ax.text(1, 3, "Predictions !", fontsize=20, color="w")
-    else:
-        ax.text(1, 3, "Inital trajectory", fontsize=20)
-
-    toplot = track[i, ::, ::, 0]
-
-    imshow(toplot)
-    ax = fig.add_subplot(122)
-    text(1, 3, "Ground truth", fontsize=20)
-
-    toplot = track2[i, ::, ::, 0]
-    if i >= 2:
-        toplot = gt[which][i - 1, ::, ::, 0]
-
-    imshow(toplot)
-    savefig("%i_animate.png" % (i + 1))
diff --git a/keras/layers/__init__.py b/keras/layers/__init__.py
index 8e2dd2877..5337e9fba 100644
--- a/keras/layers/__init__.py
+++ b/keras/layers/__init__.py
@@ -10,4 +10,4 @@ from .embeddings import *
 from .noise import *
 from .advanced_activations import *
 from .wrappers import *
-from .recurrent_convolutional import *
+from .convolutional_recurrent import *
diff --git a/keras/layers/recurrent_convolutional.py b/keras/layers/convolutional_recurrent.py
similarity index 97%
rename from keras/layers/recurrent_convolutional.py
rename to keras/layers/convolutional_recurrent.py
index 167ac1016..d9cd40d44 100644
--- a/keras/layers/recurrent_convolutional.py
+++ b/keras/layers/convolutional_recurrent.py
@@ -8,20 +8,20 @@ import warnings
 
 
 class ConvRecurrent2D(Layer):
-    '''Abstract base class for recurrent layers.
+    '''Abstract base class for convolutionnal recurrent layers.
     Do not use in a model -- it's not a functional layer!
 
-    All recurrent layers (GRU, LSTM, SimpleRNN) also
+    ConvLSTM2D
     follow the specifications of this class and accept
     the keyword arguments listed below.
 
     # Input shape
-        5D tensor with shape `(nb_samples, timesteps, channels,rows,cols)`.
+        5D tensor with shape `(nb_samples, timesteps, channels, rows, cols)`.
 
     # Output shape
         - if `return_sequences`: 5D tensor with shape
-            `(nb_samples, timesteps, channels,rows,cols)`.
-        - else, 2D tensor with shape `(nb_samples, channels,rows,cols)`.
+            `(nb_samples, timesteps, channels, rows, cols)`.
+        - else, 4D tensor with shape `(nb_samples, channels, rows, cols)`.
 
     # Arguments
         weights: list of numpy arrays to set as initial weights.
@@ -200,7 +200,8 @@ class ConvRecurrent2D(Layer):
 
 
 class ConvLSTM2D(ConvRecurrent2D):
-    '''
+    '''Convolutional LSTM.
+
     # Input shape
         - if dim_ordering='th'
             5D tensor with shape:
@@ -213,17 +214,17 @@ class ConvLSTM2D(ConvRecurrent2D):
         - if `return_sequences`
              - if dim_ordering='th'
                 5D tensor with shape:
-                `(samples, time, nb_filter, o_row, o_col)`
+                `(samples, time, nb_filter, output_row, output_col)`
              - if dim_ordering='tf'
                 5D tensor with shape:
-                `(samples, time, o_row, o_col, nb_filter)`
+                `(samples, time, output_row, output_col, nb_filter)`
         - else
             - if dim_ordering ='th'
                 4D tensor with shape:
-                `(samples, nb_filter, o_row, o_col)`
+                `(samples, nb_filter, output_row, output_col)`
             - if dim_ordering='tf'
                 4D tensor with shape:
-                `(samples, o_row, o_col, nb_filter)`
+                `(samples, output_row, output_col, nb_filter)`
 
         where o_row and o_col depend on the shape of the filter and
         the border_mode
diff --git a/tests/keras/layers/test_recurrent_convolutional.py b/tests/keras/layers/test_convolutional_recurrent.py
similarity index 92%
rename from tests/keras/layers/test_recurrent_convolutional.py
rename to tests/keras/layers/test_convolutional_recurrent.py
index 18000caa3..cb98d84e6 100644
--- a/tests/keras/layers/test_recurrent_convolutional.py
+++ b/tests/keras/layers/test_convolutional_recurrent.py
@@ -4,7 +4,7 @@ from numpy.testing import assert_allclose
 
 from keras import backend as K
 from keras.models import Sequential
-from keras.layers import recurrent_convolutional
+from keras.layers import convolutional_recurrent
 from keras.utils.test_utils import layer_test
 from keras import regularizers
 
@@ -15,10 +15,10 @@ def test_recurrent_convolutional():
     nb_col = 4
     nb_filter = 20
     nb_samples = 5
-    input_channel = 3
-    input_nb_row = 30
-    input_nb_col = 30
-    sequence_len = 10
+    input_channel = 2
+    input_nb_row = 10
+    input_nb_col = 10
+    sequence_len = 2
     for dim_ordering in ['th', 'tf']:
 
         if dim_ordering == 'th':
@@ -32,7 +32,7 @@ def test_recurrent_convolutional():
 
         for return_sequences in [True, False]:
             # test for ouptput shape:
-            output = layer_test(recurrent_convolutional.ConvLSTM2D,
+            output = layer_test(convolutional_recurrent.ConvLSTM2D,
                                 kwargs={'dim_ordering': dim_ordering,
                                         'return_sequences': return_sequences,
                                         'nb_filter': nb_filter,
@@ -67,7 +67,7 @@ def test_recurrent_convolutional():
                       'stateful': True,
                       'batch_input_shape': input.shape,
                       'border_mode': "same"}
-            layer = recurrent_convolutional.ConvLSTM2D(**kwargs)
+            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
 
             model.add(layer)
             model.compile(optimizer='sgd', loss='mse')
@@ -110,13 +110,13 @@ def test_recurrent_convolutional():
                       'b_regularizer': 'l2',
                       'border_mode': "same"}
 
-            layer = recurrent_convolutional.ConvLSTM2D(**kwargs)
+            layer = convolutional_recurrent.ConvLSTM2D(**kwargs)
             layer.set_input(K.variable(np.ones(input.shape)),
                             shape=input.shape)
             K.eval(layer.output)
 
             # check dropout
-            layer_test(recurrent_convolutional.ConvLSTM2D,
+            layer_test(convolutional_recurrent.ConvLSTM2D,
                        kwargs={'dim_ordering': dim_ordering,
                                'return_sequences': return_sequences,
                                'nb_filter': nb_filter,