Finish updating examples.

2017-03-11 19:44:29 -08:00 · 2017-03-11 19:44:29 -08:00 · aa826d684d
commit aa826d684d
parent 84711475f8
17 changed files with 170 additions and 165 deletions
--- a/examples/babi_memnn.py
+++ b/examples/babi_memnn.py
@ -12,8 +12,8 @@ References:
 Reaches 98.6% accuracy on task 'single_supporting_fact_10k' after 120 epochs.
 Time per epoch: 3s on CPU (core i7).
 '''
-
 from __future__ import print_function
+
 from keras.models import Sequential
 from keras.layers.embeddings import Embedding
 from keras.layers import Activation, Dense, Merge, Permute, Dropout
--- a/examples/conv_filter_visualization.py
+++ b/examples/conv_filter_visualization.py
@ -5,6 +5,7 @@ This script can run on CPU in a few minutes (with the TensorFlow backend).
 Results example: http://i.imgur.com/4nj4KjN.jpg
 '''
 from __future__ import print_function
+
 from scipy.misc import imsave
 import numpy as np
 import time
--- a/examples/deep_dream.py
+++ b/examples/deep_dream.py
@ -15,6 +15,7 @@ If running on CPU, prefer the TensorFlow backend (much faster).
 Example results: http://i.imgur.com/FX6ROg9.jpg
 '''
 from __future__ import print_function
+
 from keras.preprocessing.image import load_img, img_to_array
 import numpy as np
 from scipy.misc import imsave
@ -57,21 +58,19 @@ saved_settings = {
 # the settings we will use in this experiment
 settings = saved_settings['dreamy']

-# util function to open, resize and format pictures into appropriate tensors
-

 def preprocess_image(image_path):
+    # util function to open, resize and format pictures
+    # into appropriate tensors
    img = load_img(image_path, target_size=(img_height, img_width))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg16.preprocess_input(img)
    return img

-# util function to convert a tensor into a valid image
-

 def deprocess_image(x):
-
+    # util function to convert a tensor into a valid image
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, img_height, img_width))
        x = x.transpose((1, 2, 0))
@ -102,10 +101,9 @@ print('Model loaded.')
 # get the symbolic outputs of each "key" layer (we gave them unique names).
 layer_dict = dict([(layer.name, layer) for layer in model.layers])

-# continuity loss util function
-

 def continuity_loss(x):
+    # continuity loss util function
    assert K.ndim(x) == 4
    if K.image_data_format() == 'channels_first':
        a = K.square(x[:, :, :img_height - 1, :img_width - 1] -
@ -162,15 +160,17 @@ def eval_loss_and_grads(x):
        grad_values = np.array(outs[1:]).flatten().astype('float64')
    return loss_value, grad_values

-# this Evaluator class makes it possible
-# to compute loss and gradients in one pass
-# while retrieving them via two separate functions,
-# "loss" and "grads". This is done because scipy.optimize
-# requires separate functions for loss and gradients,
-# but computing them separately would be inefficient.
-

 class Evaluator(object):
+    """Loss and gradients evaluator.
+
+    This Evaluator class makes it possible
+    to compute loss and gradients in one pass
+    while retrieving them via two separate functions,
+    "loss" and "grads". This is done because scipy.optimize
+    requires separate functions for loss and gradients,
+    but computing them separately would be inefficient.
+    """

    def __init__(self):
        self.loss_value = None
@ -192,22 +192,23 @@ class Evaluator(object):

 evaluator = Evaluator()

-# run scipy-based optimization (L-BFGS) over the pixels of the generated image
+# Run scipy-based optimization (L-BFGS) over the pixels of the generated image
 # so as to minimize the loss
 x = preprocess_image(base_image_path)
 for i in range(5):
    print('Start of iteration', i)
    start_time = time.time()

-    # add a random jitter to the initial image. This will be reverted at decoding time
+    # Add a random jitter to the initial image.
+    # This will be reverted at decoding time
    random_jitter = (settings['jitter'] * 2) * (np.random.random(img_size) - 0.5)
    x += random_jitter

-    # run L-BFGS for 7 steps
+    # Run L-BFGS for 7 steps
    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
                                     fprime=evaluator.grads, maxfun=7)
    print('Current loss value:', min_val)
-    # decode the dream and save it
+    # Decode the dream and save it
    x = x.reshape(img_size)
    x -= random_jitter
    img = deprocess_image(np.copy(x))
--- a/examples/imdb_bidirectional_lstm.py
+++ b/examples/imdb_bidirectional_lstm.py
@ -6,7 +6,6 @@ Time per epoch on CPU (Core i7): ~150s.

 from __future__ import print_function
 import numpy as np
-np.random.seed(1337)  # for reproducibility

 from keras.preprocessing import sequence
 from keras.models import Sequential
@ -15,19 +14,21 @@ from keras.datasets import imdb


 max_features = 20000
-maxlen = 100  # cut texts after this number of words (among top max_features most common words)
+# cut texts after this number of words
+# (among top max_features most common words)
+maxlen = 100
 batch_size = 32

 print('Loading data...')
-(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(X_train), 'train sequences')
-print(len(X_test), 'test sequences')
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
+print(len(x_train), 'train sequences')
+print(len(x_test), 'test sequences')

 print("Pad sequences (samples x time)")
-X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
-X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
-print('X_train shape:', X_train.shape)
-print('X_test shape:', X_test.shape)
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
+print('x_train shape:', x_train.shape)
+print('x_test shape:', x_test.shape)
 y_train = np.array(y_train)
 y_test = np.array(y_test)

@ -41,7 +42,7 @@ model.add(Dense(1, activation='sigmoid'))
 model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])

 print('Train...')
-model.fit(X_train, y_train,
+model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=4,
-          validation_data=[X_test, y_test])
+          validation_data=[x_test, y_test])
--- a/examples/imdb_cnn.py
+++ b/examples/imdb_cnn.py
@ -7,8 +7,6 @@ Gets to 0.89 test accuracy after 2 epochs.
 '''

 from __future__ import print_function
-import numpy as np
-np.random.seed(1337)  # for reproducibility

 from keras.preprocessing import sequence
 from keras.models import Sequential
@ -17,7 +15,6 @@ from keras.layers import Embedding
 from keras.layers import Conv1D, GlobalMaxPooling1D
 from keras.datasets import imdb

-
 # set parameters:
 max_features = 5000
 maxlen = 400
@ -29,15 +26,15 @@ hidden_dims = 250
 epochs = 2

 print('Loading data...')
-(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(X_train), 'train sequences')
-print(len(X_test), 'test sequences')
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
+print(len(x_train), 'train sequences')
+print(len(x_test), 'test sequences')

 print('Pad sequences (samples x time)')
-X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
-X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
-print('X_train shape:', X_train.shape)
-print('X_test shape:', X_test.shape)
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
+print('x_train shape:', x_train.shape)
+print('x_test shape:', x_test.shape)

 print('Build model...')
 model = Sequential()
@ -71,7 +68,7 @@ model.add(Activation('sigmoid'))
 model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
-model.fit(X_train, y_train,
+model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
-          validation_data=(X_test, y_test))
+          validation_data=(x_test, y_test))
--- a/examples/imdb_cnn_lstm.py
+++ b/examples/imdb_cnn_lstm.py
@ -4,8 +4,6 @@ classification task.
 Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU.
 '''
 from __future__ import print_function
-import numpy as np
-np.random.seed(1337)  # for reproducibility

 from keras.preprocessing import sequence
 from keras.models import Sequential
@ -15,7 +13,6 @@ from keras.layers import LSTM
 from keras.layers import Conv1D, MaxPooling1D
 from keras.datasets import imdb

-
 # Embedding
 max_features = 20000
 maxlen = 100
@ -40,15 +37,15 @@ Only 2 epochs are needed as the dataset is very small.
 '''

 print('Loading data...')
-(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(X_train), 'train sequences')
-print(len(X_test), 'test sequences')
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
+print(len(x_train), 'train sequences')
+print(len(x_test), 'test sequences')

 print('Pad sequences (samples x time)')
-X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
-X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
-print('X_train shape:', X_train.shape)
-print('X_test shape:', X_test.shape)
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
+print('x_train shape:', x_train.shape)
+print('x_test shape:', x_test.shape)

 print('Build model...')

@ -70,8 +67,8 @@ model.compile(loss='binary_crossentropy',
              metrics=['accuracy'])

 print('Train...')
-model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
-          validation_data=(X_test, y_test))
-score, acc = model.evaluate(X_test, y_test, batch_size=batch_size)
+model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
+          validation_data=(x_test, y_test))
+score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
 print('Test score:', score)
 print('Test accuracy:', acc)
--- a/examples/imdb_fasttext.py
+++ b/examples/imdb_fasttext.py
@ -7,12 +7,11 @@ https://arxiv.org/abs/1607.01759

 Results on IMDB datasets with uni and bi-gram embeddings:
    Uni-gram: 0.8813 test accuracy after 5 epochs. 8s/epoch on i7 cpu.
-    Bi-gram : 0.9056 test accuracy after 5 epochs. 2s/epoch on GTX 980M gpu.
+    Bi-gram : 0.9056 test accuracy after 5 epochs. 2s/epoch on GTx 980M gpu.
 '''

 from __future__ import print_function
 import numpy as np
-np.random.seed(1337)  # for reproducibility

 from keras.preprocessing import sequence
 from keras.models import Sequential
@ -73,17 +72,17 @@ embedding_dims = 50
 epochs = 5

 print('Loading data...')
-(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(X_train), 'train sequences')
-print(len(X_test), 'test sequences')
-print('Average train sequence length: {}'.format(np.mean(list(map(len, X_train)), dtype=int)))
-print('Average test sequence length: {}'.format(np.mean(list(map(len, X_test)), dtype=int)))
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
+print(len(x_train), 'train sequences')
+print(len(x_test), 'test sequences')
+print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int)))
+print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int)))

 if ngram_range > 1:
    print('Adding {}-gram features'.format(ngram_range))
    # Create set of unique n-gram from the training set.
    ngram_set = set()
-    for input_list in X_train:
+    for input_list in x_train:
        for i in range(2, ngram_range + 1):
            set_of_ngram = create_ngram_set(input_list, ngram_value=i)
            ngram_set.update(set_of_ngram)
@ -98,17 +97,17 @@ if ngram_range > 1:
    # max_features is the highest integer that could be found in the dataset.
    max_features = np.max(list(indice_token.keys())) + 1

-    # Augmenting X_train and X_test with n-grams features
-    X_train = add_ngram(X_train, token_indice, ngram_range)
-    X_test = add_ngram(X_test, token_indice, ngram_range)
-    print('Average train sequence length: {}'.format(np.mean(list(map(len, X_train)), dtype=int)))
-    print('Average test sequence length: {}'.format(np.mean(list(map(len, X_test)), dtype=int)))
+    # Augmenting x_train and x_test with n-grams features
+    x_train = add_ngram(x_train, token_indice, ngram_range)
+    x_test = add_ngram(x_test, token_indice, ngram_range)
+    print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int)))
+    print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int)))

 print('Pad sequences (samples x time)')
-X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
-X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
-print('X_train shape:', X_train.shape)
-print('X_test shape:', X_test.shape)
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
+print('x_train shape:', x_train.shape)
+print('x_test shape:', x_test.shape)

 print('Build model...')
 model = Sequential()
@ -130,7 +129,7 @@ model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

-model.fit(X_train, y_train,
+model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
-          validation_data=(X_test, y_test))
+          validation_data=(x_test, y_test))
--- a/examples/imdb_lstm.py
+++ b/examples/imdb_lstm.py
@ -11,11 +11,10 @@ Some configurations won't converge.
 from what you see with CNNs/MLPs/etc.
 '''
 from __future__ import print_function
-import numpy as np

 from keras.preprocessing import sequence
 from keras.models import Sequential
-from keras.layers import Dense, Activation, Embedding
+from keras.layers import Dense, Embedding
 from keras.layers import LSTM
 from keras.datasets import imdb

--- a/examples/lstm_benchmark.py
+++ b/examples/lstm_benchmark.py
@ -43,9 +43,13 @@ for mode in modes:
    print('Testing mode: implementation={}'.format(mode))

    model = Sequential()
-    model.add(Embedding(max_features, embedding_dim, input_length=max_length))
+    model.add(Embedding(max_features, embedding_dim,
+                        input_length=max_length))
    model.add(Dropout(0.2))
-    model.add(LSTM(embedding_dim, dropout=0.2, recurrent_dropout=0.2, implementation=mode))
+    model.add(LSTM(embedding_dim,
+                   dropout=0.2,
+                   recurrent_dropout=0.2,
+                   implementation=mode))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
--- a/examples/lstm_text_generation.py
+++ b/examples/lstm_text_generation.py
@ -20,7 +20,7 @@ import numpy as np
 import random
 import sys

-path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
+path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
 text = open(path).read().lower()
 print('corpus length:', len(text))

--- a/examples/mnist_acgan.py
+++ b/examples/mnist_acgan.py
@ -6,8 +6,8 @@ MNIST dataset. See https://arxiv.org/abs/1610.09585 for more details.

 You should start to see reasonable images after ~5 epochs, and good images
 by ~15 epochs. You should use a GPU, as the convolution-heavy operations are
-very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating, as
-the compilation time can be a blocker using Theano.
+very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating,
+as the compilation time can be a blocker using Theano.

 Timings:

@ -33,9 +33,10 @@ from six.moves import range

 import keras.backend as K
 from keras.datasets import mnist
-from keras.layers import Input, Dense, Reshape, Flatten, Embedding, merge, Dropout
+from keras import layers
+from keras.layers import Input, Dense, Reshape, Flatten, Embedding, Dropout
 from keras.layers.advanced_activations import LeakyReLU
-from keras.layers.convolutional import UpSampling2D, Convolution2D
+from keras.layers.convolutional import UpSampling2D, Conv2D
 from keras.models import Sequential, Model
 from keras.optimizers import Adam
 from keras.utils.generic_utils import Progbar
@ -57,17 +58,20 @@ def build_generator(latent_size):

    # upsample to (..., 14, 14)
    cnn.add(UpSampling2D(size=(2, 2)))
-    cnn.add(Convolution2D(256, 5, padding='same',
-                          activation='relu', kernel_initializer='glorot_normal'))
+    cnn.add(Conv2D(256, 5, padding='same',
+                   activation='relu',
+                   kernel_initializer='glorot_normal'))

    # upsample to (..., 28, 28)
    cnn.add(UpSampling2D(size=(2, 2)))
-    cnn.add(Convolution2D(128, 5, padding='same',
-                          activation='relu', kernel_initializer='glorot_normal'))
+    cnn.add(Conv2D(128, 5, padding='same',
+                   activation='relu',
+                   kernel_initializer='glorot_normal'))

    # take a channel axis reduction
-    cnn.add(Convolution2D(1, 2, padding='same',
-                          activation='tanh', kernel_initializer='glorot_normal'))
+    cnn.add(Conv2D(1, 2, padding='same',
+                   activation='tanh',
+                   kernel_initializer='glorot_normal'))

    # this is the z space commonly refered to in GAN papers
    latent = Input(shape=(latent_size, ))
@ -80,7 +84,7 @@ def build_generator(latent_size):
                              embeddings_initializer='glorot_normal')(image_class))

    # hadamard product between z-space and a class conditional embedding
-    h = merge([latent, cls], mode='mul')
+    h = layers.multiply([latent, cls])

    fake_image = cnn(h)

@ -92,20 +96,20 @@ def build_discriminator():
    # the reference paper
    cnn = Sequential()

-    cnn.add(Convolution2D(32, 3, padding='same', strides=2,
-                          input_shape=(1, 28, 28)))
+    cnn.add(Conv2D(32, 3, padding='same', strides=2,
+                   input_shape=(1, 28, 28)))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))

-    cnn.add(Convolution2D(64, 3, padding='same', strides=2))
+    cnn.add(Conv2D(64, 3, padding='same', strides=2))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))

-    cnn.add(Convolution2D(128, 3, padding='same', strides=2))
+    cnn.add(Conv2D(128, 3, padding='same', strides=2))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))

-    cnn.add(Convolution2D(256, 3, padding='same', strides=1))
+    cnn.add(Conv2D(256, 3, padding='same', strides=1))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))

@ -224,7 +228,8 @@ if __name__ == '__main__':
            trick = np.ones(2 * batch_size)

            epoch_gen_loss.append(combined.train_on_batch(
-                [noise, sampled_labels.reshape((-1, 1))], [trick, sampled_labels]))
+                [noise, sampled_labels.reshape((-1, 1))],
+                [trick, sampled_labels]))

        print('\nTesting for epoch {}:'.format(epoch + 1))

--- a/examples/mnist_hierarchical_rnn.py
+++ b/examples/mnist_hierarchical_rnn.py
@ -56,8 +56,8 @@ print(x_train.shape[0], 'train samples')
 print(x_test.shape[0], 'test samples')

 # Converts class vectors to binary class matrices.
-Y_train = keras.utils.to_categorical(y_train, num_classes)
-Y_test = keras.utils.to_categorical(y_test, num_classes)
+y_train = keras.utils.to_categorical(y_train, num_classes)
+y_test = keras.utils.to_categorical(y_test, num_classes)

 row, col, pixel = x_train.shape[1:]

@ -78,10 +78,11 @@ model.compile(loss='categorical_crossentropy',
              metrics=['accuracy'])

 # Training.
-model.fit(x_train, Y_train, batch_size=batch_size, epochs=epochs,
-          verbose=1, validation_data=(x_test, Y_test))
+model.fit(x_train, y_train,
+          batch_size=batch_size, epochs=epochs,
+          verbose=1, validation_data=(x_test, y_test))

 # Evaluation.
-scores = model.evaluate(x_test, Y_test, verbose=0)
+scores = model.evaluate(x_test, y_test, verbose=0)
 print('Test loss:', scores[0])
 print('Test accuracy:', scores[1])
--- a/examples/mnist_siamese_graph.py
+++ b/examples/mnist_siamese_graph.py
@ -13,7 +13,6 @@ Gets to 99.5% test accuracy after 20 epochs.
 from __future__ import absolute_import
 from __future__ import print_function
 import numpy as np
-np.random.seed(1337)  # for reproducibility

 import random
 from keras.datasets import mnist
@ -38,7 +37,8 @@ def contrastive_loss(y_true, y_pred):
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
-    return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
+    return K.mean(y_true * K.square(y_pred) +
+                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))


 def create_pairs(x, digit_indices):
@ -108,7 +108,8 @@ input_b = Input(shape=(input_dim,))
 processed_a = base_network(input_a)
 processed_b = base_network(input_b)

-distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])
+distance = Lambda(euclidean_distance,
+                  output_shape=eucl_dist_output_shape)([processed_a, processed_b])

 model = Model([input_a, input_b], distance)

--- a/examples/mnist_sklearn_wrapper.py
+++ b/examples/mnist_sklearn_wrapper.py
@ -9,7 +9,7 @@ import keras
 from keras.datasets import mnist
 from keras.models import Sequential
 from keras.layers import Dense, Dropout, Activation, Flatten
-from keras.layers import Convolution2D, MaxPooling2D
+from keras.layers import Conv2D, MaxPooling2D
 from keras.wrappers.scikit_learn import KerasClassifier
 from keras import backend as K
 from sklearn.grid_search import GridSearchCV
@ -53,11 +53,11 @@ def make_model(dense_layer_sizes, filters, kernel_size, pool_size):
    '''

    model = Sequential()
-    model.add(Convolution2D(filters, kernel_size,
-                            padding='valid',
-                            input_shape=input_shape))
+    model.add(Conv2D(filters, kernel_size,
+                     padding='valid',
+                     input_shape=input_shape))
    model.add(Activation('relu'))
-    model.add(Convolution2D(filters, kernel_size))
+    model.add(Conv2D(filters, kernel_size))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=pool_size))
    model.add(Dropout(0.25))
--- a/examples/mnist_swwae.py
+++ b/examples/mnist_swwae.py
@ -2,21 +2,21 @@
 MNIST dataset.  It exemplifies two influential methods that have been developed
 in the past few years.

-The first is the idea of properly "unpooling." During any max pool, the
-exact location (the "where") of the maximal value in a pooled receptive field
+The first is the idea of properly 'unpooling.' During any max pool, the
+exact location (the 'where') of the maximal value in a pooled receptive field
 is lost, however it can be very useful in the overall reconstruction of an
-input image.  Therefore, if the "where" is handed from the encoder
-to the corresponding decoder layer, features being decoded can be "placed" in
+input image.  Therefore, if the 'where' is handed from the encoder
+to the corresponding decoder layer, features being decoded can be 'placed' in
 the right location, allowing for reconstructions of much higher fidelity.

 References:
 [1]
-"Visualizing and Understanding Convolutional Networks"
+'Visualizing and Understanding Convolutional Networks'
 Matthew D Zeiler, Rob Fergus
 https://arxiv.org/abs/1311.2901v3

 [2]
-"Stacked What-Where Auto-encoders"
+'Stacked What-Where Auto-encoders'
 Junbo Zhao, Michael Mathieu, Ross Goroshin, Yann LeCun
 https://arxiv.org/abs/1506.02351v8

@ -34,42 +34,42 @@ applied as a bias because we know the MNIST digits are mapped to [0,1].

 References:
 [3]
-"Deep Residual Learning for Image Recognition"
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+'Deep Residual Learning for Image Recognition'
+Kaiming He, xiangyu Zhang, Shaoqing Ren, Jian Sun
 https://arxiv.org/abs/1512.03385v1

 [4]
-"Identity Mappings in Deep Residual Networks"
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+'Identity Mappings in Deep Residual Networks'
+Kaiming He, xiangyu Zhang, Shaoqing Ren, Jian Sun
 https://arxiv.org/abs/1603.05027v3

 '''
 from __future__ import print_function
 import numpy as np
-np.random.seed(1337)  # for reproducibility

 from keras.datasets import mnist
 from keras.models import Model
-from keras.layers import Activation, merge
-from keras.layers import UpSampling2D, Convolution2D, MaxPooling2D
+from keras.layers import Activation
+from keras.layers import UpSampling2D, Conv2D, MaxPooling2D
 from keras.layers import Input, BatchNormalization
 import matplotlib.pyplot as plt
 import keras.backend as K
+from keras import layers


 def convresblock(x, nfeats=8, ksize=3, nskipped=2):
    ''' The proposed residual block from [4]'''
-    y0 = Convolution2D(nfeats, ksize, ksize, border_mode='same')(x)
+    y0 = Conv2D(nfeats, ksize, padding='same')(x)
    y = y0
    for i in range(nskipped):
-        y = BatchNormalization(mode=0, axis=1)(y)
+        y = BatchNormalization(axis=1)(y)
        y = Activation('relu')(y)
-        y = Convolution2D(nfeats, ksize, ksize, border_mode='same')(y)
-    return merge([y0, y], mode='sum')
+        y = Conv2D(nfeats, ksize, padding='same')(y)
+    return layers.add([y0, y])


 def getwhere(x):
-    ''' Calculate the "where" mask that contains switches indicating which
+    ''' Calculate the 'where' mask that contains switches indicating which
    index contained the max value when MaxPool2D was applied.  Using the
    gradient of the sum is a nice trick to keep everything high level.'''
    y_prepool, y_postpool = x
@ -89,17 +89,17 @@ K.set_image_data_format('channels_first')
 img_rows, img_cols = 28, 28

 # the data, shuffled and split between train and test sets
-(X_train, _), (X_test, _) = mnist.load_data()
+(x_train, _), (x_test, _) = mnist.load_data()

-X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
-X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
-X_train = X_train.astype('float32')
-X_test = X_test.astype('float32')
-X_train /= 255
-X_test /= 255
-print('X_train shape:', X_train.shape)
-print(X_train.shape[0], 'train samples')
-print(X_test.shape[0], 'test samples')
+x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
+x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
+x_train = x_train.astype('float32')
+x_test = x_test.astype('float32')
+x_train /= 255
+x_test /= 255
+print('x_train shape:', x_train.shape)
+print(x_train.shape[0], 'train samples')
+print(x_test.shape[0], 'test samples')

 # The size of the kernel used for the MaxPooling2D
 pool_size = 2
@ -116,41 +116,40 @@ batch_size = 128

 if pool_size == 2:
    # if using a 5 layer net of pool_size = 2
-    X_train = np.pad(X_train, [[0, 0], [0, 0], [2, 2], [2, 2]],
+    x_train = np.pad(x_train, [[0, 0], [0, 0], [2, 2], [2, 2]],
                     mode='constant')
-    X_test = np.pad(X_test, [[0, 0], [0, 0], [2, 2], [2, 2]], mode='constant')
+    x_test = np.pad(x_test, [[0, 0], [0, 0], [2, 2], [2, 2]], mode='constant')
    nlayers = 5
 elif pool_size == 3:
    # if using a 3 layer net of pool_size = 3
-    X_train = X_train[:, :, :-1, :-1]
-    X_test = X_test[:, :, :-1, :-1]
+    x_train = x_train[:, :, :-1, :-1]
+    x_test = x_test[:, :, :-1, :-1]
    nlayers = 3
 else:
    import sys
-    sys.exit("Script supports pool_size of 2 and 3.")
+    sys.exit('Script supports pool_size of 2 and 3.')

 # Shape of input to train on (note that model is fully convolutional however)
-input_shape = X_train.shape[1:]
+input_shape = x_train.shape[1:]
 # The final list of the size of axis=1 for all layers, including input
 nfeats_all = [input_shape[0]] + nfeats

-# First build the encoder, all the while keeping track of the "where" masks
+# First build the encoder, all the while keeping track of the 'where' masks
 img_input = Input(shape=input_shape)

-# We push the "where" masks to the following list
+# We push the 'where' masks to the following list
 wheres = [None] * nlayers
 y = img_input
 for i in range(nlayers):
    y_prepool = convresblock(y, nfeats=nfeats_all[i + 1], ksize=ksize)
    y = MaxPooling2D(pool_size=(pool_sizes[i], pool_sizes[i]))(y_prepool)
-    wheres[i] = merge([y_prepool, y], mode=getwhere,
-                      output_shape=lambda x: x[0])
+    wheres[i] = layers.Lambda(getwhere, output_shape=lambda x: x[0])([y_prepool, y])

-# Now build the decoder, and use the stored "where" masks to place the features
+# Now build the decoder, and use the stored 'where' masks to place the features
 for i in range(nlayers):
    ind = nlayers - 1 - i
    y = UpSampling2D(size=(pool_sizes[ind], pool_sizes[ind]))(y)
-    y = merge([y, wheres[ind]], mode='mul')
+    y = layers.multiply([y, wheres[ind]])
    y = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize)

 # Use hard_simgoid to clip range of reconstruction
@ -161,16 +160,16 @@ model = Model(img_input, y)
 model.compile('adam', 'mse')

 # Fit the model
-model.fit(X_train, X_train, validation_data=(X_test, X_test),
+model.fit(x_train, x_train, validation_data=(x_test, x_test),
          batch_size=batch_size, epochs=epochs)

 # Plot
-X_recon = model.predict(X_test[:25])
-X_plot = np.concatenate((X_test[:25], X_recon), axis=1)
-X_plot = X_plot.reshape((5, 10, input_shape[-2], input_shape[-1]))
-X_plot = np.vstack([np.hstack(x) for x in X_plot])
+x_recon = model.predict(x_test[:25])
+x_plot = np.concatenate((x_test[:25], x_recon), axis=1)
+x_plot = x_plot.reshape((5, 10, input_shape[-2], input_shape[-1]))
+x_plot = np.vstack([np.hstack(x) for x in x_plot])
 plt.figure()
 plt.axis('off')
 plt.title('Test Samples: Originals/Reconstructions')
-plt.imshow(X_plot, interpolation='none', cmap='gray')
+plt.imshow(x_plot, interpolation='none', cmap='gray')
 plt.savefig('reconstructions.png')
--- a/examples/neural_doodle.py
+++ b/examples/neural_doodle.py
@ -122,7 +122,7 @@ def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
-        _, labels, _ = k_means(xs.astype("float64"), k)
+        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
@ -179,8 +179,8 @@ images = K.concatenate([style_image, target_image, content_image], axis=0)

 # Create tensor variables for masks
 raw_style_mask, raw_target_mask = load_mask_labels()
-style_mask = K.variable(raw_style_mask.astype("float32"))
-target_mask = K.variable(raw_target_mask.astype("float32"))
+style_mask = K.variable(raw_style_mask.astype('float32'))
+target_mask = K.variable(raw_target_mask.astype('float32'))
 masks = K.concatenate([style_mask, target_mask], axis=0)

 # index constants for images and tasks variables
@ -191,13 +191,13 @@ STYLE, TARGET, CONTENT = 0, 1, 2
 image_model = vgg19.VGG19(include_top=False, input_tensor=images)

 # mask model as a series of pooling
-mask_input = Input(tensor=masks, shape=(None, None, None), name="mask_input")
+mask_input = Input(tensor=masks, shape=(None, None, None), name='mask_input')
 x = mask_input
 for layer in image_model.layers[1:]:
    name = 'mask_%s' % layer.name
    if 'conv' in layer.name:
        x = AveragePooling2D((3, 3), strides=(
-            1, 1), name=name, border_mode="same")(x)
+            1, 1), name=name, border_mode='same')(x)
    elif 'pool' in layer.name:
        x = AveragePooling2D((2, 2), name=name)(x)
 mask_model = Model(mask_input, x)
--- a/examples/stateful_lstm.py
+++ b/examples/stateful_lstm.py
@ -36,7 +36,7 @@ def gen_cosine_amp(amp=100, period=1000, x0=0, xn=50000, step=1, k=0.0001):
    return cos


-print('Generating Data')
+print('Generating Data...')
 cos = gen_cosine_amp()
 print('Input shape:', cos.shape)

@ -44,13 +44,13 @@ expected_output = np.zeros((len(cos), 1))
 for i in range(len(cos) - lahead):
    expected_output[i, 0] = np.mean(cos[i + 1:i + lahead + 1])

-print('Output shape')
-print(expected_output.shape)
+print('Output shape:', expected_output.shape)

-print('Creating Model')
+print('Creating Model...')
 model = Sequential()
 model.add(LSTM(50,
-               batch_input_shape=(batch_size, tsteps, 1),
+               input_shape=(tsteps, 1),
+               batch_size=batch_size,
               return_sequences=True,
               stateful=True))
 model.add(LSTM(50,