Finish updating examples.

2017-03-11 19:44:29 -08:00 · 2017-03-11 19:44:29 -08:00 · aa826d684d
commit aa826d684d
parent 84711475f8
17 changed files with 170 additions and 165 deletions
--- a/examples/babi_memnn.py
+++ b/examples/babi_memnn.py
@ -12,8 +12,8 @@ References:
 Reaches 98.6% accuracy on task 'single_supporting_fact_10k' after 120 epochs.
 Time per epoch: 3s on CPU (core i7).
 '''
 from __future__ import print_function
 from keras.models import Sequential
 from keras.layers.embeddings import Embedding
 from keras.layers import Activation, Dense, Merge, Permute, Dropout
--- a/examples/conv_filter_visualization.py
+++ b/examples/conv_filter_visualization.py
@ -5,6 +5,7 @@ This script can run on CPU in a few minutes (with the TensorFlow backend).
 Results example: http://i.imgur.com/4nj4KjN.jpg
 '''
 from __future__ import print_function
 from scipy.misc import imsave
 import numpy as np
 import time
--- a/examples/deep_dream.py
+++ b/examples/deep_dream.py
@ -15,6 +15,7 @@ If running on CPU, prefer the TensorFlow backend (much faster).
 Example results: http://i.imgur.com/FX6ROg9.jpg
 '''
 from __future__ import print_function
 from keras.preprocessing.image import load_img, img_to_array
 import numpy as np
 from scipy.misc import imsave
@ -57,21 +58,19 @@ saved_settings = {
 # the settings we will use in this experiment
 settings = saved_settings['dreamy']
 # util function to open, resize and format pictures into appropriate tensors
 def preprocess_image(image_path):
    # util function to open, resize and format pictures
    # into appropriate tensors
    img = load_img(image_path, target_size=(img_height, img_width))
    img = img_to_array(img)
    img = np.expand_dims(img, axis=0)
    img = vgg16.preprocess_input(img)
    return img
 # util function to convert a tensor into a valid image
 def deprocess_image(x):
-
+    # util function to convert a tensor into a valid image
    if K.image_data_format() == 'channels_first':
        x = x.reshape((3, img_height, img_width))
        x = x.transpose((1, 2, 0))
@ -102,10 +101,9 @@ print('Model loaded.')
 # get the symbolic outputs of each "key" layer (we gave them unique names).
 layer_dict = dict([(layer.name, layer) for layer in model.layers])
 # continuity loss util function
 def continuity_loss(x):
    # continuity loss util function
    assert K.ndim(x) == 4
    if K.image_data_format() == 'channels_first':
        a = K.square(x[:, :, :img_height - 1, :img_width - 1] -
@ -162,15 +160,17 @@ def eval_loss_and_grads(x):
        grad_values = np.array(outs[1:]).flatten().astype('float64')
    return loss_value, grad_values
 # this Evaluator class makes it possible
 # to compute loss and gradients in one pass
 # while retrieving them via two separate functions,
 # "loss" and "grads". This is done because scipy.optimize
 # requires separate functions for loss and gradients,
 # but computing them separately would be inefficient.
 class Evaluator(object):
    """Loss and gradients evaluator.
    This Evaluator class makes it possible
    to compute loss and gradients in one pass
    while retrieving them via two separate functions,
    "loss" and "grads". This is done because scipy.optimize
    requires separate functions for loss and gradients,
    but computing them separately would be inefficient.
    """
    def __init__(self):
        self.loss_value = None
@ -192,22 +192,23 @@ class Evaluator(object):
 evaluator = Evaluator()
-# run scipy-based optimization (L-BFGS) over the pixels of the generated image
+# Run scipy-based optimization (L-BFGS) over the pixels of the generated image
 # so as to minimize the loss
 x = preprocess_image(base_image_path)
 for i in range(5):
    print('Start of iteration', i)
    start_time = time.time()
-    # add a random jitter to the initial image. This will be reverted at decoding time
+    # Add a random jitter to the initial image.
    # This will be reverted at decoding time
    random_jitter = (settings['jitter'] * 2) * (np.random.random(img_size) - 0.5)
    x += random_jitter
-    # run L-BFGS for 7 steps
+    # Run L-BFGS for 7 steps
    x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(),
                                     fprime=evaluator.grads, maxfun=7)
    print('Current loss value:', min_val)
-    # decode the dream and save it
+    # Decode the dream and save it
    x = x.reshape(img_size)
    x -= random_jitter
    img = deprocess_image(np.copy(x))
--- a/examples/imdb_bidirectional_lstm.py
+++ b/examples/imdb_bidirectional_lstm.py
@ -6,7 +6,6 @@ Time per epoch on CPU (Core i7): ~150s.
 from __future__ import print_function
 import numpy as np
 np.random.seed(1337)  # for reproducibility
 from keras.preprocessing import sequence
 from keras.models import Sequential
@ -15,19 +14,21 @@ from keras.datasets import imdb
 max_features = 20000
-maxlen = 100  # cut texts after this number of words (among top max_features most common words)
+# cut texts after this number of words
 # (among top max_features most common words)
 maxlen = 100
 batch_size = 32
 print('Loading data...')
-(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(X_train), 'train sequences')
+print(len(x_train), 'train sequences')
-print(len(X_test), 'test sequences')
+print(len(x_test), 'test sequences')
 print("Pad sequences (samples x time)")
-X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
-X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
-print('X_train shape:', X_train.shape)
+print('x_train shape:', x_train.shape)
-print('X_test shape:', X_test.shape)
+print('x_test shape:', x_test.shape)
 y_train = np.array(y_train)
 y_test = np.array(y_test)
@ -41,7 +42,7 @@ model.add(Dense(1, activation='sigmoid'))
 model.compile('adam', 'binary_crossentropy', metrics=['accuracy'])
 print('Train...')
-model.fit(X_train, y_train,
+model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=4,
-          validation_data=[X_test, y_test])
+          validation_data=[x_test, y_test])
--- a/examples/imdb_cnn.py
+++ b/examples/imdb_cnn.py
@ -7,8 +7,6 @@ Gets to 0.89 test accuracy after 2 epochs.
 '''
 from __future__ import print_function
 import numpy as np
 np.random.seed(1337)  # for reproducibility
 from keras.preprocessing import sequence
 from keras.models import Sequential
@ -17,7 +15,6 @@ from keras.layers import Embedding
 from keras.layers import Conv1D, GlobalMaxPooling1D
 from keras.datasets import imdb
 # set parameters:
 max_features = 5000
 maxlen = 400
@ -29,15 +26,15 @@ hidden_dims = 250
 epochs = 2
 print('Loading data...')
-(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(X_train), 'train sequences')
+print(len(x_train), 'train sequences')
-print(len(X_test), 'test sequences')
+print(len(x_test), 'test sequences')
 print('Pad sequences (samples x time)')
-X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
-X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
-print('X_train shape:', X_train.shape)
+print('x_train shape:', x_train.shape)
-print('X_test shape:', X_test.shape)
+print('x_test shape:', x_test.shape)
 print('Build model...')
 model = Sequential()
@ -71,7 +68,7 @@ model.add(Activation('sigmoid'))
 model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
-model.fit(X_train, y_train,
+model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
-          validation_data=(X_test, y_test))
+          validation_data=(x_test, y_test))
--- a/examples/imdb_cnn_lstm.py
+++ b/examples/imdb_cnn_lstm.py
@ -4,8 +4,6 @@ classification task.
 Gets to 0.8498 test accuracy after 2 epochs. 41s/epoch on K520 GPU.
 '''
 from __future__ import print_function
 import numpy as np
 np.random.seed(1337)  # for reproducibility
 from keras.preprocessing import sequence
 from keras.models import Sequential
@ -15,7 +13,6 @@ from keras.layers import LSTM
 from keras.layers import Conv1D, MaxPooling1D
 from keras.datasets import imdb
 # Embedding
 max_features = 20000
 maxlen = 100
@ -40,15 +37,15 @@ Only 2 epochs are needed as the dataset is very small.
 '''
 print('Loading data...')
-(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(X_train), 'train sequences')
+print(len(x_train), 'train sequences')
-print(len(X_test), 'test sequences')
+print(len(x_test), 'test sequences')
 print('Pad sequences (samples x time)')
-X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
-X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
-print('X_train shape:', X_train.shape)
+print('x_train shape:', x_train.shape)
-print('X_test shape:', X_test.shape)
+print('x_test shape:', x_test.shape)
 print('Build model...')
@ -70,8 +67,8 @@ model.compile(loss='binary_crossentropy',
              metrics=['accuracy'])
 print('Train...')
-model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs,
+model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs,
-          validation_data=(X_test, y_test))
+          validation_data=(x_test, y_test))
-score, acc = model.evaluate(X_test, y_test, batch_size=batch_size)
+score, acc = model.evaluate(x_test, y_test, batch_size=batch_size)
 print('Test score:', score)
 print('Test accuracy:', acc)
--- a/examples/imdb_fasttext.py
+++ b/examples/imdb_fasttext.py
@ -7,12 +7,11 @@ https://arxiv.org/abs/1607.01759
 Results on IMDB datasets with uni and bi-gram embeddings:
    Uni-gram: 0.8813 test accuracy after 5 epochs. 8s/epoch on i7 cpu.
-    Bi-gram : 0.9056 test accuracy after 5 epochs. 2s/epoch on GTX 980M gpu.
+    Bi-gram : 0.9056 test accuracy after 5 epochs. 2s/epoch on GTx 980M gpu.
 '''
 from __future__ import print_function
 import numpy as np
 np.random.seed(1337)  # for reproducibility
 from keras.preprocessing import sequence
 from keras.models import Sequential
@ -73,17 +72,17 @@ embedding_dims = 50
 epochs = 5
 print('Loading data...')
-(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
+(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
-print(len(X_train), 'train sequences')
+print(len(x_train), 'train sequences')
-print(len(X_test), 'test sequences')
+print(len(x_test), 'test sequences')
-print('Average train sequence length: {}'.format(np.mean(list(map(len, X_train)), dtype=int)))
+print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int)))
-print('Average test sequence length: {}'.format(np.mean(list(map(len, X_test)), dtype=int)))
+print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int)))
 if ngram_range > 1:
    print('Adding {}-gram features'.format(ngram_range))
    # Create set of unique n-gram from the training set.
    ngram_set = set()
-    for input_list in X_train:
+    for input_list in x_train:
        for i in range(2, ngram_range + 1):
            set_of_ngram = create_ngram_set(input_list, ngram_value=i)
            ngram_set.update(set_of_ngram)
@ -98,17 +97,17 @@ if ngram_range > 1:
    # max_features is the highest integer that could be found in the dataset.
    max_features = np.max(list(indice_token.keys())) + 1
-    # Augmenting X_train and X_test with n-grams features
+    # Augmenting x_train and x_test with n-grams features
-    X_train = add_ngram(X_train, token_indice, ngram_range)
+    x_train = add_ngram(x_train, token_indice, ngram_range)
-    X_test = add_ngram(X_test, token_indice, ngram_range)
+    x_test = add_ngram(x_test, token_indice, ngram_range)
-    print('Average train sequence length: {}'.format(np.mean(list(map(len, X_train)), dtype=int)))
+    print('Average train sequence length: {}'.format(np.mean(list(map(len, x_train)), dtype=int)))
-    print('Average test sequence length: {}'.format(np.mean(list(map(len, X_test)), dtype=int)))
+    print('Average test sequence length: {}'.format(np.mean(list(map(len, x_test)), dtype=int)))
 print('Pad sequences (samples x time)')
-X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
+x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
-X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
+x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
-print('X_train shape:', X_train.shape)
+print('x_train shape:', x_train.shape)
-print('X_test shape:', X_test.shape)
+print('x_test shape:', x_test.shape)
 print('Build model...')
 model = Sequential()
@ -130,7 +129,7 @@ model.compile(loss='binary_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])
-model.fit(X_train, y_train,
+model.fit(x_train, y_train,
          batch_size=batch_size,
          epochs=epochs,
-          validation_data=(X_test, y_test))
+          validation_data=(x_test, y_test))
--- a/examples/imdb_lstm.py
+++ b/examples/imdb_lstm.py
@ -11,11 +11,10 @@ Some configurations won't converge.
 from what you see with CNNs/MLPs/etc.
 '''
 from __future__ import print_function
 import numpy as np
 from keras.preprocessing import sequence
 from keras.models import Sequential
-from keras.layers import Dense, Activation, Embedding
+from keras.layers import Dense, Embedding
 from keras.layers import LSTM
 from keras.datasets import imdb
--- a/examples/lstm_benchmark.py
+++ b/examples/lstm_benchmark.py
@ -43,9 +43,13 @@ for mode in modes:
    print('Testing mode: implementation={}'.format(mode))
    model = Sequential()
-    model.add(Embedding(max_features, embedding_dim, input_length=max_length))
+    model.add(Embedding(max_features, embedding_dim,
                        input_length=max_length))
    model.add(Dropout(0.2))
-    model.add(LSTM(embedding_dim, dropout=0.2, recurrent_dropout=0.2, implementation=mode))
+    model.add(LSTM(embedding_dim,
                   dropout=0.2,
                   recurrent_dropout=0.2,
                   implementation=mode))
    model.add(Dense(1, activation='sigmoid'))
    model.compile(loss='binary_crossentropy',
                  optimizer='adam',
--- a/examples/lstm_text_generation.py
+++ b/examples/lstm_text_generation.py
@ -20,7 +20,7 @@ import numpy as np
 import random
 import sys
-path = get_file('nietzsche.txt', origin="https://s3.amazonaws.com/text-datasets/nietzsche.txt")
+path = get_file('nietzsche.txt', origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt')
 text = open(path).read().lower()
 print('corpus length:', len(text))
--- a/examples/mnist_acgan.py
+++ b/examples/mnist_acgan.py
@ -6,8 +6,8 @@ MNIST dataset. See https://arxiv.org/abs/1610.09585 for more details.
 You should start to see reasonable images after ~5 epochs, and good images
 by ~15 epochs. You should use a GPU, as the convolution-heavy operations are
-very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating, as
+very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating,
-the compilation time can be a blocker using Theano.
+as the compilation time can be a blocker using Theano.
 Timings:
@ -33,9 +33,10 @@ from six.moves import range
 import keras.backend as K
 from keras.datasets import mnist
-from keras.layers import Input, Dense, Reshape, Flatten, Embedding, merge, Dropout
+from keras import layers
 from keras.layers import Input, Dense, Reshape, Flatten, Embedding, Dropout
 from keras.layers.advanced_activations import LeakyReLU
-from keras.layers.convolutional import UpSampling2D, Convolution2D
+from keras.layers.convolutional import UpSampling2D, Conv2D
 from keras.models import Sequential, Model
 from keras.optimizers import Adam
 from keras.utils.generic_utils import Progbar
@ -57,17 +58,20 @@ def build_generator(latent_size):
    # upsample to (..., 14, 14)
    cnn.add(UpSampling2D(size=(2, 2)))
-    cnn.add(Convolution2D(256, 5, padding='same',
+    cnn.add(Conv2D(256, 5, padding='same',
-                          activation='relu', kernel_initializer='glorot_normal'))
+                   activation='relu',
                   kernel_initializer='glorot_normal'))
    # upsample to (..., 28, 28)
    cnn.add(UpSampling2D(size=(2, 2)))
-    cnn.add(Convolution2D(128, 5, padding='same',
+    cnn.add(Conv2D(128, 5, padding='same',
-                          activation='relu', kernel_initializer='glorot_normal'))
+                   activation='relu',
                   kernel_initializer='glorot_normal'))
    # take a channel axis reduction
-    cnn.add(Convolution2D(1, 2, padding='same',
+    cnn.add(Conv2D(1, 2, padding='same',
-                          activation='tanh', kernel_initializer='glorot_normal'))
+                   activation='tanh',
                   kernel_initializer='glorot_normal'))
    # this is the z space commonly refered to in GAN papers
    latent = Input(shape=(latent_size, ))
@ -80,7 +84,7 @@ def build_generator(latent_size):
                              embeddings_initializer='glorot_normal')(image_class))
    # hadamard product between z-space and a class conditional embedding
-    h = merge([latent, cls], mode='mul')
+    h = layers.multiply([latent, cls])
    fake_image = cnn(h)
@ -92,20 +96,20 @@ def build_discriminator():
    # the reference paper
    cnn = Sequential()
-    cnn.add(Convolution2D(32, 3, padding='same', strides=2,
+    cnn.add(Conv2D(32, 3, padding='same', strides=2,
                   input_shape=(1, 28, 28)))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))
-    cnn.add(Convolution2D(64, 3, padding='same', strides=2))
+    cnn.add(Conv2D(64, 3, padding='same', strides=2))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))
-    cnn.add(Convolution2D(128, 3, padding='same', strides=2))
+    cnn.add(Conv2D(128, 3, padding='same', strides=2))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))
-    cnn.add(Convolution2D(256, 3, padding='same', strides=1))
+    cnn.add(Conv2D(256, 3, padding='same', strides=1))
    cnn.add(LeakyReLU())
    cnn.add(Dropout(0.3))
@ -224,7 +228,8 @@ if __name__ == '__main__':
            trick = np.ones(2 * batch_size)
            epoch_gen_loss.append(combined.train_on_batch(
-                [noise, sampled_labels.reshape((-1, 1))], [trick, sampled_labels]))
+                [noise, sampled_labels.reshape((-1, 1))],
                [trick, sampled_labels]))
        print('\nTesting for epoch {}:'.format(epoch + 1))
--- a/examples/mnist_hierarchical_rnn.py
+++ b/examples/mnist_hierarchical_rnn.py
@ -56,8 +56,8 @@ print(x_train.shape[0], 'train samples')
 print(x_test.shape[0], 'test samples')
 # Converts class vectors to binary class matrices.
-Y_train = keras.utils.to_categorical(y_train, num_classes)
+y_train = keras.utils.to_categorical(y_train, num_classes)
-Y_test = keras.utils.to_categorical(y_test, num_classes)
+y_test = keras.utils.to_categorical(y_test, num_classes)
 row, col, pixel = x_train.shape[1:]
@ -78,10 +78,11 @@ model.compile(loss='categorical_crossentropy',
              metrics=['accuracy'])
 # Training.
-model.fit(x_train, Y_train, batch_size=batch_size, epochs=epochs,
+model.fit(x_train, y_train,
-          verbose=1, validation_data=(x_test, Y_test))
+          batch_size=batch_size, epochs=epochs,
          verbose=1, validation_data=(x_test, y_test))
 # Evaluation.
-scores = model.evaluate(x_test, Y_test, verbose=0)
+scores = model.evaluate(x_test, y_test, verbose=0)
 print('Test loss:', scores[0])
 print('Test accuracy:', scores[1])
--- a/examples/mnist_siamese_graph.py
+++ b/examples/mnist_siamese_graph.py
@ -13,7 +13,6 @@ Gets to 99.5% test accuracy after 20 epochs.
 from __future__ import absolute_import
 from __future__ import print_function
 import numpy as np
 np.random.seed(1337)  # for reproducibility
 import random
 from keras.datasets import mnist
@ -38,7 +37,8 @@ def contrastive_loss(y_true, y_pred):
    http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf
    '''
    margin = 1
-    return K.mean(y_true * K.square(y_pred) + (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
+    return K.mean(y_true * K.square(y_pred) +
                  (1 - y_true) * K.square(K.maximum(margin - y_pred, 0)))
 def create_pairs(x, digit_indices):
@ -108,7 +108,8 @@ input_b = Input(shape=(input_dim,))
 processed_a = base_network(input_a)
 processed_b = base_network(input_b)
-distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])
+distance = Lambda(euclidean_distance,
                  output_shape=eucl_dist_output_shape)([processed_a, processed_b])
 model = Model([input_a, input_b], distance)
--- a/examples/mnist_sklearn_wrapper.py
+++ b/examples/mnist_sklearn_wrapper.py
@ -9,7 +9,7 @@ import keras
 from keras.datasets import mnist
 from keras.models import Sequential
 from keras.layers import Dense, Dropout, Activation, Flatten
-from keras.layers import Convolution2D, MaxPooling2D
+from keras.layers import Conv2D, MaxPooling2D
 from keras.wrappers.scikit_learn import KerasClassifier
 from keras import backend as K
 from sklearn.grid_search import GridSearchCV
@ -53,11 +53,11 @@ def make_model(dense_layer_sizes, filters, kernel_size, pool_size):
    '''
    model = Sequential()
-    model.add(Convolution2D(filters, kernel_size,
+    model.add(Conv2D(filters, kernel_size,
                     padding='valid',
                     input_shape=input_shape))
    model.add(Activation('relu'))
-    model.add(Convolution2D(filters, kernel_size))
+    model.add(Conv2D(filters, kernel_size))
    model.add(Activation('relu'))
    model.add(MaxPooling2D(pool_size=pool_size))
    model.add(Dropout(0.25))
--- a/examples/mnist_swwae.py
+++ b/examples/mnist_swwae.py
@ -2,21 +2,21 @@
 MNIST dataset.  It exemplifies two influential methods that have been developed
 in the past few years.
-The first is the idea of properly "unpooling." During any max pool, the
+The first is the idea of properly 'unpooling.' During any max pool, the
-exact location (the "where") of the maximal value in a pooled receptive field
+exact location (the 'where') of the maximal value in a pooled receptive field
 is lost, however it can be very useful in the overall reconstruction of an
-input image.  Therefore, if the "where" is handed from the encoder
+input image.  Therefore, if the 'where' is handed from the encoder
-to the corresponding decoder layer, features being decoded can be "placed" in
+to the corresponding decoder layer, features being decoded can be 'placed' in
 the right location, allowing for reconstructions of much higher fidelity.
 References:
 [1]
-"Visualizing and Understanding Convolutional Networks"
+'Visualizing and Understanding Convolutional Networks'
 Matthew D Zeiler, Rob Fergus
 https://arxiv.org/abs/1311.2901v3
 [2]
-"Stacked What-Where Auto-encoders"
+'Stacked What-Where Auto-encoders'
 Junbo Zhao, Michael Mathieu, Ross Goroshin, Yann LeCun
 https://arxiv.org/abs/1506.02351v8
@ -34,42 +34,42 @@ applied as a bias because we know the MNIST digits are mapped to [0,1].
 References:
 [3]
-"Deep Residual Learning for Image Recognition"
+'Deep Residual Learning for Image Recognition'
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+Kaiming He, xiangyu Zhang, Shaoqing Ren, Jian Sun
 https://arxiv.org/abs/1512.03385v1
 [4]
-"Identity Mappings in Deep Residual Networks"
+'Identity Mappings in Deep Residual Networks'
-Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun
+Kaiming He, xiangyu Zhang, Shaoqing Ren, Jian Sun
 https://arxiv.org/abs/1603.05027v3
 '''
 from __future__ import print_function
 import numpy as np
 np.random.seed(1337)  # for reproducibility
 from keras.datasets import mnist
 from keras.models import Model
-from keras.layers import Activation, merge
+from keras.layers import Activation
-from keras.layers import UpSampling2D, Convolution2D, MaxPooling2D
+from keras.layers import UpSampling2D, Conv2D, MaxPooling2D
 from keras.layers import Input, BatchNormalization
 import matplotlib.pyplot as plt
 import keras.backend as K
 from keras import layers
 def convresblock(x, nfeats=8, ksize=3, nskipped=2):
    ''' The proposed residual block from [4]'''
-    y0 = Convolution2D(nfeats, ksize, ksize, border_mode='same')(x)
+    y0 = Conv2D(nfeats, ksize, padding='same')(x)
    y = y0
    for i in range(nskipped):
-        y = BatchNormalization(mode=0, axis=1)(y)
+        y = BatchNormalization(axis=1)(y)
        y = Activation('relu')(y)
-        y = Convolution2D(nfeats, ksize, ksize, border_mode='same')(y)
+        y = Conv2D(nfeats, ksize, padding='same')(y)
-    return merge([y0, y], mode='sum')
+    return layers.add([y0, y])
 def getwhere(x):
-    ''' Calculate the "where" mask that contains switches indicating which
+    ''' Calculate the 'where' mask that contains switches indicating which
    index contained the max value when MaxPool2D was applied.  Using the
    gradient of the sum is a nice trick to keep everything high level.'''
    y_prepool, y_postpool = x
@ -89,17 +89,17 @@ K.set_image_data_format('channels_first')
 img_rows, img_cols = 28, 28
 # the data, shuffled and split between train and test sets
-(X_train, _), (X_test, _) = mnist.load_data()
+(x_train, _), (x_test, _) = mnist.load_data()
-X_train = X_train.reshape(X_train.shape[0], 1, img_rows, img_cols)
+x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)
-X_test = X_test.reshape(X_test.shape[0], 1, img_rows, img_cols)
+x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)
-X_train = X_train.astype('float32')
+x_train = x_train.astype('float32')
-X_test = X_test.astype('float32')
+x_test = x_test.astype('float32')
-X_train /= 255
+x_train /= 255
-X_test /= 255
+x_test /= 255
-print('X_train shape:', X_train.shape)
+print('x_train shape:', x_train.shape)
-print(X_train.shape[0], 'train samples')
+print(x_train.shape[0], 'train samples')
-print(X_test.shape[0], 'test samples')
+print(x_test.shape[0], 'test samples')
 # The size of the kernel used for the MaxPooling2D
 pool_size = 2
@ -116,41 +116,40 @@ batch_size = 128
 if pool_size == 2:
    # if using a 5 layer net of pool_size = 2
-    X_train = np.pad(X_train, [[0, 0], [0, 0], [2, 2], [2, 2]],
+    x_train = np.pad(x_train, [[0, 0], [0, 0], [2, 2], [2, 2]],
                     mode='constant')
-    X_test = np.pad(X_test, [[0, 0], [0, 0], [2, 2], [2, 2]], mode='constant')
+    x_test = np.pad(x_test, [[0, 0], [0, 0], [2, 2], [2, 2]], mode='constant')
    nlayers = 5
 elif pool_size == 3:
    # if using a 3 layer net of pool_size = 3
-    X_train = X_train[:, :, :-1, :-1]
+    x_train = x_train[:, :, :-1, :-1]
-    X_test = X_test[:, :, :-1, :-1]
+    x_test = x_test[:, :, :-1, :-1]
    nlayers = 3
 else:
    import sys
-    sys.exit("Script supports pool_size of 2 and 3.")
+    sys.exit('Script supports pool_size of 2 and 3.')
 # Shape of input to train on (note that model is fully convolutional however)
-input_shape = X_train.shape[1:]
+input_shape = x_train.shape[1:]
 # The final list of the size of axis=1 for all layers, including input
 nfeats_all = [input_shape[0]] + nfeats
-# First build the encoder, all the while keeping track of the "where" masks
+# First build the encoder, all the while keeping track of the 'where' masks
 img_input = Input(shape=input_shape)
-# We push the "where" masks to the following list
+# We push the 'where' masks to the following list
 wheres = [None] * nlayers
 y = img_input
 for i in range(nlayers):
    y_prepool = convresblock(y, nfeats=nfeats_all[i + 1], ksize=ksize)
    y = MaxPooling2D(pool_size=(pool_sizes[i], pool_sizes[i]))(y_prepool)
-    wheres[i] = merge([y_prepool, y], mode=getwhere,
+    wheres[i] = layers.Lambda(getwhere, output_shape=lambda x: x[0])([y_prepool, y])
                      output_shape=lambda x: x[0])
-# Now build the decoder, and use the stored "where" masks to place the features
+# Now build the decoder, and use the stored 'where' masks to place the features
 for i in range(nlayers):
    ind = nlayers - 1 - i
    y = UpSampling2D(size=(pool_sizes[ind], pool_sizes[ind]))(y)
-    y = merge([y, wheres[ind]], mode='mul')
+    y = layers.multiply([y, wheres[ind]])
    y = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize)
 # Use hard_simgoid to clip range of reconstruction
@ -161,16 +160,16 @@ model = Model(img_input, y)
 model.compile('adam', 'mse')
 # Fit the model
-model.fit(X_train, X_train, validation_data=(X_test, X_test),
+model.fit(x_train, x_train, validation_data=(x_test, x_test),
          batch_size=batch_size, epochs=epochs)
 # Plot
-X_recon = model.predict(X_test[:25])
+x_recon = model.predict(x_test[:25])
-X_plot = np.concatenate((X_test[:25], X_recon), axis=1)
+x_plot = np.concatenate((x_test[:25], x_recon), axis=1)
-X_plot = X_plot.reshape((5, 10, input_shape[-2], input_shape[-1]))
+x_plot = x_plot.reshape((5, 10, input_shape[-2], input_shape[-1]))
-X_plot = np.vstack([np.hstack(x) for x in X_plot])
+x_plot = np.vstack([np.hstack(x) for x in x_plot])
 plt.figure()
 plt.axis('off')
 plt.title('Test Samples: Originals/Reconstructions')
-plt.imshow(X_plot, interpolation='none', cmap='gray')
+plt.imshow(x_plot, interpolation='none', cmap='gray')
 plt.savefig('reconstructions.png')
--- a/examples/neural_doodle.py
+++ b/examples/neural_doodle.py
@ -122,7 +122,7 @@ def kmeans(xs, k):
    assert xs.ndim == 2
    try:
        from sklearn.cluster import k_means
-        _, labels, _ = k_means(xs.astype("float64"), k)
+        _, labels, _ = k_means(xs.astype('float64'), k)
    except ImportError:
        from scipy.cluster.vq import kmeans2
        _, labels = kmeans2(xs, k, missing='raise')
@ -179,8 +179,8 @@ images = K.concatenate([style_image, target_image, content_image], axis=0)
 # Create tensor variables for masks
 raw_style_mask, raw_target_mask = load_mask_labels()
-style_mask = K.variable(raw_style_mask.astype("float32"))
+style_mask = K.variable(raw_style_mask.astype('float32'))
-target_mask = K.variable(raw_target_mask.astype("float32"))
+target_mask = K.variable(raw_target_mask.astype('float32'))
 masks = K.concatenate([style_mask, target_mask], axis=0)
 # index constants for images and tasks variables
@ -191,13 +191,13 @@ STYLE, TARGET, CONTENT = 0, 1, 2
 image_model = vgg19.VGG19(include_top=False, input_tensor=images)
 # mask model as a series of pooling
-mask_input = Input(tensor=masks, shape=(None, None, None), name="mask_input")
+mask_input = Input(tensor=masks, shape=(None, None, None), name='mask_input')
 x = mask_input
 for layer in image_model.layers[1:]:
    name = 'mask_%s' % layer.name
    if 'conv' in layer.name:
        x = AveragePooling2D((3, 3), strides=(
-            1, 1), name=name, border_mode="same")(x)
+            1, 1), name=name, border_mode='same')(x)
    elif 'pool' in layer.name:
        x = AveragePooling2D((2, 2), name=name)(x)
 mask_model = Model(mask_input, x)
--- a/examples/stateful_lstm.py
+++ b/examples/stateful_lstm.py
@ -36,7 +36,7 @@ def gen_cosine_amp(amp=100, period=1000, x0=0, xn=50000, step=1, k=0.0001):
    return cos
-print('Generating Data')
+print('Generating Data...')
 cos = gen_cosine_amp()
 print('Input shape:', cos.shape)
@ -44,13 +44,13 @@ expected_output = np.zeros((len(cos), 1))
 for i in range(len(cos) - lahead):
    expected_output[i, 0] = np.mean(cos[i + 1:i + lahead + 1])
-print('Output shape')
+print('Output shape:', expected_output.shape)
 print(expected_output.shape)
-print('Creating Model')
+print('Creating Model...')
 model = Sequential()
 model.add(LSTM(50,
-               batch_input_shape=(batch_size, tsteps, 1),
+               input_shape=(tsteps, 1),
               batch_size=batch_size,
               return_sequences=True,
               stateful=True))
 model.add(LSTM(50,