Style fixes

2016-05-05 11:17:25 -07:00 · 2016-05-05 11:17:25 -07:00 · 3c57aff85b
commit 3c57aff85b
parent 18504bcc86
2 changed files with 57 additions and 48 deletions
--- a/examples/lstm_benchmark.py
+++ b/examples/lstm_benchmark.py
@ -10,8 +10,10 @@ a little longer, but should require less peak memory.
 consume_less='gpu' concatenates the input, output and forget gate's weights
 into one, large matrix, resulting in faster computation time as the GPU can
 utilize more cores, at the expense of reduced regularization because the same
-dropout is shared across the gates. It should require similar memory usage as
-consume_less='mem'.
+dropout is shared across the gates.
+
+Note that the relative performance of the different `consume_less` modes
+can vary depending on your device, your model and the size of your data.
 '''

 import time
@ -20,13 +22,13 @@ import matplotlib.pyplot as plt

 from keras.preprocessing import sequence
 from keras.models import Sequential
-from keras.layers import Embedding, BatchNormalization, Dense, LSTM
+from keras.layers import Embedding, Dense, LSTM
 from keras.datasets import imdb

 max_features = 20000
 max_length = 80
-embedding = 400
-batch_size = 256
+embedding_dim = 256
+batch_size = 128
 epochs = 10
 modes = ['cpu', 'mem', 'gpu']

@ -38,32 +40,36 @@ X_test = sequence.pad_sequences(X_test, max_length)
 # Compile and train different models while meauring performance.
 results = []
 for mode in modes:
-    print("Testing mode: consume_less='{}'".format(mode))
+    print('Testing mode: consume_less="{}"'.format(mode))

    model = Sequential()
-    model.add(Embedding(max_features, embedding, input_length=max_length, dropout=0.2))
-    model.add(BatchNormalization())
-    model.add(LSTM(embedding, dropout_W=0.2, dropout_U=0.2, consume_less=mode))
+    model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2))
+    model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode))
    model.add(Dense(1, activation='sigmoid'))
-    model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
+    model.compile(loss='binary_crossentropy',
+                  optimizer='adam',
+                  metrics=['accuracy'])

    start_time = time.time()
-    history = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_data=(X_test, y_test))
+    history = model.fit(X_train, y_train,
+                        batch_size=batch_size,
+                        nb_epoch=epochs,
+                        validation_data=(X_test, y_test))
    average_time_per_epoch = (time.time() - start_time) / epochs

    results.append((history, average_time_per_epoch))

 # Compare models' accuracy, loss and elapsed time per epoch.
 plt.style.use('ggplot')
-ax1 = plt.subplot2grid((2,2), (0,0))
+ax1 = plt.subplot2grid((2, 2), (0, 0))
 ax1.set_title('Accuracy')
 ax1.set_ylabel('Validation Accuracy')
 ax1.set_xlabel('Epochs')
-ax2 = plt.subplot2grid((2,2), (1,0))
+ax2 = plt.subplot2grid((2, 2), (1, 0))
 ax2.set_title('Loss')
 ax2.set_ylabel('Validation Loss')
 ax2.set_xlabel('Epochs')
-ax3 = plt.subplot2grid((2,2), (0,1), rowspan=2)
+ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2)
 ax3.set_title('Time')
 ax3.set_ylabel('Seconds')
 for mode, result in zip(modes, results):
@ -71,6 +77,7 @@ for mode, result in zip(modes, results):
    ax2.plot(result[0].epoch, result[0].history['val_loss'], label=mode)
 ax1.legend()
 ax2.legend()
-ax3.bar(np.arange(len(results)), [x[1] for x in results], tick_label=modes, align='center')
+ax3.bar(np.arange(len(results)), [x[1] for x in results],
+        tick_label=modes, align='center')
 plt.tight_layout()
 plt.show()
--- a/keras/layers/recurrent.py
+++ b/keras/layers/recurrent.py
@ -81,7 +81,9 @@ class Recurrent(Layer):
            is always unrolled, so this argument does not do anything.
            Unrolling can speed-up a RNN, although it tends to be more memory-intensive.
            Unrolling is only suitable for short sequences.
-        consume_less: one of "cpu", "mem", or "gpu" (LSTM only).
+        consume_less: one of "cpu", "mem", or "gpu".
+            Note that "gpu" mode is only available for LSTM.
+
            If set to "cpu", the RNN will use
            an implementation that uses fewer, larger matrix products,
            thus running faster on CPU but consuming more memory.
@ -391,15 +393,15 @@ class SimpleRNN(Recurrent):
        return constants

    def get_config(self):
-        config = {"output_dim": self.output_dim,
-                  "init": self.init.__name__,
-                  "inner_init": self.inner_init.__name__,
-                  "activation": self.activation.__name__,
-                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
-                  "U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
-                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
-                  "dropout_W": self.dropout_W,
-                  "dropout_U": self.dropout_U}
+        config = {'output_dim': self.output_dim,
+                  'init': self.init.__name__,
+                  'inner_init': self.inner_init.__name__,
+                  'activation': self.activation.__name__,
+                  'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
+                  'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
+                  'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
+                  'dropout_W': self.dropout_W,
+                  'dropout_U': self.dropout_U}
        base_config = super(SimpleRNN, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

@ -574,16 +576,16 @@ class GRU(Recurrent):
        return constants

    def get_config(self):
-        config = {"output_dim": self.output_dim,
-                  "init": self.init.__name__,
-                  "inner_init": self.inner_init.__name__,
-                  "activation": self.activation.__name__,
-                  "inner_activation": self.inner_activation.__name__,
-                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
-                  "U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
-                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
-                  "dropout_W": self.dropout_W,
-                  "dropout_U": self.dropout_U}
+        config = {'output_dim': self.output_dim,
+                  'init': self.init.__name__,
+                  'inner_init': self.inner_init.__name__,
+                  'activation': self.activation.__name__,
+                  'inner_activation': self.inner_activation.__name__,
+                  'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
+                  'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
+                  'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
+                  'dropout_W': self.dropout_W,
+                  'dropout_U': self.dropout_U}
        base_config = super(GRU, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))

@ -654,9 +656,9 @@ class LSTM(Recurrent):
            self.states = [None, None]

        if self.consume_less == 'gpu':
-            self.W = self.init((self.input_dim, 4*self.output_dim),
+            self.W = self.init((self.input_dim, 4 * self.output_dim),
                               name='{}_W'.format(self.name))
-            self.U = self.inner_init((self.output_dim, 4*self.output_dim),
+            self.U = self.inner_init((self.output_dim, 4 * self.output_dim),
                                     name='{}_U'.format(self.name))

            self.b = K.variable(np.hstack((np.zeros(self.output_dim),
@ -814,16 +816,16 @@ class LSTM(Recurrent):
        return constants

    def get_config(self):
-        config = {"output_dim": self.output_dim,
-                  "init": self.init.__name__,
-                  "inner_init": self.inner_init.__name__,
-                  "forget_bias_init": self.forget_bias_init.__name__,
-                  "activation": self.activation.__name__,
-                  "inner_activation": self.inner_activation.__name__,
-                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
-                  "U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
-                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
-                  "dropout_W": self.dropout_W,
-                  "dropout_U": self.dropout_U}
+        config = {'output_dim': self.output_dim,
+                  'init': self.init.__name__,
+                  'inner_init': self.inner_init.__name__,
+                  'forget_bias_init': self.forget_bias_init.__name__,
+                  'activation': self.activation.__name__,
+                  'inner_activation': self.inner_activation.__name__,
+                  'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
+                  'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
+                  'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
+                  'dropout_W': self.dropout_W,
+                  'dropout_U': self.dropout_U}
        base_config = super(LSTM, self).get_config()
        return dict(list(base_config.items()) + list(config.items()))