diff --git a/examples/lstm_benchmark.py b/examples/lstm_benchmark.py index eb462c571..91e5fc0eb 100644 --- a/examples/lstm_benchmark.py +++ b/examples/lstm_benchmark.py @@ -10,8 +10,10 @@ a little longer, but should require less peak memory. consume_less='gpu' concatenates the input, output and forget gate's weights into one, large matrix, resulting in faster computation time as the GPU can utilize more cores, at the expense of reduced regularization because the same -dropout is shared across the gates. It should require similar memory usage as -consume_less='mem'. +dropout is shared across the gates. + +Note that the relative performance of the different `consume_less` modes +can vary depending on your device, your model and the size of your data. ''' import time @@ -20,13 +22,13 @@ import matplotlib.pyplot as plt from keras.preprocessing import sequence from keras.models import Sequential -from keras.layers import Embedding, BatchNormalization, Dense, LSTM +from keras.layers import Embedding, Dense, LSTM from keras.datasets import imdb max_features = 20000 max_length = 80 -embedding = 400 -batch_size = 256 +embedding_dim = 256 +batch_size = 128 epochs = 10 modes = ['cpu', 'mem', 'gpu'] @@ -38,32 +40,36 @@ X_test = sequence.pad_sequences(X_test, max_length) # Compile and train different models while meauring performance. results = [] for mode in modes: - print("Testing mode: consume_less='{}'".format(mode)) + print('Testing mode: consume_less="{}"'.format(mode)) model = Sequential() - model.add(Embedding(max_features, embedding, input_length=max_length, dropout=0.2)) - model.add(BatchNormalization()) - model.add(LSTM(embedding, dropout_W=0.2, dropout_U=0.2, consume_less=mode)) + model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2)) + model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode)) model.add(Dense(1, activation='sigmoid')) - model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy']) + model.compile(loss='binary_crossentropy', + optimizer='adam', + metrics=['accuracy']) start_time = time.time() - history = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_data=(X_test, y_test)) + history = model.fit(X_train, y_train, + batch_size=batch_size, + nb_epoch=epochs, + validation_data=(X_test, y_test)) average_time_per_epoch = (time.time() - start_time) / epochs results.append((history, average_time_per_epoch)) # Compare models' accuracy, loss and elapsed time per epoch. plt.style.use('ggplot') -ax1 = plt.subplot2grid((2,2), (0,0)) +ax1 = plt.subplot2grid((2, 2), (0, 0)) ax1.set_title('Accuracy') ax1.set_ylabel('Validation Accuracy') ax1.set_xlabel('Epochs') -ax2 = plt.subplot2grid((2,2), (1,0)) +ax2 = plt.subplot2grid((2, 2), (1, 0)) ax2.set_title('Loss') ax2.set_ylabel('Validation Loss') ax2.set_xlabel('Epochs') -ax3 = plt.subplot2grid((2,2), (0,1), rowspan=2) +ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2) ax3.set_title('Time') ax3.set_ylabel('Seconds') for mode, result in zip(modes, results): @@ -71,6 +77,7 @@ for mode, result in zip(modes, results): ax2.plot(result[0].epoch, result[0].history['val_loss'], label=mode) ax1.legend() ax2.legend() -ax3.bar(np.arange(len(results)), [x[1] for x in results], tick_label=modes, align='center') +ax3.bar(np.arange(len(results)), [x[1] for x in results], + tick_label=modes, align='center') plt.tight_layout() plt.show() diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py index d624fdf35..38411f527 100644 --- a/keras/layers/recurrent.py +++ b/keras/layers/recurrent.py @@ -81,7 +81,9 @@ class Recurrent(Layer): is always unrolled, so this argument does not do anything. Unrolling can speed-up a RNN, although it tends to be more memory-intensive. Unrolling is only suitable for short sequences. - consume_less: one of "cpu", "mem", or "gpu" (LSTM only). + consume_less: one of "cpu", "mem", or "gpu". + Note that "gpu" mode is only available for LSTM. + If set to "cpu", the RNN will use an implementation that uses fewer, larger matrix products, thus running faster on CPU but consuming more memory. @@ -391,15 +393,15 @@ class SimpleRNN(Recurrent): return constants def get_config(self): - config = {"output_dim": self.output_dim, - "init": self.init.__name__, - "inner_init": self.inner_init.__name__, - "activation": self.activation.__name__, - "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None, - "U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None, - "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None, - "dropout_W": self.dropout_W, - "dropout_U": self.dropout_U} + config = {'output_dim': self.output_dim, + 'init': self.init.__name__, + 'inner_init': self.inner_init.__name__, + 'activation': self.activation.__name__, + 'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None, + 'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None, + 'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None, + 'dropout_W': self.dropout_W, + 'dropout_U': self.dropout_U} base_config = super(SimpleRNN, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -574,16 +576,16 @@ class GRU(Recurrent): return constants def get_config(self): - config = {"output_dim": self.output_dim, - "init": self.init.__name__, - "inner_init": self.inner_init.__name__, - "activation": self.activation.__name__, - "inner_activation": self.inner_activation.__name__, - "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None, - "U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None, - "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None, - "dropout_W": self.dropout_W, - "dropout_U": self.dropout_U} + config = {'output_dim': self.output_dim, + 'init': self.init.__name__, + 'inner_init': self.inner_init.__name__, + 'activation': self.activation.__name__, + 'inner_activation': self.inner_activation.__name__, + 'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None, + 'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None, + 'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None, + 'dropout_W': self.dropout_W, + 'dropout_U': self.dropout_U} base_config = super(GRU, self).get_config() return dict(list(base_config.items()) + list(config.items())) @@ -654,9 +656,9 @@ class LSTM(Recurrent): self.states = [None, None] if self.consume_less == 'gpu': - self.W = self.init((self.input_dim, 4*self.output_dim), + self.W = self.init((self.input_dim, 4 * self.output_dim), name='{}_W'.format(self.name)) - self.U = self.inner_init((self.output_dim, 4*self.output_dim), + self.U = self.inner_init((self.output_dim, 4 * self.output_dim), name='{}_U'.format(self.name)) self.b = K.variable(np.hstack((np.zeros(self.output_dim), @@ -814,16 +816,16 @@ class LSTM(Recurrent): return constants def get_config(self): - config = {"output_dim": self.output_dim, - "init": self.init.__name__, - "inner_init": self.inner_init.__name__, - "forget_bias_init": self.forget_bias_init.__name__, - "activation": self.activation.__name__, - "inner_activation": self.inner_activation.__name__, - "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None, - "U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None, - "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None, - "dropout_W": self.dropout_W, - "dropout_U": self.dropout_U} + config = {'output_dim': self.output_dim, + 'init': self.init.__name__, + 'inner_init': self.inner_init.__name__, + 'forget_bias_init': self.forget_bias_init.__name__, + 'activation': self.activation.__name__, + 'inner_activation': self.inner_activation.__name__, + 'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None, + 'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None, + 'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None, + 'dropout_W': self.dropout_W, + 'dropout_U': self.dropout_U} base_config = super(LSTM, self).get_config() return dict(list(base_config.items()) + list(config.items()))