Style fixes

This commit is contained in:
Francois Chollet 2016-05-05 11:17:25 -07:00
parent 18504bcc86
commit 3c57aff85b
2 changed files with 57 additions and 48 deletions

@ -10,8 +10,10 @@ a little longer, but should require less peak memory.
consume_less='gpu' concatenates the input, output and forget gate's weights
into one, large matrix, resulting in faster computation time as the GPU can
utilize more cores, at the expense of reduced regularization because the same
dropout is shared across the gates. It should require similar memory usage as
consume_less='mem'.
dropout is shared across the gates.
Note that the relative performance of the different `consume_less` modes
can vary depending on your device, your model and the size of your data.
'''
import time
@ -20,13 +22,13 @@ import matplotlib.pyplot as plt
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Embedding, BatchNormalization, Dense, LSTM
from keras.layers import Embedding, Dense, LSTM
from keras.datasets import imdb
max_features = 20000
max_length = 80
embedding = 400
batch_size = 256
embedding_dim = 256
batch_size = 128
epochs = 10
modes = ['cpu', 'mem', 'gpu']
@ -38,32 +40,36 @@ X_test = sequence.pad_sequences(X_test, max_length)
# Compile and train different models while meauring performance.
results = []
for mode in modes:
print("Testing mode: consume_less='{}'".format(mode))
print('Testing mode: consume_less="{}"'.format(mode))
model = Sequential()
model.add(Embedding(max_features, embedding, input_length=max_length, dropout=0.2))
model.add(BatchNormalization())
model.add(LSTM(embedding, dropout_W=0.2, dropout_U=0.2, consume_less=mode))
model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2))
model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy', optimizer='adam', metrics=['accuracy'])
model.compile(loss='binary_crossentropy',
optimizer='adam',
metrics=['accuracy'])
start_time = time.time()
history = model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=epochs, validation_data=(X_test, y_test))
history = model.fit(X_train, y_train,
batch_size=batch_size,
nb_epoch=epochs,
validation_data=(X_test, y_test))
average_time_per_epoch = (time.time() - start_time) / epochs
results.append((history, average_time_per_epoch))
# Compare models' accuracy, loss and elapsed time per epoch.
plt.style.use('ggplot')
ax1 = plt.subplot2grid((2,2), (0,0))
ax1 = plt.subplot2grid((2, 2), (0, 0))
ax1.set_title('Accuracy')
ax1.set_ylabel('Validation Accuracy')
ax1.set_xlabel('Epochs')
ax2 = plt.subplot2grid((2,2), (1,0))
ax2 = plt.subplot2grid((2, 2), (1, 0))
ax2.set_title('Loss')
ax2.set_ylabel('Validation Loss')
ax2.set_xlabel('Epochs')
ax3 = plt.subplot2grid((2,2), (0,1), rowspan=2)
ax3 = plt.subplot2grid((2, 2), (0, 1), rowspan=2)
ax3.set_title('Time')
ax3.set_ylabel('Seconds')
for mode, result in zip(modes, results):
@ -71,6 +77,7 @@ for mode, result in zip(modes, results):
ax2.plot(result[0].epoch, result[0].history['val_loss'], label=mode)
ax1.legend()
ax2.legend()
ax3.bar(np.arange(len(results)), [x[1] for x in results], tick_label=modes, align='center')
ax3.bar(np.arange(len(results)), [x[1] for x in results],
tick_label=modes, align='center')
plt.tight_layout()
plt.show()

@ -81,7 +81,9 @@ class Recurrent(Layer):
is always unrolled, so this argument does not do anything.
Unrolling can speed-up a RNN, although it tends to be more memory-intensive.
Unrolling is only suitable for short sequences.
consume_less: one of "cpu", "mem", or "gpu" (LSTM only).
consume_less: one of "cpu", "mem", or "gpu".
Note that "gpu" mode is only available for LSTM.
If set to "cpu", the RNN will use
an implementation that uses fewer, larger matrix products,
thus running faster on CPU but consuming more memory.
@ -391,15 +393,15 @@ class SimpleRNN(Recurrent):
return constants
def get_config(self):
config = {"output_dim": self.output_dim,
"init": self.init.__name__,
"inner_init": self.inner_init.__name__,
"activation": self.activation.__name__,
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
"dropout_W": self.dropout_W,
"dropout_U": self.dropout_U}
config = {'output_dim': self.output_dim,
'init': self.init.__name__,
'inner_init': self.inner_init.__name__,
'activation': self.activation.__name__,
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
'dropout_W': self.dropout_W,
'dropout_U': self.dropout_U}
base_config = super(SimpleRNN, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
@ -574,16 +576,16 @@ class GRU(Recurrent):
return constants
def get_config(self):
config = {"output_dim": self.output_dim,
"init": self.init.__name__,
"inner_init": self.inner_init.__name__,
"activation": self.activation.__name__,
"inner_activation": self.inner_activation.__name__,
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
"dropout_W": self.dropout_W,
"dropout_U": self.dropout_U}
config = {'output_dim': self.output_dim,
'init': self.init.__name__,
'inner_init': self.inner_init.__name__,
'activation': self.activation.__name__,
'inner_activation': self.inner_activation.__name__,
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
'dropout_W': self.dropout_W,
'dropout_U': self.dropout_U}
base_config = super(GRU, self).get_config()
return dict(list(base_config.items()) + list(config.items()))
@ -654,9 +656,9 @@ class LSTM(Recurrent):
self.states = [None, None]
if self.consume_less == 'gpu':
self.W = self.init((self.input_dim, 4*self.output_dim),
self.W = self.init((self.input_dim, 4 * self.output_dim),
name='{}_W'.format(self.name))
self.U = self.inner_init((self.output_dim, 4*self.output_dim),
self.U = self.inner_init((self.output_dim, 4 * self.output_dim),
name='{}_U'.format(self.name))
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
@ -814,16 +816,16 @@ class LSTM(Recurrent):
return constants
def get_config(self):
config = {"output_dim": self.output_dim,
"init": self.init.__name__,
"inner_init": self.inner_init.__name__,
"forget_bias_init": self.forget_bias_init.__name__,
"activation": self.activation.__name__,
"inner_activation": self.inner_activation.__name__,
"W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
"U_regularizer": self.U_regularizer.get_config() if self.U_regularizer else None,
"b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
"dropout_W": self.dropout_W,
"dropout_U": self.dropout_U}
config = {'output_dim': self.output_dim,
'init': self.init.__name__,
'inner_init': self.inner_init.__name__,
'forget_bias_init': self.forget_bias_init.__name__,
'activation': self.activation.__name__,
'inner_activation': self.inner_activation.__name__,
'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
'U_regularizer': self.U_regularizer.get_config() if self.U_regularizer else None,
'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
'dropout_W': self.dropout_W,
'dropout_U': self.dropout_U}
base_config = super(LSTM, self).get_config()
return dict(list(base_config.items()) + list(config.items()))