Update several examples to work with the new API (#5548)

* Update mnist_transfer_cnn for new API

* Update mnist_siamese_graph.py for new API

* Refactor example a little bit for clarity

* Update mnist_irnn.py for new API

* Fix variable name

* Update mnist_heirarchial_rnn.py for new api

* Fix a few api calls i missed

* Update mnist_acgan.py for new API

* Fix variable name

* Update imdb_cnn for new API

* Update benchmark.py to work with new API

* PEP8 fix

* Change filter_length to kernel_size

* Update imdb_cnn_lstm.py for new API

* PEP8 indentation fix
This commit is contained in:
Hannah Vivian Shaw 2017-02-27 21:53:41 -05:00 committed by François Chollet
parent 38a6dae44a
commit eec61d9d49
8 changed files with 107 additions and 105 deletions

@ -14,7 +14,7 @@ from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import Convolution1D, GlobalMaxPooling1D
from keras.layers import Conv1D, GlobalMaxPooling1D
from keras.datasets import imdb
@ -24,7 +24,7 @@ maxlen = 400
batch_size = 32
embedding_dims = 50
filters = 250
filter_length = 3
kernel_size = 3
hidden_dims = 250
epochs = 2
@ -46,16 +46,16 @@ model = Sequential()
# our vocab indices into embedding_dims dimensions
model.add(Embedding(max_features,
embedding_dims,
input_length=maxlen,
dropout=0.2))
input_length=maxlen))
model.add(Dropout(0.2))
# we add a Convolution1D, which will learn filters
# word group filters of size filter_length:
model.add(Convolution1D(filters=filters,
filter_length=filter_length,
border_mode='valid',
activation='relu',
subsample_length=1))
model.add(Conv1D(filters,
kernel_size,
padding='valid',
activation='relu',
strides=1))
# we use max pooling:
model.add(GlobalMaxPooling1D())

@ -12,7 +12,7 @@ from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.layers import Embedding
from keras.layers import LSTM
from keras.layers import Convolution1D, MaxPooling1D
from keras.layers import Conv1D, MaxPooling1D
from keras.datasets import imdb
@ -22,9 +22,9 @@ maxlen = 100
embedding_size = 128
# Convolution
filter_length = 5
kernel_size = 5
filters = 64
pool_length = 4
pool_size = 4
# LSTM
lstm_output_size = 70
@ -55,12 +55,12 @@ print('Build model...')
model = Sequential()
model.add(Embedding(max_features, embedding_size, input_length=maxlen))
model.add(Dropout(0.25))
model.add(Convolution1D(filters=filters,
filter_length=filter_length,
border_mode='valid',
activation='relu',
subsample_length=1))
model.add(MaxPooling1D(pool_length=pool_length))
model.add(Conv1D(filters,
kernel_size,
padding='valid',
activation='relu',
strides=1))
model.add(MaxPooling1D(pool_size=pool_size))
model.add(LSTM(lstm_output_size))
model.add(Dense(1))
model.add(Activation('sigmoid'))

@ -1,13 +1,13 @@
'''Compare LSTM implementations on the IMDB sentiment classification task.
consume_less='cpu' preprocesses input to the LSTM which typically results in
implementation=0 preprocesses input to the LSTM which typically results in
faster computations at the expense of increased peak memory usage as the
preprocessed input must be kept in memory.
consume_less='mem' does away with the preprocessing, meaning that it might take
implementation=1 does away with the preprocessing, meaning that it might take
a little longer, but should require less peak memory.
consume_less='gpu' concatenates the input, output and forget gate's weights
implementation=2 concatenates the input, output and forget gate's weights
into one, large matrix, resulting in faster computation time as the GPU can
utilize more cores, at the expense of reduced regularization because the same
dropout is shared across the gates.
@ -22,7 +22,7 @@ import matplotlib.pyplot as plt
from keras.preprocessing import sequence
from keras.models import Sequential
from keras.layers import Embedding, Dense, LSTM
from keras.layers import Embedding, Dense, LSTM, Dropout
from keras.datasets import imdb
max_features = 20000
@ -30,7 +30,7 @@ max_length = 80
embedding_dim = 256
batch_size = 128
epochs = 10
modes = ['cpu', 'mem', 'gpu']
modes = [0, 1, 2]
print('Loading data...')
(X_train, y_train), (X_test, y_test) = imdb.load_data(num_words=max_features)
@ -40,11 +40,12 @@ X_test = sequence.pad_sequences(X_test, max_length)
# Compile and train different models while meauring performance.
results = []
for mode in modes:
print('Testing mode: consume_less="{}"'.format(mode))
print('Testing mode: implementation={}'.format(mode))
model = Sequential()
model.add(Embedding(max_features, embedding_dim, input_length=max_length, dropout=0.2))
model.add(LSTM(embedding_dim, dropout_W=0.2, dropout_U=0.2, consume_less=mode))
model.add(Embedding(max_features, embedding_dim, input_length=max_length))
model.add(Dropout(0.2))
model.add(LSTM(embedding_dim, dropout=0.2, recurrent_dropout=0.2, implementation=mode))
model.add(Dense(1, activation='sigmoid'))
model.compile(loss='binary_crossentropy',
optimizer='adam',

@ -57,17 +57,17 @@ def build_generator(latent_size):
# upsample to (..., 14, 14)
cnn.add(UpSampling2D(size=(2, 2)))
cnn.add(Convolution2D(256, 5, 5, border_mode='same',
activation='relu', init='glorot_normal'))
cnn.add(Convolution2D(256, 5, padding='same',
activation='relu', kernel_initializer='glorot_normal'))
# upsample to (..., 28, 28)
cnn.add(UpSampling2D(size=(2, 2)))
cnn.add(Convolution2D(128, 5, 5, border_mode='same',
activation='relu', init='glorot_normal'))
cnn.add(Convolution2D(128, 5, padding='same',
activation='relu', kernel_initializer='glorot_normal'))
# take a channel axis reduction
cnn.add(Convolution2D(1, 2, 2, border_mode='same',
activation='tanh', init='glorot_normal'))
cnn.add(Convolution2D(1, 2, padding='same',
activation='tanh', kernel_initializer='glorot_normal'))
# this is the z space commonly refered to in GAN papers
latent = Input(shape=(latent_size, ))
@ -77,14 +77,14 @@ def build_generator(latent_size):
# 10 classes in MNIST
cls = Flatten()(Embedding(10, latent_size,
init='glorot_normal')(image_class))
embeddings_initializer='glorot_normal')(image_class))
# hadamard product between z-space and a class conditional embedding
h = merge([latent, cls], mode='mul')
fake_image = cnn(h)
return Model(input=[latent, image_class], output=fake_image)
return Model([latent, image_class], fake_image)
def build_discriminator():
@ -92,20 +92,20 @@ def build_discriminator():
# the reference paper
cnn = Sequential()
cnn.add(Convolution2D(32, 3, 3, border_mode='same', subsample=(2, 2),
cnn.add(Convolution2D(32, 3, padding='same', strides=2,
input_shape=(1, 28, 28)))
cnn.add(LeakyReLU())
cnn.add(Dropout(0.3))
cnn.add(Convolution2D(64, 3, 3, border_mode='same', subsample=(1, 1)))
cnn.add(Convolution2D(64, 3, padding='same', strides=2))
cnn.add(LeakyReLU())
cnn.add(Dropout(0.3))
cnn.add(Convolution2D(128, 3, 3, border_mode='same', subsample=(2, 2)))
cnn.add(Convolution2D(128, 3, padding='same', strides=2))
cnn.add(LeakyReLU())
cnn.add(Dropout(0.3))
cnn.add(Convolution2D(256, 3, 3, border_mode='same', subsample=(1, 1)))
cnn.add(Convolution2D(256, 3, padding='same', strides=1))
cnn.add(LeakyReLU())
cnn.add(Dropout(0.3))
@ -122,12 +122,12 @@ def build_discriminator():
fake = Dense(1, activation='sigmoid', name='generation')(features)
aux = Dense(10, activation='softmax', name='auxiliary')(features)
return Model(input=image, output=[fake, aux])
return Model(image, [fake, aux])
if __name__ == '__main__':
# batch and latent size taken from the paper
epochss = 50
epochs = 50
batch_size = 100
latent_size = 100
@ -156,7 +156,7 @@ if __name__ == '__main__':
# we only want to be able to train generation for the combined model
discriminator.trainable = False
fake, aux = discriminator(fake)
combined = Model(input=[latent, image_class], output=[fake, aux])
combined = Model([latent, image_class], [fake, aux])
combined.compile(
optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1),
@ -177,8 +177,8 @@ if __name__ == '__main__':
train_history = defaultdict(list)
test_history = defaultdict(list)
for epoch in range(epochss):
print('Epoch {} of {}'.format(epoch + 1, epochss))
for epoch in range(epochs):
print('Epoch {} of {}'.format(epoch + 1, epochs))
num_batches = int(X_train.shape[0] / batch_size)
progress_bar = Progbar(target=num_batches)

@ -35,53 +35,53 @@ from keras.utils import np_utils
# Training parameters.
batch_size = 32
num_classes = 10
epochss = 5
epochs = 5
# Embedding dimensions.
row_hidden = 128
col_hidden = 128
# The data, shuffled and split between train and test sets.
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# Reshapes data to 4D for Hierarchical RNN.
X_train = X_train.reshape(X_train.shape[0], 28, 28, 1)
X_test = X_test.reshape(X_test.shape[0], 28, 28, 1)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
x_train = x_train.reshape(x_train.shape[0], 28, 28, 1)
x_test = x_test.reshape(x_test.shape[0], 28, 28, 1)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# Converts class vectors to binary class matrices.
Y_train = np_utils.to_categorical(y_train, num_classes)
Y_test = np_utils.to_categorical(y_test, num_classes)
row, col, pixel = X_train.shape[1:]
row, col, pixel = x_train.shape[1:]
# 4D input.
x = Input(shape=(row, col, pixel))
# Encodes a row of pixels using TimeDistributed Wrapper.
encoded_rows = TimeDistributed(LSTM(output_dim=row_hidden))(x)
encoded_rows = TimeDistributed(LSTM(row_hidden))(x)
# Encodes columns of encoded rows.
encoded_columns = LSTM(col_hidden)(encoded_rows)
# Final predictions and model.
prediction = Dense(num_classes, activation='softmax')(encoded_columns)
model = Model(input=x, output=prediction)
model = Model(x, prediction)
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
# Training.
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochss,
verbose=1, validation_data=(X_test, Y_test))
model.fit(x_train, Y_train, batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(x_test, Y_test))
# Evaluation.
scores = model.evaluate(X_test, Y_test, verbose=0)
scores = model.evaluate(x_test, Y_test, verbose=0)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

@ -25,7 +25,7 @@ from keras.utils import np_utils
batch_size = 32
num_classes = 10
epochss = 200
epochs = 200
hidden_units = 100
learning_rate = 1e-6
@ -50,9 +50,9 @@ Y_test = np_utils.to_categorical(y_test, num_classes)
print('Evaluate IRNN...')
model = Sequential()
model.add(SimpleRNN(output_dim=hidden_units,
init=initializers.RandomNormal(stddev=0.001),
inner_init=initializers.Identity(gain=1.0),
model.add(SimpleRNN(hidden_units,
kernel_initializer=initializers.RandomNormal(stddev=0.001),
recurrent_initializer=initializers.Identity(gain=1.0),
activation='relu',
input_shape=X_train.shape[1:]))
model.add(Dense(num_classes))
@ -62,7 +62,7 @@ model.compile(loss='categorical_crossentropy',
optimizer=rmsprop,
metrics=['accuracy'])
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochss,
model.fit(X_train, Y_train, batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(X_test, Y_test))
scores = model.evaluate(X_test, Y_test, verbose=0)

@ -79,22 +79,22 @@ def compute_accuracy(predictions, labels):
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
(x_train, y_train), (x_test, y_test) = mnist.load_data()
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
input_dim = 784
epochs = 20
# create training+test positive and negative pairs
digit_indices = [np.where(y_train == i)[0] for i in range(10)]
tr_pairs, tr_y = create_pairs(X_train, digit_indices)
tr_pairs, tr_y = create_pairs(x_train, digit_indices)
digit_indices = [np.where(y_test == i)[0] for i in range(10)]
te_pairs, te_y = create_pairs(X_test, digit_indices)
te_pairs, te_y = create_pairs(x_test, digit_indices)
# network definition
base_network = create_base_network(input_dim)
@ -110,7 +110,7 @@ processed_b = base_network(input_b)
distance = Lambda(euclidean_distance, output_shape=eucl_dist_output_shape)([processed_a, processed_b])
model = Model(input=[input_a, input_b], output=distance)
model = Model([input_a, input_b], distance)
# train
rms = RMSprop()

@ -20,7 +20,7 @@ np.random.seed(1337) # for reproducibility
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
from keras import backend as K
@ -33,7 +33,7 @@ epochs = 5
# input image dimensions
img_rows, img_cols = 28, 28
# number of convolutional filters to use
filterss = 32
filters = 32
# size of pooling area for max pooling
pool_size = 2
# convolution kernel size
@ -46,61 +46,62 @@ else:
def train_model(model, train, test, num_classes):
X_train = train[0].reshape((train[0].shape[0],) + input_shape)
X_test = test[0].reshape((test[0].shape[0],) + input_shape)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
x_train = train[0].reshape((train[0].shape[0],) + input_shape)
x_test = test[0].reshape((test[0].shape[0],) + input_shape)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(train[1], num_classes)
Y_test = np_utils.to_categorical(test[1], num_classes)
y_train = np_utils.to_categorical(train[1], num_classes)
y_test = np_utils.to_categorical(test[1], num_classes)
model.compile(loss='categorical_crossentropy',
optimizer='adadelta',
metrics=['accuracy'])
t = now()
model.fit(X_train, Y_train,
model.fit(x_train, y_train,
batch_size=batch_size, epochs=epochs,
verbose=1,
validation_data=(X_test, Y_test))
validation_data=(x_test, y_test))
print('Training time: %s' % (now() - t))
score = model.evaluate(X_test, Y_test, verbose=0)
score = model.evaluate(x_test, y_test, verbose=0)
print('Test score:', score[0])
print('Test accuracy:', score[1])
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(x_train, y_train), (x_test, y_test) = mnist.load_data()
# create two datasets one with digits below 5 and one with 5 and above
X_train_lt5 = X_train[y_train < 5]
x_train_lt5 = x_train[y_train < 5]
y_train_lt5 = y_train[y_train < 5]
X_test_lt5 = X_test[y_test < 5]
x_test_lt5 = x_test[y_test < 5]
y_test_lt5 = y_test[y_test < 5]
X_train_gte5 = X_train[y_train >= 5]
x_train_gte5 = x_train[y_train >= 5]
y_train_gte5 = y_train[y_train >= 5] - 5 # make classes start at 0 for
X_test_gte5 = X_test[y_test >= 5] # np_utils.to_categorical
x_test_gte5 = x_test[y_test >= 5] # np_utils.to_categorical
y_test_gte5 = y_test[y_test >= 5] - 5
# define two groups of layers: feature (convolutions) and classification (dense)
feature_layers = [
Convolution2D(filterss, kernel_size, kernel_size,
border_mode='valid',
input_shape=input_shape),
Conv2D(filters, kernel_size,
padding='valid',
input_shape=input_shape),
Activation('relu'),
Convolution2D(filterss, kernel_size, kernel_size),
Conv2D(filters, kernel_size),
Activation('relu'),
MaxPooling2D(pool_size=(pool_size, pool_size)),
MaxPooling2D(pool_size=pool_size),
Dropout(0.25),
Flatten(),
]
classification_layers = [
Dense(128),
Activation('relu'),
@ -114,8 +115,8 @@ model = Sequential(feature_layers + classification_layers)
# train model for 5-digit classification [0..4]
train_model(model,
(X_train_lt5, y_train_lt5),
(X_test_lt5, y_test_lt5), num_classes)
(x_train_lt5, y_train_lt5),
(x_test_lt5, y_test_lt5), num_classes)
# freeze feature layers and rebuild model
for l in feature_layers:
@ -123,5 +124,5 @@ for l in feature_layers:
# transfer: train dense layers for new classification task [5..9]
train_model(model,
(X_train_gte5, y_train_gte5),
(X_test_gte5, y_test_gte5), num_classes)
(x_train_gte5, y_train_gte5),
(x_test_gte5, y_test_gte5), num_classes)