Update a number of example scripts.

This commit is contained in:
fchollet 2017-02-19 19:24:32 -08:00
parent e092fa8b57
commit 803e2869c7
7 changed files with 120 additions and 121 deletions

@ -23,12 +23,10 @@ Four digits inverted:
Five digits inverted:
+ One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs
'''
from __future__ import print_function
from keras.models import Sequential
from keras.engine.training import slice_X
from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
import numpy as np
from six.moves import range
@ -57,15 +55,15 @@ class CharacterTable(object):
num_rows: Number of rows in the returned one hot encoding. This is
used to keep the # of rows for each data the same.
"""
X = np.zeros((num_rows, len(self.chars)))
x = np.zeros((num_rows, len(self.chars)))
for i, c in enumerate(C):
X[i, self.char_indices[c]] = 1
return X
x[i, self.char_indices[c]] = 1
return x
def decode(self, X, calc_argmax=True):
def decode(self, x, calc_argmax=True):
if calc_argmax:
X = X.argmax(axis=-1)
return ''.join(self.indices_char[x] for x in X)
x = x.argmax(axis=-1)
return ''.join(self.indices_char[x] for x in x)
class colors:
@ -80,7 +78,7 @@ INVERT = True
# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
# int is DIGITS.
MAXLEN = DIGITS + 1 + DIGITS
MAxLEN = DIGITS + 1 + DIGITS
# All the numbers, plus sign and space for padding.
chars = '0123456789+ '
@ -95,14 +93,14 @@ while len(questions) < TRAINING_SIZE:
for i in range(np.random.randint(1, DIGITS + 1))))
a, b = f(), f()
# Skip any addition questions we've already seen
# Also skip any such that X+Y == Y+X (hence the sorting).
# Also skip any such that x+Y == Y+x (hence the sorting).
key = tuple(sorted((a, b)))
if key in seen:
continue
seen.add(key)
# Pad the data with spaces such that it is always MAXLEN.
# Pad the data with spaces such that it is always MAxLEN.
q = '{}+{}'.format(a, b)
query = q + ' ' * (MAXLEN - len(q))
query = q + ' ' * (MAxLEN - len(q))
ans = str(a + b)
# Answers can be of maximum size DIGITS + 1.
ans += ' ' * (DIGITS + 1 - len(ans))
@ -115,31 +113,31 @@ while len(questions) < TRAINING_SIZE:
print('Total addition questions:', len(questions))
print('Vectorization...')
X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
x = np.zeros((len(questions), MAxLEN, len(chars)), dtype=np.bool)
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
for i, sentence in enumerate(questions):
X[i] = ctable.encode(sentence, MAXLEN)
x[i] = ctable.encode(sentence, MAxLEN)
for i, sentence in enumerate(expected):
y[i] = ctable.encode(sentence, DIGITS + 1)
# Shuffle (X, y) in unison as the later parts of X will almost all be larger
# Shuffle (x, y) in unison as the later parts of x will almost all be larger
# digits.
indices = np.arange(len(y))
np.random.shuffle(indices)
X = X[indices]
x = x[indices]
y = y[indices]
# Explicitly set apart 10% for validation data that we never train over.
split_at = len(X) - len(X) // 10
(X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
(y_train, y_val) = (y[:split_at], y[split_at:])
split_at = len(x) - len(x) // 10
(x_train, x_val) = x[:split_at], x[split_at:]
(y_train, y_val) = y[:split_at], y[split_at:]
print('Training Data:')
print(X_train.shape)
print(x_train.shape)
print(y_train.shape)
print('Validation Data:')
print(X_val.shape)
print(x_val.shape)
print(y_val.shape)
# Try replacing GRU, or SimpleRNN.
@ -153,7 +151,7 @@ model = Sequential()
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
# Note: In a situation where your input sequences have a variable length,
# use input_shape=(None, num_feature).
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
model.add(RNN(HIDDEN_SIZE, input_shape=(MAxLEN, len(chars))))
# As the decoder RNN's input, repeatedly provide with the last hidden state of
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
@ -181,15 +179,15 @@ for iteration in range(1, 200):
print()
print('-' * 50)
print('Iteration', iteration)
model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=1,
validation_data=(X_val, y_val))
model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1,
validation_data=(x_val, y_val))
# Select 10 samples from the validation set at random so we can visualize
# errors.
for i in range(10):
ind = np.random.randint(0, len(X_val))
rowX, rowy = X_val[np.array([ind])], y_val[np.array([ind])]
preds = model.predict_classes(rowX, verbose=0)
q = ctable.decode(rowX[0])
ind = np.random.randint(0, len(x_val))
rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
preds = model.predict_classes(rowx, verbose=0)
q = ctable.decode(rowx[0])
correct = ctable.decode(rowy[0])
guess = ctable.decode(preds[0], calc_argmax=False)
print('Q', q[::-1] if INVERT else q)

@ -52,11 +52,11 @@ class Antirectifier(Layer):
shape[-1] *= 2
return tuple(shape)
def call(self, x, mask=None):
x -= K.mean(x, axis=1, keepdims=True)
x = K.l2_normalize(x, axis=1)
pos = K.relu(x)
neg = K.relu(-x)
def call(self, inputs):
inputs -= K.mean(inputs, axis=1, keepdims=True)
inputs = K.l2_normalize(inputs, axis=1)
pos = K.relu(inputs)
neg = K.relu(-inputs)
return K.concatenate([pos, neg], axis=1)
# global parameters
@ -65,16 +65,16 @@ num_classes = 10
epochs = 40
# the data, shuffled and split between train and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
(x_train, y_train), (x_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000, 784)
X_test = X_test.reshape(10000, 784)
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
x_train = x_train.reshape(60000, 784)
x_test = x_test.reshape(10000, 784)
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, num_classes)
@ -97,9 +97,9 @@ model.compile(loss='categorical_crossentropy',
metrics=['accuracy'])
# train the model
model.fit(X_train, Y_train,
model.fit(x_train, Y_train,
batch_size=batch_size, epochs=epochs,
verbose=1, validation_data=(X_test, Y_test))
verbose=1, validation_data=(x_test, Y_test))
# next, compare with an equivalent network
# with2x bigger Dense layers and ReLU

@ -12,7 +12,7 @@ QA2 - Two Supporting Facts | 20 | 50.0
QA3 - Three Supporting Facts | 20 | 20.5
QA4 - Two Arg. Relations | 61 | 62.9
QA5 - Three Arg. Relations | 70 | 61.9
QA6 - Yes/No Questions | 48 | 50.7
QA6 - yes/No Questions | 48 | 50.7
QA7 - Counting | 49 | 78.9
QA8 - Lists/Sets | 45 | 77.2
QA9 - Simple Negation | 64 | 64.0
@ -62,13 +62,12 @@ import re
import tarfile
import numpy as np
np.random.seed(1337) # for reproducibility
from keras.utils.data_utils import get_file
from keras.layers.embeddings import Embedding
from keras.layers import Dense, Merge, Dropout, RepeatVector
from keras import layers
from keras.layers import recurrent
from keras.models import Sequential
from keras.models import Model
from keras.preprocessing.sequence import pad_sequences
@ -125,26 +124,26 @@ def get_stories(f, only_supporting=False, max_length=None):
def vectorize_stories(data, word_idx, story_maxlen, query_maxlen):
X = []
Xq = []
Y = []
xs = []
xqs = []
ys = []
for story, query, answer in data:
x = [word_idx[w] for w in story]
xq = [word_idx[w] for w in query]
y = np.zeros(len(word_idx) + 1) # let's not forget that index 0 is reserved
y[word_idx[answer]] = 1
X.append(x)
Xq.append(xq)
Y.append(y)
return pad_sequences(X, maxlen=story_maxlen), pad_sequences(Xq, maxlen=query_maxlen), np.array(Y)
xs.append(x)
xqs.append(xq)
ys.append(y)
return pad_sequences(xs, maxlen=story_maxlen), pad_sequences(xqs, maxlen=query_maxlen), np.array(ys)
RNN = recurrent.LSTM
EMBED_HIDDEN_SIZE = 50
SENT_HIDDEN_SIZE = 100
QUERY_HIDDEN_SIZE = 100
QUERy_HIDDEN_SIZE = 100
BATCH_SIZE = 32
EPOCHS = 40
print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE))
print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERy_HIDDEN_SIZE))
try:
path = get_file('babi-tasks-v1-2.tar.gz', origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz')
@ -172,40 +171,38 @@ word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
query_maxlen = max(map(len, (x for _, x, _ in train + test)))
X, Xq, Y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
tX, tXq, tY = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
x, xq, y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
tx, txq, ty = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
print('vocab = {}'.format(vocab))
print('X.shape = {}'.format(X.shape))
print('Xq.shape = {}'.format(Xq.shape))
print('Y.shape = {}'.format(Y.shape))
print('x.shape = {}'.format(x.shape))
print('xq.shape = {}'.format(xq.shape))
print('y.shape = {}'.format(y.shape))
print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen))
print('Build model...')
sentrnn = Sequential()
sentrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE,
input_length=story_maxlen))
sentrnn.add(Dropout(0.3))
sentence = layers.Input(shape=(story_maxlen,), dtype='int32')
encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence)
encoded_sentence = layers.Dropout(0.3)(encoded_sentence)
qrnn = Sequential()
qrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE,
input_length=query_maxlen))
qrnn.add(Dropout(0.3))
qrnn.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False))
qrnn.add(RepeatVector(story_maxlen))
question = layers.Input(shape=(query_maxlen,), dtype='int32')
encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question)
encoded_question = layers.Dropout(0.3)(encoded_question)
encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question)
encoded_question = layers.RepeatVector(story_maxlen)(encoded_question)
model = Sequential()
model.add(Merge([sentrnn, qrnn], mode='sum'))
model.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False))
model.add(Dropout(0.3))
model.add(Dense(vocab_size, activation='softmax'))
merged = layers.sum([encoded_sentence, encoded_question])
merged = RNN(EMBED_HIDDEN_SIZE)(merged)
merged = layers.Dropout(0.3)(merged)
preds = layers.Dense(vocab_size, activation='softmax')(merged)
model = Model([sentence, question], preds)
model.compile(optimizer='adam',
loss='categorical_crossentropy',
metrics=['accuracy'])
print('Training')
model.fit([X, Xq], Y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05)
loss, acc = model.evaluate([tX, tXq], tY, batch_size=BATCH_SIZE)
model.fit([x, xq], y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05)
loss, acc = model.evaluate([tx, txq], ty, batch_size=BATCH_SIZE)
print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))

@ -1,7 +1,7 @@
'''Train a simple deep CNN on the CIFAR10 small images dataset.
GPU run command with Theano backend (with TensorFlow, the GPU is automatically used):
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatx=float32 python cifar10_cnn.py
It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs.
(it's still underfitting at that point, though).
@ -12,7 +12,7 @@ from keras.datasets import cifar10
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Conv2D, MaxPooling2D
from keras.utils import np_utils
batch_size = 32
@ -26,28 +26,28 @@ img_rows, img_cols = 32, 32
img_channels = 3
# The data, shuffled and split between train and test sets:
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
print('X_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
print('x_train shape:', x_train.shape)
print(x_train.shape[0], 'train samples')
print(x_test.shape[0], 'test samples')
# Convert class vectors to binary class matrices.
Y_train = np_utils.to_categorical(y_train, num_classes)
Y_test = np_utils.to_categorical(y_test, num_classes)
y_train = np_utils.to_categorical(y_train, num_classes)
y_test = np_utils.to_categorical(y_test, num_classes)
model = Sequential()
model.add(Convolution2D(32, 3, 3, border_mode='same',
input_shape=X_train.shape[1:]))
model.add(Conv2D(32, (3, 3), padding='same',
input_shape=x_train.shape[1:]))
model.add(Activation('relu'))
model.add(Convolution2D(32, 3, 3))
model.add(Conv2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Convolution2D(64, 3, 3, border_mode='same'))
model.add(Conv2D(64, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(Convolution2D(64, 3, 3))
model.add(Conv2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
@ -64,17 +64,17 @@ model.compile(loss='categorical_crossentropy',
optimizer='rmsprop',
metrics=['accuracy'])
X_train = X_train.astype('float32')
X_test = X_test.astype('float32')
X_train /= 255
X_test /= 255
x_train = x_train.astype('float32')
x_test = x_test.astype('float32')
x_train /= 255
x_test /= 255
if not data_augmentation:
print('Not using data augmentation.')
model.fit(X_train, Y_train,
model.fit(x_train, y_train,
batch_size=batch_size,
epochs=epochs,
validation_data=(X_test, Y_test),
validation_data=(x_test, y_test),
shuffle=True)
else:
print('Using real-time data augmentation.')
@ -93,11 +93,11 @@ else:
# Compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(X_train)
datagen.fit(x_train)
# Fit the model on the batches generated by datagen.flow().
model.fit_generator(datagen.flow(X_train, Y_train,
model.fit_generator(datagen.flow(x_train, y_train,
batch_size=batch_size),
samples_per_epoch=X_train.shape[0],
samples_per_epoch=x_train.shape[0],
epochs=epochs,
validation_data=(X_test, Y_test))
validation_data=(x_test, y_test))

@ -3,7 +3,7 @@ This network is used to predict the next frame of an artificially
generated movie which contains moving squares.
"""
from keras.models import Sequential
from keras.layers.convolutional import Convolution3D
from keras.layers.convolutional import Conv3D
from keras.layers.convolutional_recurrent import ConvLSTM2D
from keras.layers.normalization import BatchNormalization
import numpy as np
@ -14,27 +14,26 @@ import pylab as plt
# of identical shape.
seq = Sequential()
seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3,
seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
input_shape=(None, 40, 40, 1),
border_mode='same', return_sequences=True))
padding='same', return_sequences=True))
seq.add(BatchNormalization())
seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3,
border_mode='same', return_sequences=True))
seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
padding='same', return_sequences=True))
seq.add(BatchNormalization())
seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3,
border_mode='same', return_sequences=True))
seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
padding='same', return_sequences=True))
seq.add(BatchNormalization())
seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3,
border_mode='same', return_sequences=True))
seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
padding='same', return_sequences=True))
seq.add(BatchNormalization())
seq.add(Convolution3D(filters=1, kernel_dim1=1, kernel_dim2=3,
kernel_dim3=3, activation='sigmoid',
border_mode='same', data_format='channels_last'))
seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3),
activation='sigmoid',
padding='same', data_format='channels_last'))
seq.compile(loss='binary_crossentropy', optimizer='adadelta')

@ -12,7 +12,6 @@ from what you see with CNNs/MLPs/etc.
'''
from __future__ import print_function
import numpy as np
np.random.seed(1337) # for reproducibility
from keras.preprocessing import sequence
from keras.models import Sequential
@ -37,10 +36,9 @@ print('x_test shape:', x_test.shape)
print('Build model...')
model = Sequential()
model.add(Embedding(max_features, 128, dropout=0.2))
model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2)) # try using a GRU instead, for fun
model.add(Dense(1))
model.add(Activation('sigmoid'))
model.add(Embedding(max_features, 128))
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
model.add(Dense(1, activation='sigmoid'))
# try using different optimizers and different optimizer configs
model.compile(loss='binary_crossentropy',

@ -33,7 +33,14 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
ValueError: in case of invalid values for `truncating` or `padding`,
or in case of invalid shape for a `sequences` entry.
"""
lengths = [len(s) for s in sequences]
if not hasattr(sequences, '__len__'):
raise ValueError('`sequences` must be iterable.')
lengths = []
for x in sequences:
if not hasattr(x, '__len__'):
raise ValueError('`sequences` must be a list of iterables. '
'Found non-iterable: ' + str(x))
lengths.append(len(x))
num_samples = len(sequences)
if maxlen is None: