Update a number of example scripts.
This commit is contained in:
parent
e092fa8b57
commit
803e2869c7
@ -23,12 +23,10 @@ Four digits inverted:
|
|||||||
|
|
||||||
Five digits inverted:
|
Five digits inverted:
|
||||||
+ One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs
|
+ One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs
|
||||||
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
from keras.models import Sequential
|
from keras.models import Sequential
|
||||||
from keras.engine.training import slice_X
|
|
||||||
from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
|
from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from six.moves import range
|
from six.moves import range
|
||||||
@ -57,15 +55,15 @@ class CharacterTable(object):
|
|||||||
num_rows: Number of rows in the returned one hot encoding. This is
|
num_rows: Number of rows in the returned one hot encoding. This is
|
||||||
used to keep the # of rows for each data the same.
|
used to keep the # of rows for each data the same.
|
||||||
"""
|
"""
|
||||||
X = np.zeros((num_rows, len(self.chars)))
|
x = np.zeros((num_rows, len(self.chars)))
|
||||||
for i, c in enumerate(C):
|
for i, c in enumerate(C):
|
||||||
X[i, self.char_indices[c]] = 1
|
x[i, self.char_indices[c]] = 1
|
||||||
return X
|
return x
|
||||||
|
|
||||||
def decode(self, X, calc_argmax=True):
|
def decode(self, x, calc_argmax=True):
|
||||||
if calc_argmax:
|
if calc_argmax:
|
||||||
X = X.argmax(axis=-1)
|
x = x.argmax(axis=-1)
|
||||||
return ''.join(self.indices_char[x] for x in X)
|
return ''.join(self.indices_char[x] for x in x)
|
||||||
|
|
||||||
|
|
||||||
class colors:
|
class colors:
|
||||||
@ -80,7 +78,7 @@ INVERT = True
|
|||||||
|
|
||||||
# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
|
# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of
|
||||||
# int is DIGITS.
|
# int is DIGITS.
|
||||||
MAXLEN = DIGITS + 1 + DIGITS
|
MAxLEN = DIGITS + 1 + DIGITS
|
||||||
|
|
||||||
# All the numbers, plus sign and space for padding.
|
# All the numbers, plus sign and space for padding.
|
||||||
chars = '0123456789+ '
|
chars = '0123456789+ '
|
||||||
@ -95,14 +93,14 @@ while len(questions) < TRAINING_SIZE:
|
|||||||
for i in range(np.random.randint(1, DIGITS + 1))))
|
for i in range(np.random.randint(1, DIGITS + 1))))
|
||||||
a, b = f(), f()
|
a, b = f(), f()
|
||||||
# Skip any addition questions we've already seen
|
# Skip any addition questions we've already seen
|
||||||
# Also skip any such that X+Y == Y+X (hence the sorting).
|
# Also skip any such that x+Y == Y+x (hence the sorting).
|
||||||
key = tuple(sorted((a, b)))
|
key = tuple(sorted((a, b)))
|
||||||
if key in seen:
|
if key in seen:
|
||||||
continue
|
continue
|
||||||
seen.add(key)
|
seen.add(key)
|
||||||
# Pad the data with spaces such that it is always MAXLEN.
|
# Pad the data with spaces such that it is always MAxLEN.
|
||||||
q = '{}+{}'.format(a, b)
|
q = '{}+{}'.format(a, b)
|
||||||
query = q + ' ' * (MAXLEN - len(q))
|
query = q + ' ' * (MAxLEN - len(q))
|
||||||
ans = str(a + b)
|
ans = str(a + b)
|
||||||
# Answers can be of maximum size DIGITS + 1.
|
# Answers can be of maximum size DIGITS + 1.
|
||||||
ans += ' ' * (DIGITS + 1 - len(ans))
|
ans += ' ' * (DIGITS + 1 - len(ans))
|
||||||
@ -115,31 +113,31 @@ while len(questions) < TRAINING_SIZE:
|
|||||||
print('Total addition questions:', len(questions))
|
print('Total addition questions:', len(questions))
|
||||||
|
|
||||||
print('Vectorization...')
|
print('Vectorization...')
|
||||||
X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool)
|
x = np.zeros((len(questions), MAxLEN, len(chars)), dtype=np.bool)
|
||||||
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
|
y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool)
|
||||||
for i, sentence in enumerate(questions):
|
for i, sentence in enumerate(questions):
|
||||||
X[i] = ctable.encode(sentence, MAXLEN)
|
x[i] = ctable.encode(sentence, MAxLEN)
|
||||||
for i, sentence in enumerate(expected):
|
for i, sentence in enumerate(expected):
|
||||||
y[i] = ctable.encode(sentence, DIGITS + 1)
|
y[i] = ctable.encode(sentence, DIGITS + 1)
|
||||||
|
|
||||||
# Shuffle (X, y) in unison as the later parts of X will almost all be larger
|
# Shuffle (x, y) in unison as the later parts of x will almost all be larger
|
||||||
# digits.
|
# digits.
|
||||||
indices = np.arange(len(y))
|
indices = np.arange(len(y))
|
||||||
np.random.shuffle(indices)
|
np.random.shuffle(indices)
|
||||||
X = X[indices]
|
x = x[indices]
|
||||||
y = y[indices]
|
y = y[indices]
|
||||||
|
|
||||||
# Explicitly set apart 10% for validation data that we never train over.
|
# Explicitly set apart 10% for validation data that we never train over.
|
||||||
split_at = len(X) - len(X) // 10
|
split_at = len(x) - len(x) // 10
|
||||||
(X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
|
(x_train, x_val) = x[:split_at], x[split_at:]
|
||||||
(y_train, y_val) = (y[:split_at], y[split_at:])
|
(y_train, y_val) = y[:split_at], y[split_at:]
|
||||||
|
|
||||||
print('Training Data:')
|
print('Training Data:')
|
||||||
print(X_train.shape)
|
print(x_train.shape)
|
||||||
print(y_train.shape)
|
print(y_train.shape)
|
||||||
|
|
||||||
print('Validation Data:')
|
print('Validation Data:')
|
||||||
print(X_val.shape)
|
print(x_val.shape)
|
||||||
print(y_val.shape)
|
print(y_val.shape)
|
||||||
|
|
||||||
# Try replacing GRU, or SimpleRNN.
|
# Try replacing GRU, or SimpleRNN.
|
||||||
@ -153,7 +151,7 @@ model = Sequential()
|
|||||||
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
|
# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE.
|
||||||
# Note: In a situation where your input sequences have a variable length,
|
# Note: In a situation where your input sequences have a variable length,
|
||||||
# use input_shape=(None, num_feature).
|
# use input_shape=(None, num_feature).
|
||||||
model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars))))
|
model.add(RNN(HIDDEN_SIZE, input_shape=(MAxLEN, len(chars))))
|
||||||
# As the decoder RNN's input, repeatedly provide with the last hidden state of
|
# As the decoder RNN's input, repeatedly provide with the last hidden state of
|
||||||
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
|
# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum
|
||||||
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
|
# length of output, e.g., when DIGITS=3, max output is 999+999=1998.
|
||||||
@ -181,15 +179,15 @@ for iteration in range(1, 200):
|
|||||||
print()
|
print()
|
||||||
print('-' * 50)
|
print('-' * 50)
|
||||||
print('Iteration', iteration)
|
print('Iteration', iteration)
|
||||||
model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=1,
|
model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1,
|
||||||
validation_data=(X_val, y_val))
|
validation_data=(x_val, y_val))
|
||||||
# Select 10 samples from the validation set at random so we can visualize
|
# Select 10 samples from the validation set at random so we can visualize
|
||||||
# errors.
|
# errors.
|
||||||
for i in range(10):
|
for i in range(10):
|
||||||
ind = np.random.randint(0, len(X_val))
|
ind = np.random.randint(0, len(x_val))
|
||||||
rowX, rowy = X_val[np.array([ind])], y_val[np.array([ind])]
|
rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])]
|
||||||
preds = model.predict_classes(rowX, verbose=0)
|
preds = model.predict_classes(rowx, verbose=0)
|
||||||
q = ctable.decode(rowX[0])
|
q = ctable.decode(rowx[0])
|
||||||
correct = ctable.decode(rowy[0])
|
correct = ctable.decode(rowy[0])
|
||||||
guess = ctable.decode(preds[0], calc_argmax=False)
|
guess = ctable.decode(preds[0], calc_argmax=False)
|
||||||
print('Q', q[::-1] if INVERT else q)
|
print('Q', q[::-1] if INVERT else q)
|
||||||
|
@ -52,11 +52,11 @@ class Antirectifier(Layer):
|
|||||||
shape[-1] *= 2
|
shape[-1] *= 2
|
||||||
return tuple(shape)
|
return tuple(shape)
|
||||||
|
|
||||||
def call(self, x, mask=None):
|
def call(self, inputs):
|
||||||
x -= K.mean(x, axis=1, keepdims=True)
|
inputs -= K.mean(inputs, axis=1, keepdims=True)
|
||||||
x = K.l2_normalize(x, axis=1)
|
inputs = K.l2_normalize(inputs, axis=1)
|
||||||
pos = K.relu(x)
|
pos = K.relu(inputs)
|
||||||
neg = K.relu(-x)
|
neg = K.relu(-inputs)
|
||||||
return K.concatenate([pos, neg], axis=1)
|
return K.concatenate([pos, neg], axis=1)
|
||||||
|
|
||||||
# global parameters
|
# global parameters
|
||||||
@ -65,16 +65,16 @@ num_classes = 10
|
|||||||
epochs = 40
|
epochs = 40
|
||||||
|
|
||||||
# the data, shuffled and split between train and test sets
|
# the data, shuffled and split between train and test sets
|
||||||
(X_train, y_train), (X_test, y_test) = mnist.load_data()
|
(x_train, y_train), (x_test, y_test) = mnist.load_data()
|
||||||
|
|
||||||
X_train = X_train.reshape(60000, 784)
|
x_train = x_train.reshape(60000, 784)
|
||||||
X_test = X_test.reshape(10000, 784)
|
x_test = x_test.reshape(10000, 784)
|
||||||
X_train = X_train.astype('float32')
|
x_train = x_train.astype('float32')
|
||||||
X_test = X_test.astype('float32')
|
x_test = x_test.astype('float32')
|
||||||
X_train /= 255
|
x_train /= 255
|
||||||
X_test /= 255
|
x_test /= 255
|
||||||
print(X_train.shape[0], 'train samples')
|
print(x_train.shape[0], 'train samples')
|
||||||
print(X_test.shape[0], 'test samples')
|
print(x_test.shape[0], 'test samples')
|
||||||
|
|
||||||
# convert class vectors to binary class matrices
|
# convert class vectors to binary class matrices
|
||||||
Y_train = np_utils.to_categorical(y_train, num_classes)
|
Y_train = np_utils.to_categorical(y_train, num_classes)
|
||||||
@ -97,9 +97,9 @@ model.compile(loss='categorical_crossentropy',
|
|||||||
metrics=['accuracy'])
|
metrics=['accuracy'])
|
||||||
|
|
||||||
# train the model
|
# train the model
|
||||||
model.fit(X_train, Y_train,
|
model.fit(x_train, Y_train,
|
||||||
batch_size=batch_size, epochs=epochs,
|
batch_size=batch_size, epochs=epochs,
|
||||||
verbose=1, validation_data=(X_test, Y_test))
|
verbose=1, validation_data=(x_test, Y_test))
|
||||||
|
|
||||||
# next, compare with an equivalent network
|
# next, compare with an equivalent network
|
||||||
# with2x bigger Dense layers and ReLU
|
# with2x bigger Dense layers and ReLU
|
||||||
|
@ -12,7 +12,7 @@ QA2 - Two Supporting Facts | 20 | 50.0
|
|||||||
QA3 - Three Supporting Facts | 20 | 20.5
|
QA3 - Three Supporting Facts | 20 | 20.5
|
||||||
QA4 - Two Arg. Relations | 61 | 62.9
|
QA4 - Two Arg. Relations | 61 | 62.9
|
||||||
QA5 - Three Arg. Relations | 70 | 61.9
|
QA5 - Three Arg. Relations | 70 | 61.9
|
||||||
QA6 - Yes/No Questions | 48 | 50.7
|
QA6 - yes/No Questions | 48 | 50.7
|
||||||
QA7 - Counting | 49 | 78.9
|
QA7 - Counting | 49 | 78.9
|
||||||
QA8 - Lists/Sets | 45 | 77.2
|
QA8 - Lists/Sets | 45 | 77.2
|
||||||
QA9 - Simple Negation | 64 | 64.0
|
QA9 - Simple Negation | 64 | 64.0
|
||||||
@ -62,13 +62,12 @@ import re
|
|||||||
import tarfile
|
import tarfile
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
np.random.seed(1337) # for reproducibility
|
|
||||||
|
|
||||||
from keras.utils.data_utils import get_file
|
from keras.utils.data_utils import get_file
|
||||||
from keras.layers.embeddings import Embedding
|
from keras.layers.embeddings import Embedding
|
||||||
from keras.layers import Dense, Merge, Dropout, RepeatVector
|
from keras import layers
|
||||||
from keras.layers import recurrent
|
from keras.layers import recurrent
|
||||||
from keras.models import Sequential
|
from keras.models import Model
|
||||||
from keras.preprocessing.sequence import pad_sequences
|
from keras.preprocessing.sequence import pad_sequences
|
||||||
|
|
||||||
|
|
||||||
@ -125,26 +124,26 @@ def get_stories(f, only_supporting=False, max_length=None):
|
|||||||
|
|
||||||
|
|
||||||
def vectorize_stories(data, word_idx, story_maxlen, query_maxlen):
|
def vectorize_stories(data, word_idx, story_maxlen, query_maxlen):
|
||||||
X = []
|
xs = []
|
||||||
Xq = []
|
xqs = []
|
||||||
Y = []
|
ys = []
|
||||||
for story, query, answer in data:
|
for story, query, answer in data:
|
||||||
x = [word_idx[w] for w in story]
|
x = [word_idx[w] for w in story]
|
||||||
xq = [word_idx[w] for w in query]
|
xq = [word_idx[w] for w in query]
|
||||||
y = np.zeros(len(word_idx) + 1) # let's not forget that index 0 is reserved
|
y = np.zeros(len(word_idx) + 1) # let's not forget that index 0 is reserved
|
||||||
y[word_idx[answer]] = 1
|
y[word_idx[answer]] = 1
|
||||||
X.append(x)
|
xs.append(x)
|
||||||
Xq.append(xq)
|
xqs.append(xq)
|
||||||
Y.append(y)
|
ys.append(y)
|
||||||
return pad_sequences(X, maxlen=story_maxlen), pad_sequences(Xq, maxlen=query_maxlen), np.array(Y)
|
return pad_sequences(xs, maxlen=story_maxlen), pad_sequences(xqs, maxlen=query_maxlen), np.array(ys)
|
||||||
|
|
||||||
RNN = recurrent.LSTM
|
RNN = recurrent.LSTM
|
||||||
EMBED_HIDDEN_SIZE = 50
|
EMBED_HIDDEN_SIZE = 50
|
||||||
SENT_HIDDEN_SIZE = 100
|
SENT_HIDDEN_SIZE = 100
|
||||||
QUERY_HIDDEN_SIZE = 100
|
QUERy_HIDDEN_SIZE = 100
|
||||||
BATCH_SIZE = 32
|
BATCH_SIZE = 32
|
||||||
EPOCHS = 40
|
EPOCHS = 40
|
||||||
print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE))
|
print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERy_HIDDEN_SIZE))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
path = get_file('babi-tasks-v1-2.tar.gz', origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz')
|
path = get_file('babi-tasks-v1-2.tar.gz', origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz')
|
||||||
@ -172,40 +171,38 @@ word_idx = dict((c, i + 1) for i, c in enumerate(vocab))
|
|||||||
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
|
story_maxlen = max(map(len, (x for x, _, _ in train + test)))
|
||||||
query_maxlen = max(map(len, (x for _, x, _ in train + test)))
|
query_maxlen = max(map(len, (x for _, x, _ in train + test)))
|
||||||
|
|
||||||
X, Xq, Y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
|
x, xq, y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen)
|
||||||
tX, tXq, tY = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
|
tx, txq, ty = vectorize_stories(test, word_idx, story_maxlen, query_maxlen)
|
||||||
|
|
||||||
print('vocab = {}'.format(vocab))
|
print('vocab = {}'.format(vocab))
|
||||||
print('X.shape = {}'.format(X.shape))
|
print('x.shape = {}'.format(x.shape))
|
||||||
print('Xq.shape = {}'.format(Xq.shape))
|
print('xq.shape = {}'.format(xq.shape))
|
||||||
print('Y.shape = {}'.format(Y.shape))
|
print('y.shape = {}'.format(y.shape))
|
||||||
print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen))
|
print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen))
|
||||||
|
|
||||||
print('Build model...')
|
print('Build model...')
|
||||||
|
|
||||||
sentrnn = Sequential()
|
sentence = layers.Input(shape=(story_maxlen,), dtype='int32')
|
||||||
sentrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE,
|
encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence)
|
||||||
input_length=story_maxlen))
|
encoded_sentence = layers.Dropout(0.3)(encoded_sentence)
|
||||||
sentrnn.add(Dropout(0.3))
|
|
||||||
|
|
||||||
qrnn = Sequential()
|
question = layers.Input(shape=(query_maxlen,), dtype='int32')
|
||||||
qrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE,
|
encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question)
|
||||||
input_length=query_maxlen))
|
encoded_question = layers.Dropout(0.3)(encoded_question)
|
||||||
qrnn.add(Dropout(0.3))
|
encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question)
|
||||||
qrnn.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False))
|
encoded_question = layers.RepeatVector(story_maxlen)(encoded_question)
|
||||||
qrnn.add(RepeatVector(story_maxlen))
|
|
||||||
|
|
||||||
model = Sequential()
|
merged = layers.sum([encoded_sentence, encoded_question])
|
||||||
model.add(Merge([sentrnn, qrnn], mode='sum'))
|
merged = RNN(EMBED_HIDDEN_SIZE)(merged)
|
||||||
model.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False))
|
merged = layers.Dropout(0.3)(merged)
|
||||||
model.add(Dropout(0.3))
|
preds = layers.Dense(vocab_size, activation='softmax')(merged)
|
||||||
model.add(Dense(vocab_size, activation='softmax'))
|
|
||||||
|
|
||||||
|
model = Model([sentence, question], preds)
|
||||||
model.compile(optimizer='adam',
|
model.compile(optimizer='adam',
|
||||||
loss='categorical_crossentropy',
|
loss='categorical_crossentropy',
|
||||||
metrics=['accuracy'])
|
metrics=['accuracy'])
|
||||||
|
|
||||||
print('Training')
|
print('Training')
|
||||||
model.fit([X, Xq], Y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05)
|
model.fit([x, xq], y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05)
|
||||||
loss, acc = model.evaluate([tX, tXq], tY, batch_size=BATCH_SIZE)
|
loss, acc = model.evaluate([tx, txq], ty, batch_size=BATCH_SIZE)
|
||||||
print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))
|
print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc))
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
'''Train a simple deep CNN on the CIFAR10 small images dataset.
|
'''Train a simple deep CNN on the CIFAR10 small images dataset.
|
||||||
|
|
||||||
GPU run command with Theano backend (with TensorFlow, the GPU is automatically used):
|
GPU run command with Theano backend (with TensorFlow, the GPU is automatically used):
|
||||||
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py
|
THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatx=float32 python cifar10_cnn.py
|
||||||
|
|
||||||
It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs.
|
It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs.
|
||||||
(it's still underfitting at that point, though).
|
(it's still underfitting at that point, though).
|
||||||
@ -12,7 +12,7 @@ from keras.datasets import cifar10
|
|||||||
from keras.preprocessing.image import ImageDataGenerator
|
from keras.preprocessing.image import ImageDataGenerator
|
||||||
from keras.models import Sequential
|
from keras.models import Sequential
|
||||||
from keras.layers import Dense, Dropout, Activation, Flatten
|
from keras.layers import Dense, Dropout, Activation, Flatten
|
||||||
from keras.layers import Convolution2D, MaxPooling2D
|
from keras.layers import Conv2D, MaxPooling2D
|
||||||
from keras.utils import np_utils
|
from keras.utils import np_utils
|
||||||
|
|
||||||
batch_size = 32
|
batch_size = 32
|
||||||
@ -26,28 +26,28 @@ img_rows, img_cols = 32, 32
|
|||||||
img_channels = 3
|
img_channels = 3
|
||||||
|
|
||||||
# The data, shuffled and split between train and test sets:
|
# The data, shuffled and split between train and test sets:
|
||||||
(X_train, y_train), (X_test, y_test) = cifar10.load_data()
|
(x_train, y_train), (x_test, y_test) = cifar10.load_data()
|
||||||
print('X_train shape:', X_train.shape)
|
print('x_train shape:', x_train.shape)
|
||||||
print(X_train.shape[0], 'train samples')
|
print(x_train.shape[0], 'train samples')
|
||||||
print(X_test.shape[0], 'test samples')
|
print(x_test.shape[0], 'test samples')
|
||||||
|
|
||||||
# Convert class vectors to binary class matrices.
|
# Convert class vectors to binary class matrices.
|
||||||
Y_train = np_utils.to_categorical(y_train, num_classes)
|
y_train = np_utils.to_categorical(y_train, num_classes)
|
||||||
Y_test = np_utils.to_categorical(y_test, num_classes)
|
y_test = np_utils.to_categorical(y_test, num_classes)
|
||||||
|
|
||||||
model = Sequential()
|
model = Sequential()
|
||||||
|
|
||||||
model.add(Convolution2D(32, 3, 3, border_mode='same',
|
model.add(Conv2D(32, (3, 3), padding='same',
|
||||||
input_shape=X_train.shape[1:]))
|
input_shape=x_train.shape[1:]))
|
||||||
model.add(Activation('relu'))
|
model.add(Activation('relu'))
|
||||||
model.add(Convolution2D(32, 3, 3))
|
model.add(Conv2D(32, (3, 3)))
|
||||||
model.add(Activation('relu'))
|
model.add(Activation('relu'))
|
||||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||||
model.add(Dropout(0.25))
|
model.add(Dropout(0.25))
|
||||||
|
|
||||||
model.add(Convolution2D(64, 3, 3, border_mode='same'))
|
model.add(Conv2D(64, (3, 3), padding='same'))
|
||||||
model.add(Activation('relu'))
|
model.add(Activation('relu'))
|
||||||
model.add(Convolution2D(64, 3, 3))
|
model.add(Conv2D(64, (3, 3)))
|
||||||
model.add(Activation('relu'))
|
model.add(Activation('relu'))
|
||||||
model.add(MaxPooling2D(pool_size=(2, 2)))
|
model.add(MaxPooling2D(pool_size=(2, 2)))
|
||||||
model.add(Dropout(0.25))
|
model.add(Dropout(0.25))
|
||||||
@ -64,17 +64,17 @@ model.compile(loss='categorical_crossentropy',
|
|||||||
optimizer='rmsprop',
|
optimizer='rmsprop',
|
||||||
metrics=['accuracy'])
|
metrics=['accuracy'])
|
||||||
|
|
||||||
X_train = X_train.astype('float32')
|
x_train = x_train.astype('float32')
|
||||||
X_test = X_test.astype('float32')
|
x_test = x_test.astype('float32')
|
||||||
X_train /= 255
|
x_train /= 255
|
||||||
X_test /= 255
|
x_test /= 255
|
||||||
|
|
||||||
if not data_augmentation:
|
if not data_augmentation:
|
||||||
print('Not using data augmentation.')
|
print('Not using data augmentation.')
|
||||||
model.fit(X_train, Y_train,
|
model.fit(x_train, y_train,
|
||||||
batch_size=batch_size,
|
batch_size=batch_size,
|
||||||
epochs=epochs,
|
epochs=epochs,
|
||||||
validation_data=(X_test, Y_test),
|
validation_data=(x_test, y_test),
|
||||||
shuffle=True)
|
shuffle=True)
|
||||||
else:
|
else:
|
||||||
print('Using real-time data augmentation.')
|
print('Using real-time data augmentation.')
|
||||||
@ -93,11 +93,11 @@ else:
|
|||||||
|
|
||||||
# Compute quantities required for featurewise normalization
|
# Compute quantities required for featurewise normalization
|
||||||
# (std, mean, and principal components if ZCA whitening is applied).
|
# (std, mean, and principal components if ZCA whitening is applied).
|
||||||
datagen.fit(X_train)
|
datagen.fit(x_train)
|
||||||
|
|
||||||
# Fit the model on the batches generated by datagen.flow().
|
# Fit the model on the batches generated by datagen.flow().
|
||||||
model.fit_generator(datagen.flow(X_train, Y_train,
|
model.fit_generator(datagen.flow(x_train, y_train,
|
||||||
batch_size=batch_size),
|
batch_size=batch_size),
|
||||||
samples_per_epoch=X_train.shape[0],
|
samples_per_epoch=x_train.shape[0],
|
||||||
epochs=epochs,
|
epochs=epochs,
|
||||||
validation_data=(X_test, Y_test))
|
validation_data=(x_test, y_test))
|
||||||
|
@ -3,7 +3,7 @@ This network is used to predict the next frame of an artificially
|
|||||||
generated movie which contains moving squares.
|
generated movie which contains moving squares.
|
||||||
"""
|
"""
|
||||||
from keras.models import Sequential
|
from keras.models import Sequential
|
||||||
from keras.layers.convolutional import Convolution3D
|
from keras.layers.convolutional import Conv3D
|
||||||
from keras.layers.convolutional_recurrent import ConvLSTM2D
|
from keras.layers.convolutional_recurrent import ConvLSTM2D
|
||||||
from keras.layers.normalization import BatchNormalization
|
from keras.layers.normalization import BatchNormalization
|
||||||
import numpy as np
|
import numpy as np
|
||||||
@ -14,27 +14,26 @@ import pylab as plt
|
|||||||
# of identical shape.
|
# of identical shape.
|
||||||
|
|
||||||
seq = Sequential()
|
seq = Sequential()
|
||||||
seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3,
|
seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
|
||||||
input_shape=(None, 40, 40, 1),
|
input_shape=(None, 40, 40, 1),
|
||||||
border_mode='same', return_sequences=True))
|
padding='same', return_sequences=True))
|
||||||
seq.add(BatchNormalization())
|
seq.add(BatchNormalization())
|
||||||
|
|
||||||
seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3,
|
seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
|
||||||
border_mode='same', return_sequences=True))
|
padding='same', return_sequences=True))
|
||||||
seq.add(BatchNormalization())
|
seq.add(BatchNormalization())
|
||||||
|
|
||||||
seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3,
|
seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
|
||||||
border_mode='same', return_sequences=True))
|
padding='same', return_sequences=True))
|
||||||
seq.add(BatchNormalization())
|
seq.add(BatchNormalization())
|
||||||
|
|
||||||
seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3,
|
seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3),
|
||||||
border_mode='same', return_sequences=True))
|
padding='same', return_sequences=True))
|
||||||
seq.add(BatchNormalization())
|
seq.add(BatchNormalization())
|
||||||
|
|
||||||
seq.add(Convolution3D(filters=1, kernel_dim1=1, kernel_dim2=3,
|
seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3),
|
||||||
kernel_dim3=3, activation='sigmoid',
|
activation='sigmoid',
|
||||||
border_mode='same', data_format='channels_last'))
|
padding='same', data_format='channels_last'))
|
||||||
|
|
||||||
seq.compile(loss='binary_crossentropy', optimizer='adadelta')
|
seq.compile(loss='binary_crossentropy', optimizer='adadelta')
|
||||||
|
|
||||||
|
|
||||||
|
@ -12,7 +12,6 @@ from what you see with CNNs/MLPs/etc.
|
|||||||
'''
|
'''
|
||||||
from __future__ import print_function
|
from __future__ import print_function
|
||||||
import numpy as np
|
import numpy as np
|
||||||
np.random.seed(1337) # for reproducibility
|
|
||||||
|
|
||||||
from keras.preprocessing import sequence
|
from keras.preprocessing import sequence
|
||||||
from keras.models import Sequential
|
from keras.models import Sequential
|
||||||
@ -37,10 +36,9 @@ print('x_test shape:', x_test.shape)
|
|||||||
|
|
||||||
print('Build model...')
|
print('Build model...')
|
||||||
model = Sequential()
|
model = Sequential()
|
||||||
model.add(Embedding(max_features, 128, dropout=0.2))
|
model.add(Embedding(max_features, 128))
|
||||||
model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2)) # try using a GRU instead, for fun
|
model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2))
|
||||||
model.add(Dense(1))
|
model.add(Dense(1, activation='sigmoid'))
|
||||||
model.add(Activation('sigmoid'))
|
|
||||||
|
|
||||||
# try using different optimizers and different optimizer configs
|
# try using different optimizers and different optimizer configs
|
||||||
model.compile(loss='binary_crossentropy',
|
model.compile(loss='binary_crossentropy',
|
||||||
|
@ -33,7 +33,14 @@ def pad_sequences(sequences, maxlen=None, dtype='int32',
|
|||||||
ValueError: in case of invalid values for `truncating` or `padding`,
|
ValueError: in case of invalid values for `truncating` or `padding`,
|
||||||
or in case of invalid shape for a `sequences` entry.
|
or in case of invalid shape for a `sequences` entry.
|
||||||
"""
|
"""
|
||||||
lengths = [len(s) for s in sequences]
|
if not hasattr(sequences, '__len__'):
|
||||||
|
raise ValueError('`sequences` must be iterable.')
|
||||||
|
lengths = []
|
||||||
|
for x in sequences:
|
||||||
|
if not hasattr(x, '__len__'):
|
||||||
|
raise ValueError('`sequences` must be a list of iterables. '
|
||||||
|
'Found non-iterable: ' + str(x))
|
||||||
|
lengths.append(len(x))
|
||||||
|
|
||||||
num_samples = len(sequences)
|
num_samples = len(sequences)
|
||||||
if maxlen is None:
|
if maxlen is None:
|
||||||
|
Loading…
Reference in New Issue
Block a user