keras/tests/manual/check_masked_recurrent.py
2015-06-26 15:21:45 -07:00

131 lines
5.1 KiB
Python

# Dummy test data as input to RNN. This input is 3 timesteps long where the third timestep always matches the
# first. Without masking it should be able to learn it, with masking it should fail.
import numpy as np
from keras.utils.theano_utils import sharedX
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Merge, Dropout, TimeDistributedDense
from keras.layers.embeddings import Embedding
from keras.layers.recurrent import SimpleRNN, SimpleDeepRNN, LSTM, GRU
import theano
theano.config.exception_verbosity = 'high'
# (nb_samples, timesteps, dimensions)
X = np.random.random_integers(1, 4, size=(500000, 15))
print("About to compile the first model")
model = Sequential()
model.add(Embedding(5, 4, mask_zero=True))
model.add(TimeDistributedDense(4, 4)) # obviously this is redundant. Just testing.
model.add(SimpleRNN(4, 4, activation='relu', return_sequences=True))
model.add(Dropout(0.5))
model.add(SimpleDeepRNN(4, 4, depth=2, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(4, 4, activation='softmax'))
model.compile(loss='categorical_crossentropy',
optimizer='rmsprop', theano_mode=theano.compile.mode.FAST_RUN)
print("Compiled model")
W = model.get_weights() # We'll save these so we can reset it later
X[:, : 10] = 0
Xmask0 = X.copy()
Xmask0[:, 10] = 0
Xmask12 = X.copy()
Xmask12[:, 11] = 0
Xmask12[:, 12] = 0
X0_onehot = np.zeros((X.shape[0], 4))
X1_onehot = np.zeros((X.shape[0], 4))
for i, row in enumerate(X):
X0_onehot[i, row[10] - 1] = 1
X1_onehot[i, row[11] - 1] = 1
# Uniform score: 4 options = ln(4) nats (2 bits)
# we should not do better than this when we mask out the part of the input
# that gives us the correct answer
uniform_score = np.log(4)
batch_size=512
# Train it to guess 0th dim
model.fit(X, X0_onehot, nb_epoch=1, batch_size=batch_size)
score = model.evaluate(X, X0_onehot, batch_size=batch_size)
if score > uniform_score * 0.9:
raise Exception('Failed to learn to copy timestep 0, score %f' % score)
model.set_weights(W)
# Train without showing it the 0th dim to learn 1st dim
model.fit(X[: , 1:], X1_onehot, nb_epoch=1, batch_size=batch_size)
score = model.evaluate(X[:, 1:], X1_onehot, batch_size=batch_size)
if score > uniform_score * 0.9:
raise Exception('Failed to learn to copy timestep 1, score %f' % score)
model.set_weights(W)
# Train to guess 0th dim when 0th dim has been masked (should fail)
model.fit(Xmask0, X0_onehot, nb_epoch=1, batch_size=batch_size)
score = model.evaluate(Xmask0, X0_onehot, batch_size=batch_size)
if score < uniform_score * 0.9:
raise Exception('Somehow learned to copy timestep 0 despite mask, score %f' % score)
model.set_weights(W)
# Train to guess 1st dim when 0th dim has been masked (should succeed)
model.fit(Xmask0, X1_onehot, nb_epoch=1, batch_size=batch_size)
score = model.evaluate(Xmask0, X1_onehot, batch_size=batch_size)
if score > uniform_score * 0.9:
raise Exception('Failed to learn to copy timestep 1 in masked model, score %f' % score)
model.set_weights(W)
# Finally, make sure the mask is actually blocking input, mask out timesteps 1 and 2, and see if
# it can learn timestep 0 (should fail)
model.fit(Xmask12, X0_onehot, nb_epoch=1, batch_size=batch_size)
score = model.evaluate(Xmask12, X0_onehot, batch_size=batch_size)
if score < uniform_score * 0.9:
raise Exception('Somehow learned to copy timestep 0 despite masking 1, score %f' % score)
# Another testing approach, just initialize models and make sure that prepending zeros doesn't affect
# their output
print("About to compile the second model")
model2 = Sequential()
model2.add(Embedding(5, 4, mask_zero=True))
model2.add(TimeDistributedDense(4, 4))
model2.add(Activation('time_distributed_softmax'))
model2.add(LSTM(4, 4, return_sequences=True))
model2.add(Activation('tanh'))
model2.add(GRU(4, 4, activation='softmax', return_sequences=True))
model2.add(SimpleDeepRNN(4, 4, depth=2, activation='relu', return_sequences=True))
model2.add(SimpleRNN(4, 4, activation='relu', return_sequences=True))
model2.compile(loss='categorical_crossentropy',
optimizer='rmsprop', theano_mode=theano.compile.mode.FAST_RUN)
print("Compiled model2")
X2 = np.random.random_integers(1, 4, size=(2, 5))
y2 = np.random.random((X2.shape[0], X2.shape[1], 4))
ref = model2.predict(X2)
ref_eval = model2.evaluate(X2, y2)
mask = np.ones((y2.shape[0], y2.shape[1], 1))
for pre_zeros in range(1, 10):
padded_X2 = np.concatenate((np.zeros((X2.shape[0], pre_zeros)), X2), axis=1)
padded_mask = np.concatenate((np.zeros((mask.shape[0], pre_zeros, mask.shape[2])), mask), axis=1)
padded_y2 = np.concatenate((np.zeros((y2.shape[0], pre_zeros, y2.shape[2])), y2), axis=1)
pred = model2.predict(padded_X2)
if not np.allclose(ref[:, -1, :], pred[:, -1, :]):
raise Exception("Different result after left-padding %d zeros. Ref: %s, Pred: %s" % (pre_zeros, ref, pred))
pad_eval = model2.evaluate(padded_X2, padded_y2, weights=padded_mask)
if not np.allclose([pad_eval], [ref_eval]):
raise Exception("Got dissimilar categorical_crossentropy after left-padding %d zeros. Ref: %f, Pred %f" %\
(pref_eval, pred_val))