diff --git a/examples/addition_rnn.py b/examples/addition_rnn.py index bf74d8949..c7133a0e2 100644 --- a/examples/addition_rnn.py +++ b/examples/addition_rnn.py @@ -23,12 +23,10 @@ Four digits inverted: Five digits inverted: + One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs - ''' from __future__ import print_function from keras.models import Sequential -from keras.engine.training import slice_X from keras.layers import Activation, TimeDistributed, Dense, RepeatVector, recurrent import numpy as np from six.moves import range @@ -57,15 +55,15 @@ class CharacterTable(object): num_rows: Number of rows in the returned one hot encoding. This is used to keep the # of rows for each data the same. """ - X = np.zeros((num_rows, len(self.chars))) + x = np.zeros((num_rows, len(self.chars))) for i, c in enumerate(C): - X[i, self.char_indices[c]] = 1 - return X + x[i, self.char_indices[c]] = 1 + return x - def decode(self, X, calc_argmax=True): + def decode(self, x, calc_argmax=True): if calc_argmax: - X = X.argmax(axis=-1) - return ''.join(self.indices_char[x] for x in X) + x = x.argmax(axis=-1) + return ''.join(self.indices_char[x] for x in x) class colors: @@ -80,7 +78,7 @@ INVERT = True # Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of # int is DIGITS. -MAXLEN = DIGITS + 1 + DIGITS +MAxLEN = DIGITS + 1 + DIGITS # All the numbers, plus sign and space for padding. chars = '0123456789+ ' @@ -95,14 +93,14 @@ while len(questions) < TRAINING_SIZE: for i in range(np.random.randint(1, DIGITS + 1)))) a, b = f(), f() # Skip any addition questions we've already seen - # Also skip any such that X+Y == Y+X (hence the sorting). + # Also skip any such that x+Y == Y+x (hence the sorting). key = tuple(sorted((a, b))) if key in seen: continue seen.add(key) - # Pad the data with spaces such that it is always MAXLEN. + # Pad the data with spaces such that it is always MAxLEN. q = '{}+{}'.format(a, b) - query = q + ' ' * (MAXLEN - len(q)) + query = q + ' ' * (MAxLEN - len(q)) ans = str(a + b) # Answers can be of maximum size DIGITS + 1. ans += ' ' * (DIGITS + 1 - len(ans)) @@ -115,31 +113,31 @@ while len(questions) < TRAINING_SIZE: print('Total addition questions:', len(questions)) print('Vectorization...') -X = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) +x = np.zeros((len(questions), MAxLEN, len(chars)), dtype=np.bool) y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool) for i, sentence in enumerate(questions): - X[i] = ctable.encode(sentence, MAXLEN) + x[i] = ctable.encode(sentence, MAxLEN) for i, sentence in enumerate(expected): y[i] = ctable.encode(sentence, DIGITS + 1) -# Shuffle (X, y) in unison as the later parts of X will almost all be larger +# Shuffle (x, y) in unison as the later parts of x will almost all be larger # digits. indices = np.arange(len(y)) np.random.shuffle(indices) -X = X[indices] +x = x[indices] y = y[indices] # Explicitly set apart 10% for validation data that we never train over. -split_at = len(X) - len(X) // 10 -(X_train, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at)) -(y_train, y_val) = (y[:split_at], y[split_at:]) +split_at = len(x) - len(x) // 10 +(x_train, x_val) = x[:split_at], x[split_at:] +(y_train, y_val) = y[:split_at], y[split_at:] print('Training Data:') -print(X_train.shape) +print(x_train.shape) print(y_train.shape) print('Validation Data:') -print(X_val.shape) +print(x_val.shape) print(y_val.shape) # Try replacing GRU, or SimpleRNN. @@ -153,7 +151,7 @@ model = Sequential() # "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE. # Note: In a situation where your input sequences have a variable length, # use input_shape=(None, num_feature). -model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)))) +model.add(RNN(HIDDEN_SIZE, input_shape=(MAxLEN, len(chars)))) # As the decoder RNN's input, repeatedly provide with the last hidden state of # RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum # length of output, e.g., when DIGITS=3, max output is 999+999=1998. @@ -181,15 +179,15 @@ for iteration in range(1, 200): print() print('-' * 50) print('Iteration', iteration) - model.fit(X_train, y_train, batch_size=BATCH_SIZE, epochs=1, - validation_data=(X_val, y_val)) + model.fit(x_train, y_train, batch_size=BATCH_SIZE, epochs=1, + validation_data=(x_val, y_val)) # Select 10 samples from the validation set at random so we can visualize # errors. for i in range(10): - ind = np.random.randint(0, len(X_val)) - rowX, rowy = X_val[np.array([ind])], y_val[np.array([ind])] - preds = model.predict_classes(rowX, verbose=0) - q = ctable.decode(rowX[0]) + ind = np.random.randint(0, len(x_val)) + rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])] + preds = model.predict_classes(rowx, verbose=0) + q = ctable.decode(rowx[0]) correct = ctable.decode(rowy[0]) guess = ctable.decode(preds[0], calc_argmax=False) print('Q', q[::-1] if INVERT else q) diff --git a/examples/antirectifier.py b/examples/antirectifier.py index 398175eb4..4a4269b62 100644 --- a/examples/antirectifier.py +++ b/examples/antirectifier.py @@ -52,11 +52,11 @@ class Antirectifier(Layer): shape[-1] *= 2 return tuple(shape) - def call(self, x, mask=None): - x -= K.mean(x, axis=1, keepdims=True) - x = K.l2_normalize(x, axis=1) - pos = K.relu(x) - neg = K.relu(-x) + def call(self, inputs): + inputs -= K.mean(inputs, axis=1, keepdims=True) + inputs = K.l2_normalize(inputs, axis=1) + pos = K.relu(inputs) + neg = K.relu(-inputs) return K.concatenate([pos, neg], axis=1) # global parameters @@ -65,16 +65,16 @@ num_classes = 10 epochs = 40 # the data, shuffled and split between train and test sets -(X_train, y_train), (X_test, y_test) = mnist.load_data() +(x_train, y_train), (x_test, y_test) = mnist.load_data() -X_train = X_train.reshape(60000, 784) -X_test = X_test.reshape(10000, 784) -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') -X_train /= 255 -X_test /= 255 -print(X_train.shape[0], 'train samples') -print(X_test.shape[0], 'test samples') +x_train = x_train.reshape(60000, 784) +x_test = x_test.reshape(10000, 784) +x_train = x_train.astype('float32') +x_test = x_test.astype('float32') +x_train /= 255 +x_test /= 255 +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') # convert class vectors to binary class matrices Y_train = np_utils.to_categorical(y_train, num_classes) @@ -97,9 +97,9 @@ model.compile(loss='categorical_crossentropy', metrics=['accuracy']) # train the model -model.fit(X_train, Y_train, +model.fit(x_train, Y_train, batch_size=batch_size, epochs=epochs, - verbose=1, validation_data=(X_test, Y_test)) + verbose=1, validation_data=(x_test, Y_test)) # next, compare with an equivalent network # with2x bigger Dense layers and ReLU diff --git a/examples/babi_rnn.py b/examples/babi_rnn.py index 1c11a8445..c04db8250 100644 --- a/examples/babi_rnn.py +++ b/examples/babi_rnn.py @@ -12,7 +12,7 @@ QA2 - Two Supporting Facts | 20 | 50.0 QA3 - Three Supporting Facts | 20 | 20.5 QA4 - Two Arg. Relations | 61 | 62.9 QA5 - Three Arg. Relations | 70 | 61.9 -QA6 - Yes/No Questions | 48 | 50.7 +QA6 - yes/No Questions | 48 | 50.7 QA7 - Counting | 49 | 78.9 QA8 - Lists/Sets | 45 | 77.2 QA9 - Simple Negation | 64 | 64.0 @@ -62,13 +62,12 @@ import re import tarfile import numpy as np -np.random.seed(1337) # for reproducibility from keras.utils.data_utils import get_file from keras.layers.embeddings import Embedding -from keras.layers import Dense, Merge, Dropout, RepeatVector +from keras import layers from keras.layers import recurrent -from keras.models import Sequential +from keras.models import Model from keras.preprocessing.sequence import pad_sequences @@ -125,26 +124,26 @@ def get_stories(f, only_supporting=False, max_length=None): def vectorize_stories(data, word_idx, story_maxlen, query_maxlen): - X = [] - Xq = [] - Y = [] + xs = [] + xqs = [] + ys = [] for story, query, answer in data: x = [word_idx[w] for w in story] xq = [word_idx[w] for w in query] y = np.zeros(len(word_idx) + 1) # let's not forget that index 0 is reserved y[word_idx[answer]] = 1 - X.append(x) - Xq.append(xq) - Y.append(y) - return pad_sequences(X, maxlen=story_maxlen), pad_sequences(Xq, maxlen=query_maxlen), np.array(Y) + xs.append(x) + xqs.append(xq) + ys.append(y) + return pad_sequences(xs, maxlen=story_maxlen), pad_sequences(xqs, maxlen=query_maxlen), np.array(ys) RNN = recurrent.LSTM EMBED_HIDDEN_SIZE = 50 SENT_HIDDEN_SIZE = 100 -QUERY_HIDDEN_SIZE = 100 +QUERy_HIDDEN_SIZE = 100 BATCH_SIZE = 32 EPOCHS = 40 -print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERY_HIDDEN_SIZE)) +print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, EMBED_HIDDEN_SIZE, SENT_HIDDEN_SIZE, QUERy_HIDDEN_SIZE)) try: path = get_file('babi-tasks-v1-2.tar.gz', origin='https://s3.amazonaws.com/text-datasets/babi_tasks_1-20_v1-2.tar.gz') @@ -172,40 +171,38 @@ word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) story_maxlen = max(map(len, (x for x, _, _ in train + test))) query_maxlen = max(map(len, (x for _, x, _ in train + test))) -X, Xq, Y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen) -tX, tXq, tY = vectorize_stories(test, word_idx, story_maxlen, query_maxlen) +x, xq, y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen) +tx, txq, ty = vectorize_stories(test, word_idx, story_maxlen, query_maxlen) print('vocab = {}'.format(vocab)) -print('X.shape = {}'.format(X.shape)) -print('Xq.shape = {}'.format(Xq.shape)) -print('Y.shape = {}'.format(Y.shape)) +print('x.shape = {}'.format(x.shape)) +print('xq.shape = {}'.format(xq.shape)) +print('y.shape = {}'.format(y.shape)) print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen)) print('Build model...') -sentrnn = Sequential() -sentrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE, - input_length=story_maxlen)) -sentrnn.add(Dropout(0.3)) +sentence = layers.Input(shape=(story_maxlen,), dtype='int32') +encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence) +encoded_sentence = layers.Dropout(0.3)(encoded_sentence) -qrnn = Sequential() -qrnn.add(Embedding(vocab_size, EMBED_HIDDEN_SIZE, - input_length=query_maxlen)) -qrnn.add(Dropout(0.3)) -qrnn.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False)) -qrnn.add(RepeatVector(story_maxlen)) +question = layers.Input(shape=(query_maxlen,), dtype='int32') +encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question) +encoded_question = layers.Dropout(0.3)(encoded_question) +encoded_question = RNN(EMBED_HIDDEN_SIZE)(encoded_question) +encoded_question = layers.RepeatVector(story_maxlen)(encoded_question) -model = Sequential() -model.add(Merge([sentrnn, qrnn], mode='sum')) -model.add(RNN(EMBED_HIDDEN_SIZE, return_sequences=False)) -model.add(Dropout(0.3)) -model.add(Dense(vocab_size, activation='softmax')) +merged = layers.sum([encoded_sentence, encoded_question]) +merged = RNN(EMBED_HIDDEN_SIZE)(merged) +merged = layers.Dropout(0.3)(merged) +preds = layers.Dense(vocab_size, activation='softmax')(merged) +model = Model([sentence, question], preds) model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy']) print('Training') -model.fit([X, Xq], Y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05) -loss, acc = model.evaluate([tX, tXq], tY, batch_size=BATCH_SIZE) +model.fit([x, xq], y, batch_size=BATCH_SIZE, epochs=EPOCHS, validation_split=0.05) +loss, acc = model.evaluate([tx, txq], ty, batch_size=BATCH_SIZE) print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc)) diff --git a/examples/cifar10_cnn.py b/examples/cifar10_cnn.py index 92a3ed308..f27265bd3 100644 --- a/examples/cifar10_cnn.py +++ b/examples/cifar10_cnn.py @@ -1,7 +1,7 @@ '''Train a simple deep CNN on the CIFAR10 small images dataset. GPU run command with Theano backend (with TensorFlow, the GPU is automatically used): - THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10_cnn.py + THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatx=float32 python cifar10_cnn.py It gets down to 0.65 test logloss in 25 epochs, and down to 0.55 after 50 epochs. (it's still underfitting at that point, though). @@ -12,7 +12,7 @@ from keras.datasets import cifar10 from keras.preprocessing.image import ImageDataGenerator from keras.models import Sequential from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Convolution2D, MaxPooling2D +from keras.layers import Conv2D, MaxPooling2D from keras.utils import np_utils batch_size = 32 @@ -26,28 +26,28 @@ img_rows, img_cols = 32, 32 img_channels = 3 # The data, shuffled and split between train and test sets: -(X_train, y_train), (X_test, y_test) = cifar10.load_data() -print('X_train shape:', X_train.shape) -print(X_train.shape[0], 'train samples') -print(X_test.shape[0], 'test samples') +(x_train, y_train), (x_test, y_test) = cifar10.load_data() +print('x_train shape:', x_train.shape) +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') # Convert class vectors to binary class matrices. -Y_train = np_utils.to_categorical(y_train, num_classes) -Y_test = np_utils.to_categorical(y_test, num_classes) +y_train = np_utils.to_categorical(y_train, num_classes) +y_test = np_utils.to_categorical(y_test, num_classes) model = Sequential() -model.add(Convolution2D(32, 3, 3, border_mode='same', - input_shape=X_train.shape[1:])) +model.add(Conv2D(32, (3, 3), padding='same', + input_shape=x_train.shape[1:])) model.add(Activation('relu')) -model.add(Convolution2D(32, 3, 3)) +model.add(Conv2D(32, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) -model.add(Convolution2D(64, 3, 3, border_mode='same')) +model.add(Conv2D(64, (3, 3), padding='same')) model.add(Activation('relu')) -model.add(Convolution2D(64, 3, 3)) +model.add(Conv2D(64, (3, 3))) model.add(Activation('relu')) model.add(MaxPooling2D(pool_size=(2, 2))) model.add(Dropout(0.25)) @@ -64,17 +64,17 @@ model.compile(loss='categorical_crossentropy', optimizer='rmsprop', metrics=['accuracy']) -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') -X_train /= 255 -X_test /= 255 +x_train = x_train.astype('float32') +x_test = x_test.astype('float32') +x_train /= 255 +x_test /= 255 if not data_augmentation: print('Not using data augmentation.') - model.fit(X_train, Y_train, + model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, - validation_data=(X_test, Y_test), + validation_data=(x_test, y_test), shuffle=True) else: print('Using real-time data augmentation.') @@ -93,11 +93,11 @@ else: # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(X_train) + datagen.fit(x_train) # Fit the model on the batches generated by datagen.flow(). - model.fit_generator(datagen.flow(X_train, Y_train, + model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), - samples_per_epoch=X_train.shape[0], + samples_per_epoch=x_train.shape[0], epochs=epochs, - validation_data=(X_test, Y_test)) + validation_data=(x_test, y_test)) diff --git a/examples/conv_lstm.py b/examples/conv_lstm.py index 9b4e758ab..653f75481 100644 --- a/examples/conv_lstm.py +++ b/examples/conv_lstm.py @@ -3,7 +3,7 @@ This network is used to predict the next frame of an artificially generated movie which contains moving squares. """ from keras.models import Sequential -from keras.layers.convolutional import Convolution3D +from keras.layers.convolutional import Conv3D from keras.layers.convolutional_recurrent import ConvLSTM2D from keras.layers.normalization import BatchNormalization import numpy as np @@ -14,27 +14,26 @@ import pylab as plt # of identical shape. seq = Sequential() -seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3, +seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), input_shape=(None, 40, 40, 1), - border_mode='same', return_sequences=True)) + padding='same', return_sequences=True)) seq.add(BatchNormalization()) -seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3, - border_mode='same', return_sequences=True)) +seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), + padding='same', return_sequences=True)) seq.add(BatchNormalization()) -seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3, - border_mode='same', return_sequences=True)) +seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), + padding='same', return_sequences=True)) seq.add(BatchNormalization()) -seq.add(ConvLSTM2D(filters=40, num_row=3, num_col=3, - border_mode='same', return_sequences=True)) +seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), + padding='same', return_sequences=True)) seq.add(BatchNormalization()) -seq.add(Convolution3D(filters=1, kernel_dim1=1, kernel_dim2=3, - kernel_dim3=3, activation='sigmoid', - border_mode='same', data_format='channels_last')) - +seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3), + activation='sigmoid', + padding='same', data_format='channels_last')) seq.compile(loss='binary_crossentropy', optimizer='adadelta') diff --git a/examples/imdb_lstm.py b/examples/imdb_lstm.py index a5f8ddeae..c83d5c6c7 100644 --- a/examples/imdb_lstm.py +++ b/examples/imdb_lstm.py @@ -12,7 +12,6 @@ from what you see with CNNs/MLPs/etc. ''' from __future__ import print_function import numpy as np -np.random.seed(1337) # for reproducibility from keras.preprocessing import sequence from keras.models import Sequential @@ -37,10 +36,9 @@ print('x_test shape:', x_test.shape) print('Build model...') model = Sequential() -model.add(Embedding(max_features, 128, dropout=0.2)) -model.add(LSTM(128, dropout_W=0.2, dropout_U=0.2)) # try using a GRU instead, for fun -model.add(Dense(1)) -model.add(Activation('sigmoid')) +model.add(Embedding(max_features, 128)) +model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) +model.add(Dense(1, activation='sigmoid')) # try using different optimizers and different optimizer configs model.compile(loss='binary_crossentropy', diff --git a/keras/preprocessing/sequence.py b/keras/preprocessing/sequence.py index 2b5064f66..ac26c9f53 100644 --- a/keras/preprocessing/sequence.py +++ b/keras/preprocessing/sequence.py @@ -33,7 +33,14 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', ValueError: in case of invalid values for `truncating` or `padding`, or in case of invalid shape for a `sequences` entry. """ - lengths = [len(s) for s in sequences] + if not hasattr(sequences, '__len__'): + raise ValueError('`sequences` must be iterable.') + lengths = [] + for x in sequences: + if not hasattr(x, '__len__'): + raise ValueError('`sequences` must be a list of iterables. ' + 'Found non-iterable: ' + str(x)) + lengths.append(len(x)) num_samples = len(sequences) if maxlen is None: