Fix up a few example

2016-07-16 17:47:52 -07:00 · 2016-07-16 17:47:52 -07:00 · 01d5e7bc47
commit 01d5e7bc47
parent f3e7245910
3 changed files with 14 additions and 10 deletions
--- a/examples/imdb_cnn_lstm.py
+++ b/examples/imdb_cnn_lstm.py
@ -22,9 +22,9 @@ maxlen = 100
 embedding_size = 128

 # Convolution
-filter_length = 3
+filter_length = 5
 nb_filter = 64
-pool_length = 2
+pool_length = 4

 # LSTM
 lstm_output_size = 70
--- a/examples/lstm_text_generation.py
+++ b/examples/lstm_text_generation.py
@ -14,6 +14,7 @@ from __future__ import print_function
 from keras.models import Sequential
 from keras.layers import Dense, Activation, Dropout
 from keras.layers import LSTM
+from keras.optimizers import RMSprop
 from keras.utils.data_utils import get_file
 import numpy as np
 import random
@ -50,20 +51,22 @@ for i, sentence in enumerate(sentences):
 # build the model: 2 stacked LSTM
 print('Build model...')
 model = Sequential()
-model.add(LSTM(512, return_sequences=True, input_shape=(maxlen, len(chars))))
-model.add(LSTM(512, return_sequences=False))
-model.add(Dropout(0.2))
+model.add(LSTM(128, input_shape=(maxlen, len(chars))))
 model.add(Dense(len(chars)))
 model.add(Activation('softmax'))

-model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+optimizer = RMSprop(lr=0.01)
+model.compile(loss='categorical_crossentropy', optimizer=optimizer)


-def sample(a, temperature=1.0):
+def sample(preds, temperature=1.0):
    # helper function to sample an index from a probability array
-    a = np.log(a) / temperature
-    a = np.exp(a) / np.sum(np.exp(a))
-    return np.argmax(np.random.multinomial(1, a, 1))
+    preds = np.asarray(preds).astype('float64')
+    preds = np.log(preds) / temperature
+    exp_preds = np.exp(preds)
+    preds = exp_preds / np.sum(exp_preds)
+    probas = np.random.multinomial(1, preds, 1)
+    return np.argmax(probas)

 # train the model, output generated text after each iteration
 for iteration in range(1, 60):
--- a/keras/preprocessing/text.py
+++ b/keras/preprocessing/text.py
@ -99,6 +99,7 @@ class Tokenizer(object):
        wcounts = list(self.word_counts.items())
        wcounts.sort(key=lambda x: x[1], reverse=True)
        sorted_voc = [wc[0] for wc in wcounts]
+        # note that index 0 is reserved, never assigned to an existing word
        self.word_index = dict(list(zip(sorted_voc, list(range(1, len(sorted_voc) + 1)))))

        self.index_docs = {}