bidirectional lstm example added.

2015-11-08 23:19:14 +01:00 · 2015-11-08 23:19:14 +01:00 · ca360b0d15
commit ca360b0d15
parent 10852b2529
1 changed files with 64 additions and 0 deletions
--- a/examples/imdb_bidirectional_lstm.py
+++ b/examples/imdb_bidirectional_lstm.py
@ -0,0 +1,64 @@
 from __future__ import absolute_import
 from __future__ import print_function
 import numpy as np
 np.random.seed(1337)  # for reproducibility
 from keras.preprocessing import sequence
 from keras.utils.np_utils import accuracy
 from keras.models import Graph
 from keras.layers.core import Dense, Dropout
 from keras.layers.embeddings import Embedding
 from keras.layers.recurrent import LSTM
 from keras.datasets import imdb
 '''
    Train a Bidirectional LSTM on the IMDB sentiment classification task.
    The dataset is actually too small for bidirectional LSTM to be of any advantage
    compared to simpler, much faster methods such as TF-IDF+LogReg.
    Bidirectional LSTM may be not suited for a simple text classification task.
    Notes:
    - RNNs are tricky. And in particular Bidirectional RNNs you may experiment
    with different Recurrent layers and different parameters to find the best configuration
    for your task.
    GPU command:
        THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python imdb_bidirectional_lstm.py
    Output after 4 epochs on CPU: ~0.8146 
 '''
 max_features = 20000
 maxlen = 100  # cut texts after this number of words (among top max_features most common words)
 batch_size = 32
 print("Loading data...")
 (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features, test_split=0.2)
 print(len(X_train), 'train sequences')
 print(len(X_test), 'test sequences')
 print("Pad sequences (samples x time)")
 X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
 X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
 print('X_train shape:', X_train.shape)
 print('X_test shape:', X_test.shape)
 y_train = np.array(y_train)
 y_test = np.array(y_test)
 print('Build model...')
 model = Graph()
 model.add_input(name='input', input_shape=(1,), dtype=int)
 model.add_node(Embedding(max_features, 128, input_length=maxlen), name='embedding', input='input')
 model.add_node(LSTM(64), name='forward', input='embedding' )  # You can change these two layers with GRU
 model.add_node(LSTM(64, go_backwards=True), name='backward', input='embedding' )
 model.add_node(Dropout(0.5), name='dropout', inputs=['forward', 'backward'])
 model.add_node(Dense(1, activation='sigmoid'), name='sigmoid', input='dropout')
 model.add_output(name='output', input='sigmoid')
 # try using different optimizers and different optimizer configs
 model.compile('adam',{'output':'binary_crossentropy'})
 print("Train...")
 model.fit({'input':X_train, 'output':y_train}, batch_size=batch_size, nb_epoch=4)
 acc = accuracy(y_test, np.round(np.array(model.predict({'input':X_test}, batch_size=batch_size)['output'])))
 print('Test accuracy:', acc)