Implement a fasttext example (#3446)

* Upload examples/imdb_fasttext.py which implement the fasttext model * Remove Dropout and unnecessary imports * Remove Dropout and unnecessary imports * Remove Dropout and unnecessary imports
2016-08-12 14:36:35 -07:00 · 2016-08-12 14:36:35 -07:00 · 46d5b197e0
commit 46d5b197e0
parent 2c510530b1
1 changed files with 69 additions and 0 deletions
--- a/examples/imdb_fasttext.py
+++ b/examples/imdb_fasttext.py
@ -0,0 +1,69 @@
+'''This example demonstrates the use of fasttext for text classification
+
+Based on Joulin et al's paper:
+
+Bags of Tricks for Efficient Text Classification
+https://arxiv.org/abs/1607.01759
+
+Can achieve accuracy around 88% after 5 epochs in 70s.
+
+'''
+
+from __future__ import print_function
+import numpy as np
+np.random.seed(1337)  # for reproducibility
+
+from keras.preprocessing import sequence
+from keras.models import Sequential
+from keras.layers import Dense, Activation, Flatten
+from keras.layers import Embedding
+from keras.layers import AveragePooling1D
+from keras.datasets import imdb
+from keras import backend as K
+
+
+# set parameters:
+max_features = 20000
+maxlen = 400
+batch_size = 32
+embedding_dims = 20
+nb_epoch = 5
+
+print('Loading data...')
+(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
+print(len(X_train), 'train sequences')
+print(len(X_test), 'test sequences')
+
+print('Pad sequences (samples x time)')
+X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
+X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
+print('X_train shape:', X_train.shape)
+print('X_test shape:', X_test.shape)
+
+print('Build model...')
+model = Sequential()
+
+# we start off with an efficient embedding layer which maps
+# our vocab indices into embedding_dims dimensions
+model.add(Embedding(max_features,
+                    embedding_dims,
+                    input_length=maxlen))
+
+# we add a AveragePooling1D, which will average the embeddings
+# of all words in the document
+model.add(AveragePooling1D(pool_length=model.output_shape[1]))
+
+# We flatten the output of the AveragePooling1D layer
+model.add(Flatten())
+
+# We project onto a single unit output layer, and squash it with a sigmoid:
+model.add(Dense(1, activation = 'sigmoid'))
+
+model.compile(loss='binary_crossentropy',
+              optimizer='adam',
+              metrics=['accuracy'])
+
+model.fit(X_train, y_train,
+          batch_size=batch_size,
+          nb_epoch=nb_epoch,
+          validation_data=(X_test, y_test))