Implement a fasttext example (#3446)
* Upload examples/imdb_fasttext.py which implement the fasttext model * Remove Dropout and unnecessary imports * Remove Dropout and unnecessary imports * Remove Dropout and unnecessary imports
This commit is contained in:
parent
2c510530b1
commit
46d5b197e0
69
examples/imdb_fasttext.py
Normal file
69
examples/imdb_fasttext.py
Normal file
@ -0,0 +1,69 @@
|
||||
'''This example demonstrates the use of fasttext for text classification
|
||||
|
||||
Based on Joulin et al's paper:
|
||||
|
||||
Bags of Tricks for Efficient Text Classification
|
||||
https://arxiv.org/abs/1607.01759
|
||||
|
||||
Can achieve accuracy around 88% after 5 epochs in 70s.
|
||||
|
||||
'''
|
||||
|
||||
from __future__ import print_function
|
||||
import numpy as np
|
||||
np.random.seed(1337) # for reproducibility
|
||||
|
||||
from keras.preprocessing import sequence
|
||||
from keras.models import Sequential
|
||||
from keras.layers import Dense, Activation, Flatten
|
||||
from keras.layers import Embedding
|
||||
from keras.layers import AveragePooling1D
|
||||
from keras.datasets import imdb
|
||||
from keras import backend as K
|
||||
|
||||
|
||||
# set parameters:
|
||||
max_features = 20000
|
||||
maxlen = 400
|
||||
batch_size = 32
|
||||
embedding_dims = 20
|
||||
nb_epoch = 5
|
||||
|
||||
print('Loading data...')
|
||||
(X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features)
|
||||
print(len(X_train), 'train sequences')
|
||||
print(len(X_test), 'test sequences')
|
||||
|
||||
print('Pad sequences (samples x time)')
|
||||
X_train = sequence.pad_sequences(X_train, maxlen=maxlen)
|
||||
X_test = sequence.pad_sequences(X_test, maxlen=maxlen)
|
||||
print('X_train shape:', X_train.shape)
|
||||
print('X_test shape:', X_test.shape)
|
||||
|
||||
print('Build model...')
|
||||
model = Sequential()
|
||||
|
||||
# we start off with an efficient embedding layer which maps
|
||||
# our vocab indices into embedding_dims dimensions
|
||||
model.add(Embedding(max_features,
|
||||
embedding_dims,
|
||||
input_length=maxlen))
|
||||
|
||||
# we add a AveragePooling1D, which will average the embeddings
|
||||
# of all words in the document
|
||||
model.add(AveragePooling1D(pool_length=model.output_shape[1]))
|
||||
|
||||
# We flatten the output of the AveragePooling1D layer
|
||||
model.add(Flatten())
|
||||
|
||||
# We project onto a single unit output layer, and squash it with a sigmoid:
|
||||
model.add(Dense(1, activation = 'sigmoid'))
|
||||
|
||||
model.compile(loss='binary_crossentropy',
|
||||
optimizer='adam',
|
||||
metrics=['accuracy'])
|
||||
|
||||
model.fit(X_train, y_train,
|
||||
batch_size=batch_size,
|
||||
nb_epoch=nb_epoch,
|
||||
validation_data=(X_test, y_test))
|
Loading…
Reference in New Issue
Block a user