2015-12-09 02:49:14 +00:00
|
|
|
'''This example demonstrates the use of Convolution1D for text classification.
|
|
|
|
|
2016-08-01 00:45:32 +00:00
|
|
|
Gets to 0.89 test accuracy after 2 epochs.
|
2016-04-14 20:22:06 +00:00
|
|
|
90s/epoch on Intel i5 2.4Ghz CPU.
|
|
|
|
10s/epoch on Tesla K40 GPU.
|
|
|
|
|
2015-12-09 02:49:14 +00:00
|
|
|
'''
|
|
|
|
|
2015-07-14 20:34:05 +00:00
|
|
|
from __future__ import print_function
|
|
|
|
|
|
|
|
from keras.preprocessing import sequence
|
|
|
|
from keras.models import Sequential
|
2016-10-24 16:25:08 +00:00
|
|
|
from keras.layers import Dense, Dropout, Activation
|
2016-05-12 01:45:37 +00:00
|
|
|
from keras.layers import Embedding
|
2017-02-28 02:53:41 +00:00
|
|
|
from keras.layers import Conv1D, GlobalMaxPooling1D
|
2015-07-14 20:34:05 +00:00
|
|
|
from keras.datasets import imdb
|
|
|
|
|
|
|
|
# set parameters:
|
|
|
|
max_features = 5000
|
2016-04-14 20:22:06 +00:00
|
|
|
maxlen = 400
|
2015-07-15 03:35:28 +00:00
|
|
|
batch_size = 32
|
2016-04-14 20:22:06 +00:00
|
|
|
embedding_dims = 50
|
2017-02-15 00:08:30 +00:00
|
|
|
filters = 250
|
2017-02-28 02:53:41 +00:00
|
|
|
kernel_size = 3
|
2015-07-14 20:34:05 +00:00
|
|
|
hidden_dims = 250
|
2017-02-15 00:08:30 +00:00
|
|
|
epochs = 2
|
2015-07-14 20:34:05 +00:00
|
|
|
|
2015-12-09 02:49:14 +00:00
|
|
|
print('Loading data...')
|
2017-03-12 03:44:29 +00:00
|
|
|
(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features)
|
|
|
|
print(len(x_train), 'train sequences')
|
|
|
|
print(len(x_test), 'test sequences')
|
2015-07-14 20:34:05 +00:00
|
|
|
|
2015-12-09 02:49:14 +00:00
|
|
|
print('Pad sequences (samples x time)')
|
2017-03-12 03:44:29 +00:00
|
|
|
x_train = sequence.pad_sequences(x_train, maxlen=maxlen)
|
|
|
|
x_test = sequence.pad_sequences(x_test, maxlen=maxlen)
|
|
|
|
print('x_train shape:', x_train.shape)
|
|
|
|
print('x_test shape:', x_test.shape)
|
2015-07-14 20:34:05 +00:00
|
|
|
|
|
|
|
print('Build model...')
|
|
|
|
model = Sequential()
|
|
|
|
|
|
|
|
# we start off with an efficient embedding layer which maps
|
|
|
|
# our vocab indices into embedding_dims dimensions
|
2016-04-14 20:22:06 +00:00
|
|
|
model.add(Embedding(max_features,
|
|
|
|
embedding_dims,
|
2017-02-28 02:53:41 +00:00
|
|
|
input_length=maxlen))
|
|
|
|
model.add(Dropout(0.2))
|
2015-07-14 20:34:05 +00:00
|
|
|
|
2017-02-15 00:08:30 +00:00
|
|
|
# we add a Convolution1D, which will learn filters
|
2015-07-14 20:34:05 +00:00
|
|
|
# word group filters of size filter_length:
|
2017-02-28 02:53:41 +00:00
|
|
|
model.add(Conv1D(filters,
|
|
|
|
kernel_size,
|
|
|
|
padding='valid',
|
|
|
|
activation='relu',
|
|
|
|
strides=1))
|
2016-07-19 19:18:59 +00:00
|
|
|
# we use max pooling:
|
2016-10-24 16:25:08 +00:00
|
|
|
model.add(GlobalMaxPooling1D())
|
2015-07-14 20:34:05 +00:00
|
|
|
|
|
|
|
# We add a vanilla hidden layer:
|
2015-10-05 01:44:49 +00:00
|
|
|
model.add(Dense(hidden_dims))
|
2016-04-14 20:22:06 +00:00
|
|
|
model.add(Dropout(0.2))
|
2015-07-14 20:34:05 +00:00
|
|
|
model.add(Activation('relu'))
|
|
|
|
|
|
|
|
# We project onto a single unit output layer, and squash it with a sigmoid:
|
2015-10-05 01:44:49 +00:00
|
|
|
model.add(Dense(1))
|
2015-07-14 20:34:05 +00:00
|
|
|
model.add(Activation('sigmoid'))
|
|
|
|
|
2015-11-29 00:34:52 +00:00
|
|
|
model.compile(loss='binary_crossentropy',
|
2016-04-14 20:22:06 +00:00
|
|
|
optimizer='adam',
|
2016-03-19 16:07:15 +00:00
|
|
|
metrics=['accuracy'])
|
2017-03-12 03:44:29 +00:00
|
|
|
model.fit(x_train, y_train,
|
2016-03-19 16:07:15 +00:00
|
|
|
batch_size=batch_size,
|
2017-02-15 00:08:30 +00:00
|
|
|
epochs=epochs,
|
2017-03-12 03:44:29 +00:00
|
|
|
validation_data=(x_test, y_test))
|