Add support for model merging

This commit is contained in:
fchollet 2015-05-15 20:22:59 -07:00
parent c8a104b8fa
commit e82ab0ca50
4 changed files with 406 additions and 80 deletions

@ -42,6 +42,63 @@ class Layer(object):
return {"name":self.__class__.__name__}
class Merge(object):
def __init__(self, models, mode='sum'):
''' Merge the output of a list of models into a single tensor.
mode: {'sum', 'concat'}
'''
if len(models) < 2:
raise Exception("Please specify two or more input models to merge")
self.mode = mode
self.models = models
self.params = []
self.regularizers = []
self.constraints = []
for m in self.models:
self.params += m.params
self.regularizers += m.regularizers
self.constraints += m.constraints
def get_output(self, train=False):
if self.mode == 'sum':
s = self.models[0].get_output(train)
for i in range(1, len(self.models)):
s += self.models[i].get_output(train)
return s
elif self.mode == 'concat':
inputs = [self.models[i].get_output(train) for i in range(len(self.models))]
return T.concatenate(inputs, axis=-1)
else:
raise Exception('Unknown merge mode')
def get_input(self, train=False):
res = []
for i in range(len(self.models)):
o = self.models[i].get_input(train)
if type(o) == list:
res += o
else:
res.append(o)
return res
def get_weights(self):
weights = []
for m in self.models:
weights += m.get_weights()
return weights
def set_weights(self, weights):
for i in range(len(self.models)):
nb_param = len(self.models[i].params)
self.models[i].set_weights(weights[:nb_param])
weights = weights[nb_param:]
def get_config(self):
return {"name":self.__class__.__name__,
"models":[m.get_config() for m in self.models],
"mode":self.mode}
class Dropout(Layer):
'''
Hinton's dropout.

@ -10,7 +10,7 @@ from . import objectives
from . import regularizers
from . import constraints
import time, copy
from .utils.generic_utils import Progbar
from .utils.generic_utils import Progbar, printv
from six.moves import range
def standardize_y(y):
@ -33,12 +33,94 @@ def ndim_tensor(ndim):
return T.tensor4()
return T.matrix()
class Sequential(object):
def standardize_X(X):
if type(X) == list:
return X
else:
return [X]
def slice_X(X, start=None, stop=None):
if type(X) == list:
if hasattr(start, '__len__'):
return [x[start] for x in X]
else:
return [x[start:stop] for x in X]
else:
if hasattr(start, '__len__'):
return X[start]
else:
return X[start:stop]
class Model(object):
def get_output(self, train):
raise NotImplementedError
def get_input(self, train):
raise NotImplementedError
def compile(self, optimizer, loss, class_mode="categorical", theano_mode=None):
self.optimizer = optimizers.get(optimizer)
self.loss = objectives.get(loss)
# input of model
self.X_train = self.get_input(train=True)
self.X_test = self.get_input(train=False)
self.y_train = self.get_output(train=True)
self.y_test = self.get_output(train=False)
self.y_train = self.get_output(train=True)
self.y_test = self.get_output(train=False)
# target of model
self.y = T.zeros_like(self.y_train)
train_loss = self.loss(self.y, self.y_train)
test_score = self.loss(self.y, self.y_test)
if class_mode == "categorical":
train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))
elif class_mode == "binary":
train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
else:
raise Exception("Invalid class mode:" + str(class_mode))
self.class_mode = class_mode
updates = self.optimizer.get_updates(self.params, self.regularizers, self.constraints, train_loss)
if type(self.X_train) == list:
train_ins = self.X_train + [self.y]
test_ins = self.X_test + [self.y]
predict_ins = self.X_test
else:
train_ins = [self.X_train, self.y]
test_ins = [self.X_test, self.y]
predict_ins = [self.X_test]
self._train = theano.function(train_ins, train_loss,
updates=updates, allow_input_downcast=True, mode=theano_mode)
self._train_with_acc = theano.function(train_ins, [train_loss, train_accuracy],
updates=updates, allow_input_downcast=True, mode=theano_mode)
self._predict = theano.function(predict_ins, self.y_test,
allow_input_downcast=True, mode=theano_mode)
self._test = theano.function(test_ins, test_score,
allow_input_downcast=True, mode=theano_mode)
self._test_with_acc = theano.function(test_ins, [test_score, test_accuracy],
allow_input_downcast=True, mode=theano_mode)
class Sequential(Model):
def __init__(self):
self.layers = []
self.params = []
self.regularizers = []
self.constraints = []
self.params = [] # learnable
self.regularizers = [] # same size as params
self.constraints = [] # same size as params
def add(self, layer):
self.layers.append(layer)
@ -68,74 +150,45 @@ class Sequential(object):
else:
self.constraints += [constraints.identity for _ in range(len(layer.params))]
def get_output(self, train):
def get_output(self, train=False):
return self.layers[-1].get_output(train)
def compile(self, optimizer, loss, class_mode="categorical", y_dim_components=1, theano_mode=None):
self.optimizer = optimizers.get(optimizer)
self.loss = objectives.get(loss)
# input of model
def get_input(self, train=False):
if not hasattr(self.layers[0], 'input'):
for l in self.layers:
if hasattr(l, 'input'):
break
ndim = l.input.ndim
self.layers[0].input = ndim_tensor(ndim)
self.X = self.layers[0].input
self.y_train = self.get_output(train=True)
self.y_test = self.get_output(train=False)
# output of model
self.y = ndim_tensor(y_dim_components+1)
train_loss = self.loss(self.y, self.y_train)
test_score = self.loss(self.y, self.y_test)
if class_mode == "categorical":
train_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_train, axis=-1)))
test_accuracy = T.mean(T.eq(T.argmax(self.y, axis=-1), T.argmax(self.y_test, axis=-1)))
elif class_mode == "binary":
train_accuracy = T.mean(T.eq(self.y, T.round(self.y_train)))
test_accuracy = T.mean(T.eq(self.y, T.round(self.y_test)))
else:
raise Exception("Invalid class mode:" + str(class_mode))
self.class_mode = class_mode
updates = self.optimizer.get_updates(self.params, self.regularizers, self.constraints, train_loss)
self._train = theano.function([self.X, self.y], train_loss,
updates=updates, allow_input_downcast=True, mode=theano_mode)
self._train_with_acc = theano.function([self.X, self.y], [train_loss, train_accuracy],
updates=updates, allow_input_downcast=True, mode=theano_mode)
self._predict = theano.function([self.X], self.y_test,
allow_input_downcast=True, mode=theano_mode)
self._test = theano.function([self.X, self.y], test_score,
allow_input_downcast=True, mode=theano_mode)
self._test_with_acc = theano.function([self.X, self.y], [test_score, test_accuracy],
allow_input_downcast=True, mode=theano_mode)
return self.layers[0].get_input(train)
def train(self, X, y, accuracy=False):
X = standardize_X(X)
y = standardize_y(y)
ins = X + [y]
if accuracy:
return self._train_with_acc(X, y)
return self._train_with_acc(*ins)
else:
return self._train(X, y)
return self._train(*ins)
def test(self, X, y, accuracy=False):
X = standardize_X(X)
y = standardize_y(y)
ins = X + [y]
if accuracy:
return self._test_with_acc(X, y)
return self._test_with_acc(*ins)
else:
return self._test(X, y)
return self._test(*ins)
def fit(self, X, y, batch_size=128, nb_epoch=100, verbose=1,
validation_split=0., validation_data=None, shuffle=True, show_accuracy=False):
X = standardize_X(X)
y = standardize_y(y)
do_validation = False
@ -143,8 +196,10 @@ class Sequential(object):
try:
X_val, y_val = validation_data
except:
raise Exception("Invalid format for validation data; provide a tuple (X_val, y_val).")
raise Exception("Invalid format for validation data; provide a tuple (X_val, y_val). \
X_val may be a numpy array or a list of numpy arrays depending on your model input.")
do_validation = True
X_val = standardize_X(X_val)
y_val = standardize_y(y_val)
if verbose:
print("Train on %d samples, validate on %d samples" % (len(y), len(y_val)))
@ -154,8 +209,8 @@ class Sequential(object):
# then split X into smaller X and X_val,
# and split y into smaller y and y_val.
do_validation = True
split_at = int(len(X) * (1 - validation_split))
(X, X_val) = (X[0:split_at], X[split_at:])
split_at = int(len(y) * (1 - validation_split))
(X, X_val) = (slice_X(X, 0, split_at), slice_X(X, split_at))
(y, y_val) = (y[0:split_at], y[split_at:])
if verbose:
print("Train on %d samples, validate on %d samples" % (len(y), len(y_val)))
@ -168,11 +223,11 @@ class Sequential(object):
if show_accuracy:
history['val_acc'] = []
index_array = np.arange(len(X))
index_array = np.arange(len(y))
for epoch in range(nb_epoch):
if verbose:
print('Epoch', epoch)
progbar = Progbar(target=len(X), verbose=verbose)
progbar = Progbar(target=len(y), verbose=verbose)
if shuffle:
np.random.shuffle(index_array)
@ -180,20 +235,21 @@ class Sequential(object):
av_acc = 0.
seen = 0
batches = make_batches(len(X), batch_size)
batches = make_batches(len(y), batch_size)
for batch_index, (batch_start, batch_end) in enumerate(batches):
batch_ids = index_array[batch_start:batch_end]
seen += len(batch_ids)
X_batch = X[batch_ids]
X_batch = slice_X(X, batch_ids)
y_batch = y[batch_ids]
ins = X_batch + [y_batch]
if show_accuracy:
loss, acc = self._train_with_acc(X_batch, y_batch)
loss, acc = self._train_with_acc(*ins)
log_values = [('loss', loss), ('acc.', acc)]
av_loss += loss * len(batch_ids)
av_acc += acc * len(batch_ids)
else:
loss = self._train(X_batch, y_batch)
loss = self._train(*ins)
log_values = [('loss', loss)]
av_loss += loss * len(batch_ids)
@ -221,15 +277,16 @@ class Sequential(object):
return history
def predict(self, X, batch_size=128, verbose=1):
batches = make_batches(len(X), batch_size)
X = standardize_X(X)
batches = make_batches(len(X[0]), batch_size)
if verbose==1:
progbar = Progbar(target=len(X))
progbar = Progbar(target=len(X[0]))
for batch_index, (batch_start, batch_end) in enumerate(batches):
X_batch = X[batch_start:batch_end]
batch_preds = self._predict(X_batch)
X_batch = slice_X(X, batch_start, batch_end)
batch_preds = self._predict(*X_batch)
if batch_index == 0:
shape = (len(X),) + batch_preds.shape[1:]
shape = (len(X[0]),) + batch_preds.shape[1:]
preds = np.zeros(shape)
preds[batch_start:batch_end] = batch_preds
@ -246,7 +303,7 @@ class Sequential(object):
def predict_classes(self, X, batch_size=128, verbose=1):
proba = self.predict_proba(X, batch_size=batch_size, verbose=verbose)
proba = self.predict(X, batch_size=batch_size, verbose=verbose)
if self.class_mode == "categorical":
return proba.argmax(axis=-1)
else:
@ -254,25 +311,27 @@ class Sequential(object):
def evaluate(self, X, y, batch_size=128, show_accuracy=False, verbose=1):
X = standardize_X(X)
y = standardize_y(y)
if show_accuracy:
tot_acc = 0.
tot_score = 0.
batches = make_batches(len(X), batch_size)
batches = make_batches(len(y), batch_size)
if verbose:
progbar = Progbar(target=len(X), verbose=verbose)
progbar = Progbar(target=len(y), verbose=verbose)
for batch_index, (batch_start, batch_end) in enumerate(batches):
X_batch = X[batch_start:batch_end]
X_batch = slice_X(X, batch_start, batch_end)
y_batch = y[batch_start:batch_end]
ins = X_batch + [y_batch]
if show_accuracy:
loss, acc = self._test_with_acc(X_batch, y_batch)
loss, acc = self._test_with_acc(*ins)
tot_acc += acc
log_values = [('loss', loss), ('acc.', acc)]
else:
loss = self._test(X_batch, y_batch)
loss = self._test(*ins)
log_values = [('loss', loss)]
tot_score += loss
@ -285,18 +344,27 @@ class Sequential(object):
else:
return tot_score/len(batches)
def describe(self, verbose=1):
def get_config(self, verbose=0):
layers = []
for i, l in enumerate(self.layers):
config = l.get_config()
layers.append(config)
if verbose:
print('Layer %d: %s' % (i, config.get('name', '?')))
for k, v in config.items():
if k != 'name':
print('... ' + k + ' = ' + str(v))
if verbose:
printv(layers)
return layers
def get_weights(self):
res = []
for l in self.layers:
res += l.get_weights()
return res
def set_weights(self, weights):
for i in range(len(self.layers)):
nb_param = len(self.layers[i].params)
self.layers[i].set_weights(weights[:nb_param])
weights = weights[nb_param:]
def save_weights(self, filepath):
# Save weights from all layers to HDF5
import h5py
@ -309,11 +377,8 @@ class Sequential(object):
g.attrs['nb_params'] = len(weights)
for n, param in enumerate(weights):
param_name = 'param_{}'.format(n)
param_dset = g.create_dataset(param_name, param.shape, dtype='float64')
param_dset = g.create_dataset(param_name, param.shape, dtype=param.dtype)
param_dset[:] = param
for k, v in l.get_config().items():
if v is not None:
g.attrs[k] = v
f.flush()
f.close()
@ -326,5 +391,4 @@ class Sequential(object):
weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
self.layers[k].set_weights(weights)
f.close()

@ -17,6 +17,27 @@ def get_from_module(identifier, module_params, module_name, instantiate=False):
def make_tuple(*args):
return args
def printv(v, prefix=''):
if type(v) == dict:
if 'name' in v:
print(prefix + '#' + v['name'])
del v['name']
prefix += '...'
for nk, nv in v.items():
if type(nv) in [dict, list]:
print(prefix + nk + ':')
printv(nv, prefix)
else:
print(prefix + nk + ':' + str(nv))
elif type(v) == list:
prefix += '...'
for i, nv in enumerate(v):
print(prefix + '#' + str(i))
printv(nv, prefix)
else:
prefix += '...'
print(prefix + str(v))
class Progbar(object):
def __init__(self, target, width=30, verbose=1):
'''

184
test/test_models.py Normal file

@ -0,0 +1,184 @@
from __future__ import absolute_import
from __future__ import print_function
from keras.datasets import mnist
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Merge
from keras.utils import np_utils
import numpy as np
nb_classes = 10
batch_size = 128
nb_epoch = 1
max_train_samples = 5000
max_test_samples = 1000
np.random.seed(1337) # for reproducibility
# the data, shuffled and split between tran and test sets
(X_train, y_train), (X_test, y_test) = mnist.load_data()
X_train = X_train.reshape(60000,784)[:max_train_samples]
X_test = X_test.reshape(10000,784)[:max_test_samples]
X_train = X_train.astype("float32")
X_test = X_test.astype("float32")
X_train /= 255
X_test /= 255
# convert class vectors to binary class matrices
Y_train = np_utils.to_categorical(y_train, nb_classes)[:max_train_samples]
Y_test = np_utils.to_categorical(y_test, nb_classes)[:max_test_samples]
#########################
# sequential model test #
#########################
print('Test sequential')
model = Sequential()
model.add(Dense(784, 50))
model.add(Activation('relu'))
model.add(Dense(50, 10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_data=(X_test, Y_test))
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_data=(X_test, Y_test))
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_split=0.1)
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_split=0.1)
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
score = model.evaluate(X_train, Y_train, verbose=0)
print('score:', score)
if score < 0.25:
raise Exception('Score too low, learning issue.')
preds = model.predict(X_test, verbose=0)
classes = model.predict_classes(X_test, verbose=0)
model.get_config(verbose=1)
###################
# merge test: sum #
###################
print('Test merge: sum')
left = Sequential()
left.add(Dense(784, 50))
left.add(Activation('relu'))
right = Sequential()
right.add(Dense(784, 50))
right.add(Activation('relu'))
model = Sequential()
model.add(Merge([left, right], mode='sum'))
model.add(Dense(50, 10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_data=([X_test, X_test], Y_test))
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_data=([X_test, X_test], Y_test))
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_split=0.1)
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_split=0.1)
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
score = model.evaluate([X_train, X_train], Y_train, verbose=0)
print('score:', score)
if score < 0.22:
raise Exception('Score too low, learning issue.')
preds = model.predict([X_test, X_test], verbose=0)
classes = model.predict_classes([X_test, X_test], verbose=0)
model.get_config(verbose=1)
###################
# merge test: concat #
###################
print('Test merge: concat')
left = Sequential()
left.add(Dense(784, 50))
left.add(Activation('relu'))
right = Sequential()
right.add(Dense(784, 50))
right.add(Activation('relu'))
model = Sequential()
model.add(Merge([left, right], mode='concat'))
model.add(Dense(50*2, 10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_data=([X_test, X_test], Y_test))
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_data=([X_test, X_test], Y_test))
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_split=0.1)
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_split=0.1)
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
model.fit([X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
score = model.evaluate([X_train, X_train], Y_train, verbose=0)
print('score:', score)
if score < 0.22:
raise Exception('Score too low, learning issue.')
preds = model.predict([X_test, X_test], verbose=0)
classes = model.predict_classes([X_test, X_test], verbose=0)
model.get_config(verbose=1)
##########################
# test merge recursivity #
##########################
print('Test merge recursivity')
left = Sequential()
left.add(Dense(784, 50))
left.add(Activation('relu'))
right = Sequential()
right.add(Dense(784, 50))
right.add(Activation('relu'))
righter = Sequential()
righter.add(Dense(784, 50))
righter.add(Activation('relu'))
intermediate = Sequential()
intermediate.add(Merge([left, right], mode='sum'))
intermediate.add(Dense(50, 50))
intermediate.add(Activation('relu'))
model = Sequential()
model.add(Merge([intermediate, righter], mode='sum'))
model.add(Dense(50, 10))
model.add(Activation('softmax'))
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
model.fit([X_train, X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_data=([X_test, X_test, X_test], Y_test))
model.fit([X_train, X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_data=([X_test, X_test, X_test], Y_test))
model.fit([X_train, X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_split=0.1)
model.fit([X_train, X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_split=0.1)
model.fit([X_train, X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
model.fit([X_train, X_train, X_train], Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
score = model.evaluate([X_train, X_train, X_train], Y_train, verbose=0)
print('score:', score)
if score < 0.19:
raise Exception('Score too low, learning issue.')
preds = model.predict([X_test, X_test, X_test], verbose=0)
classes = model.predict_classes([X_test, X_test, X_test], verbose=0)
model.get_config(verbose=1)
model.save_weights('temp.h5')
model.load_weights('temp.h5')
score = model.evaluate([X_train, X_train, X_train], Y_train, verbose=0)
print('score:', score)