Fixes in loss weighting with validation data

This commit is contained in:
fchollet 2015-08-27 15:38:26 -07:00
parent 1eb2e6e3f2
commit 7a86ff7f5b
3 changed files with 70 additions and 35 deletions

@ -64,12 +64,7 @@ def slice_X(X, start=None, stop=None):
def weighted_objective(fn):
def weighted(y_true, y_pred, weights, mask=None):
'''
y_true dimension: (sample, timestep, dims)
y_pred dimension: (sample, timestep, dims)
weights dimension: (sample, timestep, 1)
'''
# it's important that 0 * Inf == 0, not NaN, so I need to filter
# it's important that 0 * Inf == 0, not NaN, so we need to filter
# those out first
filtered_y_true = y_true[weights.nonzero()[:-1]]
filtered_y_pred = y_pred[weights.nonzero()[:-1]]
@ -92,7 +87,7 @@ def standardize_weights(y, sample_weight=None, class_weight=None):
if len(y.shape) > 3:
raise Exception('class_weight not supported for 4+ dimensional targets.')
yshape = y.shape
y = np.reshape(y, (-1,yshape[-1])) # for time-distributed data, collapse time and sample
y = np.reshape(y, (-1, yshape[-1])) # for time-distributed data, collapse time and sample
if y.shape[1] > 1:
y_classes = y.argmax(axis=1)
elif y.shape[1] == 1:
@ -100,7 +95,7 @@ def standardize_weights(y, sample_weight=None, class_weight=None):
else:
y_classes = y
class_weights = np.asarray([class_weight[cls] for cls in y_classes])
return np.reshape(class_weights, yshape[:-1] + (1,)) # uncollapse initial dimensions
return np.reshape(class_weights, yshape[:-1] + (1,)) # uncollapse initial dimensions
else:
return np.ones(y.shape[:-1] + (1,))
@ -160,7 +155,7 @@ def get_function_name(o):
class Model(object):
def _fit(self, f, ins, out_labels=[], batch_size=128, nb_epoch=100, verbose=1, callbacks=[],
validation_split=0., val_f=None, val_ins=None, shuffle=True, metrics=[]):
val_f=None, val_ins=None, shuffle=True, metrics=[]):
'''
Abstract fit function for f(*ins). Assume that f returns a list, labelled by out_labels.
'''
@ -169,13 +164,6 @@ class Model(object):
do_validation = True
if verbose:
print("Train on %d samples, validate on %d samples" % (len(ins[0]), len(val_ins[0])))
else:
if 0 < validation_split < 1:
do_validation = True
split_at = int(len(ins[0]) * (1 - validation_split))
(ins, val_ins) = (slice_X(ins, 0, split_at), slice_X(ins, split_at))
if verbose:
print("Train on %d samples, validate on %d samples" % (len(ins[0]), len(val_ins[0])))
nb_train_sample = len(ins[0])
index_array = np.arange(nb_train_sample)
@ -451,7 +439,6 @@ class Sequential(Model, containers.Sequential):
X = standardize_X(X)
y = standardize_y(y)
sample_weight = standardize_weights(y, class_weight=class_weight, sample_weight=sample_weight)
val_f = None
val_ins = None
@ -461,14 +448,29 @@ class Sequential(Model, containers.Sequential):
else:
val_f = self._test
if validation_data:
try:
if len(validation_data) == 2:
X_val, y_val = validation_data
except:
raise Exception("Invalid format for validation data; provide a tuple (X_val, y_val). \
sample_weight_val = np.ones(y_val.shape[:-1] + (1,))
elif len(validation_data) == 3:
X_val, y_val, sample_weight_val = validation_data
else:
raise Exception("Invalid format for validation data; provide a tuple (X_val, y_val) or (X_val, y_val, sample_weight). \
X_val may be a numpy array or a list of numpy arrays depending on your model input.")
X_val = standardize_X(X_val)
y_val = standardize_y(y_val)
val_ins = X_val + [y_val, np.ones(y_val.shape[:-1] + (1,))]
sample_weight_val = standardize_weights(y_val, sample_weight=sample_weight_val)
val_ins = X_val + [y_val, sample_weight_val]
elif 0 < validation_split < 1:
split_at = int(len(X[0]) * (1 - validation_split))
X, X_val = (slice_X(X, 0, split_at), slice_X(X, split_at))
y, y_val = (slice_X(y, 0, split_at), slice_X(y, split_at))
if sample_weight is not None:
sample_weight, sample_weight_val = (slice_X(sample_weight, 0, split_at), slice_X(sample_weight, split_at))
sample_weight_val = standardize_weights(y_val, sample_weight=sample_weight_val)
else:
sample_weight_val = np.ones(y_val.shape[:-1] + (1,))
val_ins = X_val + [y_val, sample_weight_val]
if show_accuracy:
f = self._train_with_acc
@ -477,11 +479,12 @@ class Sequential(Model, containers.Sequential):
f = self._train
out_labels = ['loss']
sample_weight = standardize_weights(y, class_weight=class_weight, sample_weight=sample_weight)
ins = X + [y, sample_weight]
metrics = ['loss', 'acc', 'val_loss', 'val_acc']
return self._fit(f, ins, out_labels=out_labels, batch_size=batch_size, nb_epoch=nb_epoch,
verbose=verbose, callbacks=callbacks,
validation_split=validation_split, val_f=val_f, val_ins=val_ins,
val_f=val_f, val_ins=val_ins,
shuffle=shuffle, metrics=metrics)
def predict(self, X, batch_size=128, verbose=0):
@ -624,8 +627,8 @@ class Graph(Model, containers.Graph):
def test_on_batch(self, data, sample_weight={}):
# data is a dictionary mapping input names to arrays
sample_weight = [standardize_weights(data[name]) for name in self.output_order]
sample_weight = [standardize_weights(data[name],
sample_weight=sample_weight.get(name)) for name in self.output_order]
ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight
return self._test(*ins)
@ -636,30 +639,46 @@ class Graph(Model, containers.Graph):
def fit(self, data, batch_size=128, nb_epoch=100, verbose=1, callbacks=[],
validation_split=0., validation_data=None, shuffle=True, class_weight={}, sample_weight={}):
sample_weight = [standardize_weights(data[name],
sample_weight=sample_weight.get(name),
class_weight=class_weight.get(name)) for name in self.output_order]
ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight
X = [data[name] for name in self.input_order]
y = [standardize_y(data[name]) for name in self.output_order]
sample_weight_list = [standardize_weights(data[name],
sample_weight=sample_weight.get(name)) for name in self.output_order]
class_weight_list = [class_weight.get(name) for name in self.output_order]
val_f = None
val_ins = None
if validation_data or validation_split:
val_f = self._test
if validation_data:
# can't use sample weights with validation data at this point
sample_weight = [standardize_weights(validation_data[name]) for name in self.output_order]
val_ins = [validation_data[name] for name in self.input_order] + [standardize_y(validation_data[name]) for name in self.output_order] + sample_weight
elif 0 < validation_split < 1:
split_at = int(len(X[0]) * (1 - validation_split))
X, X_val = (slice_X(X, 0, split_at), slice_X(X, split_at))
y, y_val = (slice_X(y, 0, split_at), slice_X(y, split_at))
sample_weight_list, sample_weight_list_val = (slice_X(sample_weight_list, 0, split_at), slice_X(sample_weight_list, split_at))
val_ins = X_val + y_val + sample_weight_list_val
f = self._train
out_labels = ['loss']
metrics = ['loss', 'val_loss']
sample_weight_list = [standardize_weights(y[i],
sample_weight=sample_weight_list[i],
class_weight=class_weight_list[i]) for i in range(len(self.output_order))]
ins = X + y + sample_weight_list
history = self._fit(f, ins, out_labels=out_labels, batch_size=batch_size, nb_epoch=nb_epoch,
verbose=verbose, callbacks=callbacks,
validation_split=validation_split, val_f=val_f, val_ins=val_ins,
val_f=val_f, val_ins=val_ins,
shuffle=shuffle, metrics=metrics)
return history
def evaluate(self, data, batch_size=128, verbose=0, sample_weight={}):
sample_weight = [standardize_weights(data[name], sample_weight.get(name)) for name in self.output_order]
sample_weight = [standardize_weights(data[name],
sample_weight=sample_weight.get(name)) for name in self.output_order]
ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight
outs = self._test_loop(self._test, ins, batch_size, verbose)

@ -144,6 +144,8 @@ class TestGraph(unittest.TestCase):
weights1 = np.random.uniform(size=y_train.shape[0])
weights2 = np.random.uniform(size=y2_train.shape[0])
weights1_test = np.random.uniform(size=y_test.shape[0])
weights2_test = np.random.uniform(size=y2_test.shape[0])
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'})
@ -153,7 +155,7 @@ class TestGraph(unittest.TestCase):
assert(type(out == dict))
assert(len(out) == 2)
loss = graph.test_on_batch({'input1': X_test, 'output1': y_test, 'output2': y2_test},
sample_weight={'output1': weights1, 'output2': weights2})
sample_weight={'output1': weights1_test, 'output2': weights2_test})
loss = graph.train_on_batch({'input1': X_train, 'output1': y_train, 'output2': y2_train},
sample_weight={'output1': weights1, 'output2': weights2})
loss = graph.evaluate({'input1': X_train, 'output1': y_train, 'output2': y2_train},

@ -12,7 +12,7 @@ import unittest
nb_classes = 10
batch_size = 128
nb_epoch = 5
nb_epoch = 8
weighted_class = 9
standard_weight = 1
high_weight = 5
@ -59,8 +59,19 @@ def create_graph_model():
def _test_weights_sequential(model, class_weight=None, sample_weight=None):
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0,
class_weight=class_weight, sample_weight=sample_weight)
if sample_weight is not None:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 3, verbose=0,
class_weight=class_weight, sample_weight=sample_weight)
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 3, verbose=0,
class_weight=class_weight, sample_weight=sample_weight, validation_split=0.1)
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 3, verbose=0,
class_weight=class_weight, sample_weight=sample_weight, validation_data=(X_train, Y_train, sample_weight))
else:
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
class_weight=class_weight, sample_weight=sample_weight)
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
class_weight=class_weight, sample_weight=sample_weight, validation_split=0.1)
model.train_on_batch(X_train[:32], Y_train[:32],
class_weight=class_weight, sample_weight=sample_weight[:32] if sample_weight is not None else None)
model.test_on_batch(X_train[:32], Y_train[:32],
@ -70,8 +81,11 @@ def _test_weights_sequential(model, class_weight=None, sample_weight=None):
def _test_weights_graph(model, class_weight=None, sample_weight=None):
model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0,
model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
class_weight={'output': class_weight}, sample_weight={'output': sample_weight})
model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
class_weight={'output': class_weight}, sample_weight={'output': sample_weight}, validation_split=0.1)
model.train_on_batch({'input': X_train[:32], 'output': Y_train[:32]},
class_weight={'output': class_weight}, sample_weight={'output': sample_weight[:32] if sample_weight is not None else None})
model.test_on_batch({'input': X_train[:32], 'output': Y_train[:32]},