From 7a86ff7f5bcbf8bb56cdab90fd873c2f2b561b3e Mon Sep 17 00:00:00 2001 From: fchollet Date: Thu, 27 Aug 2015 15:38:26 -0700 Subject: [PATCH] Fixes in loss weighting with validation data --- keras/models.py | 79 +++++++++++++++++++------------ tests/auto/test_graph_model.py | 4 +- tests/auto/test_loss_weighting.py | 22 +++++++-- 3 files changed, 70 insertions(+), 35 deletions(-) diff --git a/keras/models.py b/keras/models.py index 4d770ce41..39d9715b8 100644 --- a/keras/models.py +++ b/keras/models.py @@ -64,12 +64,7 @@ def slice_X(X, start=None, stop=None): def weighted_objective(fn): def weighted(y_true, y_pred, weights, mask=None): - ''' - y_true dimension: (sample, timestep, dims) - y_pred dimension: (sample, timestep, dims) - weights dimension: (sample, timestep, 1) - ''' - # it's important that 0 * Inf == 0, not NaN, so I need to filter + # it's important that 0 * Inf == 0, not NaN, so we need to filter # those out first filtered_y_true = y_true[weights.nonzero()[:-1]] filtered_y_pred = y_pred[weights.nonzero()[:-1]] @@ -92,7 +87,7 @@ def standardize_weights(y, sample_weight=None, class_weight=None): if len(y.shape) > 3: raise Exception('class_weight not supported for 4+ dimensional targets.') yshape = y.shape - y = np.reshape(y, (-1,yshape[-1])) # for time-distributed data, collapse time and sample + y = np.reshape(y, (-1, yshape[-1])) # for time-distributed data, collapse time and sample if y.shape[1] > 1: y_classes = y.argmax(axis=1) elif y.shape[1] == 1: @@ -100,7 +95,7 @@ def standardize_weights(y, sample_weight=None, class_weight=None): else: y_classes = y class_weights = np.asarray([class_weight[cls] for cls in y_classes]) - return np.reshape(class_weights, yshape[:-1] + (1,)) # uncollapse initial dimensions + return np.reshape(class_weights, yshape[:-1] + (1,)) # uncollapse initial dimensions else: return np.ones(y.shape[:-1] + (1,)) @@ -160,7 +155,7 @@ def get_function_name(o): class Model(object): def _fit(self, f, ins, out_labels=[], batch_size=128, nb_epoch=100, verbose=1, callbacks=[], - validation_split=0., val_f=None, val_ins=None, shuffle=True, metrics=[]): + val_f=None, val_ins=None, shuffle=True, metrics=[]): ''' Abstract fit function for f(*ins). Assume that f returns a list, labelled by out_labels. ''' @@ -169,13 +164,6 @@ class Model(object): do_validation = True if verbose: print("Train on %d samples, validate on %d samples" % (len(ins[0]), len(val_ins[0]))) - else: - if 0 < validation_split < 1: - do_validation = True - split_at = int(len(ins[0]) * (1 - validation_split)) - (ins, val_ins) = (slice_X(ins, 0, split_at), slice_X(ins, split_at)) - if verbose: - print("Train on %d samples, validate on %d samples" % (len(ins[0]), len(val_ins[0]))) nb_train_sample = len(ins[0]) index_array = np.arange(nb_train_sample) @@ -451,7 +439,6 @@ class Sequential(Model, containers.Sequential): X = standardize_X(X) y = standardize_y(y) - sample_weight = standardize_weights(y, class_weight=class_weight, sample_weight=sample_weight) val_f = None val_ins = None @@ -461,14 +448,29 @@ class Sequential(Model, containers.Sequential): else: val_f = self._test if validation_data: - try: + if len(validation_data) == 2: X_val, y_val = validation_data - except: - raise Exception("Invalid format for validation data; provide a tuple (X_val, y_val). \ + sample_weight_val = np.ones(y_val.shape[:-1] + (1,)) + elif len(validation_data) == 3: + X_val, y_val, sample_weight_val = validation_data + else: + raise Exception("Invalid format for validation data; provide a tuple (X_val, y_val) or (X_val, y_val, sample_weight). \ X_val may be a numpy array or a list of numpy arrays depending on your model input.") X_val = standardize_X(X_val) y_val = standardize_y(y_val) - val_ins = X_val + [y_val, np.ones(y_val.shape[:-1] + (1,))] + sample_weight_val = standardize_weights(y_val, sample_weight=sample_weight_val) + val_ins = X_val + [y_val, sample_weight_val] + + elif 0 < validation_split < 1: + split_at = int(len(X[0]) * (1 - validation_split)) + X, X_val = (slice_X(X, 0, split_at), slice_X(X, split_at)) + y, y_val = (slice_X(y, 0, split_at), slice_X(y, split_at)) + if sample_weight is not None: + sample_weight, sample_weight_val = (slice_X(sample_weight, 0, split_at), slice_X(sample_weight, split_at)) + sample_weight_val = standardize_weights(y_val, sample_weight=sample_weight_val) + else: + sample_weight_val = np.ones(y_val.shape[:-1] + (1,)) + val_ins = X_val + [y_val, sample_weight_val] if show_accuracy: f = self._train_with_acc @@ -477,11 +479,12 @@ class Sequential(Model, containers.Sequential): f = self._train out_labels = ['loss'] + sample_weight = standardize_weights(y, class_weight=class_weight, sample_weight=sample_weight) ins = X + [y, sample_weight] metrics = ['loss', 'acc', 'val_loss', 'val_acc'] return self._fit(f, ins, out_labels=out_labels, batch_size=batch_size, nb_epoch=nb_epoch, verbose=verbose, callbacks=callbacks, - validation_split=validation_split, val_f=val_f, val_ins=val_ins, + val_f=val_f, val_ins=val_ins, shuffle=shuffle, metrics=metrics) def predict(self, X, batch_size=128, verbose=0): @@ -624,8 +627,8 @@ class Graph(Model, containers.Graph): def test_on_batch(self, data, sample_weight={}): # data is a dictionary mapping input names to arrays - sample_weight = [standardize_weights(data[name]) for name in self.output_order] - + sample_weight = [standardize_weights(data[name], + sample_weight=sample_weight.get(name)) for name in self.output_order] ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight return self._test(*ins) @@ -636,30 +639,46 @@ class Graph(Model, containers.Graph): def fit(self, data, batch_size=128, nb_epoch=100, verbose=1, callbacks=[], validation_split=0., validation_data=None, shuffle=True, class_weight={}, sample_weight={}): - sample_weight = [standardize_weights(data[name], - sample_weight=sample_weight.get(name), - class_weight=class_weight.get(name)) for name in self.output_order] - ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight + X = [data[name] for name in self.input_order] + y = [standardize_y(data[name]) for name in self.output_order] + sample_weight_list = [standardize_weights(data[name], + sample_weight=sample_weight.get(name)) for name in self.output_order] + class_weight_list = [class_weight.get(name) for name in self.output_order] val_f = None val_ins = None if validation_data or validation_split: val_f = self._test if validation_data: + # can't use sample weights with validation data at this point sample_weight = [standardize_weights(validation_data[name]) for name in self.output_order] val_ins = [validation_data[name] for name in self.input_order] + [standardize_y(validation_data[name]) for name in self.output_order] + sample_weight + elif 0 < validation_split < 1: + split_at = int(len(X[0]) * (1 - validation_split)) + X, X_val = (slice_X(X, 0, split_at), slice_X(X, split_at)) + y, y_val = (slice_X(y, 0, split_at), slice_X(y, split_at)) + sample_weight_list, sample_weight_list_val = (slice_X(sample_weight_list, 0, split_at), slice_X(sample_weight_list, split_at)) + val_ins = X_val + y_val + sample_weight_list_val + f = self._train out_labels = ['loss'] metrics = ['loss', 'val_loss'] + + sample_weight_list = [standardize_weights(y[i], + sample_weight=sample_weight_list[i], + class_weight=class_weight_list[i]) for i in range(len(self.output_order))] + ins = X + y + sample_weight_list + history = self._fit(f, ins, out_labels=out_labels, batch_size=batch_size, nb_epoch=nb_epoch, verbose=verbose, callbacks=callbacks, - validation_split=validation_split, val_f=val_f, val_ins=val_ins, + val_f=val_f, val_ins=val_ins, shuffle=shuffle, metrics=metrics) return history def evaluate(self, data, batch_size=128, verbose=0, sample_weight={}): - sample_weight = [standardize_weights(data[name], sample_weight.get(name)) for name in self.output_order] + sample_weight = [standardize_weights(data[name], + sample_weight=sample_weight.get(name)) for name in self.output_order] ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight outs = self._test_loop(self._test, ins, batch_size, verbose) diff --git a/tests/auto/test_graph_model.py b/tests/auto/test_graph_model.py index a7fce2e7a..a33ff2b82 100644 --- a/tests/auto/test_graph_model.py +++ b/tests/auto/test_graph_model.py @@ -144,6 +144,8 @@ class TestGraph(unittest.TestCase): weights1 = np.random.uniform(size=y_train.shape[0]) weights2 = np.random.uniform(size=y2_train.shape[0]) + weights1_test = np.random.uniform(size=y_test.shape[0]) + weights2_test = np.random.uniform(size=y2_test.shape[0]) graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'}) @@ -153,7 +155,7 @@ class TestGraph(unittest.TestCase): assert(type(out == dict)) assert(len(out) == 2) loss = graph.test_on_batch({'input1': X_test, 'output1': y_test, 'output2': y2_test}, - sample_weight={'output1': weights1, 'output2': weights2}) + sample_weight={'output1': weights1_test, 'output2': weights2_test}) loss = graph.train_on_batch({'input1': X_train, 'output1': y_train, 'output2': y2_train}, sample_weight={'output1': weights1, 'output2': weights2}) loss = graph.evaluate({'input1': X_train, 'output1': y_train, 'output2': y2_train}, diff --git a/tests/auto/test_loss_weighting.py b/tests/auto/test_loss_weighting.py index 01867730d..bab802ca6 100644 --- a/tests/auto/test_loss_weighting.py +++ b/tests/auto/test_loss_weighting.py @@ -12,7 +12,7 @@ import unittest nb_classes = 10 batch_size = 128 -nb_epoch = 5 +nb_epoch = 8 weighted_class = 9 standard_weight = 1 high_weight = 5 @@ -59,8 +59,19 @@ def create_graph_model(): def _test_weights_sequential(model, class_weight=None, sample_weight=None): - model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, - class_weight=class_weight, sample_weight=sample_weight) + if sample_weight is not None: + model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 3, verbose=0, + class_weight=class_weight, sample_weight=sample_weight) + model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 3, verbose=0, + class_weight=class_weight, sample_weight=sample_weight, validation_split=0.1) + model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 3, verbose=0, + class_weight=class_weight, sample_weight=sample_weight, validation_data=(X_train, Y_train, sample_weight)) + else: + model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0, + class_weight=class_weight, sample_weight=sample_weight) + model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0, + class_weight=class_weight, sample_weight=sample_weight, validation_split=0.1) + model.train_on_batch(X_train[:32], Y_train[:32], class_weight=class_weight, sample_weight=sample_weight[:32] if sample_weight is not None else None) model.test_on_batch(X_train[:32], Y_train[:32], @@ -70,8 +81,11 @@ def _test_weights_sequential(model, class_weight=None, sample_weight=None): def _test_weights_graph(model, class_weight=None, sample_weight=None): - model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, + model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0, class_weight={'output': class_weight}, sample_weight={'output': sample_weight}) + model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0, + class_weight={'output': class_weight}, sample_weight={'output': sample_weight}, validation_split=0.1) + model.train_on_batch({'input': X_train[:32], 'output': Y_train[:32]}, class_weight={'output': class_weight}, sample_weight={'output': sample_weight[:32] if sample_weight is not None else None}) model.test_on_batch({'input': X_train[:32], 'output': Y_train[:32]},