Fixes in loss weighting with validation data
This commit is contained in:
parent
1eb2e6e3f2
commit
7a86ff7f5b
@ -64,12 +64,7 @@ def slice_X(X, start=None, stop=None):
|
||||
|
||||
def weighted_objective(fn):
|
||||
def weighted(y_true, y_pred, weights, mask=None):
|
||||
'''
|
||||
y_true dimension: (sample, timestep, dims)
|
||||
y_pred dimension: (sample, timestep, dims)
|
||||
weights dimension: (sample, timestep, 1)
|
||||
'''
|
||||
# it's important that 0 * Inf == 0, not NaN, so I need to filter
|
||||
# it's important that 0 * Inf == 0, not NaN, so we need to filter
|
||||
# those out first
|
||||
filtered_y_true = y_true[weights.nonzero()[:-1]]
|
||||
filtered_y_pred = y_pred[weights.nonzero()[:-1]]
|
||||
@ -92,7 +87,7 @@ def standardize_weights(y, sample_weight=None, class_weight=None):
|
||||
if len(y.shape) > 3:
|
||||
raise Exception('class_weight not supported for 4+ dimensional targets.')
|
||||
yshape = y.shape
|
||||
y = np.reshape(y, (-1,yshape[-1])) # for time-distributed data, collapse time and sample
|
||||
y = np.reshape(y, (-1, yshape[-1])) # for time-distributed data, collapse time and sample
|
||||
if y.shape[1] > 1:
|
||||
y_classes = y.argmax(axis=1)
|
||||
elif y.shape[1] == 1:
|
||||
@ -160,7 +155,7 @@ def get_function_name(o):
|
||||
|
||||
class Model(object):
|
||||
def _fit(self, f, ins, out_labels=[], batch_size=128, nb_epoch=100, verbose=1, callbacks=[],
|
||||
validation_split=0., val_f=None, val_ins=None, shuffle=True, metrics=[]):
|
||||
val_f=None, val_ins=None, shuffle=True, metrics=[]):
|
||||
'''
|
||||
Abstract fit function for f(*ins). Assume that f returns a list, labelled by out_labels.
|
||||
'''
|
||||
@ -169,13 +164,6 @@ class Model(object):
|
||||
do_validation = True
|
||||
if verbose:
|
||||
print("Train on %d samples, validate on %d samples" % (len(ins[0]), len(val_ins[0])))
|
||||
else:
|
||||
if 0 < validation_split < 1:
|
||||
do_validation = True
|
||||
split_at = int(len(ins[0]) * (1 - validation_split))
|
||||
(ins, val_ins) = (slice_X(ins, 0, split_at), slice_X(ins, split_at))
|
||||
if verbose:
|
||||
print("Train on %d samples, validate on %d samples" % (len(ins[0]), len(val_ins[0])))
|
||||
|
||||
nb_train_sample = len(ins[0])
|
||||
index_array = np.arange(nb_train_sample)
|
||||
@ -451,7 +439,6 @@ class Sequential(Model, containers.Sequential):
|
||||
|
||||
X = standardize_X(X)
|
||||
y = standardize_y(y)
|
||||
sample_weight = standardize_weights(y, class_weight=class_weight, sample_weight=sample_weight)
|
||||
|
||||
val_f = None
|
||||
val_ins = None
|
||||
@ -461,14 +448,29 @@ class Sequential(Model, containers.Sequential):
|
||||
else:
|
||||
val_f = self._test
|
||||
if validation_data:
|
||||
try:
|
||||
if len(validation_data) == 2:
|
||||
X_val, y_val = validation_data
|
||||
except:
|
||||
raise Exception("Invalid format for validation data; provide a tuple (X_val, y_val). \
|
||||
sample_weight_val = np.ones(y_val.shape[:-1] + (1,))
|
||||
elif len(validation_data) == 3:
|
||||
X_val, y_val, sample_weight_val = validation_data
|
||||
else:
|
||||
raise Exception("Invalid format for validation data; provide a tuple (X_val, y_val) or (X_val, y_val, sample_weight). \
|
||||
X_val may be a numpy array or a list of numpy arrays depending on your model input.")
|
||||
X_val = standardize_X(X_val)
|
||||
y_val = standardize_y(y_val)
|
||||
val_ins = X_val + [y_val, np.ones(y_val.shape[:-1] + (1,))]
|
||||
sample_weight_val = standardize_weights(y_val, sample_weight=sample_weight_val)
|
||||
val_ins = X_val + [y_val, sample_weight_val]
|
||||
|
||||
elif 0 < validation_split < 1:
|
||||
split_at = int(len(X[0]) * (1 - validation_split))
|
||||
X, X_val = (slice_X(X, 0, split_at), slice_X(X, split_at))
|
||||
y, y_val = (slice_X(y, 0, split_at), slice_X(y, split_at))
|
||||
if sample_weight is not None:
|
||||
sample_weight, sample_weight_val = (slice_X(sample_weight, 0, split_at), slice_X(sample_weight, split_at))
|
||||
sample_weight_val = standardize_weights(y_val, sample_weight=sample_weight_val)
|
||||
else:
|
||||
sample_weight_val = np.ones(y_val.shape[:-1] + (1,))
|
||||
val_ins = X_val + [y_val, sample_weight_val]
|
||||
|
||||
if show_accuracy:
|
||||
f = self._train_with_acc
|
||||
@ -477,11 +479,12 @@ class Sequential(Model, containers.Sequential):
|
||||
f = self._train
|
||||
out_labels = ['loss']
|
||||
|
||||
sample_weight = standardize_weights(y, class_weight=class_weight, sample_weight=sample_weight)
|
||||
ins = X + [y, sample_weight]
|
||||
metrics = ['loss', 'acc', 'val_loss', 'val_acc']
|
||||
return self._fit(f, ins, out_labels=out_labels, batch_size=batch_size, nb_epoch=nb_epoch,
|
||||
verbose=verbose, callbacks=callbacks,
|
||||
validation_split=validation_split, val_f=val_f, val_ins=val_ins,
|
||||
val_f=val_f, val_ins=val_ins,
|
||||
shuffle=shuffle, metrics=metrics)
|
||||
|
||||
def predict(self, X, batch_size=128, verbose=0):
|
||||
@ -624,8 +627,8 @@ class Graph(Model, containers.Graph):
|
||||
|
||||
def test_on_batch(self, data, sample_weight={}):
|
||||
# data is a dictionary mapping input names to arrays
|
||||
sample_weight = [standardize_weights(data[name]) for name in self.output_order]
|
||||
|
||||
sample_weight = [standardize_weights(data[name],
|
||||
sample_weight=sample_weight.get(name)) for name in self.output_order]
|
||||
ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight
|
||||
return self._test(*ins)
|
||||
|
||||
@ -636,30 +639,46 @@ class Graph(Model, containers.Graph):
|
||||
|
||||
def fit(self, data, batch_size=128, nb_epoch=100, verbose=1, callbacks=[],
|
||||
validation_split=0., validation_data=None, shuffle=True, class_weight={}, sample_weight={}):
|
||||
sample_weight = [standardize_weights(data[name],
|
||||
sample_weight=sample_weight.get(name),
|
||||
class_weight=class_weight.get(name)) for name in self.output_order]
|
||||
ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight
|
||||
X = [data[name] for name in self.input_order]
|
||||
y = [standardize_y(data[name]) for name in self.output_order]
|
||||
sample_weight_list = [standardize_weights(data[name],
|
||||
sample_weight=sample_weight.get(name)) for name in self.output_order]
|
||||
class_weight_list = [class_weight.get(name) for name in self.output_order]
|
||||
|
||||
val_f = None
|
||||
val_ins = None
|
||||
if validation_data or validation_split:
|
||||
val_f = self._test
|
||||
if validation_data:
|
||||
# can't use sample weights with validation data at this point
|
||||
sample_weight = [standardize_weights(validation_data[name]) for name in self.output_order]
|
||||
val_ins = [validation_data[name] for name in self.input_order] + [standardize_y(validation_data[name]) for name in self.output_order] + sample_weight
|
||||
|
||||
elif 0 < validation_split < 1:
|
||||
split_at = int(len(X[0]) * (1 - validation_split))
|
||||
X, X_val = (slice_X(X, 0, split_at), slice_X(X, split_at))
|
||||
y, y_val = (slice_X(y, 0, split_at), slice_X(y, split_at))
|
||||
sample_weight_list, sample_weight_list_val = (slice_X(sample_weight_list, 0, split_at), slice_X(sample_weight_list, split_at))
|
||||
val_ins = X_val + y_val + sample_weight_list_val
|
||||
|
||||
f = self._train
|
||||
out_labels = ['loss']
|
||||
metrics = ['loss', 'val_loss']
|
||||
|
||||
sample_weight_list = [standardize_weights(y[i],
|
||||
sample_weight=sample_weight_list[i],
|
||||
class_weight=class_weight_list[i]) for i in range(len(self.output_order))]
|
||||
ins = X + y + sample_weight_list
|
||||
|
||||
history = self._fit(f, ins, out_labels=out_labels, batch_size=batch_size, nb_epoch=nb_epoch,
|
||||
verbose=verbose, callbacks=callbacks,
|
||||
validation_split=validation_split, val_f=val_f, val_ins=val_ins,
|
||||
val_f=val_f, val_ins=val_ins,
|
||||
shuffle=shuffle, metrics=metrics)
|
||||
return history
|
||||
|
||||
def evaluate(self, data, batch_size=128, verbose=0, sample_weight={}):
|
||||
sample_weight = [standardize_weights(data[name], sample_weight.get(name)) for name in self.output_order]
|
||||
sample_weight = [standardize_weights(data[name],
|
||||
sample_weight=sample_weight.get(name)) for name in self.output_order]
|
||||
|
||||
ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight
|
||||
outs = self._test_loop(self._test, ins, batch_size, verbose)
|
||||
|
@ -144,6 +144,8 @@ class TestGraph(unittest.TestCase):
|
||||
|
||||
weights1 = np.random.uniform(size=y_train.shape[0])
|
||||
weights2 = np.random.uniform(size=y2_train.shape[0])
|
||||
weights1_test = np.random.uniform(size=y_test.shape[0])
|
||||
weights2_test = np.random.uniform(size=y2_test.shape[0])
|
||||
|
||||
graph.compile('rmsprop', {'output1': 'mse', 'output2': 'mse'})
|
||||
|
||||
@ -153,7 +155,7 @@ class TestGraph(unittest.TestCase):
|
||||
assert(type(out == dict))
|
||||
assert(len(out) == 2)
|
||||
loss = graph.test_on_batch({'input1': X_test, 'output1': y_test, 'output2': y2_test},
|
||||
sample_weight={'output1': weights1, 'output2': weights2})
|
||||
sample_weight={'output1': weights1_test, 'output2': weights2_test})
|
||||
loss = graph.train_on_batch({'input1': X_train, 'output1': y_train, 'output2': y2_train},
|
||||
sample_weight={'output1': weights1, 'output2': weights2})
|
||||
loss = graph.evaluate({'input1': X_train, 'output1': y_train, 'output2': y2_train},
|
||||
|
@ -12,7 +12,7 @@ import unittest
|
||||
|
||||
nb_classes = 10
|
||||
batch_size = 128
|
||||
nb_epoch = 5
|
||||
nb_epoch = 8
|
||||
weighted_class = 9
|
||||
standard_weight = 1
|
||||
high_weight = 5
|
||||
@ -59,8 +59,19 @@ def create_graph_model():
|
||||
|
||||
|
||||
def _test_weights_sequential(model, class_weight=None, sample_weight=None):
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0,
|
||||
if sample_weight is not None:
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 3, verbose=0,
|
||||
class_weight=class_weight, sample_weight=sample_weight)
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 3, verbose=0,
|
||||
class_weight=class_weight, sample_weight=sample_weight, validation_split=0.1)
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 3, verbose=0,
|
||||
class_weight=class_weight, sample_weight=sample_weight, validation_data=(X_train, Y_train, sample_weight))
|
||||
else:
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
|
||||
class_weight=class_weight, sample_weight=sample_weight)
|
||||
model.fit(X_train, Y_train, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
|
||||
class_weight=class_weight, sample_weight=sample_weight, validation_split=0.1)
|
||||
|
||||
model.train_on_batch(X_train[:32], Y_train[:32],
|
||||
class_weight=class_weight, sample_weight=sample_weight[:32] if sample_weight is not None else None)
|
||||
model.test_on_batch(X_train[:32], Y_train[:32],
|
||||
@ -70,8 +81,11 @@ def _test_weights_sequential(model, class_weight=None, sample_weight=None):
|
||||
|
||||
|
||||
def _test_weights_graph(model, class_weight=None, sample_weight=None):
|
||||
model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0,
|
||||
model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
|
||||
class_weight={'output': class_weight}, sample_weight={'output': sample_weight})
|
||||
model.fit({'input': X_train, 'output': Y_train}, batch_size=batch_size, nb_epoch=nb_epoch // 2, verbose=0,
|
||||
class_weight={'output': class_weight}, sample_weight={'output': sample_weight}, validation_split=0.1)
|
||||
|
||||
model.train_on_batch({'input': X_train[:32], 'output': Y_train[:32]},
|
||||
class_weight={'output': class_weight}, sample_weight={'output': sample_weight[:32] if sample_weight is not None else None})
|
||||
model.test_on_batch({'input': X_train[:32], 'output': Y_train[:32]},
|
||||
|
Loading…
Reference in New Issue
Block a user