Refactor regularizers and add add_weight method. (#4703)
* Refactor regularizers, introduce layer.add_weight * Fix BN add_update syntax * Fix eigenvalue regularizer * Style fixes.
This commit is contained in:
parent
2b336756b6
commit
ff62eb251b
@ -57,7 +57,7 @@ def to_dense(tensor):
|
||||
|
||||
|
||||
def variable(value, dtype=_FLOATX, name=None):
|
||||
'''Instantiate a tensor variable.
|
||||
'''Instantiates a variable.
|
||||
'''
|
||||
if hasattr(value, 'tocoo'):
|
||||
_assert_sparse_module()
|
||||
|
@ -13,6 +13,7 @@ import inspect
|
||||
from six.moves import zip
|
||||
|
||||
from .. import backend as K
|
||||
from .. import initializations
|
||||
from ..utils.io_utils import ask_to_proceed_with_overwrite
|
||||
from ..utils.generic_utils import func_dump, func_load
|
||||
|
||||
@ -28,6 +29,11 @@ def to_list(x):
|
||||
return [x]
|
||||
|
||||
|
||||
def object_list_uid(object_list):
|
||||
object_list = to_list(object_list)
|
||||
return ', '.join([str(abs(id(x))) for x in object_list])
|
||||
|
||||
|
||||
class InputSpec(object):
|
||||
'''This specifies the ndim, dtype and shape of every input to a layer.
|
||||
Every layer should expose (if appropriate) an `input_spec` attribute:
|
||||
@ -239,7 +245,6 @@ class Layer(object):
|
||||
non_trainable_weights: List of variables.
|
||||
weights: The concatenation of the lists trainable_weights and
|
||||
non_trainable_weights (in this order).
|
||||
regularizers: List of regularizers.
|
||||
constraints: Dict mapping weights to constraints.
|
||||
|
||||
# Methods
|
||||
@ -294,8 +299,8 @@ class Layer(object):
|
||||
self.trainable_weights = []
|
||||
if not hasattr(self, 'non_trainable_weights'):
|
||||
self.non_trainable_weights = []
|
||||
if not hasattr(self, 'regularizers'):
|
||||
self.regularizers = []
|
||||
if not hasattr(self, 'losses'):
|
||||
self.losses = []
|
||||
if not hasattr(self, 'constraints'):
|
||||
self.constraints = {} # dict {tensor: constraint instance}
|
||||
self.built = False
|
||||
@ -354,6 +359,19 @@ class Layer(object):
|
||||
def non_trainable_weights(self, weights):
|
||||
self._non_trainable_weights = weights
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
warnings.warn('The `regularizers` property of layers/models is deprecated. '
|
||||
'Regularization losses are now managed via the `losses` '
|
||||
'layer/model property.')
|
||||
return []
|
||||
|
||||
@regularizers.setter
|
||||
def regularizers(self, _):
|
||||
warnings.warn('The `regularizers` property of layers/models is deprecated. '
|
||||
'Regularization losses are now managed via the `losses` '
|
||||
'layer/model property.')
|
||||
|
||||
def create_input_layer(self, batch_input_shape,
|
||||
input_dtype=None, name=None):
|
||||
if not name:
|
||||
@ -373,6 +391,32 @@ class Layer(object):
|
||||
# to the input layer we just created.
|
||||
self(x)
|
||||
|
||||
def add_weight(self, shape, initializer, name=None,
|
||||
trainable=True,
|
||||
regularizer=None,
|
||||
constraint=None):
|
||||
'''Adds a weight variable to the layer.
|
||||
|
||||
# Arguments:
|
||||
shape: The shape tuple of the weight.
|
||||
initializer: An Initializer instance (callable).
|
||||
trainable: A boolean, whether the weight should
|
||||
be trained via backprop or not (assuming
|
||||
that the layer itself is also trainable).
|
||||
regularizer: An optional Regularizer instance.
|
||||
'''
|
||||
initializer = initializations.get(initializer)
|
||||
weight = initializer(shape, name=name)
|
||||
if regularizer is not None:
|
||||
self.add_loss(regularizer(weight))
|
||||
if constraint is not None:
|
||||
self.constraints[weight] = constraint
|
||||
if trainable:
|
||||
self.trainable_weights.append(weight)
|
||||
else:
|
||||
self.non_trainable_weights.append(weight)
|
||||
return weight
|
||||
|
||||
def assert_input_compatibility(self, input):
|
||||
'''This checks that the tensor(s) `input`
|
||||
verify the input assumptions of the layer
|
||||
@ -519,15 +563,21 @@ class Layer(object):
|
||||
self.add_inbound_node(inbound_layers, node_indices, tensor_indices)
|
||||
# Outputs were already computed when calling self.add_inbound_node.
|
||||
outputs = self.inbound_nodes[-1].output_tensors
|
||||
# If single output tensor: return it,
|
||||
# else return a list (at least 2 elements).
|
||||
if len(outputs) == 1:
|
||||
return outputs[0]
|
||||
else:
|
||||
return outputs
|
||||
else:
|
||||
# This case appears if the input was not a Keras tensor.
|
||||
return self.call(x, mask)
|
||||
outputs = to_list(self.call(x, mask))
|
||||
|
||||
# Apply activity regularizer if any:
|
||||
if hasattr(self, 'activity_regularizer') and self.activity_regularizer is not None:
|
||||
regularization_losses = [self.activity_regularizer(x) for x in outputs]
|
||||
self.add_loss(regularization_losses, input_tensors)
|
||||
|
||||
# If single output tensor: return it,
|
||||
# else return a list (at least 2 elements).
|
||||
if len(outputs) == 1:
|
||||
return outputs[0]
|
||||
else:
|
||||
return outputs
|
||||
|
||||
def add_inbound_node(self, inbound_layers,
|
||||
node_indices=None, tensor_indices=None):
|
||||
@ -806,20 +856,58 @@ class Layer(object):
|
||||
'ill-defined for the layer. ' +
|
||||
'Use `get_output_shape_at(node_index)` instead.')
|
||||
|
||||
def add_updates(self, updates, inputs):
|
||||
def add_loss(self, losses, inputs=None):
|
||||
if losses is None:
|
||||
return
|
||||
# Update self.losses
|
||||
losses = to_list(losses)
|
||||
if not hasattr(self, 'losses'):
|
||||
self.losses = []
|
||||
try:
|
||||
self.losses += losses
|
||||
except AttributeError:
|
||||
# In case self.losses isn't settable
|
||||
# (i.e. it's a getter method).
|
||||
# In that case the `losses` property is
|
||||
# auto-computed and shouldn't be set.
|
||||
pass
|
||||
# Update self._per_input_updates
|
||||
if not hasattr(self, '_per_input_losses'):
|
||||
self._per_input_losses = {}
|
||||
if inputs is not None:
|
||||
inputs_hash = object_list_uid(inputs)
|
||||
else:
|
||||
# Updates indexed by None are unconditional
|
||||
# rather than input-dependent
|
||||
inputs_hash = None
|
||||
if inputs_hash not in self._per_input_losses:
|
||||
self._per_input_losses[inputs_hash] = []
|
||||
self._per_input_losses[inputs_hash] += losses
|
||||
|
||||
def add_update(self, updates, inputs=None):
|
||||
if updates is None:
|
||||
return
|
||||
# Update self.updates
|
||||
updates = to_list(updates)
|
||||
if not hasattr(self, 'updates'):
|
||||
self.updates = []
|
||||
try:
|
||||
self.updates += updates
|
||||
except AttributeError:
|
||||
# In case self.updates isn't settable
|
||||
# (i.e. it's a getter method).
|
||||
# In that case the `updates` property is
|
||||
# auto-computed and shouldn't be set.
|
||||
pass
|
||||
# Update self._per_input_updates
|
||||
if not hasattr(self, '_per_input_updates'):
|
||||
self._per_input_updates = {}
|
||||
inputs = to_list(inputs)
|
||||
updates = to_list(updates)
|
||||
inputs_hash = ', '.join([str(abs(id(x))) for x in inputs])
|
||||
if inputs is not None:
|
||||
inputs_hash = object_list_uid(inputs)
|
||||
else:
|
||||
# Updates indexed by None are unconditional
|
||||
# rather than input-dependent
|
||||
inputs_hash = None
|
||||
if inputs_hash not in self._per_input_updates:
|
||||
self._per_input_updates[inputs_hash] = []
|
||||
self._per_input_updates[inputs_hash] += updates
|
||||
@ -827,12 +915,19 @@ class Layer(object):
|
||||
def get_updates_for(self, inputs):
|
||||
if not hasattr(self, '_per_input_updates'):
|
||||
return []
|
||||
inputs = to_list(inputs)
|
||||
inputs_hash = ', '.join([str(abs(id(x))) for x in inputs])
|
||||
inputs_hash = object_list_uid(inputs)
|
||||
if inputs_hash in self._per_input_updates:
|
||||
return self._per_input_updates[inputs_hash]
|
||||
return []
|
||||
|
||||
def get_losses_for(self, inputs):
|
||||
if not hasattr(self, '_per_input_losses'):
|
||||
return []
|
||||
inputs_hash = object_list_uid(inputs)
|
||||
if inputs_hash in self._per_input_losses:
|
||||
return self._per_input_losses[inputs_hash]
|
||||
return []
|
||||
|
||||
@property
|
||||
def weights(self):
|
||||
return self.trainable_weights + self.non_trainable_weights
|
||||
@ -950,7 +1045,6 @@ class InputLayer(Layer):
|
||||
|
||||
self.trainable_weights = []
|
||||
self.non_trainable_weights = []
|
||||
self.regularizers = []
|
||||
self.constraints = {}
|
||||
|
||||
self.sparse = sparse
|
||||
@ -1151,7 +1245,6 @@ class Merge(Layer):
|
||||
self.inbound_nodes = []
|
||||
self.outbound_nodes = []
|
||||
self.constraints = {}
|
||||
self.regularizers = []
|
||||
self.trainable_weights = []
|
||||
self.non_trainable_weights = []
|
||||
self.supports_masking = True
|
||||
@ -1587,7 +1680,6 @@ class Container(Layer):
|
||||
supports_masking (boolean)
|
||||
trainable_weights (list of variables)
|
||||
non_trainable_weights (list of variables)
|
||||
regularizers (list of regularizers)
|
||||
constraints (list of tuples (weight, constraint))
|
||||
|
||||
# Methods
|
||||
@ -1901,7 +1993,6 @@ class Container(Layer):
|
||||
self.supports_masking = False
|
||||
# The following are implemented as property functions:
|
||||
# self.constraints
|
||||
# self.regularizers
|
||||
# self.trainable_weights
|
||||
# self.non_trainable_weights
|
||||
# self.input_spec
|
||||
@ -1946,14 +2037,38 @@ class Container(Layer):
|
||||
if len(layer.inbound_nodes) == 1:
|
||||
updates += layer.updates
|
||||
else:
|
||||
# Collect updates that are dependent on inputs
|
||||
# that are part of the model.
|
||||
for node_index, node in enumerate(layer.inbound_nodes):
|
||||
node_key = layer.name + '_ib-' + str(node_index)
|
||||
if node_key in self.container_nodes:
|
||||
# The model owns this layer node.
|
||||
inputs = node.input_tensors
|
||||
updates += layer.get_updates_for(inputs)
|
||||
# Collect unconditional updates.
|
||||
updates += layer.get_updates_for(None)
|
||||
return updates
|
||||
|
||||
@property
|
||||
def losses(self):
|
||||
losses = []
|
||||
for layer in self.layers:
|
||||
if hasattr(layer, 'losses'):
|
||||
if len(layer.inbound_nodes) == 1:
|
||||
losses += layer.losses
|
||||
else:
|
||||
# Collect losses that are dependent on inputs
|
||||
# that are part of the model.
|
||||
for node_index, node in enumerate(layer.inbound_nodes):
|
||||
node_key = layer.name + '_ib-' + str(node_index)
|
||||
if node_key in self.container_nodes:
|
||||
# The model owns this layer node.
|
||||
inputs = node.input_tensors
|
||||
losses += layer.get_losses_for(inputs)
|
||||
# Collect unconditional losses.
|
||||
losses += layer.get_losses_for(None)
|
||||
return losses
|
||||
|
||||
@property
|
||||
def stateful(self):
|
||||
return any([(hasattr(layer, 'stateful') and layer.stateful) for layer in self.layers])
|
||||
@ -1990,10 +2105,13 @@ class Container(Layer):
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
regs = []
|
||||
for layer in self.layers:
|
||||
regs += layer.regularizers
|
||||
return regs
|
||||
warnings.warn('The `regularizers` attribute of layers/models '
|
||||
'is deprecated. '
|
||||
'Regularization losses are now managed via the `losses` '
|
||||
'layer/model property.\n'
|
||||
'The `regularizers` attribute will be removed '
|
||||
'after 06/2017.')
|
||||
return []
|
||||
|
||||
@property
|
||||
def trainable_weights(self):
|
||||
@ -2061,8 +2179,7 @@ class Container(Layer):
|
||||
'''True if any layer in the graph uses it.
|
||||
'''
|
||||
layers_learning_phase = any([layer.uses_learning_phase for layer in self.layers])
|
||||
regs_learning_phase = any([reg.uses_learning_phase for reg in self.regularizers])
|
||||
return layers_learning_phase or regs_learning_phase
|
||||
return layers_learning_phase
|
||||
|
||||
def call(self, input, mask=None):
|
||||
'''`call` just reapplies all ops in the graph to the new inputs
|
||||
@ -2239,9 +2356,16 @@ class Container(Layer):
|
||||
output_tensors = to_list(layer.call(computed_tensors, computed_masks))
|
||||
output_masks = to_list(layer.compute_mask(computed_tensors, computed_masks))
|
||||
|
||||
# update model updates
|
||||
# Update model updates and losses:
|
||||
layer_inputs = [x[0] for x in computed_data]
|
||||
self.add_updates(layer.get_updates_for(layer_inputs), inputs)
|
||||
# Keep track of updates that depend on the inputs (e.g. BN updates).
|
||||
self.add_update(layer.get_updates_for(layer_inputs), inputs)
|
||||
# Keep track of unconditional updates (e.g. a counter).
|
||||
self.add_update(layer.get_updates_for(None), None)
|
||||
# Keep track of losses that depend on the inputs (e.g. activity regularizers).
|
||||
self.add_loss(layer.get_losses_for(layer_inputs), inputs)
|
||||
# Keep track of unconditional losses (e.g. weight regularizers).
|
||||
self.add_loss(layer.get_losses_for(None), None)
|
||||
|
||||
# Update _keras_shape.
|
||||
if all([hasattr(x, '_keras_shape') for x in computed_tensors]):
|
||||
|
@ -611,9 +611,10 @@ class Model(Container):
|
||||
else:
|
||||
total_loss += loss_weight * output_loss
|
||||
|
||||
# add regularization penalties to the loss
|
||||
for r in self.regularizers:
|
||||
total_loss = r(total_loss)
|
||||
# add regularization penalties
|
||||
# and other layer-specific losses
|
||||
for loss_tensor in self.losses:
|
||||
total_loss += loss_tensor
|
||||
|
||||
# list of same size as output_names.
|
||||
# contains tuples (metrics for output, names of metrics)
|
||||
|
@ -113,31 +113,20 @@ class Convolution1D(Layer):
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[2]
|
||||
self.W_shape = (self.filter_length, 1, input_dim, self.nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.nb_filter,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@ -406,32 +395,20 @@ class Convolution2D(Layer):
|
||||
stack_size = input_shape[3]
|
||||
self.W_shape = (self.nb_row, self.nb_col, stack_size, self.nb_filter)
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.nb_filter,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@ -957,40 +934,26 @@ class SeparableConvolution2D(Layer):
|
||||
depthwise_shape = (self.nb_row, self.nb_col, stack_size, self.depth_multiplier)
|
||||
pointwise_shape = (1, 1, self.depth_multiplier * stack_size, self.nb_filter)
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
self.depthwise_kernel = self.init(depthwise_shape,
|
||||
name='{}_depthwise_kernel'.format(self.name))
|
||||
self.pointwise_kernel = self.init(pointwise_shape,
|
||||
name='{}_pointwise_kernel'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.depthwise_kernel,
|
||||
self.pointwise_kernel,
|
||||
self.b]
|
||||
else:
|
||||
self.trainable_weights = [self.depthwise_kernel,
|
||||
self.pointwise_kernel]
|
||||
self.regularizers = []
|
||||
if self.depthwise_regularizer:
|
||||
self.depthwise_regularizer.set_param(self.depthwise_kernel)
|
||||
self.regularizers.append(self.depthwise_regularizer)
|
||||
if self.pointwise_regularizer:
|
||||
self.pointwise_regularizer.set_param(self.pointwise_kernel)
|
||||
self.regularizers.append(self.pointwise_regularizer)
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
|
||||
|
||||
self.constraints = {}
|
||||
if self.depthwise_constraint:
|
||||
self.constraints[self.depthwise_kernel] = self.depthwise_constraint
|
||||
if self.pointwise_constraint:
|
||||
self.constraints[self.pointwise_kernel] = self.pointwise_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.depthwise_kernel = self.add_weight(depthwise_shape,
|
||||
initializer=self.init,
|
||||
regularizer=self.depthwise_regularizer,
|
||||
constraint=self.depthwise_constraint,
|
||||
name='{}_depthwise_kernel'.format(self.name))
|
||||
self.pointwise_kernel = self.add_weight(pointwise_shape,
|
||||
initializer=self.init,
|
||||
regularizer=self.pointwise_regularizer,
|
||||
constraint=self.pointwise_constraint,
|
||||
name='{}_pointwise_kernel'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = self.add_weight((self.nb_filter,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@ -1165,31 +1128,19 @@ class Convolution3D(Layer):
|
||||
else:
|
||||
raise ValueError('Invalid dim_ordering:', self.dim_ordering)
|
||||
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_filter,), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.nb_filter,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
|
@ -125,8 +125,8 @@ class SpatialDropout1D(Dropout):
|
||||
input_shape = K.shape(x)
|
||||
noise_shape = (input_shape[0], 1, input_shape[2])
|
||||
return noise_shape
|
||||
|
||||
|
||||
|
||||
|
||||
class SpatialDropout2D(Dropout):
|
||||
'''This version performs the same function as Dropout, however it drops
|
||||
entire 2D feature maps instead of individual elements. If adjacent pixels
|
||||
@ -728,33 +728,19 @@ class Dense(Layer):
|
||||
self.input_spec = [InputSpec(dtype=K.floatx(),
|
||||
shape=(None, input_dim))]
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight((input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@ -808,9 +794,8 @@ class ActivityRegularization(Layer):
|
||||
self.l2 = l2
|
||||
|
||||
super(ActivityRegularization, self).__init__(**kwargs)
|
||||
activity_regularizer = ActivityRegularizer(l1=l1, l2=l2)
|
||||
activity_regularizer.set_layer(self)
|
||||
self.regularizers = [activity_regularizer]
|
||||
self.activity_regularizer = regularizers.L1L2Regularizer(l1=l1, l2=l2)
|
||||
self.regularizers = [self.activity_regularizer]
|
||||
|
||||
def get_config(self):
|
||||
config = {'l1': self.l1,
|
||||
@ -897,33 +882,19 @@ class MaxoutDense(Layer):
|
||||
self.input_spec = [InputSpec(dtype=K.floatx(),
|
||||
shape=(None, input_dim))]
|
||||
|
||||
self.W = self.init((self.nb_feature, input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight((self.nb_feature, input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.nb_feature, self.output_dim),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((self.nb_feature, self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@ -1030,38 +1001,25 @@ class Highway(Layer):
|
||||
self.input_spec = [InputSpec(dtype=K.floatx(),
|
||||
shape=(None, input_dim))]
|
||||
|
||||
self.W = self.init((input_dim, input_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W_carry = self.init((input_dim, input_dim),
|
||||
name='{}_W_carry'.format(self.name))
|
||||
|
||||
self.W = self.add_weight((input_dim, input_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
self.W_carry = self.add_weight((input_dim, input_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_carry'.format(self.name))
|
||||
if self.bias:
|
||||
self.b = K.zeros((input_dim,), name='{}_b'.format(self.name))
|
||||
# initialize with a vector of values `transform_bias`
|
||||
self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias,
|
||||
name='{}_b_carry'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b, self.W_carry, self.b_carry]
|
||||
self.b = self.add_weight((input_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
self.b_carry = self.add_weight((input_dim,),
|
||||
initializer='one',
|
||||
name='{}_b_carry'.format(self.name))
|
||||
else:
|
||||
self.trainable_weights = [self.W, self.W_carry]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b_carry = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@ -1178,31 +1136,19 @@ class TimeDistributedDense(Layer):
|
||||
shape=(None,) + input_shape[1:])]
|
||||
input_dim = input_shape[2]
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.W = self.add_weight((input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((self.output_dim,),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.regularizers = []
|
||||
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
|
@ -91,22 +91,11 @@ class Embedding(Layer):
|
||||
super(Embedding, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.W = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
self.W = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
|
@ -110,31 +110,21 @@ class LocallyConnected1D(Layer):
|
||||
def build(self, input_shape):
|
||||
input_dim = input_shape[2]
|
||||
_, output_length, nb_filter = self.get_output_shape_for(input_shape)
|
||||
|
||||
self.W_shape = (output_length, self.filter_length * input_dim, nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((output_length, self.nb_filter), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((output_length, self.nb_filter),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@ -306,30 +296,20 @@ class LocallyConnected2D(Layer):
|
||||
self.output_row = output_row
|
||||
self.output_col = output_col
|
||||
self.W_shape = (output_row * output_col, self.nb_row * self.nb_col * input_filter, nb_filter)
|
||||
self.W = self.init(self.W_shape, name='{}_W'.format(self.name))
|
||||
|
||||
self.W = self.add_weight(self.W_shape,
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer,
|
||||
constraint=self.W_constraint)
|
||||
if self.bias:
|
||||
self.b = K.zeros((output_row, output_col, nb_filter), name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.b]
|
||||
self.b = self.add_weight((output_row, output_col, nb_filter),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer,
|
||||
constraint=self.b_constraint)
|
||||
else:
|
||||
self.trainable_weights = [self.W]
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.bias and self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
if self.activity_regularizer:
|
||||
self.activity_regularizer.set_layer(self)
|
||||
self.regularizers.append(self.activity_regularizer)
|
||||
|
||||
self.constraints = {}
|
||||
if self.W_constraint:
|
||||
self.constraints[self.W] = self.W_constraint
|
||||
if self.bias and self.b_constraint:
|
||||
self.constraints[self.b] = self.b_constraint
|
||||
self.b = None
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
|
@ -82,24 +82,20 @@ class BatchNormalization(Layer):
|
||||
self.input_spec = [InputSpec(shape=input_shape)]
|
||||
shape = (input_shape[self.axis],)
|
||||
|
||||
self.gamma = self.gamma_init(shape, name='{}_gamma'.format(self.name))
|
||||
self.beta = self.beta_init(shape, name='{}_beta'.format(self.name))
|
||||
self.trainable_weights = [self.gamma, self.beta]
|
||||
|
||||
self.regularizers = []
|
||||
if self.gamma_regularizer:
|
||||
self.gamma_regularizer.set_param(self.gamma)
|
||||
self.regularizers.append(self.gamma_regularizer)
|
||||
|
||||
if self.beta_regularizer:
|
||||
self.beta_regularizer.set_param(self.beta)
|
||||
self.regularizers.append(self.beta_regularizer)
|
||||
|
||||
self.running_mean = K.zeros(shape,
|
||||
name='{}_running_mean'.format(self.name))
|
||||
self.running_std = K.ones(shape,
|
||||
name='{}_running_std'.format(self.name))
|
||||
self.non_trainable_weights = [self.running_mean, self.running_std]
|
||||
self.gamma = self.add_weight(shape,
|
||||
initializer=self.gamma_init,
|
||||
regularizer=self.gamma_regularizer,
|
||||
name='{}_gamma'.format(self.name))
|
||||
self.beta = self.add_weight(shape,
|
||||
initializer=self.beta_init,
|
||||
regularizer=self.beta_regularizer,
|
||||
name='{}_beta'.format(self.name))
|
||||
self.running_mean = self.add_weight(shape, initializer='zero',
|
||||
name='{}_running_mean'.format(self.name),
|
||||
trainable=False)
|
||||
self.running_std = self.add_weight(shape, initializer='one',
|
||||
name='{}_running_std'.format(self.name),
|
||||
trainable=False)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@ -121,8 +117,8 @@ class BatchNormalization(Layer):
|
||||
epsilon=self.epsilon)
|
||||
|
||||
if self.mode == 0:
|
||||
self.add_updates([K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)], x)
|
||||
self.add_update([K.moving_average_update(self.running_mean, mean, self.momentum),
|
||||
K.moving_average_update(self.running_std, std, self.momentum)], x)
|
||||
|
||||
if sorted(reduction_axes) == range(K.ndim(x))[:-1]:
|
||||
x_normed_running = K.batch_normalization(
|
||||
|
@ -229,7 +229,7 @@ class Recurrent(Layer):
|
||||
updates = []
|
||||
for i in range(len(states)):
|
||||
updates.append((self.states[i], states[i]))
|
||||
self.add_updates(updates, x)
|
||||
self.add_update(updates, x)
|
||||
|
||||
if self.return_sequences:
|
||||
return outputs
|
||||
@ -288,7 +288,8 @@ class SimpleRNN(Recurrent):
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
self.dropout_W = dropout_W
|
||||
self.dropout_U = dropout_U
|
||||
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
@ -304,24 +305,18 @@ class SimpleRNN(Recurrent):
|
||||
input_dim = input_shape[2]
|
||||
self.input_dim = input_dim
|
||||
|
||||
self.W = self.init((input_dim, self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
self.b = K.zeros((self.output_dim,), name='{}_b'.format(self.name))
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
self.W = self.add_weight((input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.inner_init,
|
||||
name='{}_U'.format(self.name),
|
||||
regularizer=self.U_regularizer)
|
||||
self.b = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
@ -446,7 +441,8 @@ class GRU(Recurrent):
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
self.dropout_W = dropout_W
|
||||
self.dropout_U = dropout_U
|
||||
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
@ -463,57 +459,59 @@ class GRU(Recurrent):
|
||||
self.states = [None]
|
||||
|
||||
if self.consume_less == 'gpu':
|
||||
|
||||
self.W = self.init((self.input_dim, 3 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 3 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
self.W = self.add_weight((self.input_dim, 3 * self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U = self.add_weight((self.output_dim, 3 * self.output_dim),
|
||||
initializer=self.inner_init,
|
||||
name='{}_U'.format(self.name),
|
||||
regularizer=self.U_regularizer)
|
||||
self.b = self.add_weight((self.output_dim * 3,),
|
||||
initializer='zero',
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
else:
|
||||
|
||||
self.W_z = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_z'.format(self.name))
|
||||
self.U_z = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_z'.format(self.name))
|
||||
self.b_z = K.zeros((self.output_dim,), name='{}_b_z'.format(self.name))
|
||||
|
||||
self.W_r = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_r'.format(self.name))
|
||||
self.U_r = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_r'.format(self.name))
|
||||
self.b_r = K.zeros((self.output_dim,), name='{}_b_r'.format(self.name))
|
||||
|
||||
self.W_h = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_h'.format(self.name))
|
||||
self.U_h = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_h'.format(self.name))
|
||||
self.b_h = K.zeros((self.output_dim,), name='{}_b_h'.format(self.name))
|
||||
|
||||
self.trainable_weights = [self.W_z, self.U_z, self.b_z,
|
||||
self.W_r, self.U_r, self.b_r,
|
||||
self.W_h, self.U_h, self.b_h]
|
||||
|
||||
self.W_z = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_z'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_z = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_z'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_z = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_z'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_r = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_r'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_r = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_r'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_r = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_r'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_h = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_h'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_h = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_h'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_h = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_h'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W = K.concatenate([self.W_z, self.W_r, self.W_h])
|
||||
self.U = K.concatenate([self.U_z, self.U_r, self.U_h])
|
||||
self.b = K.concatenate([self.b_z, self.b_r, self.b_h])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
@ -671,7 +669,8 @@ class LSTM(Recurrent):
|
||||
self.W_regularizer = regularizers.get(W_regularizer)
|
||||
self.U_regularizer = regularizers.get(U_regularizer)
|
||||
self.b_regularizer = regularizers.get(b_regularizer)
|
||||
self.dropout_W, self.dropout_U = dropout_W, dropout_U
|
||||
self.dropout_W = dropout_W
|
||||
self.dropout_U = dropout_U
|
||||
|
||||
if self.dropout_W or self.dropout_U:
|
||||
self.uses_learning_phase = True
|
||||
@ -688,63 +687,83 @@ class LSTM(Recurrent):
|
||||
self.states = [None, None]
|
||||
|
||||
if self.consume_less == 'gpu':
|
||||
self.W = self.init((self.input_dim, 4 * self.output_dim),
|
||||
name='{}_W'.format(self.name))
|
||||
self.U = self.inner_init((self.output_dim, 4 * self.output_dim),
|
||||
name='{}_U'.format(self.name))
|
||||
self.W = self.add_weight((self.input_dim, 4 * self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U = self.add_weight((self.output_dim, 4 * self.output_dim),
|
||||
initializer=self.inner_init,
|
||||
name='{}_U'.format(self.name),
|
||||
regularizer=self.U_regularizer)
|
||||
|
||||
self.b = K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init((self.output_dim,))),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
self.trainable_weights = [self.W, self.U, self.b]
|
||||
def b_reg(shape, name=None):
|
||||
return K.variable(np.hstack((np.zeros(self.output_dim),
|
||||
K.get_value(self.forget_bias_init((self.output_dim,))),
|
||||
np.zeros(self.output_dim),
|
||||
np.zeros(self.output_dim))),
|
||||
name='{}_b'.format(self.name))
|
||||
self.b = self.add_weight((self.output_dim * 4,),
|
||||
initializer=b_reg,
|
||||
name='{}_b'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
else:
|
||||
self.W_i = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_i'.format(self.name))
|
||||
self.U_i = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_i'.format(self.name))
|
||||
self.b_i = K.zeros((self.output_dim,), name='{}_b_i'.format(self.name))
|
||||
|
||||
self.W_f = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_f'.format(self.name))
|
||||
self.U_f = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_f'.format(self.name))
|
||||
self.b_f = self.forget_bias_init((self.output_dim,),
|
||||
name='{}_b_f'.format(self.name))
|
||||
|
||||
self.W_c = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_c'.format(self.name))
|
||||
self.U_c = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_c'.format(self.name))
|
||||
self.b_c = K.zeros((self.output_dim,), name='{}_b_c'.format(self.name))
|
||||
|
||||
self.W_o = self.init((self.input_dim, self.output_dim),
|
||||
name='{}_W_o'.format(self.name))
|
||||
self.U_o = self.inner_init((self.output_dim, self.output_dim),
|
||||
name='{}_U_o'.format(self.name))
|
||||
self.b_o = K.zeros((self.output_dim,), name='{}_b_o'.format(self.name))
|
||||
self.W_i = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_i'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_i = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_i'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_i = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_i'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_f = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_f'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_f = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_f'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_f = self.add_weight((self.output_dim,),
|
||||
initializer=self.forget_bias_init,
|
||||
name='{}_b_f'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_c = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_c'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_c = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_c'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_c = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_c'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
self.W_o = self.add_weight((self.input_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_W_o'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.U_o = self.add_weight((self.output_dim, self.output_dim),
|
||||
initializer=self.init,
|
||||
name='{}_U_o'.format(self.name),
|
||||
regularizer=self.W_regularizer)
|
||||
self.b_o = self.add_weight((self.output_dim,),
|
||||
initializer='zero',
|
||||
name='{}_b_o'.format(self.name),
|
||||
regularizer=self.b_regularizer)
|
||||
|
||||
self.trainable_weights = [self.W_i, self.U_i, self.b_i,
|
||||
self.W_c, self.U_c, self.b_c,
|
||||
self.W_f, self.U_f, self.b_f,
|
||||
self.W_o, self.U_o, self.b_o]
|
||||
|
||||
self.W = K.concatenate([self.W_i, self.W_f, self.W_c, self.W_o])
|
||||
self.U = K.concatenate([self.U_i, self.U_f, self.U_c, self.U_o])
|
||||
self.b = K.concatenate([self.b_i, self.b_f, self.b_c, self.b_o])
|
||||
|
||||
self.regularizers = []
|
||||
if self.W_regularizer:
|
||||
self.W_regularizer.set_param(self.W)
|
||||
self.regularizers.append(self.W_regularizer)
|
||||
if self.U_regularizer:
|
||||
self.U_regularizer.set_param(self.U)
|
||||
self.regularizers.append(self.U_regularizer)
|
||||
if self.b_regularizer:
|
||||
self.b_regularizer.set_param(self.b)
|
||||
self.regularizers.append(self.b_regularizer)
|
||||
|
||||
if self.initial_weights is not None:
|
||||
self.set_weights(self.initial_weights)
|
||||
del self.initial_weights
|
||||
|
@ -17,7 +17,7 @@ class Wrapper(Layer):
|
||||
self.trainable_weights = getattr(self.layer, 'trainable_weights', [])
|
||||
self.non_trainable_weights = getattr(self.layer, 'non_trainable_weights', [])
|
||||
self.updates = getattr(self.layer, 'updates', [])
|
||||
self.regularizers = getattr(self.layer, 'regularizers', [])
|
||||
self.losses = getattr(self.layer, 'losses', [])
|
||||
self.constraints = getattr(self.layer, 'constraints', {})
|
||||
|
||||
# properly attribute the current layer to
|
||||
@ -130,6 +130,11 @@ class TimeDistributed(Wrapper):
|
||||
# (nb_samples, timesteps, ...)
|
||||
output_shape = self.get_output_shape_for(input_shape)
|
||||
y = K.reshape(y, (-1, input_length) + output_shape[2:])
|
||||
|
||||
# Apply activity regularizer if any:
|
||||
if hasattr(self.layer, 'activity_regularizer') and self.layer.activity_regularizer is not None:
|
||||
regularization_loss = self.layer.activity_regularizer(y)
|
||||
self.add_loss(regularization_loss, X)
|
||||
return y
|
||||
|
||||
|
||||
@ -246,9 +251,9 @@ class Bidirectional(Wrapper):
|
||||
return []
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
if hasattr(self.forward_layer, 'regularizers'):
|
||||
return self.forward_layer.regularizers + self.backward_layer.regularizers
|
||||
def losses(self):
|
||||
if hasattr(self.forward_layer, 'losses'):
|
||||
return self.forward_layer.losses + self.backward_layer.losses
|
||||
return []
|
||||
|
||||
@property
|
||||
|
@ -497,6 +497,13 @@ class Sequential(Model):
|
||||
def get_updates_for(self, inputs):
|
||||
return self.model.get_updates_for(inputs)
|
||||
|
||||
@property
|
||||
def losses(self):
|
||||
return self.model.losses
|
||||
|
||||
def get_losses_for(self, inputs):
|
||||
return self.model.get_losses_for(inputs)
|
||||
|
||||
@property
|
||||
def regularizers(self):
|
||||
# support for legacy behavior
|
||||
|
@ -1,22 +1,27 @@
|
||||
from __future__ import absolute_import
|
||||
from . import backend as K
|
||||
from .utils.generic_utils import get_from_module
|
||||
import warnings
|
||||
|
||||
|
||||
class Regularizer(object):
|
||||
|
||||
def set_param(self, p):
|
||||
self.p = p
|
||||
|
||||
def set_layer(self, layer):
|
||||
self.layer = layer
|
||||
|
||||
def __call__(self, loss):
|
||||
return loss
|
||||
def __call__(self, x):
|
||||
return 0
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__}
|
||||
|
||||
def set_param(self, _):
|
||||
warnings.warn('The `set_param` method on regularizers is deprecated. '
|
||||
'It no longer does anything, '
|
||||
'and it will be removed after 06/2017.')
|
||||
|
||||
def set_layer(self, _):
|
||||
warnings.warn('The `set_layer` method on regularizers is deprecated. '
|
||||
'It no longer does anything, '
|
||||
'and it will be removed after 06/2017.')
|
||||
|
||||
|
||||
class EigenvalueRegularizer(Regularizer):
|
||||
'''This takes a constant that controls
|
||||
@ -28,71 +33,43 @@ class EigenvalueRegularizer(Regularizer):
|
||||
'''
|
||||
def __init__(self, k):
|
||||
self.k = k
|
||||
self.uses_learning_phase = True
|
||||
|
||||
def set_param(self, p):
|
||||
if hasattr(self, 'p'):
|
||||
raise Exception('Regularizers cannot be reused. '
|
||||
'Instantiate one regularizer per layer.')
|
||||
self.p = p
|
||||
def __call__(self, x):
|
||||
if K.ndim(x) > 2:
|
||||
raise Exception('EigenvalueRegularizer '
|
||||
'is only available for tensors of rank 2.')
|
||||
covariance = K.dot(K.transpose(x), x)
|
||||
dim1, dim2 = K.eval(K.shape(covariance))
|
||||
|
||||
def __call__(self, loss):
|
||||
power = 9 # number of iterations of the power method
|
||||
W = self.p
|
||||
if K.ndim(W) > 2:
|
||||
raise Exception('Eigenvalue Decay regularizer '
|
||||
'is only available for dense '
|
||||
'and embedding layers.')
|
||||
WW = K.dot(K.transpose(W), W)
|
||||
dim1, dim2 = K.eval(K.shape(WW)) # number of neurons in the layer
|
||||
|
||||
# power method for approximating the dominant eigenvector:
|
||||
o = K.ones([dim1, 1]) # initial values for the dominant eigenvector
|
||||
main_eigenvect = K.dot(WW, o)
|
||||
# Power method for approximating the dominant eigenvector:
|
||||
power = 9 # Number of iterations of the power method.
|
||||
o = K.ones([dim1, 1]) # Initial values for the dominant eigenvector.
|
||||
main_eigenvect = K.dot(covariance, o)
|
||||
for n in range(power - 1):
|
||||
main_eigenvect = K.dot(WW, main_eigenvect)
|
||||
main_eigenvect = K.dot(covariance, main_eigenvect)
|
||||
covariance_d = K.dot(covariance, main_eigenvect)
|
||||
|
||||
WWd = K.dot(WW, main_eigenvect)
|
||||
|
||||
# the corresponding dominant eigenvalue:
|
||||
main_eigenval = (K.dot(K.transpose(WWd), main_eigenvect) /
|
||||
# The corresponding dominant eigenvalue:
|
||||
main_eigenval = (K.dot(K.transpose(covariance_d), main_eigenvect) /
|
||||
K.dot(K.transpose(main_eigenvect), main_eigenvect))
|
||||
# multiplied by the given regularization gain
|
||||
regularized_loss = loss + (main_eigenval ** 0.5) * self.k
|
||||
|
||||
return K.in_train_phase(regularized_loss[0, 0], loss)
|
||||
# Multiply by the given regularization gain.
|
||||
regularization = (main_eigenval ** 0.5) * self.k
|
||||
return K.sum(regularization)
|
||||
|
||||
|
||||
class WeightRegularizer(Regularizer):
|
||||
class L1L2Regularizer(Regularizer):
|
||||
|
||||
def __init__(self, l1=0., l2=0.):
|
||||
self.l1 = K.cast_to_floatx(l1)
|
||||
self.l2 = K.cast_to_floatx(l2)
|
||||
self.uses_learning_phase = True
|
||||
self.p = None
|
||||
|
||||
def set_param(self, p):
|
||||
if self.p is not None:
|
||||
raise Exception('Regularizers cannot be reused. '
|
||||
'Instantiate one regularizer per layer.')
|
||||
self.p = p
|
||||
|
||||
def __call__(self, loss):
|
||||
if self.p is None:
|
||||
raise Exception('Need to call `set_param` on '
|
||||
'WeightRegularizer instance '
|
||||
'before calling the instance. '
|
||||
'Check that you are not passing '
|
||||
'a WeightRegularizer instead of an '
|
||||
'ActivityRegularizer '
|
||||
'(i.e. activity_regularizer="l2" instead '
|
||||
'of activity_regularizer="activity_l2".')
|
||||
regularized_loss = loss
|
||||
def __call__(self, x):
|
||||
regularization = 0
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(self.p))
|
||||
regularization += K.sum(self.l1 * K.abs(x))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(self.p))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
regularization += K.sum(self.l2 * K.square(x))
|
||||
return regularization
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
@ -100,61 +77,34 @@ class WeightRegularizer(Regularizer):
|
||||
'l2': float(self.l2)}
|
||||
|
||||
|
||||
class ActivityRegularizer(Regularizer):
|
||||
# Aliases.
|
||||
|
||||
def __init__(self, l1=0., l2=0.):
|
||||
self.l1 = K.cast_to_floatx(l1)
|
||||
self.l2 = K.cast_to_floatx(l2)
|
||||
self.uses_learning_phase = True
|
||||
self.layer = None
|
||||
|
||||
def set_layer(self, layer):
|
||||
if self.layer is not None:
|
||||
raise Exception('Regularizers cannot be reused')
|
||||
self.layer = layer
|
||||
|
||||
def __call__(self, loss):
|
||||
if self.layer is None:
|
||||
raise Exception('Need to call `set_layer` on '
|
||||
'ActivityRegularizer instance '
|
||||
'before calling the instance.')
|
||||
regularized_loss = loss
|
||||
for i in range(len(self.layer.inbound_nodes)):
|
||||
output = self.layer.get_output_at(i)
|
||||
if self.l1:
|
||||
regularized_loss += K.sum(self.l1 * K.abs(output))
|
||||
if self.l2:
|
||||
regularized_loss += K.sum(self.l2 * K.square(output))
|
||||
return K.in_train_phase(regularized_loss, loss)
|
||||
|
||||
def get_config(self):
|
||||
return {'name': self.__class__.__name__,
|
||||
'l1': float(self.l1),
|
||||
'l2': float(self.l2)}
|
||||
WeightRegularizer = L1L2Regularizer
|
||||
ActivityRegularizer = L1L2Regularizer
|
||||
|
||||
|
||||
def l1(l=0.01):
|
||||
return WeightRegularizer(l1=l)
|
||||
return L1L2Regularizer(l1=l)
|
||||
|
||||
|
||||
def l2(l=0.01):
|
||||
return WeightRegularizer(l2=l)
|
||||
return L1L2Regularizer(l2=l)
|
||||
|
||||
|
||||
def l1l2(l1=0.01, l2=0.01):
|
||||
return WeightRegularizer(l1=l1, l2=l2)
|
||||
return L1L2Regularizer(l1=l1, l2=l2)
|
||||
|
||||
|
||||
def activity_l1(l=0.01):
|
||||
return ActivityRegularizer(l1=l)
|
||||
return L1L2Regularizer(l1=l)
|
||||
|
||||
|
||||
def activity_l2(l=0.01):
|
||||
return ActivityRegularizer(l2=l)
|
||||
return L1L2Regularizer(l2=l)
|
||||
|
||||
|
||||
def activity_l1l2(l1=0.01, l2=0.01):
|
||||
return ActivityRegularizer(l1=l1, l2=l2)
|
||||
return L1L2Regularizer(l1=l1, l2=l2)
|
||||
|
||||
|
||||
def get(identifier, kwargs=None):
|
||||
|
@ -132,6 +132,12 @@ def test_regularizer(layer_class):
|
||||
layer.build(shape)
|
||||
output = layer(K.variable(np.ones(shape)))
|
||||
K.eval(output)
|
||||
if layer_class == recurrent.SimpleRNN:
|
||||
assert len(layer.losses) == 3
|
||||
if layer_class == recurrent.GRU:
|
||||
assert len(layer.losses) == 9
|
||||
if layer_class == recurrent.LSTM:
|
||||
assert len(layer.losses) == 12
|
||||
|
||||
|
||||
@keras_test
|
||||
|
@ -76,6 +76,15 @@ def test_TimeDistributed():
|
||||
outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), nb_epoch=1, batch_size=10)
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_regularizers():
|
||||
model = Sequential()
|
||||
model.add(wrappers.TimeDistributed(core.Dense(2, W_regularizer='l1'), input_shape=(3, 4)))
|
||||
model.add(core.Activation('relu'))
|
||||
model.compile(optimizer='rmsprop', loss='mse')
|
||||
assert len(model.losses) == 1
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_Bidirectional():
|
||||
rnn = recurrent.SimpleRNN
|
||||
|
@ -67,6 +67,7 @@ def test_W_reg():
|
||||
regularizers.l1l2()]:
|
||||
model = create_model(weight_reg=reg)
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
assert len(model.losses) == 1
|
||||
model.fit(X_train, Y_train, batch_size=batch_size,
|
||||
nb_epoch=nb_epoch, verbose=0)
|
||||
model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0)
|
||||
@ -77,6 +78,7 @@ def test_A_reg():
|
||||
for reg in [regularizers.activity_l1(), regularizers.activity_l2()]:
|
||||
model = create_model(activity_reg=reg)
|
||||
model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
|
||||
assert len(model.losses) == 1
|
||||
model.fit(X_train, Y_train, batch_size=batch_size,
|
||||
nb_epoch=nb_epoch, verbose=0)
|
||||
model.evaluate(X_test[test_ids, :], Y_test[test_ids, :], verbose=0)
|
||||
|
Loading…
Reference in New Issue
Block a user