* allow ability to share activation parameters along specified axes * add tests * change to shared_axes and remove TF dummy broadcast function * update tests to shared_axes * Update docstrings in advanced activations
This commit is contained in:
parent
79406f111b
commit
5f0e0d6c38
@ -831,6 +831,9 @@ def reverse(x, axes):
|
||||
return x[slices]
|
||||
|
||||
|
||||
def pattern_broadcast(x, broatcastable):
|
||||
return T.patternbroadcast(x, broatcastable)
|
||||
|
||||
# VALUE MANIPULATION
|
||||
|
||||
|
||||
|
@ -52,18 +52,37 @@ class PReLU(Layer):
|
||||
# Arguments
|
||||
init: initialization function for the weights.
|
||||
weights: initial weights, as a list of a single Numpy array.
|
||||
shared_axes: the axes along which to share learnable
|
||||
parameters for the activation function.
|
||||
For example, if the incoming feature maps
|
||||
are from a 2D convolution
|
||||
with output shape `(batch, height, width, channels)`,
|
||||
and you wish to share parameters across space
|
||||
so that each filter only has one set of parameters,
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](http://arxiv.org/pdf/1502.01852v1.pdf)
|
||||
'''
|
||||
def __init__(self, init='zero', weights=None, **kwargs):
|
||||
def __init__(self, init='zero', weights=None, shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.init = initializations.get(init)
|
||||
self.initial_weights = weights
|
||||
if type(shared_axes) is not list and type(shared_axes) is not tuple:
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
super(PReLU, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
self.alphas = self.init(input_shape[1:],
|
||||
param_shape = list(input_shape[1:])
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i] = 1
|
||||
self.param_broadcast[i] = True
|
||||
|
||||
self.alphas = self.init(param_shape,
|
||||
name='{}_alphas'.format(self.name))
|
||||
self.trainable_weights = [self.alphas]
|
||||
|
||||
@ -73,6 +92,9 @@ class PReLU(Layer):
|
||||
|
||||
def call(self, x, mask=None):
|
||||
pos = K.relu(x)
|
||||
if K.backend() == 'theano':
|
||||
neg = K.pattern_broadcast(self.alphas, self.param_broadcast) * (x - abs(x)) * 0.5
|
||||
else:
|
||||
neg = self.alphas * (x - abs(x)) * 0.5
|
||||
return pos + neg
|
||||
|
||||
@ -131,23 +153,41 @@ class ParametricSoftplus(Layer):
|
||||
alpha_init: float. Initial value of the alpha weights.
|
||||
beta_init: float. Initial values of the beta weights.
|
||||
weights: initial weights, as a list of 2 numpy arrays.
|
||||
shared_axes: the axes along which to share learnable
|
||||
parameters for the activation function.
|
||||
For example, if the incoming feature maps
|
||||
are from a 2D convolution
|
||||
with output shape `(batch, height, width, channels)`,
|
||||
and you wish to share parameters across space
|
||||
so that each filter only has one set of parameters,
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143)
|
||||
'''
|
||||
def __init__(self, alpha_init=0.2, beta_init=5.0,
|
||||
weights=None, **kwargs):
|
||||
weights=None, shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.alpha_init = K.cast_to_floatx(alpha_init)
|
||||
self.beta_init = K.cast_to_floatx(beta_init)
|
||||
self.initial_weights = weights
|
||||
if type(shared_axes) is not list and type(shared_axes) is not tuple:
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
super(ParametricSoftplus, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_shape = input_shape[1:]
|
||||
self.alphas = K.variable(self.alpha_init * np.ones(input_shape),
|
||||
param_shape = list(input_shape[1:])
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i] = 1
|
||||
self.param_broadcast[i] = True
|
||||
|
||||
self.alphas = K.variable(self.alpha_init * np.ones(param_shape),
|
||||
name='{}_alphas'.format(self.name))
|
||||
self.betas = K.variable(self.beta_init * np.ones(input_shape),
|
||||
self.betas = K.variable(self.beta_init * np.ones(param_shape),
|
||||
name='{}_betas'.format(self.name))
|
||||
self.trainable_weights = [self.alphas, self.betas]
|
||||
|
||||
@ -156,6 +196,9 @@ class ParametricSoftplus(Layer):
|
||||
del self.initial_weights
|
||||
|
||||
def call(self, x, mask=None):
|
||||
if K.backend() == 'theano':
|
||||
return K.softplus(K.pattern_broadcast(self.betas, self.param_broadcast) * x) * K.pattern_broadcast(self.alphas, self.param_broadcast)
|
||||
else:
|
||||
return K.softplus(self.betas * x) * self.alphas
|
||||
|
||||
def get_config(self):
|
||||
@ -214,34 +257,51 @@ class SReLU(Layer):
|
||||
a_left_init: initialization function for the left part slope
|
||||
t_right_init: initialization function for the right part intercept
|
||||
a_right_init: initialization function for the right part slope
|
||||
shared_axes: the axes along which to share learnable
|
||||
parameters for the activation function.
|
||||
For example, if the incoming feature maps
|
||||
are from a 2D convolution
|
||||
with output shape `(batch, height, width, channels)`,
|
||||
and you wish to share parameters across space
|
||||
so that each filter only has one set of parameters,
|
||||
set `shared_axes=[1, 2]`.
|
||||
|
||||
# References
|
||||
- [Deep Learning with S-shaped Rectified Linear Activation Units](http://arxiv.org/abs/1512.07030)
|
||||
'''
|
||||
def __init__(self, t_left_init='zero', a_left_init='glorot_uniform',
|
||||
t_right_init='glorot_uniform', a_right_init='one', **kwargs):
|
||||
t_right_init='glorot_uniform', a_right_init='one', shared_axes=None, **kwargs):
|
||||
self.supports_masking = True
|
||||
self.t_left_init = t_left_init
|
||||
self.a_left_init = a_left_init
|
||||
self.t_right_init = t_right_init
|
||||
self.a_right_init = a_right_init
|
||||
if type(shared_axes) is not list and type(shared_axes) is not tuple:
|
||||
self.shared_axes = [shared_axes]
|
||||
else:
|
||||
self.shared_axes = list(shared_axes)
|
||||
super(SReLU, self).__init__(**kwargs)
|
||||
|
||||
def build(self, input_shape):
|
||||
input_shape = input_shape[1:]
|
||||
param_shape = list(input_shape[1:])
|
||||
self.param_broadcast = [False] * len(param_shape)
|
||||
if self.shared_axes[0] is not None:
|
||||
for i in self.shared_axes:
|
||||
param_shape[i] = 1
|
||||
self.param_broadcast[i] = True
|
||||
|
||||
t_left_init = initializations.get(self.t_left_init)
|
||||
a_left_init = initializations.get(self.a_left_init)
|
||||
t_right_init = initializations.get(self.t_right_init)
|
||||
a_right_init = initializations.get(self.a_right_init)
|
||||
|
||||
self.t_left = t_left_init(input_shape,
|
||||
self.t_left = t_left_init(param_shape,
|
||||
name='{}_t_left'.format(self.name))
|
||||
self.a_left = a_left_init(input_shape,
|
||||
self.a_left = a_left_init(param_shape,
|
||||
name='{}_a_left'.format(self.name))
|
||||
self.t_right = t_right_init(input_shape,
|
||||
self.t_right = t_right_init(param_shape,
|
||||
name='{}_t_right'.format(self.name))
|
||||
self.a_right = a_right_init(input_shape,
|
||||
self.a_right = a_right_init(param_shape,
|
||||
name='{}_a_right'.format(self.name))
|
||||
# ensure the the right part is always to the right of the left
|
||||
self.t_right_actual = self.t_left + abs(self.t_right)
|
||||
@ -249,10 +309,21 @@ class SReLU(Layer):
|
||||
self.t_right, self.a_right]
|
||||
|
||||
def call(self, x, mask=None):
|
||||
Y_left_and_center = self.t_left + K.relu(x - self.t_left,
|
||||
self.a_left,
|
||||
self.t_right_actual - self.t_left)
|
||||
Y_right = K.relu(x - self.t_right_actual) * self.a_right
|
||||
if K.backend() == 'theano':
|
||||
t_left = K.pattern_broadcast(self.t_left, self.param_broadcast)
|
||||
a_left = K.pattern_broadcast(self.a_left, self.param_broadcast)
|
||||
a_right = K.pattern_broadcast(self.a_right, self.param_broadcast)
|
||||
t_right_actual = K.pattern_broadcast(self.t_right_actual, self.param_broadcast)
|
||||
else:
|
||||
t_left = self.t_left
|
||||
a_left = self.a_left
|
||||
a_right = self.a_right
|
||||
t_right_actual = self.t_right_actual
|
||||
|
||||
Y_left_and_center = t_left + K.relu(x - t_left,
|
||||
a_left,
|
||||
t_right_actual - t_left)
|
||||
Y_right = K.relu(x - t_right_actual) * a_right
|
||||
return Y_left_and_center + Y_right
|
||||
|
||||
def get_config(self):
|
||||
|
@ -17,6 +17,13 @@ def test_prelu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_prelu_share():
|
||||
from keras.layers.advanced_activations import PReLU
|
||||
layer_test(PReLU, kwargs={'shared_axes': 1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_elu():
|
||||
from keras.layers.advanced_activations import ELU
|
||||
@ -28,13 +35,22 @@ def test_elu():
|
||||
@keras_test
|
||||
def test_parametric_softplus():
|
||||
from keras.layers.advanced_activations import ParametricSoftplus
|
||||
for alpha in [0., .5, -1.]:
|
||||
layer_test(ParametricSoftplus,
|
||||
kwargs={'alpha_init': 1.,
|
||||
'beta_init': -1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_parametric_softplus_share():
|
||||
from keras.layers.advanced_activations import ParametricSoftplus
|
||||
layer_test(ParametricSoftplus,
|
||||
kwargs={'shared_axes': 1,
|
||||
'alpha_init': 1.,
|
||||
'beta_init': -1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_thresholded_relu():
|
||||
from keras.layers.advanced_activations import ThresholdedReLU
|
||||
@ -49,5 +65,12 @@ def test_srelu():
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
@keras_test
|
||||
def test_srelu_share():
|
||||
from keras.layers.advanced_activations import SReLU
|
||||
layer_test(SReLU, kwargs={'shared_axes': 1},
|
||||
input_shape=(2, 3, 4))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pytest.main([__file__])
|
||||
|
Loading…
Reference in New Issue
Block a user