From 5f0e0d6c385d75090350eabbe27c124c767c00f1 Mon Sep 17 00:00:00 2001 From: Michael Oliver Date: Fri, 16 Dec 2016 17:07:10 -0800 Subject: [PATCH] Fix issue #3568: allow sharing of activation function parameters along specified axes (#4141) * allow ability to share activation parameters along specified axes * add tests * change to shared_axes and remove TF dummy broadcast function * update tests to shared_axes * Update docstrings in advanced activations --- keras/backend/theano_backend.py | 3 + keras/layers/advanced_activations.py | 107 +++++++++++++++--- .../keras/layers/test_advanced_activations.py | 33 +++++- 3 files changed, 120 insertions(+), 23 deletions(-) diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py index 5985bc0b7..904c9917d 100644 --- a/keras/backend/theano_backend.py +++ b/keras/backend/theano_backend.py @@ -831,6 +831,9 @@ def reverse(x, axes): return x[slices] +def pattern_broadcast(x, broatcastable): + return T.patternbroadcast(x, broatcastable) + # VALUE MANIPULATION diff --git a/keras/layers/advanced_activations.py b/keras/layers/advanced_activations.py index ad5ce8162..e522c19d5 100644 --- a/keras/layers/advanced_activations.py +++ b/keras/layers/advanced_activations.py @@ -52,18 +52,37 @@ class PReLU(Layer): # Arguments init: initialization function for the weights. weights: initial weights, as a list of a single Numpy array. + shared_axes: the axes along which to share learnable + parameters for the activation function. + For example, if the incoming feature maps + are from a 2D convolution + with output shape `(batch, height, width, channels)`, + and you wish to share parameters across space + so that each filter only has one set of parameters, + set `shared_axes=[1, 2]`. # References - [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](http://arxiv.org/pdf/1502.01852v1.pdf) ''' - def __init__(self, init='zero', weights=None, **kwargs): + def __init__(self, init='zero', weights=None, shared_axes=None, **kwargs): self.supports_masking = True self.init = initializations.get(init) self.initial_weights = weights + if type(shared_axes) is not list and type(shared_axes) is not tuple: + self.shared_axes = [shared_axes] + else: + self.shared_axes = list(shared_axes) super(PReLU, self).__init__(**kwargs) def build(self, input_shape): - self.alphas = self.init(input_shape[1:], + param_shape = list(input_shape[1:]) + self.param_broadcast = [False] * len(param_shape) + if self.shared_axes[0] is not None: + for i in self.shared_axes: + param_shape[i] = 1 + self.param_broadcast[i] = True + + self.alphas = self.init(param_shape, name='{}_alphas'.format(self.name)) self.trainable_weights = [self.alphas] @@ -73,7 +92,10 @@ class PReLU(Layer): def call(self, x, mask=None): pos = K.relu(x) - neg = self.alphas * (x - abs(x)) * 0.5 + if K.backend() == 'theano': + neg = K.pattern_broadcast(self.alphas, self.param_broadcast) * (x - abs(x)) * 0.5 + else: + neg = self.alphas * (x - abs(x)) * 0.5 return pos + neg def get_config(self): @@ -131,23 +153,41 @@ class ParametricSoftplus(Layer): alpha_init: float. Initial value of the alpha weights. beta_init: float. Initial values of the beta weights. weights: initial weights, as a list of 2 numpy arrays. + shared_axes: the axes along which to share learnable + parameters for the activation function. + For example, if the incoming feature maps + are from a 2D convolution + with output shape `(batch, height, width, channels)`, + and you wish to share parameters across space + so that each filter only has one set of parameters, + set `shared_axes=[1, 2]`. # References - [Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143) ''' def __init__(self, alpha_init=0.2, beta_init=5.0, - weights=None, **kwargs): + weights=None, shared_axes=None, **kwargs): self.supports_masking = True self.alpha_init = K.cast_to_floatx(alpha_init) self.beta_init = K.cast_to_floatx(beta_init) self.initial_weights = weights + if type(shared_axes) is not list and type(shared_axes) is not tuple: + self.shared_axes = [shared_axes] + else: + self.shared_axes = list(shared_axes) super(ParametricSoftplus, self).__init__(**kwargs) def build(self, input_shape): - input_shape = input_shape[1:] - self.alphas = K.variable(self.alpha_init * np.ones(input_shape), + param_shape = list(input_shape[1:]) + self.param_broadcast = [False] * len(param_shape) + if self.shared_axes[0] is not None: + for i in self.shared_axes: + param_shape[i] = 1 + self.param_broadcast[i] = True + + self.alphas = K.variable(self.alpha_init * np.ones(param_shape), name='{}_alphas'.format(self.name)) - self.betas = K.variable(self.beta_init * np.ones(input_shape), + self.betas = K.variable(self.beta_init * np.ones(param_shape), name='{}_betas'.format(self.name)) self.trainable_weights = [self.alphas, self.betas] @@ -156,7 +196,10 @@ class ParametricSoftplus(Layer): del self.initial_weights def call(self, x, mask=None): - return K.softplus(self.betas * x) * self.alphas + if K.backend() == 'theano': + return K.softplus(K.pattern_broadcast(self.betas, self.param_broadcast) * x) * K.pattern_broadcast(self.alphas, self.param_broadcast) + else: + return K.softplus(self.betas * x) * self.alphas def get_config(self): config = {'alpha_init': float(self.alpha_init), @@ -214,34 +257,51 @@ class SReLU(Layer): a_left_init: initialization function for the left part slope t_right_init: initialization function for the right part intercept a_right_init: initialization function for the right part slope + shared_axes: the axes along which to share learnable + parameters for the activation function. + For example, if the incoming feature maps + are from a 2D convolution + with output shape `(batch, height, width, channels)`, + and you wish to share parameters across space + so that each filter only has one set of parameters, + set `shared_axes=[1, 2]`. # References - [Deep Learning with S-shaped Rectified Linear Activation Units](http://arxiv.org/abs/1512.07030) ''' def __init__(self, t_left_init='zero', a_left_init='glorot_uniform', - t_right_init='glorot_uniform', a_right_init='one', **kwargs): + t_right_init='glorot_uniform', a_right_init='one', shared_axes=None, **kwargs): self.supports_masking = True self.t_left_init = t_left_init self.a_left_init = a_left_init self.t_right_init = t_right_init self.a_right_init = a_right_init + if type(shared_axes) is not list and type(shared_axes) is not tuple: + self.shared_axes = [shared_axes] + else: + self.shared_axes = list(shared_axes) super(SReLU, self).__init__(**kwargs) def build(self, input_shape): - input_shape = input_shape[1:] + param_shape = list(input_shape[1:]) + self.param_broadcast = [False] * len(param_shape) + if self.shared_axes[0] is not None: + for i in self.shared_axes: + param_shape[i] = 1 + self.param_broadcast[i] = True t_left_init = initializations.get(self.t_left_init) a_left_init = initializations.get(self.a_left_init) t_right_init = initializations.get(self.t_right_init) a_right_init = initializations.get(self.a_right_init) - self.t_left = t_left_init(input_shape, + self.t_left = t_left_init(param_shape, name='{}_t_left'.format(self.name)) - self.a_left = a_left_init(input_shape, + self.a_left = a_left_init(param_shape, name='{}_a_left'.format(self.name)) - self.t_right = t_right_init(input_shape, + self.t_right = t_right_init(param_shape, name='{}_t_right'.format(self.name)) - self.a_right = a_right_init(input_shape, + self.a_right = a_right_init(param_shape, name='{}_a_right'.format(self.name)) # ensure the the right part is always to the right of the left self.t_right_actual = self.t_left + abs(self.t_right) @@ -249,10 +309,21 @@ class SReLU(Layer): self.t_right, self.a_right] def call(self, x, mask=None): - Y_left_and_center = self.t_left + K.relu(x - self.t_left, - self.a_left, - self.t_right_actual - self.t_left) - Y_right = K.relu(x - self.t_right_actual) * self.a_right + if K.backend() == 'theano': + t_left = K.pattern_broadcast(self.t_left, self.param_broadcast) + a_left = K.pattern_broadcast(self.a_left, self.param_broadcast) + a_right = K.pattern_broadcast(self.a_right, self.param_broadcast) + t_right_actual = K.pattern_broadcast(self.t_right_actual, self.param_broadcast) + else: + t_left = self.t_left + a_left = self.a_left + a_right = self.a_right + t_right_actual = self.t_right_actual + + Y_left_and_center = t_left + K.relu(x - t_left, + a_left, + t_right_actual - t_left) + Y_right = K.relu(x - t_right_actual) * a_right return Y_left_and_center + Y_right def get_config(self): diff --git a/tests/keras/layers/test_advanced_activations.py b/tests/keras/layers/test_advanced_activations.py index 369195d12..7a3fe7714 100644 --- a/tests/keras/layers/test_advanced_activations.py +++ b/tests/keras/layers/test_advanced_activations.py @@ -17,6 +17,13 @@ def test_prelu(): input_shape=(2, 3, 4)) +@keras_test +def test_prelu_share(): + from keras.layers.advanced_activations import PReLU + layer_test(PReLU, kwargs={'shared_axes': 1}, + input_shape=(2, 3, 4)) + + @keras_test def test_elu(): from keras.layers.advanced_activations import ELU @@ -28,11 +35,20 @@ def test_elu(): @keras_test def test_parametric_softplus(): from keras.layers.advanced_activations import ParametricSoftplus - for alpha in [0., .5, -1.]: - layer_test(ParametricSoftplus, - kwargs={'alpha_init': 1., - 'beta_init': -1}, - input_shape=(2, 3, 4)) + layer_test(ParametricSoftplus, + kwargs={'alpha_init': 1., + 'beta_init': -1}, + input_shape=(2, 3, 4)) + + +@keras_test +def test_parametric_softplus_share(): + from keras.layers.advanced_activations import ParametricSoftplus + layer_test(ParametricSoftplus, + kwargs={'shared_axes': 1, + 'alpha_init': 1., + 'beta_init': -1}, + input_shape=(2, 3, 4)) @keras_test @@ -49,5 +65,12 @@ def test_srelu(): input_shape=(2, 3, 4)) +@keras_test +def test_srelu_share(): + from keras.layers.advanced_activations import SReLU + layer_test(SReLU, kwargs={'shared_axes': 1}, + input_shape=(2, 3, 4)) + + if __name__ == '__main__': pytest.main([__file__])