From 5f0e0d6c385d75090350eabbe27c124c767c00f1 Mon Sep 17 00:00:00 2001
From: Michael Oliver <michael.d.oliver@gmail.com>
Date: Fri, 16 Dec 2016 17:07:10 -0800
Subject: [PATCH] Fix issue #3568: allow sharing of activation function
 parameters along specified axes (#4141)

* allow ability to share activation parameters along specified axes

* add tests

* change to shared_axes and remove TF dummy broadcast function

* update tests to shared_axes

* Update docstrings in advanced activations
---
 keras/backend/theano_backend.py               |   3 +
 keras/layers/advanced_activations.py          | 107 +++++++++++++++---
 .../keras/layers/test_advanced_activations.py |  33 +++++-
 3 files changed, 120 insertions(+), 23 deletions(-)

diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index 5985bc0b7..904c9917d 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -831,6 +831,9 @@ def reverse(x, axes):
     return x[slices]
 
 
+def pattern_broadcast(x, broatcastable):
+    return T.patternbroadcast(x, broatcastable)
+
 # VALUE MANIPULATION
 
 
diff --git a/keras/layers/advanced_activations.py b/keras/layers/advanced_activations.py
index ad5ce8162..e522c19d5 100644
--- a/keras/layers/advanced_activations.py
+++ b/keras/layers/advanced_activations.py
@@ -52,18 +52,37 @@ class PReLU(Layer):
     # Arguments
         init: initialization function for the weights.
         weights: initial weights, as a list of a single Numpy array.
+        shared_axes: the axes along which to share learnable
+            parameters for the activation function.
+            For example, if the incoming feature maps
+            are from a 2D convolution
+            with output shape `(batch, height, width, channels)`,
+            and you wish to share parameters across space
+            so that each filter only has one set of parameters,
+            set `shared_axes=[1, 2]`.
 
     # References
         - [Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification](http://arxiv.org/pdf/1502.01852v1.pdf)
     '''
-    def __init__(self, init='zero', weights=None, **kwargs):
+    def __init__(self, init='zero', weights=None, shared_axes=None, **kwargs):
         self.supports_masking = True
         self.init = initializations.get(init)
         self.initial_weights = weights
+        if type(shared_axes) is not list and type(shared_axes) is not tuple:
+            self.shared_axes = [shared_axes]
+        else:
+            self.shared_axes = list(shared_axes)
         super(PReLU, self).__init__(**kwargs)
 
     def build(self, input_shape):
-        self.alphas = self.init(input_shape[1:],
+        param_shape = list(input_shape[1:])
+        self.param_broadcast = [False] * len(param_shape)
+        if self.shared_axes[0] is not None:
+            for i in self.shared_axes:
+                param_shape[i] = 1
+                self.param_broadcast[i] = True
+
+        self.alphas = self.init(param_shape,
                                 name='{}_alphas'.format(self.name))
         self.trainable_weights = [self.alphas]
 
@@ -73,7 +92,10 @@ class PReLU(Layer):
 
     def call(self, x, mask=None):
         pos = K.relu(x)
-        neg = self.alphas * (x - abs(x)) * 0.5
+        if K.backend() == 'theano':
+            neg = K.pattern_broadcast(self.alphas, self.param_broadcast) * (x - abs(x)) * 0.5
+        else:
+            neg = self.alphas * (x - abs(x)) * 0.5
         return pos + neg
 
     def get_config(self):
@@ -131,23 +153,41 @@ class ParametricSoftplus(Layer):
         alpha_init: float. Initial value of the alpha weights.
         beta_init: float. Initial values of the beta weights.
         weights: initial weights, as a list of 2 numpy arrays.
+        shared_axes: the axes along which to share learnable
+            parameters for the activation function.
+            For example, if the incoming feature maps
+            are from a 2D convolution
+            with output shape `(batch, height, width, channels)`,
+            and you wish to share parameters across space
+            so that each filter only has one set of parameters,
+            set `shared_axes=[1, 2]`.
 
     # References
         - [Inferring Nonlinear Neuronal Computation Based on Physiologically Plausible Inputs](http://journals.plos.org/ploscompbiol/article?id=10.1371/journal.pcbi.1003143)
     '''
     def __init__(self, alpha_init=0.2, beta_init=5.0,
-                 weights=None, **kwargs):
+                 weights=None, shared_axes=None, **kwargs):
         self.supports_masking = True
         self.alpha_init = K.cast_to_floatx(alpha_init)
         self.beta_init = K.cast_to_floatx(beta_init)
         self.initial_weights = weights
+        if type(shared_axes) is not list and type(shared_axes) is not tuple:
+            self.shared_axes = [shared_axes]
+        else:
+            self.shared_axes = list(shared_axes)
         super(ParametricSoftplus, self).__init__(**kwargs)
 
     def build(self, input_shape):
-        input_shape = input_shape[1:]
-        self.alphas = K.variable(self.alpha_init * np.ones(input_shape),
+        param_shape = list(input_shape[1:])
+        self.param_broadcast = [False] * len(param_shape)
+        if self.shared_axes[0] is not None:
+            for i in self.shared_axes:
+                param_shape[i] = 1
+                self.param_broadcast[i] = True
+
+        self.alphas = K.variable(self.alpha_init * np.ones(param_shape),
                                  name='{}_alphas'.format(self.name))
-        self.betas = K.variable(self.beta_init * np.ones(input_shape),
+        self.betas = K.variable(self.beta_init * np.ones(param_shape),
                                 name='{}_betas'.format(self.name))
         self.trainable_weights = [self.alphas, self.betas]
 
@@ -156,7 +196,10 @@ class ParametricSoftplus(Layer):
             del self.initial_weights
 
     def call(self, x, mask=None):
-        return K.softplus(self.betas * x) * self.alphas
+        if K.backend() == 'theano':
+            return K.softplus(K.pattern_broadcast(self.betas, self.param_broadcast) * x) * K.pattern_broadcast(self.alphas, self.param_broadcast)
+        else:
+            return K.softplus(self.betas * x) * self.alphas
 
     def get_config(self):
         config = {'alpha_init': float(self.alpha_init),
@@ -214,34 +257,51 @@ class SReLU(Layer):
         a_left_init: initialization function for the left part slope
         t_right_init: initialization function for the right part intercept
         a_right_init: initialization function for the right part slope
+        shared_axes: the axes along which to share learnable
+            parameters for the activation function.
+            For example, if the incoming feature maps
+            are from a 2D convolution
+            with output shape `(batch, height, width, channels)`,
+            and you wish to share parameters across space
+            so that each filter only has one set of parameters,
+            set `shared_axes=[1, 2]`.
 
     # References
         - [Deep Learning with S-shaped Rectified Linear Activation Units](http://arxiv.org/abs/1512.07030)
     '''
     def __init__(self, t_left_init='zero', a_left_init='glorot_uniform',
-                 t_right_init='glorot_uniform', a_right_init='one', **kwargs):
+                 t_right_init='glorot_uniform', a_right_init='one', shared_axes=None, **kwargs):
         self.supports_masking = True
         self.t_left_init = t_left_init
         self.a_left_init = a_left_init
         self.t_right_init = t_right_init
         self.a_right_init = a_right_init
+        if type(shared_axes) is not list and type(shared_axes) is not tuple:
+            self.shared_axes = [shared_axes]
+        else:
+            self.shared_axes = list(shared_axes)
         super(SReLU, self).__init__(**kwargs)
 
     def build(self, input_shape):
-        input_shape = input_shape[1:]
+        param_shape = list(input_shape[1:])
+        self.param_broadcast = [False] * len(param_shape)
+        if self.shared_axes[0] is not None:
+            for i in self.shared_axes:
+                param_shape[i] = 1
+                self.param_broadcast[i] = True
 
         t_left_init = initializations.get(self.t_left_init)
         a_left_init = initializations.get(self.a_left_init)
         t_right_init = initializations.get(self.t_right_init)
         a_right_init = initializations.get(self.a_right_init)
 
-        self.t_left = t_left_init(input_shape,
+        self.t_left = t_left_init(param_shape,
                                   name='{}_t_left'.format(self.name))
-        self.a_left = a_left_init(input_shape,
+        self.a_left = a_left_init(param_shape,
                                   name='{}_a_left'.format(self.name))
-        self.t_right = t_right_init(input_shape,
+        self.t_right = t_right_init(param_shape,
                                     name='{}_t_right'.format(self.name))
-        self.a_right = a_right_init(input_shape,
+        self.a_right = a_right_init(param_shape,
                                     name='{}_a_right'.format(self.name))
         # ensure the the right part is always to the right of the left
         self.t_right_actual = self.t_left + abs(self.t_right)
@@ -249,10 +309,21 @@ class SReLU(Layer):
                                   self.t_right, self.a_right]
 
     def call(self, x, mask=None):
-        Y_left_and_center = self.t_left + K.relu(x - self.t_left,
-                                                 self.a_left,
-                                                 self.t_right_actual - self.t_left)
-        Y_right = K.relu(x - self.t_right_actual) * self.a_right
+        if K.backend() == 'theano':
+            t_left = K.pattern_broadcast(self.t_left, self.param_broadcast)
+            a_left = K.pattern_broadcast(self.a_left, self.param_broadcast)
+            a_right = K.pattern_broadcast(self.a_right, self.param_broadcast)
+            t_right_actual = K.pattern_broadcast(self.t_right_actual, self.param_broadcast)
+        else:
+            t_left = self.t_left
+            a_left = self.a_left
+            a_right = self.a_right
+            t_right_actual = self.t_right_actual
+
+        Y_left_and_center = t_left + K.relu(x - t_left,
+                                            a_left,
+                                            t_right_actual - t_left)
+        Y_right = K.relu(x - t_right_actual) * a_right
         return Y_left_and_center + Y_right
 
     def get_config(self):
diff --git a/tests/keras/layers/test_advanced_activations.py b/tests/keras/layers/test_advanced_activations.py
index 369195d12..7a3fe7714 100644
--- a/tests/keras/layers/test_advanced_activations.py
+++ b/tests/keras/layers/test_advanced_activations.py
@@ -17,6 +17,13 @@ def test_prelu():
                input_shape=(2, 3, 4))
 
 
+@keras_test
+def test_prelu_share():
+    from keras.layers.advanced_activations import PReLU
+    layer_test(PReLU, kwargs={'shared_axes': 1},
+               input_shape=(2, 3, 4))
+
+
 @keras_test
 def test_elu():
     from keras.layers.advanced_activations import ELU
@@ -28,11 +35,20 @@ def test_elu():
 @keras_test
 def test_parametric_softplus():
     from keras.layers.advanced_activations import ParametricSoftplus
-    for alpha in [0., .5, -1.]:
-        layer_test(ParametricSoftplus,
-                   kwargs={'alpha_init': 1.,
-                           'beta_init': -1},
-                   input_shape=(2, 3, 4))
+    layer_test(ParametricSoftplus,
+               kwargs={'alpha_init': 1.,
+                       'beta_init': -1},
+               input_shape=(2, 3, 4))
+
+
+@keras_test
+def test_parametric_softplus_share():
+    from keras.layers.advanced_activations import ParametricSoftplus
+    layer_test(ParametricSoftplus,
+               kwargs={'shared_axes': 1,
+                       'alpha_init': 1.,
+                       'beta_init': -1},
+               input_shape=(2, 3, 4))
 
 
 @keras_test
@@ -49,5 +65,12 @@ def test_srelu():
                input_shape=(2, 3, 4))
 
 
+@keras_test
+def test_srelu_share():
+    from keras.layers.advanced_activations import SReLU
+    layer_test(SReLU, kwargs={'shared_axes': 1},
+               input_shape=(2, 3, 4))
+
+
 if __name__ == '__main__':
     pytest.main([__file__])