From 039d15bb55b03984c6515f1ada736bd02edd1347 Mon Sep 17 00:00:00 2001
From: olegsinyavskiy <olegsinyavskiy@gmail.com>
Date: Sat, 12 Dec 2015 11:21:15 -0800
Subject: [PATCH 001/145] Split unit and integration tests:  - separate job on
 travis for IT tests  - refactor and document IT tests (test_tasks.py)  - char
 generation test with stacked LSTM

---
 .travis.yml                                   |   8 +-
 integration_tests/test_image_data_tasks.py    |  46 ++++++
 integration_tests/test_temporal_data_tasks.py | 131 ++++++++++++++++++
 integration_tests/test_vector_data_tasks.py   |  63 +++++++++
 tests/test_tasks.py                           | 129 -----------------
 5 files changed, 247 insertions(+), 130 deletions(-)
 create mode 100644 integration_tests/test_image_data_tasks.py
 create mode 100644 integration_tests/test_temporal_data_tasks.py
 create mode 100644 integration_tests/test_vector_data_tasks.py
 delete mode 100644 tests/test_tasks.py

diff --git a/.travis.yml b/.travis.yml
index c5a512dd6..748279b9f 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -11,6 +11,8 @@ matrix:
           env: KERAS_BACKEND=theano
         - python: 2.7
           env: KERAS_BACKEND=tensorflow
+        - python: 2.7
+          env: KERAS_BACKEND=theano INTEGRATION_TESTS=true
 install:
   # code below is taken from http://conda.pydata.org/docs/travis.html
   # We do this conditionally because it saves us some downloading if the
@@ -47,6 +49,10 @@ script:
   # set up keras backend
   - sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
   - echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)"
-  - PYTHONPATH=$PWD:$PYTHONPATH py.test tests/
+  - if [[ "$INTEGRATION_TESTS" == "true" ]]; then
+       PYTHONPATH=$PWD:$PYTHONPATH py.test integration_tests/;
+    else
+       PYTHONPATH=$PWD:$PYTHONPATH py.test tests/;
+    fi
 after_success:
   - coveralls
diff --git a/integration_tests/test_image_data_tasks.py b/integration_tests/test_image_data_tasks.py
new file mode 100644
index 000000000..24b7b86de
--- /dev/null
+++ b/integration_tests/test_image_data_tasks.py
@@ -0,0 +1,46 @@
+from __future__ import print_function
+import numpy as np
+import pytest
+
+from keras.utils.test_utils import get_test_data
+from keras.models import Sequential
+from keras.layers.core import Dense, Flatten, Activation
+from keras.layers.convolutional import Convolution2D, MaxPooling2D
+from keras.utils.np_utils import to_categorical
+
+
+def test_image_classification():
+    '''
+    Classify random 16x16 color images into several classes using logistic regression
+    with convolutional hidden layer.
+    '''
+    np.random.seed(1337)
+    input_shape = (3, 16, 16)
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
+                                                         nb_test=200,
+                                                         input_shape=input_shape,
+                                                         classification=True,
+                                                         nb_class=4)
+    y_train = to_categorical(y_train)
+    y_test = to_categorical(y_test)
+    # convolution kernel size
+    nb_conv = 3
+    # size of pooling area for max pooling
+    nb_pool = 2
+
+    model = Sequential([
+        Convolution2D(nb_filter=8, nb_row=nb_conv, nb_col=nb_conv, input_shape=input_shape),
+        MaxPooling2D(pool_size=(nb_pool, nb_pool)),
+        Flatten(),
+        Activation('relu'),
+        Dense(y_test.shape[-1], activation='softmax')
+    ])
+    model.compile(loss='categorical_crossentropy', optimizer='sgd')
+    history = model.fit(X_train, y_train, nb_epoch=10, batch_size=16,
+                        validation_data=(X_test, y_test),
+                        show_accuracy=True, verbose=0)
+    assert(history.history['val_acc'][-1] > 0.9)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])
diff --git a/integration_tests/test_temporal_data_tasks.py b/integration_tests/test_temporal_data_tasks.py
new file mode 100644
index 000000000..5acc21853
--- /dev/null
+++ b/integration_tests/test_temporal_data_tasks.py
@@ -0,0 +1,131 @@
+from __future__ import print_function
+import numpy as np
+import pytest
+import string
+
+from keras.utils.test_utils import get_test_data
+from keras.models import Sequential
+from keras.layers.core import TimeDistributedDense, Dropout, Dense
+from keras.layers.recurrent import GRU, LSTM
+from keras.utils.np_utils import to_categorical
+
+
+def test_temporal_classification():
+    '''
+    Classify temporal sequences of float numbers of length 3 into 2 classes using
+    single layer of GRU units and softmax applied to the last activations of the units
+    '''
+    np.random.seed(1337)
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
+                                                         nb_test=200,
+                                                         input_shape=(3, 5),
+                                                         classification=True,
+                                                         nb_class=2)
+    y_train = to_categorical(y_train)
+    y_test = to_categorical(y_test)
+
+    model = Sequential()
+    model.add(GRU(y_train.shape[-1],
+                  input_shape=(X_train.shape[1], X_train.shape[2]),
+                  activation='softmax'))
+    model.compile(loss='categorical_crossentropy', optimizer='adadelta')
+    history = model.fit(X_train, y_train, nb_epoch=5, batch_size=16,
+                        validation_data=(X_test, y_test),
+                        show_accuracy=True, verbose=0)
+    assert(history.history['val_acc'][-1] > 0.9)
+
+
+def test_temporal_regression():
+    '''
+    Predict float numbers (regression) based on sequences of float numbers of length 3 using
+    single layer of GRU units
+    '''
+    np.random.seed(1337)
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
+                                                         nb_test=200,
+                                                         input_shape=(3, 5),
+                                                         output_shape=(2,),
+                                                         classification=False)
+    model = Sequential()
+    model.add(GRU(y_train.shape[-1],
+              input_shape=(X_train.shape[1], X_train.shape[2])))
+    model.compile(loss='hinge', optimizer='adam')
+    history = model.fit(X_train, y_train, nb_epoch=5, batch_size=16,
+                        validation_data=(X_test, y_test), verbose=0)
+    assert(history.history['val_loss'][-1] < 0.75)
+
+
+def test_sequence_to_sequence():
+    '''
+    Apply a same Dense layer for each element of time dimension of the input
+    and make predictions of the output sequence elements.
+    This does not make use of the temporal structure of the sequence
+    (see TimeDistributedDense for more details)
+    '''
+    np.random.seed(1337)
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
+                                                         nb_test=200,
+                                                         input_shape=(3, 5),
+                                                         output_shape=(3, 5),
+                                                         classification=False)
+
+    model = Sequential()
+    model.add(TimeDistributedDense(y_train.shape[-1],
+              input_shape=(X_train.shape[1], X_train.shape[2])))
+    model.compile(loss='hinge', optimizer='rmsprop')
+    history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16,
+                        validation_data=(X_test, y_test), verbose=0)
+    assert(history.history['val_loss'][-1] < 0.8)
+
+
+def test_stacked_lstm_char_prediction():
+    '''
+    Learn alphabetical char sequence with stacked LSTM.
+    Predict the whole alphabet based on the first two letters ('ab' -> 'ab...z')
+    See non-toy example in examples/lstm_text_generation.py
+    '''
+    np.random.seed(1336)
+    # generate alphabet: http://stackoverflow.com/questions/16060899/alphabet-range-python
+    alphabet = string.ascii_lowercase
+    number_of_chars = len(alphabet)
+
+    # generate char sequences of length 'sequence_length' out of alphabet and store the next char as label (e.g. 'ab'->'c')
+    sequence_length = 2
+    sentences = [alphabet[i: i + sequence_length] for i in range(len(alphabet) - sequence_length)]
+    next_chars = [alphabet[i + sequence_length] for i in range(len(alphabet) - sequence_length)]
+
+    # Transform sequences and labels into 'one-hot' encoding
+    X = np.zeros((len(sentences), sequence_length, number_of_chars), dtype=np.bool)
+    y = np.zeros((len(sentences), number_of_chars), dtype=np.bool)
+    for i, sentence in enumerate(sentences):
+        for t, char in enumerate(sentence):
+            X[i, t, ord(char)-ord('a')] = 1
+        y[i, ord(next_chars[i])-ord('a')] = 1
+
+    # learn the alphabet with stacked LSTM
+    model = Sequential([
+        LSTM(16, return_sequences=True, input_shape=(sequence_length, number_of_chars)),
+        LSTM(16, return_sequences=False),
+        Dense(number_of_chars, activation='softmax')
+    ])
+    model.compile(loss='categorical_crossentropy', optimizer='adam')
+    model.fit(X, y, batch_size=1, nb_epoch=60, verbose=1)
+
+    # prime the model with 'ab' sequence and let it generate the learned alphabet
+    sentence = alphabet[:sequence_length]
+    generated = sentence
+    for iteration in range(number_of_chars-sequence_length):
+        x = np.zeros((1, sequence_length, number_of_chars))
+        for t, char in enumerate(sentence):
+            x[0, t, ord(char) - ord('a')] = 1.
+        preds = model.predict(x, verbose=0)[0]
+        next_char = chr(np.argmax(preds) + ord('a'))
+        generated += next_char
+        sentence = sentence[1:] + next_char
+
+    # check that it did generate the alphabet correctly
+    assert(generated == alphabet)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])
diff --git a/integration_tests/test_vector_data_tasks.py b/integration_tests/test_vector_data_tasks.py
new file mode 100644
index 000000000..f290bde6e
--- /dev/null
+++ b/integration_tests/test_vector_data_tasks.py
@@ -0,0 +1,63 @@
+from __future__ import print_function
+import numpy as np
+import pytest
+
+from keras.utils.test_utils import get_test_data
+from keras.models import Sequential
+from keras.layers.core import Dense
+from keras.utils.np_utils import to_categorical
+
+
+def test_vector_classification():
+    '''
+    Classify random float vectors into 2 classes with logistic regression
+    using 2 layer neural network with ReLU hidden units.
+    '''
+    np.random.seed(1337)
+    nb_hidden = 10
+
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
+                                                         nb_test=200,
+                                                         input_shape=(20,),
+                                                         classification=True,
+                                                         nb_class=2)
+    y_train = to_categorical(y_train)
+    y_test = to_categorical(y_test)
+
+    model = Sequential([
+        Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='relu'),
+        Dense(y_train.shape[-1], activation='softmax')
+    ])
+    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+    history = model.fit(X_train, y_train, nb_epoch=15, batch_size=16,
+                        validation_data=(X_test, y_test),
+                        show_accuracy=True, verbose=0)
+    assert(history.history['val_acc'][-1] > 0.8)
+
+
+def test_vector_regression():
+    '''
+    Perform float data prediction (regression) using 2 layer MLP
+    with tanh and sigmoid activations.
+    '''
+    np.random.seed(1337)
+    nb_hidden = 10
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
+                                                         nb_test=200,
+                                                         input_shape=(20,),
+                                                         output_shape=(2,),
+                                                         classification=False)
+
+    model = Sequential([
+        Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='tanh'),
+        Dense(y_train.shape[-1])
+    ])
+
+    model.compile(loss='hinge', optimizer='adagrad')
+    history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16,
+                        validation_data=(X_test, y_test), verbose=0)
+    assert (history.history['val_loss'][-1] < 0.9)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])
diff --git a/tests/test_tasks.py b/tests/test_tasks.py
deleted file mode 100644
index 197904029..000000000
--- a/tests/test_tasks.py
+++ /dev/null
@@ -1,129 +0,0 @@
-from __future__ import print_function
-import numpy as np
-import pytest
-np.random.seed(1337)
-
-from keras.utils.test_utils import get_test_data
-from keras.models import Sequential
-from keras.layers.core import Dense, TimeDistributedDense, Flatten
-from keras.layers.recurrent import GRU
-from keras.layers.convolutional import Convolution2D
-from keras.utils.np_utils import to_categorical
-
-
-def test_vector_classification():
-    nb_hidden = 10
-
-    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
-                                                         nb_test=200,
-                                                         input_shape=(20,),
-                                                         classification=True,
-                                                         nb_class=2)
-    y_train = to_categorical(y_train)
-    y_test = to_categorical(y_test)
-
-    model = Sequential([
-        Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='relu'),
-        Dense(y_train.shape[-1], activation='softmax')
-    ])
-    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
-    history = model.fit(X_train, y_train, nb_epoch=15, batch_size=16,
-                        validation_data=(X_test, y_test),
-                        show_accuracy=True, verbose=0)
-    assert(history.history['val_acc'][-1] > 0.8)
-
-
-def test_vector_regression():
-    nb_hidden = 10
-    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
-                                                         nb_test=200,
-                                                         input_shape=(20,),
-                                                         output_shape=(2,),
-                                                         classification=False)
-
-    model = Sequential([
-        Dense(nb_hidden, input_shape=(X_train.shape[-1],), activation='tanh'),
-        Dense(y_train.shape[-1])
-    ])
-
-    model.compile(loss='hinge', optimizer='adagrad')
-    history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16,
-                        validation_data=(X_test, y_test), verbose=0)
-    assert (history.history['val_loss'][-1] < 0.9)
-
-
-def test_temporal_classification():
-    np.random.seed(1337)
-    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
-                                                         nb_test=200,
-                                                         input_shape=(3, 5),
-                                                         classification=True,
-                                                         nb_class=2)
-    y_train = to_categorical(y_train)
-    y_test = to_categorical(y_test)
-
-    model = Sequential()
-    model.add(GRU(y_train.shape[-1],
-                  input_shape=(X_train.shape[1], X_train.shape[2]),
-                  activation='softmax'))
-    model.compile(loss='categorical_crossentropy', optimizer='adadelta')
-    history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16,
-                        validation_data=(X_test, y_test),
-                        show_accuracy=True, verbose=0)
-    assert(history.history['val_acc'][-1] > 0.9)
-
-
-def test_temporal_regression():
-    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
-                                                         nb_test=200,
-                                                         input_shape=(3, 5),
-                                                         output_shape=(2,),
-                                                         classification=False)
-    model = Sequential()
-    model.add(GRU(y_train.shape[-1],
-              input_shape=(X_train.shape[1], X_train.shape[2])))
-    model.compile(loss='hinge', optimizer='adam')
-    history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16,
-                        validation_data=(X_test, y_test), verbose=0)
-    assert(history.history['val_loss'][-1] < 0.8)
-
-
-def test_sequence_to_sequence():
-    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
-                                                         nb_test=200,
-                                                         input_shape=(3, 5),
-                                                         output_shape=(3, 5),
-                                                         classification=False)
-
-    model = Sequential()
-    model.add(TimeDistributedDense(y_train.shape[-1],
-              input_shape=(X_train.shape[1], X_train.shape[2])))
-    model.compile(loss='hinge', optimizer='rmsprop')
-    history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16,
-                        validation_data=(X_test, y_test), verbose=0)
-    assert(history.history['val_loss'][-1] < 0.8)
-
-
-def test_image_classification():
-    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=500,
-                                                         nb_test=200,
-                                                         input_shape=(3, 8, 8),
-                                                         classification=True,
-                                                         nb_class=2)
-    y_train = to_categorical(y_train)
-    y_test = to_categorical(y_test)
-
-    model = Sequential([
-        Convolution2D(8, 8, 8, input_shape=(3, 8, 8), activation='sigmoid'),
-        Flatten(),
-        Dense(y_test.shape[-1], activation='softmax')
-    ])
-    model.compile(loss='categorical_crossentropy', optimizer='sgd')
-    history = model.fit(X_train, y_train, nb_epoch=20, batch_size=16,
-                        validation_data=(X_test, y_test),
-                        show_accuracy=True, verbose=0)
-    assert(history.history['val_acc'][-1] > 0.9)
-
-
-if __name__ == '__main__':
-    pytest.main([__file__])

From efff160cea608891b9b5ed25b6c5f56869964f63 Mon Sep 17 00:00:00 2001
From: jeffzhengye <jeff.zheng.ye@gmail.com>
Date: Sun, 13 Dec 2015 15:56:34 -0500
Subject: [PATCH 002/145] correct masking: switch for each example in a batch

---
 keras/backend/theano_backend.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index 3cd239108..360625cb9 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -412,7 +412,7 @@ def rnn(step_function, inputs, initial_states,
         if masking:
             # if all-zero input timestep, return
             # all-zero output and unchanged states
-            switch = T.any(input)
+            switch = T.any(input, axis=-1, keepdims=True)
             output = T.switch(switch, output, 0. * output)
             return_states = []
             for state, new_state in zip(states, new_states):

From bab230c0d6debd6b1ab2c521f21d133841c1a7f5 Mon Sep 17 00:00:00 2001
From: jeffzhengye <jeff.zheng.ye@gmail.com>
Date: Sun, 13 Dec 2015 19:14:50 -0500
Subject: [PATCH 003/145] add a test for simple rnn

---
 tests/keras/layers/test_simplernn.py | 69 ++++++++++++++++++++++++++++
 1 file changed, 69 insertions(+)
 create mode 100644 tests/keras/layers/test_simplernn.py

diff --git a/tests/keras/layers/test_simplernn.py b/tests/keras/layers/test_simplernn.py
new file mode 100644
index 000000000..7d27adf12
--- /dev/null
+++ b/tests/keras/layers/test_simplernn.py
@@ -0,0 +1,69 @@
+import theano
+import unittest
+from numpy.testing import assert_allclose
+import numpy as np
+from keras.layers.recurrent import SimpleRNN
+from mock import Mock
+
+floatX = theano.config.floatX
+
+__author__ = "Jeff Ye"
+
+
+class TestSimpleRNN(unittest.TestCase):
+    left_padding_data = np.array(
+        [
+            [  # batch 1
+               [0], [1], [2], [3]
+               ],
+            [  # batch 2
+               [0], [0], [1], [2]
+               ]
+        ], dtype=floatX)
+    left_padding_mask = np.array(  # n_sample x n_time
+        [
+            [  # batch 1
+               0, 1, 1, 1
+               ],
+            [  # batch 2
+               0, 0, 1, 1
+               ]
+        ], dtype=np.int32)
+
+    def setUp(self):
+        W = np.array([[1]], dtype=floatX)
+        U = np.array([[1]], dtype=floatX)
+        b = np.array([0], dtype=floatX)
+        weights = [W, U, b]
+        self.forward = SimpleRNN(output_dim=1, activation='linear', weights=weights)
+        self.backward = SimpleRNN(output_dim=1, activation='linear', weights=weights)
+
+        previous = Mock()
+        previous.nb_input = 1
+        previous.nb_output = 1
+        previous.output_shape = self.left_padding_data.shape
+        previous.get_output_mask = Mock()
+        self.previous = previous
+
+    def test_left_padding(self):
+        forward = self.forward
+        forward.go_backwards = False
+        forward.return_sequences = True
+        self.previous.get_output.return_value = theano.shared(value=self.left_padding_data)
+        self.previous.get_output_mask.return_value = theano.shared(value=self.left_padding_mask)
+        forward.set_previous(self.previous)
+        np.testing.assert_allclose(forward.get_output().eval(),
+                                   np.array([
+                                       [[0], [1], [3], [6]],
+                                       [[0], [0], [1], [3]]]))
+
+        backward = self.backward
+        backward.go_backwards = True
+        backward.return_sequences = True
+        self.previous.get_output.return_value = theano.shared(value=self.left_padding_data)
+        self.previous.get_output_mask.return_value = theano.shared(value=self.left_padding_mask)
+        backward.set_previous(self.previous)
+        np.testing.assert_allclose(backward.get_output().eval(),
+                                   np.array([
+                                       [[3], [5], [6], [0]],
+                                       [[2], [3], [0], [0]]]))

From 52976242f0c58a178805f882fb8142c3fb1db832 Mon Sep 17 00:00:00 2001
From: lukedeo <lukepercivaldeoliveira@gmail.com>
Date: Sun, 13 Dec 2015 18:03:03 -0500
Subject: [PATCH 004/145] adding Highway Network layers (regular and
 time-distr.)

---
 keras/layers/core.py            | 190 ++++++++++++++++++++++++++++++++
 tests/keras/layers/test_core.py |  10 ++
 2 files changed, 200 insertions(+)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index d6c8225a3..878e45596 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1048,6 +1048,107 @@ class TimeDistributedDense(MaskedLayer):
         return dict(list(base_config.items()) + list(config.items()))
 
 
+class TimeDistributedHighway(MaskedLayer):
+    '''
+       Apply a same Highway layer for each dimension[1] (time_dimension) input.
+       Especially useful after a recurrent network with 'return_sequence=True'.
+       Tensor input dimensions:   (nb_sample, time_dimension, input_dim)
+       Tensor output dimensions:  (nb_sample, time_dimension, input_dim)
+
+       NOTE that this layer carries it's input dimension, so an output dimension 
+       isn't necessary.
+
+    '''
+    input_ndim = 3
+
+    def __init__(self, 
+                 init='glorot_uniform', transform_bias=-1, activation='linear', weights=None,
+                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
+                 W_constraint=None, b_constraint=None,
+                 input_dim=None, input_length=None, **kwargs):
+        self.init = initializations.get(init)
+        self.transform_bias = transform_bias
+        self.activation = activations.get(activation)
+        
+        self.W_regularizer = regularizers.get(W_regularizer)
+        self.b_regularizer = regularizers.get(b_regularizer)
+        self.activity_regularizer = regularizers.get(activity_regularizer)
+
+        self.W_constraint = constraints.get(W_constraint)
+        self.b_constraint = constraints.get(b_constraint)
+        self.constraints = [self.W_constraint, self.b_constraint]
+
+        self.initial_weights = weights
+
+        self.input_dim = input_dim
+        self.input_length = input_length
+        if self.input_dim:
+            kwargs['input_shape'] = (self.input_length, self.input_dim)
+        self.input = K.placeholder(ndim=3)
+        super(TimeDistributedHighway, self).__init__(**kwargs)
+
+    def build(self):
+        input_dim = self.input_shape[2]
+
+        self.W = self.init((input_dim, input_dim))
+        self.b = K.zeros((input_dim))
+
+        self.W_carry = self.init((input_dim, input_dim))
+        self.b_carry = K.variable(np.ones((input_dim)) * self.transform_bias)
+
+        self.params = [self.W, self.b, self.W_carry, self.b_carry]
+        self.regularizers = []
+
+        if self.W_regularizer:
+            self.W_regularizer.set_param(self.W)
+            self.regularizers.append(self.W_regularizer)
+
+        if self.b_regularizer:
+            self.b_regularizer.set_param(self.b)
+            self.regularizers.append(self.b_regularizer)
+
+        if self.activity_regularizer:
+            self.activity_regularizer.set_layer(self)
+            self.regularizers.append(self.activity_regularizer)
+
+        if self.initial_weights is not None:
+            self.set_weights(self.initial_weights)
+            del self.initial_weights
+
+    @property
+    def output_shape(self):
+        input_shape = self.input_shape
+        return (input_shape[0], input_shape[1], input_shape[2])
+
+    def get_output(self, train=False):
+        X = self.get_input(train)
+
+        def step(x, states):
+            output = self.activation(K.dot(x, self.W) + self.b)
+            transform_weight = activations.sigmoid(K.dot(x, self.W_carry) + self.b_carry)
+            output *= transform_weight
+            output = output + (1 - transform_weight) * x
+            return output, []
+
+        last_output, outputs, states = K.rnn(step, X, [], masking=False)
+        return outputs
+
+    def get_config(self):
+        config = {"name": self.__class__.__name__,
+                  "init": self.init.__name__,
+                  "transform_bias": self.transform_bias,
+                  "activation": self.activation.__name__,
+                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
+                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
+                  "activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
+                  "W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
+                  "b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
+                  "input_dim": self.input_dim,
+                  "input_length": self.input_length}
+        base_config = super(TimeDistributedHighway, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+
+
 class AutoEncoder(Layer):
     '''A customizable autoencoder model.
 
@@ -1694,3 +1795,92 @@ def add_shared_layer(layer, inputs):
         sh = SiameseHead(i)
         inputs[i].add(s)
         inputs[i].add(sh)
+
+class Highway(Layer):
+    '''Densely connected highway network, 
+    a natural extension of LSTMs to feedforward networks
+
+    This layer has identical output shape to input shape, and
+    thus doesn't need an output_dim specified
+
+    cite: http://arxiv.org/pdf/1505.00387v2.pdf
+    '''
+    input_ndim = 2
+
+    def __init__(self, init='glorot_uniform', transform_bias=-2, activation='linear', weights=None,
+                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
+                 W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
+        self.init = initializations.get(init)
+        self.transform_bias = transform_bias
+        self.activation = activations.get(activation)
+
+        self.W_regularizer = regularizers.get(W_regularizer)
+        self.b_regularizer = regularizers.get(b_regularizer)
+        self.activity_regularizer = regularizers.get(activity_regularizer)
+
+        self.W_constraint = constraints.get(W_constraint)
+        self.b_constraint = constraints.get(b_constraint)
+        self.constraints = [self.W_constraint, self.b_constraint]
+
+        self.initial_weights = weights
+
+        self.input_dim = input_dim
+        if self.input_dim:
+            kwargs['input_shape'] = (self.input_dim,)
+        self.input = K.placeholder(ndim=2)
+        super(Highway, self).__init__(**kwargs)
+
+    def build(self):
+        input_dim = self.input_shape[1]
+
+        self.W = self.init((input_dim, input_dim))
+        self.W_carry = self.init((input_dim, input_dim))
+        
+        self.b = K.zeros((input_dim,))
+        # -- initialize with a vector of values `transform_bias`
+        self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias)
+
+        self.params = [self.W, self.b, self.W_carry, self.b_carry]
+
+        self.regularizers = []
+        if self.W_regularizer:
+            self.W_regularizer.set_param(self.W)
+            self.regularizers.append(self.W_regularizer)
+
+        if self.b_regularizer:
+            self.b_regularizer.set_param(self.b)
+            self.regularizers.append(self.b_regularizer)
+
+        if self.activity_regularizer:
+            self.activity_regularizer.set_layer(self)
+            self.regularizers.append(self.activity_regularizer)
+
+        if self.initial_weights is not None:
+            self.set_weights(self.initial_weights)
+            del self.initial_weights
+
+    @property
+    def output_shape(self):
+        return (self.input_shape[0], self.input_shape[1])
+
+    def get_output(self, train=False):
+        X = self.get_input(train)
+        transform_weight = activations.sigmoid(K.dot(X, self.W_carry) + self.b_carry)
+        act = self.activation(K.dot(X, self.W) + self.b)
+        act *= transform_weight
+        output = act + (1 - transform_weight) * X
+        return output
+
+    def get_config(self):
+        config = {"name": self.__class__.__name__,
+                  "init": self.init.__name__,
+                  "transform_bias": self.transform_bias,
+                  "activation": self.activation.__name__,
+                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
+                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
+                  "activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
+                  "W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
+                  "b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
+                  "input_dim": self.input_dim}
+        base_config = super(Highway, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
diff --git a/tests/keras/layers/test_core.py b/tests/keras/layers/test_core.py
index dcc9d6058..2edb3452b 100644
--- a/tests/keras/layers/test_core.py
+++ b/tests/keras/layers/test_core.py
@@ -100,6 +100,16 @@ def test_time_dist_merge():
     _runner(layer)
 
 
+def test_time_dist_highway():
+    layer = core.TimeDistributedHighway(input_shape=(None, 10))
+    _runner(layer)
+
+
+def test_highway():
+    layer = core.Highway(input_shape=(10,))
+    _runner(layer)
+
+
 def test_autoencoder():
     layer_1 = core.Layer()
     layer_2 = core.Layer()

From 090ac0d1387a9f370f18afbc25314eeec2568d0c Mon Sep 17 00:00:00 2001
From: jeffzhengye <jeff.zheng.ye@gmail.com>
Date: Tue, 15 Dec 2015 01:46:04 -0500
Subject: [PATCH 005/145] remove incompatible tests

---
 tests/keras/layers/test_simplernn.py | 69 ----------------------------
 1 file changed, 69 deletions(-)
 delete mode 100644 tests/keras/layers/test_simplernn.py

diff --git a/tests/keras/layers/test_simplernn.py b/tests/keras/layers/test_simplernn.py
deleted file mode 100644
index 7d27adf12..000000000
--- a/tests/keras/layers/test_simplernn.py
+++ /dev/null
@@ -1,69 +0,0 @@
-import theano
-import unittest
-from numpy.testing import assert_allclose
-import numpy as np
-from keras.layers.recurrent import SimpleRNN
-from mock import Mock
-
-floatX = theano.config.floatX
-
-__author__ = "Jeff Ye"
-
-
-class TestSimpleRNN(unittest.TestCase):
-    left_padding_data = np.array(
-        [
-            [  # batch 1
-               [0], [1], [2], [3]
-               ],
-            [  # batch 2
-               [0], [0], [1], [2]
-               ]
-        ], dtype=floatX)
-    left_padding_mask = np.array(  # n_sample x n_time
-        [
-            [  # batch 1
-               0, 1, 1, 1
-               ],
-            [  # batch 2
-               0, 0, 1, 1
-               ]
-        ], dtype=np.int32)
-
-    def setUp(self):
-        W = np.array([[1]], dtype=floatX)
-        U = np.array([[1]], dtype=floatX)
-        b = np.array([0], dtype=floatX)
-        weights = [W, U, b]
-        self.forward = SimpleRNN(output_dim=1, activation='linear', weights=weights)
-        self.backward = SimpleRNN(output_dim=1, activation='linear', weights=weights)
-
-        previous = Mock()
-        previous.nb_input = 1
-        previous.nb_output = 1
-        previous.output_shape = self.left_padding_data.shape
-        previous.get_output_mask = Mock()
-        self.previous = previous
-
-    def test_left_padding(self):
-        forward = self.forward
-        forward.go_backwards = False
-        forward.return_sequences = True
-        self.previous.get_output.return_value = theano.shared(value=self.left_padding_data)
-        self.previous.get_output_mask.return_value = theano.shared(value=self.left_padding_mask)
-        forward.set_previous(self.previous)
-        np.testing.assert_allclose(forward.get_output().eval(),
-                                   np.array([
-                                       [[0], [1], [3], [6]],
-                                       [[0], [0], [1], [3]]]))
-
-        backward = self.backward
-        backward.go_backwards = True
-        backward.return_sequences = True
-        self.previous.get_output.return_value = theano.shared(value=self.left_padding_data)
-        self.previous.get_output_mask.return_value = theano.shared(value=self.left_padding_mask)
-        backward.set_previous(self.previous)
-        np.testing.assert_allclose(backward.get_output().eval(),
-                                   np.array([
-                                       [[3], [5], [6], [0]],
-                                       [[2], [3], [0], [0]]]))

From f0cba6ec83f802c6984bbaf90c4286048215aaa5 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 13:16:36 +0530
Subject: [PATCH 006/145] Add  mask arg

---
 keras/layers/core.py | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index d6c8225a3..d6e9a3a48 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -47,13 +47,19 @@ class Layer(object):
         if not hasattr(self, 'params'):
             self.params = []
 
-    def __call__(self, X, train=False):
-        # set temporary input
-        tmp = self.get_input
+    def __call__(self, X, mask=None, train=False):
+        # set temporary input and mask
+        tmp_input = self.get_input
+        tmp_mask = None
+        if hasattr(self, 'get_input_mask'):
+            tmp_mask = self.get_input_mask
+            self.get_input_mask = lambda _: mask
         self.get_input = lambda _: X
         Y = self.get_output(train=train)
-        # return input to what it was
-        self.get_input = tmp
+        # return input and mask to what it was
+        self.get_input = tmp_input
+        if hasattr(self, 'get_input_mask'):
+            self.get_input_mask = tmp_mask
         return Y
 
     def set_previous(self, layer, connection_map={}):

From 4d3c6c9bbe2cddd650aa32559eff1f03774a2d47 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 13:18:03 +0530
Subject: [PATCH 007/145] Support nested models

---
 keras/layers/containers.py | 20 +++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index c667bdc43..58e8b85f5 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -24,13 +24,23 @@ class Sequential(Layer):
         for layer in layers:
             self.add(layer)
 
-    def __call__(self, X, train=False):
+    def __call__(self, X, mask=None, train=False):
+        #recursively search for a layer which is not a Sequential model
+        layer = self
+        while issubclass(layer.__class__, Sequential):
+            layer = layer.layers[0]
         # set temporary input to first layer
-        tmp = self.layers[0].get_input
-        self.layers[0].get_input = lambda _: X
+        tmp_input = layer.get_input
+        tmp_mask = None
+        layer.get_input = lambda _: X
+        if hasattr(layer, 'get_input_mask'):
+            tmp_mask = layer.get_input_mask
+            layer.get_input_mask = lambda _: mask
         Y = self.get_output(train=train)
-        # return input to first layer to what it was
-        self.layers[0].get_input = tmp
+        # return input from first layer to what it was
+        layer.get_input = tmp_input
+        if hasattr(layer, 'get_input_mask'):
+            layer.get_input_mask = tmp_mask
         return Y
 
     def set_previous(self, layer):

From f51982359577e726730e3c94aa382df2d63247f2 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 13:22:20 +0530
Subject: [PATCH 008/145] Tests

---
 tests/keras/layers/test_call.py | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/tests/keras/layers/test_call.py b/tests/keras/layers/test_call.py
index af250ddb9..a57d46fd5 100644
--- a/tests/keras/layers/test_call.py
+++ b/tests/keras/layers/test_call.py
@@ -31,6 +31,7 @@ def test_sequential_call():
     model.add(Dense(output_dim=output_dim, input_dim=input_dim))
     model.compile('sgd', 'mse')
 
+    # test flat model
     X = K.placeholder(ndim=2)
     Y = model(X)
     F = K.function([X], [Y])
@@ -41,6 +42,18 @@ def test_sequential_call():
     # results of __call__ should match model.predict
     assert_allclose(y1, y2)
 
+    # test nested model
+    model2 = Sequential()
+    model2.add(model)
+    model2.compile('sgd', 'mse')
+        
+    Y2 = model2(X)
+    F = K.function([X], [Y2])
+
+    y1 = F([x])[0].astype(K.floatx())
+    y2 = model2.predict(x)
+    # results of __call__ should match model.predict
+    assert_allclose(y1, y2)
 
 if __name__ == '__main__':
     pytest.main([__file__])

From 1cf7036d1cf44a0514cee24ceda896e8f4038863 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 15:16:09 +0530
Subject: [PATCH 009/145] Fix add_shared_node()

---
 keras/layers/containers.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index 58e8b85f5..e752df1db 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -385,9 +385,7 @@ class Graph(Layer):
             dot_axes: Same meaning as `dot_axes` argument of `add_node()`
             outputs: Used when `merge_mode=None`. Names for the output nodes.
             create_output: Same meaning as `create_output` argument of `add_node()`.
-                When creating an output, `merge_mode` must be specified.
         '''
-        layer.layer_cache = self.layer_cache
         if name in self.namespace:
             raise Exception('Duplicate node identifier: ' + name)
         for o in outputs:
@@ -418,7 +416,8 @@ class Graph(Layer):
                 raise Exception('Unknown identifier: ' + input)
         s = Siamese(layer, layers, merge_mode,
                     concat_axis=concat_axis,
-                    dot_axes=dot_axes)
+                    dot_axes=dot_axes,
+                    is_graph=True)
         self.namespace.add(name)
         self.nodes[name] = s
         self.node_config.append({'name': name,

From 060fcd3ed072eb81a768d9e8da97ecb7b95c4702 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 15:18:13 +0530
Subject: [PATCH 010/145] Fix Siamese layer

---
 keras/layers/core.py | 34 ++++++++++++++++++++--------------
 1 file changed, 20 insertions(+), 14 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index d6e9a3a48..81a9febe0 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1450,7 +1450,7 @@ class Siamese(Layer):
         dot_axes: Same meaning as `dot_axes` argument of Merge layer
     '''
     def __init__(self, layer, inputs, merge_mode='concat',
-                 concat_axis=1, dot_axes=-1):
+                 concat_axis=1, dot_axes=-1, is_graph=False):
         if merge_mode not in ['sum', 'mul', 'concat', 'ave',
                               'join', 'cos', 'dot', None]:
             raise Exception('Invalid merge mode: ' + str(merge_mode))
@@ -1460,17 +1460,19 @@ class Siamese(Layer):
                 raise Exception(merge_mode + ' merge takes exactly 2 layers')
 
         self.layer = layer
+        self.trainable = layer.trainable
+        self.is_graph = is_graph
         self.inputs = inputs
-        self.params = []
+        self.layer.set_previous(inputs[0])
         self.merge_mode = merge_mode
         self.concat_axis = concat_axis
         self.dot_axes = dot_axes
-        layer.set_previous(inputs[0])
+        self.params = []
         self.regularizers = []
         self.constraints = []
         self.updates = []
         layers = [layer]
-        if merge_mode:
+        if merge_mode and not is_graph:
             layers += inputs
         for l in layers:
             params, regs, consts, updates = l.get_params()
@@ -1518,15 +1520,18 @@ class Siamese(Layer):
     def get_params(self):
         return self.params, self.regularizers, self.constraints, self.updates
 
-    def set_layer_input(self, index):
-        l = self.layer
-        while not hasattr(l, 'previous'):
-            l = l.layers[0]
-        l.previous = self.inputs[index]
+    def set_layer_input(self, head):
+        layer = self.layer
+        from ..layers.containers import Sequential
+        while issubclass(layer.__class__, Sequential):
+            layer = layer.layers[0]
+        layer.previous = self.inputs[head]
 
     def get_output_at(self, head, train=False):
-        self.set_layer_input(head)
-        return self.layer.get_output(train)
+        X = self.inputs[head].get_output(train)
+        mask = self.inputs[head].get_output_mask(train)
+        Y = self.layer(X), mask)
+        return Y
 
     def get_output_shape(self, head, train=False):
         self.set_layer_input(head)
@@ -1627,7 +1632,7 @@ class Siamese(Layer):
 
     def get_weights(self):
         weights = self.layer.get_weights()
-        if self.merge_mode:
+        if self.merge_mode and not self.is_graph:
             for m in self.inputs:
                 weights += m.get_weights()
         return weights
@@ -1636,7 +1641,7 @@ class Siamese(Layer):
         nb_param = len(self.layer.params)
         self.layer.set_weights(weights[:nb_param])
         weights = weights[nb_param:]
-        if self.merge_mode:
+        if self.merge_mode and not self.is_graph:
             for i in range(len(self.inputs)):
                 nb_param = len(self.inputs[i].params)
                 self.inputs[i].set_weights(weights[:nb_param])
@@ -1648,7 +1653,8 @@ class Siamese(Layer):
                   'inputs': [m.get_config() for m in self.inputs],
                   'merge_mode': self.merge_mode,
                   'concat_axis': self.concat_axis,
-                  'dot_axes': self.dot_axes}
+                  'dot_axes': self.dot_axes,
+                  'is_graph': self.is_graph}
         base_config = super(Siamese, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 

From 652d61be4675cb1e2244988b0262b17a924aaca6 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 15:18:54 +0530
Subject: [PATCH 011/145] Update core.py

---
 keras/layers/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 81a9febe0..71705f498 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1530,7 +1530,7 @@ class Siamese(Layer):
     def get_output_at(self, head, train=False):
         X = self.inputs[head].get_output(train)
         mask = self.inputs[head].get_output_mask(train)
-        Y = self.layer(X), mask)
+        Y = self.layer(X, mask)
         return Y
 
     def get_output_shape(self, head, train=False):

From 810cdc4a33aa2641dbb00b92a1b1da538c1e90b9 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 15:24:32 +0530
Subject: [PATCH 012/145] Test sequential

---
 tests/keras/test_models.py | 116 ++++++++++++++++++++++++++++++++++++-
 1 file changed, 115 insertions(+), 1 deletion(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 8bcaa74b6..115bfcec6 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -6,7 +6,7 @@ np.random.seed(1337)
 
 from keras import backend as K
 from keras.models import Graph, Sequential, model_from_json, model_from_yaml
-from keras.layers.core import Dense, Activation, Merge, Lambda, LambdaMerge
+from keras.layers.core import Dense, Activation, Merge, Lambda, LambdaMerge, Siamese, add_shared_layer
 from keras.layers import containers
 from keras.utils import np_utils
 from keras.utils.test_utils import get_test_data
@@ -395,6 +395,120 @@ def test_sequential_count_params():
 
     assert(n == model.count_params())
 
+def test_siamese_1():
+    left = Sequential()
+    left.add(Dense(nb_hidden, input_shape=(input_dim,)))
+    left.add(Activation('relu'))
+
+    right = Sequential()
+    right.add(Dense(nb_hidden, input_shape=(input_dim,)))
+    right.add(Activation('relu'))
+
+    model = Sequential()
+    model.add(Siamese(Dense(nb_hidden), [left, right], merge_mode='sum'))
+    model.add(Dense(nb_class))
+    model.add(Activation('softmax'))
+    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_data=([X_test, X_test], y_test))
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_data=([X_test, X_test], y_test))
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_split=0.1)
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_split=0.1)
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
+
+    loss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    assert(loss < 0.7)
+
+    model.predict([X_test, X_test], verbose=0)
+    model.predict_classes([X_test, X_test], verbose=0)
+    model.predict_proba([X_test, X_test], verbose=0)
+    model.get_config(verbose=0)
+
+    # test weight saving
+    fname = 'test_merge_sum_temp.h5'
+    model.save_weights(fname, overwrite=True)
+    left = Sequential()
+    left.add(Dense(nb_hidden, input_shape=(input_dim,)))
+    left.add(Activation('relu'))
+    right = Sequential()
+    right.add(Dense(nb_hidden, input_shape=(input_dim,)))
+    right.add(Activation('relu'))
+    model = Sequential()
+    model.add(Merge([left, right], mode='sum'))
+    model.add(Dense(nb_class))
+    model.add(Activation('softmax'))
+    model.load_weights(fname)
+    os.remove(fname)
+    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+
+    nloss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    assert(loss == nloss)
+
+
+@pytest.mark.skipif(K._BACKEND == 'tensorflow',
+                    reason='currently not working with TensorFlow')
+
+def test_siamese_2():
+    left = Sequential()
+    left.add(Dense(nb_hidden, input_shape=(input_dim,)))
+    left.add(Activation('relu'))
+
+    right = Sequential()
+    right.add(Dense(nb_hidden, input_shape=(input_dim,)))
+    right.add(Activation('relu'))
+
+    add_shared_layer(Dense(nb_hidden), [left, right])
+
+    left.add(Dense(nb_hidden))
+    right.add(Dense(nb_hidden))
+
+    add_shared_layer(Dense(nb_hidden), [left, right])
+
+    model = Sequential()
+    model.add(Merge([left, right], mode='sum'))
+    model.add(Dense(nb_class))
+    model.add(Activation('softmax'))
+    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_data=([X_test, X_test], y_test))
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_data=([X_test, X_test], y_test))
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=True, verbose=0, validation_split=0.1)
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, show_accuracy=False, verbose=0, validation_split=0.1)
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
+    model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
+
+    loss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    assert(loss < 0.7)
+
+    model.predict([X_test, X_test], verbose=0)
+    model.predict_classes([X_test, X_test], verbose=0)
+    model.predict_proba([X_test, X_test], verbose=0)
+    model.get_config(verbose=0)
+
+    # test weight saving
+    fname = 'test_merge_sum_temp.h5'
+    model.save_weights(fname, overwrite=True)
+    left = Sequential()
+    left.add(Dense(nb_hidden, input_shape=(input_dim,)))
+    left.add(Activation('relu'))
+    right = Sequential()
+    right.add(Dense(nb_hidden, input_shape=(input_dim,)))
+    right.add(Activation('relu'))
+    model = Sequential()
+    model.add(Merge([left, right], mode='sum'))
+    model.add(Dense(nb_class))
+    model.add(Activation('softmax'))
+    model.load_weights(fname)
+    os.remove(fname)
+    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+
+    nloss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    assert(loss == nloss)
+
+
+@pytest.mark.skipif(K._BACKEND == 'tensorflow',
+                    reason='currently not working with TensorFlow')
 
 ###############
 # GRAPH TEST  #

From 0855541280de2264dc758ef8e7b9b0c41b5c901b Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 15:37:08 +0530
Subject: [PATCH 013/145] Siamese graph tests

---
 tests/keras/test_models.py | 79 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 79 insertions(+)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 115bfcec6..6a84deb0c 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -620,7 +620,86 @@ def test_1o_2i():
 
     graph.get_config(verbose=1)
 
+def test_siamese_3():
+    graph = Graph()
+    graph.add_input(name='input1', input_shape=(32,))
+    graph.add_input(name='input2', input_shape=(32,))
 
+    graph.add_shared_node(Dense(16), name='shared', inputs=['input1', 'input2'], merge_mode='sum')
+    graph.add_node(Dense(4), name='dense1', input='shared')
+    graph.add_node(Dense(4), name='dense2', input='dense1')
+
+    graph.add_output(name='output1', input='dense2')
+    graph.compile('rmsprop', {'output1': 'mse'})
+
+    graph.fit({'input1': X_train_graph, 'input2': X2_train_graph, 'output1': y_train_graph},
+              nb_epoch=10)
+    out = graph.predict({'input1': X_test_graph, 'input2': X2_test_graph})
+    assert(type(out == dict))
+    assert(len(out) == 1)
+
+    loss = graph.test_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
+    loss = graph.train_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
+    loss = graph.evaluate({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
+    assert(loss < 3.0)
+
+    graph.get_config(verbose=1)
+
+def test_siamese_4():
+    graph = Graph()
+    graph.add_input(name='input1', input_shape=(32,))
+    graph.add_input(name='input2', input_shape=(32,))
+
+    graph.add_shared_node(Dense(16), name='shared1', inputs['input1', 'input2'])
+    graph.add_shared_node(Dense(4), name='shared2', inputs=['shared1'])
+    graph.add_shared_node(Dense(4), name='shared3', inputs=['shared2'], merge_mode='sum')
+    graph.add_node(Dense(4), name='dense', input='shared3')
+
+    graph.add_output(name='output1', inputs=['dense2', 'dense3'],
+                     merge_mode='sum')
+    graph.compile('rmsprop', {'output1': 'mse'})
+
+    graph.fit({'input1': X_train_graph, 'input2': X2_train_graph, 'output1': y_train_graph},
+              nb_epoch=10)
+    out = graph.predict({'input1': X_test_graph, 'input2': X2_test_graph})
+    assert(type(out == dict))
+    assert(len(out) == 1)
+
+    loss = graph.test_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
+    loss = graph.train_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
+    loss = graph.evaluate({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
+    assert(loss < 3.0)
+
+    graph.get_config(verbose=1)
+
+def test_siamese_5():
+    graph = Graph()
+    graph.add_input(name='input1', input_shape=(32,))
+    graph.add_input(name='input2', input_shape=(32,))
+
+    graph.add_shared_node(Dense(16), name='shared1', inputs['input1', 'input2'])
+    graph.add_shared_node(Dense(4), name='shared2', inputs=['shared1'])
+    graph.add_shared_node(Dense(4), name='shared3', inputs=['shared2'], outputs=['shared_output1','shared_output2'])
+    graph.add_node(Dense(4), name='dense1',  input='shared_output1')
+    graph.add_node(Dense(4), name='dense2',  input='shared_output2')
+
+    graph.add_output(name='output1', inputs=['dense2', 'dense3'],
+                     merge_mode='sum')
+    graph.compile('rmsprop', {'output1': 'mse'})
+
+    graph.fit({'input1': X_train_graph, 'input2': X2_train_graph, 'output1': y_train_graph},
+              nb_epoch=10)
+    out = graph.predict({'input1': X_test_graph, 'input2': X2_test_graph})
+    assert(type(out == dict))
+    assert(len(out) == 1)
+
+    loss = graph.test_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
+    loss = graph.train_on_batch({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
+    loss = graph.evaluate({'input1': X_test_graph, 'input2': X2_test_graph, 'output1': y_test_graph})
+    assert(loss < 3.0)
+
+    graph.get_config(verbose=1)
+    
 def test_2o_1i_weights():
     # test a non-sequential graph with 1 input and 2 outputs
     graph = Graph()

From 07723ccff44661ea3fd457eaf81803c643471312 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 15:39:41 +0530
Subject: [PATCH 014/145] Update test_models.py

---
 tests/keras/test_models.py | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 6a84deb0c..5a186d3fe 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -427,17 +427,19 @@ def test_siamese_1():
 
     # test weight saving
     fname = 'test_merge_sum_temp.h5'
-    model.save_weights(fname, overwrite=True)
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
     left.add(Activation('relu'))
+
     right = Sequential()
     right.add(Dense(nb_hidden, input_shape=(input_dim,)))
     right.add(Activation('relu'))
+
     model = Sequential()
-    model.add(Merge([left, right], mode='sum'))
+    model.add(Siamese(Dense(nb_hidden), [left, right], merge_mode='sum'))
     model.add(Dense(nb_class))
     model.add(Activation('softmax'))
+
     model.load_weights(fname)
     os.remove(fname)
     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
@@ -492,13 +494,23 @@ def test_siamese_2():
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
     left.add(Activation('relu'))
+
     right = Sequential()
     right.add(Dense(nb_hidden, input_shape=(input_dim,)))
     right.add(Activation('relu'))
+
+    add_shared_layer(Dense(nb_hidden), [left, right])
+
+    left.add(Dense(nb_hidden))
+    right.add(Dense(nb_hidden))
+
+    add_shared_layer(Dense(nb_hidden), [left, right])
+
     model = Sequential()
     model.add(Merge([left, right], mode='sum'))
     model.add(Dense(nb_class))
     model.add(Activation('softmax'))
+
     model.load_weights(fname)
     os.remove(fname)
     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')

From 92f2717c99ed771767b2943c76ba47b1ec2d7c53 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 16:02:44 +0530
Subject: [PATCH 015/145] Update test_models.py

---
 tests/keras/test_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 5a186d3fe..cb10d5b9d 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -667,7 +667,7 @@ def test_siamese_4():
     graph.add_shared_node(Dense(4), name='shared3', inputs=['shared2'], merge_mode='sum')
     graph.add_node(Dense(4), name='dense', input='shared3')
 
-    graph.add_output(name='output1', inputs=['dense2', 'dense3'],
+    graph.add_output(name='output1', input='dense',
                      merge_mode='sum')
     graph.compile('rmsprop', {'output1': 'mse'})
 

From 91965e8f851144472974e1ec1ed64d7341975c66 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 16:08:50 +0530
Subject: [PATCH 016/145] Update test_models.py

---
 tests/keras/test_models.py | 7 -------
 1 file changed, 7 deletions(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index cb10d5b9d..19d616f5a 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -447,10 +447,6 @@ def test_siamese_1():
     nloss = model.evaluate([X_train, X_train], y_train, verbose=0)
     assert(loss == nloss)
 
-
-@pytest.mark.skipif(K._BACKEND == 'tensorflow',
-                    reason='currently not working with TensorFlow')
-
 def test_siamese_2():
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
@@ -519,9 +515,6 @@ def test_siamese_2():
     assert(loss == nloss)
 
 
-@pytest.mark.skipif(K._BACKEND == 'tensorflow',
-                    reason='currently not working with TensorFlow')
-
 ###############
 # GRAPH TEST  #
 ###############

From 3dcff62578799ac92e62d2deccf55b5a54f9ecb9 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 16:21:12 +0530
Subject: [PATCH 017/145] add arg cache_enabled

---
 keras/layers/core.py | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 71705f498..332902572 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -35,7 +35,8 @@ class Layer(object):
     def __init__(self, **kwargs):
         allowed_kwargs = {'input_shape',
                           'trainable',
-                          'batch_input_shape'}
+                          'batch_input_shape',
+                          'cache_enabled'}
         for kwarg in kwargs:
             assert kwarg in allowed_kwargs, "Keyword argument not understood: " + kwarg
         if 'input_shape' in kwargs:
@@ -46,20 +47,17 @@ class Layer(object):
             self._trainable = kwargs['trainable']
         if not hasattr(self, 'params'):
             self.params = []
+        self.cache_enabled = True
+        if 'cache_enabled' in kwargs:
+            self.cache_enabled = kwargs['cache_enabled']
 
-    def __call__(self, X, mask=None, train=False):
-        # set temporary input and mask
-        tmp_input = self.get_input
-        tmp_mask = None
-        if hasattr(self, 'get_input_mask'):
-            tmp_mask = self.get_input_mask
-            self.get_input_mask = lambda _: mask
+    def __call__(self, X, train=False):
+        # set temporary input
+        tmp = self.get_input
         self.get_input = lambda _: X
         Y = self.get_output(train=train)
-        # return input and mask to what it was
-        self.get_input = tmp_input
-        if hasattr(self, 'get_input_mask'):
-            self.get_input_mask = tmp_mask
+        # return input to what it was
+        self.get_input = tmp
         return Y
 
     def set_previous(self, layer, connection_map={}):
@@ -138,12 +136,12 @@ class Layer(object):
         if hasattr(self, 'previous'):
             # to avoid redundant computations,
             # layer outputs are cached when possible.
-            if hasattr(self, 'layer_cache'):
+            if hasattr(self, 'layer_cache') and self.cache_enabled:
                 previous_layer_id = '%s_%s' % (id(self.previous), train)
                 if previous_layer_id in self.layer_cache:
                     return self.layer_cache[previous_layer_id]
             previous_output = self.previous.get_output(train=train)
-            if hasattr(self, 'layer_cache'):
+            if hasattr(self, 'layer_cache') and self.cache_enabled:
                 previous_layer_id = '%s_%s' % (id(self.previous), train)
                 self.layer_cache[previous_layer_id] = previous_output
             return previous_output
@@ -218,6 +216,7 @@ class Layer(object):
             config['input_shape'] = self._input_shape[1:]
         if hasattr(self, '_trainable'):
             config['trainable'] = self._trainable
+        config['cache_enabled'] =  self.cache_enabled
         return config
 
     def get_params(self):

From 9620a497c7335053a75a59c5515e683813047991 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 16:25:23 +0530
Subject: [PATCH 018/145] Update core.py

---
 keras/layers/core.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 332902572..debf8156d 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -49,7 +49,15 @@ class Layer(object):
             self.params = []
         self.cache_enabled = True
         if 'cache_enabled' in kwargs:
-            self.cache_enabled = kwargs['cache_enabled']
+            self._cache_enabled = kwargs['cache_enabled']
+
+    @property
+    def cache_enabled(self):
+        return self._cache_enabled
+
+    @cache_enabled.setter
+    def cache_enabled(self, value):
+        self._cache_enabled = value
 
     def __call__(self, X, train=False):
         # set temporary input
@@ -250,7 +258,6 @@ class Layer(object):
         '''
         return sum([K.count_params(p) for p in self.params])
 
-
 class MaskedLayer(Layer):
     '''If your layer trivially supports masking
     (by simply copying the input mask to the output),

From 7fb4f4b073e34745f75b67ebd8e51e44a045ec5c Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 16:30:10 +0530
Subject: [PATCH 019/145] Update core.py

---
 keras/layers/core.py | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index debf8156d..7cd5a9899 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -59,13 +59,19 @@ class Layer(object):
     def cache_enabled(self, value):
         self._cache_enabled = value
 
-    def __call__(self, X, train=False):
+    def __call__(self, X, mask, train=False):
         # set temporary input
-        tmp = self.get_input
+        tmp_input = self.get_input
+        tmp_mask = None
+        if hasattr(self, 'get_input_mask'):
+            tmp_mask = self.get_input_mask
+            self.get_input_mask = lambda _: mask
         self.get_input = lambda _: X
         Y = self.get_output(train=train)
         # return input to what it was
-        self.get_input = tmp
+        if hasattr(self, 'get_input_mask'):
+            self.get_input_mask = tmp_mask
+        self.get_input = tmp_input
         return Y
 
     def set_previous(self, layer, connection_map={}):

From 080a8199f408eef9698330e67a2000d388e58cac Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 16:35:06 +0530
Subject: [PATCH 020/145] Fix bug regarding layer cache

---
 keras/layers/containers.py | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index e752df1db..99fbc5a88 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -25,6 +25,9 @@ class Sequential(Layer):
             self.add(layer)
 
     def __call__(self, X, mask=None, train=False):
+        #turn off layer cache temporarily
+        tmp_cache_enabled = self.cache_enabled
+        self.cache_enabled = False
         #recursively search for a layer which is not a Sequential model
         layer = self
         while issubclass(layer.__class__, Sequential):
@@ -41,8 +44,15 @@ class Sequential(Layer):
         layer.get_input = tmp_input
         if hasattr(layer, 'get_input_mask'):
             layer.get_input_mask = tmp_mask
+        self.cache_enabled = tmp_cache_enabled
         return Y
 
+    @cache_enabled.setter
+    def cache_enabled(self, value):
+        self._cache_enabled = value
+        for l in self.layers:
+            l.cache_enabled = value
+
     def set_previous(self, layer):
         self.layers[0].previous = layer
 

From f5d56fa2f721ede5efdb78750711080405579dc8 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 16:40:49 +0530
Subject: [PATCH 021/145] Update test_models.py

---
 tests/keras/test_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 19d616f5a..8240a9c64 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -655,7 +655,7 @@ def test_siamese_4():
     graph.add_input(name='input1', input_shape=(32,))
     graph.add_input(name='input2', input_shape=(32,))
 
-    graph.add_shared_node(Dense(16), name='shared1', inputs['input1', 'input2'])
+    graph.add_shared_node(Dense(16), name='shared1', inputs=['input1', 'input2'])
     graph.add_shared_node(Dense(4), name='shared2', inputs=['shared1'])
     graph.add_shared_node(Dense(4), name='shared3', inputs=['shared2'], merge_mode='sum')
     graph.add_node(Dense(4), name='dense', input='shared3')

From 5c83c699363e5fc59e3045d06e1d3fc31c9b673e Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 17:27:40 +0530
Subject: [PATCH 022/145] Update containers.py

---
 keras/layers/containers.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index 99fbc5a88..af6dc0d9e 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -47,6 +47,10 @@ class Sequential(Layer):
         self.cache_enabled = tmp_cache_enabled
         return Y
 
+    @propery
+    def cache_enabled(self):
+        return  self._cache_enabled
+
     @cache_enabled.setter
     def cache_enabled(self, value):
         self._cache_enabled = value

From f20383b7f7baf3b82451a90fceef18af9bcb2cdc Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 17:45:55 +0530
Subject: [PATCH 023/145] Update containers.py

---
 keras/layers/containers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index af6dc0d9e..24886f494 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -47,7 +47,7 @@ class Sequential(Layer):
         self.cache_enabled = tmp_cache_enabled
         return Y
 
-    @propery
+    @property
     def cache_enabled(self):
         return  self._cache_enabled
 

From e0d8c0919931358d349eedc7a649c3a72f6f5c47 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 17:58:36 +0530
Subject: [PATCH 024/145] default arg for mask

---
 keras/layers/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 7cd5a9899..d0c87ac1e 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -59,7 +59,7 @@ class Layer(object):
     def cache_enabled(self, value):
         self._cache_enabled = value
 
-    def __call__(self, X, mask, train=False):
+    def __call__(self, X, mask=None, train=False):
         # set temporary input
         tmp_input = self.get_input
         tmp_mask = None

From 3e19b0252fd7db4b245e12f6a644f734df5f23bb Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 18:08:55 +0530
Subject: [PATCH 025/145] Update test_models.py

---
 tests/keras/test_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 8240a9c64..c2397cbb2 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -682,7 +682,7 @@ def test_siamese_5():
     graph.add_input(name='input1', input_shape=(32,))
     graph.add_input(name='input2', input_shape=(32,))
 
-    graph.add_shared_node(Dense(16), name='shared1', inputs['input1', 'input2'])
+    graph.add_shared_node(Dense(16), name='shared1', inputs=['input1', 'input2'])
     graph.add_shared_node(Dense(4), name='shared2', inputs=['shared1'])
     graph.add_shared_node(Dense(4), name='shared3', inputs=['shared2'], outputs=['shared_output1','shared_output2'])
     graph.add_node(Dense(4), name='dense1',  input='shared_output1')

From 775e91c57309b7849818c61ce52a5b067a4a1fe7 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 18:09:39 +0530
Subject: [PATCH 026/145] Update core.py

---
 keras/layers/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index d0c87ac1e..979c17c2f 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -47,7 +47,7 @@ class Layer(object):
             self._trainable = kwargs['trainable']
         if not hasattr(self, 'params'):
             self.params = []
-        self.cache_enabled = True
+        self._cache_enabled = True
         if 'cache_enabled' in kwargs:
             self._cache_enabled = kwargs['cache_enabled']
 

From c2220bff6e3a1c323c86df49cd616e8406732b93 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 18:48:23 +0530
Subject: [PATCH 027/145] call base constructor in Merge and Siamese

---
 keras/layers/core.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 979c17c2f..0540c0679 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -476,6 +476,7 @@ class Merge(Layer):
                 if p not in self.params:
                     self.params.append(p)
                     self.constraints.append(c)
+        super(Merge, self).__init__()
 
     @property
     def output_shape(self):
@@ -1495,6 +1496,7 @@ class Siamese(Layer):
                 if p not in self.params:
                     self.params.append(p)
                     self.constraints.append(c)
+        super(Siamese, self).__init__()
 
     @property
     def output_shape(self):

From 3adbb2bd4f2dce51df35a376de8a1fef75964e78 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 18:50:07 +0530
Subject: [PATCH 028/145] Fix tests

---
 tests/keras/test_models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index c2397cbb2..47c845d1f 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -418,7 +418,7 @@ def test_siamese_1():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
     loss = model.evaluate([X_train, X_train], y_train, verbose=0)
-    assert(loss < 0.7)
+    assert(loss < 0.8)
 
     model.predict([X_test, X_test], verbose=0)
     model.predict_classes([X_test, X_test], verbose=0)
@@ -477,7 +477,7 @@ def test_siamese_2():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
     loss = model.evaluate([X_train, X_train], y_train, verbose=0)
-    assert(loss < 0.7)
+    assert(loss < 0.8)
 
     model.predict([X_test, X_test], verbose=0)
     model.predict_classes([X_test, X_test], verbose=0)
@@ -688,7 +688,7 @@ def test_siamese_5():
     graph.add_node(Dense(4), name='dense1',  input='shared_output1')
     graph.add_node(Dense(4), name='dense2',  input='shared_output2')
 
-    graph.add_output(name='output1', inputs=['dense2', 'dense3'],
+    graph.add_output(name='output1', inputs=['dense1', 'dense2'],
                      merge_mode='sum')
     graph.compile('rmsprop', {'output1': 'mse'})
 

From 534d81fc100c699c94060baa581ebcb605ba1681 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 20:25:29 +0530
Subject: [PATCH 029/145] call base constructor in SiameseHead

---
 keras/layers/core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 0540c0679..73f5f5055 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1687,6 +1687,7 @@ class SiameseHead(Layer):
     def __init__(self, head):
         self.head = head
         self.params = []
+        super(SiameseHead, self).__init__()
 
     def get_output(self, train=False):
         return self.get_input(train)

From 794c083f6b1a48538e5b9f23752fd729785ab0a2 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 20:29:04 +0530
Subject: [PATCH 030/145] Update containers.py

---
 keras/layers/containers.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index 24886f494..a3da3bb0c 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -23,6 +23,7 @@ class Sequential(Layer):
         self.layer_cache = {}
         for layer in layers:
             self.add(layer)
+        self._cache_enabled = True
 
     def __call__(self, X, mask=None, train=False):
         #turn off layer cache temporarily

From 97ba6aaaa1084d5c09911be3e63b5b99cbdfe880 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 20:32:45 +0530
Subject: [PATCH 031/145] call base constructor in  Lambda layers

---
 keras/layers/core.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 73f5f5055..36abbc043 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1304,6 +1304,7 @@ class Lambda(Layer):
                 self._output_shape = marshal.dumps(output_shape.__code__)
             else:
                 self._output_shape = marshal.dumps(output_shape.func_code)
+        super(Lambda, self).__init__
 
     @property
     def output_shape(self):
@@ -1378,6 +1379,7 @@ class LambdaMerge(Lambda):
                 self._output_shape = marshal.dumps(output_shape.__code__)
             else:
                 self._output_shape = marshal.dumps(output_shape.func_code)
+        super(Lambda, self).__init__()
 
     @property
     def output_shape(self):

From b2ab55611bbb01875cdac7a58717291ebccb1d4d Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 21:14:42 +0530
Subject: [PATCH 032/145] Fix weight save

---
 tests/keras/test_models.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 47c845d1f..1bf484c2a 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -426,7 +426,8 @@ def test_siamese_1():
     model.get_config(verbose=0)
 
     # test weight saving
-    fname = 'test_merge_sum_temp.h5'
+    fname = 'test_siamese_1.h5'
+    model.save_weights(fname, overwrite=True)
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
     left.add(Activation('relu'))
@@ -485,7 +486,7 @@ def test_siamese_2():
     model.get_config(verbose=0)
 
     # test weight saving
-    fname = 'test_merge_sum_temp.h5'
+    fname = 'test_siamese_2.h5'
     model.save_weights(fname, overwrite=True)
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))

From 3d51a26749937cb1a1aec40c20bc505e82809dce Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 22:48:51 +0530
Subject: [PATCH 033/145] Fix json serializing

---
 keras/layers/containers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index a3da3bb0c..3674867bc 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -449,7 +449,7 @@ class Graph(Layer):
                 self.namespace.add(sh_name)
                 self.nodes[sh_name] = sh
                 self.node_config.append({'name': sh_name,
-                                         'inputs': [s],
+                                         'inputs': [name],
                                          'create_output': create_output})
                 if create_output:
                     self.add_output(sh_name, input=sh_name)

From 54d332bf639bb77be39c46dbeac511c9d6582f8b Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 23:02:20 +0530
Subject: [PATCH 034/145] Quotes for default string values in docs

---
 docs/autogen.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/docs/autogen.py b/docs/autogen.py
index c62df2f9f..3b99b854d 100644
--- a/docs/autogen.py
+++ b/docs/autogen.py
@@ -80,6 +80,10 @@ def get_method_signature(method):
     for a in args:
         st += str(a) + ', '
     for a, v in kwargs:
+        if  type(v) == str:
+            v = '\'' + v + '\''
+        elif type(v) == unicode:
+            v = 'u\'' + v + '\''
         st += str(a) + '=' + str(v) + ', '
     if kwargs or args:
         return st[:-2] + ')'

From 85ba9aa8842bdd7bf867f057aa2960e87c282571 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 23:44:35 +0530
Subject: [PATCH 035/145] Fix test_batchnorm_config()

---
 tests/keras/test_normalization.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/keras/test_normalization.py b/tests/keras/test_normalization.py
index 41f85fb0b..4355b34a3 100644
--- a/tests/keras/test_normalization.py
+++ b/tests/keras/test_normalization.py
@@ -83,6 +83,7 @@ def test_batchnorm_config():
     norm = normalization.BatchNormalization(input_shape=(10, 10), mode=1,
                                             epsilon=0.1, momentum=0.9)
     conf = norm.get_config()
+    del conf['cache_enabled']
     conf_target = {"input_shape": (10, 10),
                    "name": normalization.BatchNormalization.__name__,
                    "epsilon": 0.1, "mode": 1, "momentum": 0.9}

From 515448f8596e2f9f1d0ffe91957c3da1a37dda23 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Tue, 15 Dec 2015 23:56:30 +0530
Subject: [PATCH 036/145] Add is_graph to docstring

---
 keras/layers/core.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 36abbc043..0a5e1786d 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1463,6 +1463,7 @@ class Siamese(Layer):
         merge_mode: Same meaning as `mode` argument of Merge layer
         concat_axis: Same meaning as `concat_axis` argument of Merge layer
         dot_axes: Same meaning as `dot_axes` argument of Merge layer
+        is_graph: Should be set to True when used inside `Graph`
     '''
     def __init__(self, layer, inputs, merge_mode='concat',
                  concat_axis=1, dot_axes=-1, is_graph=False):

From 3c8618bb39a18d75c97e58421d867d70006e373f Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Wed, 16 Dec 2015 00:05:26 +0530
Subject: [PATCH 037/145] Update core.py

---
 keras/layers/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 0a5e1786d..3805ac3b6 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1304,7 +1304,7 @@ class Lambda(Layer):
                 self._output_shape = marshal.dumps(output_shape.__code__)
             else:
                 self._output_shape = marshal.dumps(output_shape.func_code)
-        super(Lambda, self).__init__
+        super(Lambda, self).__init__()
 
     @property
     def output_shape(self):

From 1872e00beaee2c685384e0e3e1410ecce5945f19 Mon Sep 17 00:00:00 2001
From: lukedeo <lukepercivaldeoliveira@gmail.com>
Date: Tue, 15 Dec 2015 19:46:19 +0100
Subject: [PATCH 038/145] adding better documentation to highway layers

---
 keras/layers/core.py | 267 ++++++++++++++++++++++++++-----------------
 1 file changed, 163 insertions(+), 104 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 878e45596..b8d1a839a 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1048,107 +1048,6 @@ class TimeDistributedDense(MaskedLayer):
         return dict(list(base_config.items()) + list(config.items()))
 
 
-class TimeDistributedHighway(MaskedLayer):
-    '''
-       Apply a same Highway layer for each dimension[1] (time_dimension) input.
-       Especially useful after a recurrent network with 'return_sequence=True'.
-       Tensor input dimensions:   (nb_sample, time_dimension, input_dim)
-       Tensor output dimensions:  (nb_sample, time_dimension, input_dim)
-
-       NOTE that this layer carries it's input dimension, so an output dimension 
-       isn't necessary.
-
-    '''
-    input_ndim = 3
-
-    def __init__(self, 
-                 init='glorot_uniform', transform_bias=-1, activation='linear', weights=None,
-                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
-                 W_constraint=None, b_constraint=None,
-                 input_dim=None, input_length=None, **kwargs):
-        self.init = initializations.get(init)
-        self.transform_bias = transform_bias
-        self.activation = activations.get(activation)
-        
-        self.W_regularizer = regularizers.get(W_regularizer)
-        self.b_regularizer = regularizers.get(b_regularizer)
-        self.activity_regularizer = regularizers.get(activity_regularizer)
-
-        self.W_constraint = constraints.get(W_constraint)
-        self.b_constraint = constraints.get(b_constraint)
-        self.constraints = [self.W_constraint, self.b_constraint]
-
-        self.initial_weights = weights
-
-        self.input_dim = input_dim
-        self.input_length = input_length
-        if self.input_dim:
-            kwargs['input_shape'] = (self.input_length, self.input_dim)
-        self.input = K.placeholder(ndim=3)
-        super(TimeDistributedHighway, self).__init__(**kwargs)
-
-    def build(self):
-        input_dim = self.input_shape[2]
-
-        self.W = self.init((input_dim, input_dim))
-        self.b = K.zeros((input_dim))
-
-        self.W_carry = self.init((input_dim, input_dim))
-        self.b_carry = K.variable(np.ones((input_dim)) * self.transform_bias)
-
-        self.params = [self.W, self.b, self.W_carry, self.b_carry]
-        self.regularizers = []
-
-        if self.W_regularizer:
-            self.W_regularizer.set_param(self.W)
-            self.regularizers.append(self.W_regularizer)
-
-        if self.b_regularizer:
-            self.b_regularizer.set_param(self.b)
-            self.regularizers.append(self.b_regularizer)
-
-        if self.activity_regularizer:
-            self.activity_regularizer.set_layer(self)
-            self.regularizers.append(self.activity_regularizer)
-
-        if self.initial_weights is not None:
-            self.set_weights(self.initial_weights)
-            del self.initial_weights
-
-    @property
-    def output_shape(self):
-        input_shape = self.input_shape
-        return (input_shape[0], input_shape[1], input_shape[2])
-
-    def get_output(self, train=False):
-        X = self.get_input(train)
-
-        def step(x, states):
-            output = self.activation(K.dot(x, self.W) + self.b)
-            transform_weight = activations.sigmoid(K.dot(x, self.W_carry) + self.b_carry)
-            output *= transform_weight
-            output = output + (1 - transform_weight) * x
-            return output, []
-
-        last_output, outputs, states = K.rnn(step, X, [], masking=False)
-        return outputs
-
-    def get_config(self):
-        config = {"name": self.__class__.__name__,
-                  "init": self.init.__name__,
-                  "transform_bias": self.transform_bias,
-                  "activation": self.activation.__name__,
-                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
-                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
-                  "activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
-                  "W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
-                  "b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
-                  "input_dim": self.input_dim,
-                  "input_length": self.input_length}
-        base_config = super(TimeDistributedHighway, self).get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-
-
 class AutoEncoder(Layer):
     '''A customizable autoencoder model.
 
@@ -1800,10 +1699,41 @@ class Highway(Layer):
     '''Densely connected highway network, 
     a natural extension of LSTMs to feedforward networks
 
-    This layer has identical output shape to input shape, and
-    thus doesn't need an output_dim specified
-
     cite: http://arxiv.org/pdf/1505.00387v2.pdf
+
+    # Input shape
+        2D tensor with shape: `(nb_samples, input_dim)`.
+
+    # Output shape
+        2D tensor with shape: `(nb_samples, input_dim)`.
+
+    # Arguments
+        init: name of initialization function for the weights of the layer
+            (see [initializations](../initializations.md)),
+            or alternatively, Theano function to use for weights
+            initialization. This parameter is only relevant
+            if you don't pass a `weights` argument.
+        transform_bias: value for the bias to take on initially (default -2)
+        activation: name of activation function to use
+            (see [activations](../activations.md)),
+            or alternatively, elementwise Theano function.
+            If you don't specify anything, no activation is applied
+            (ie. "linear" activation: a(x) = x).
+        weights: list of numpy arrays to set as initial weights.
+            The list should have 1 element, of shape `(input_dim, output_dim)`.
+        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
+            (eg. L1 or L2 regularization), applied to the main weights matrix.
+        b_regularizer: instance of [WeightRegularizer](../regularizers.md),
+            applied to the bias.
+        activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
+            applied to the network output.
+        W_constraint: instance of the [constraints](../constraints.md) module
+            (eg. maxnorm, nonneg), applied to the main weights matrix.
+        b_constraint: instance of the [constraints](../constraints.md) module,
+            applied to the bias.
+        input_dim: dimensionality of the input (integer).
+            This argument (or alternatively, the keyword argument `input_shape`)
+            is required when using this layer as the first layer in a model.
     '''
     input_ndim = 2
 
@@ -1884,3 +1814,132 @@ class Highway(Layer):
                   "input_dim": self.input_dim}
         base_config = super(Highway, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
+
+
+class TimeDistributedHighway(MaskedLayer):
+    '''Apply a same Highway layer for each dimension[1] (time_dimension) input.
+    Especially useful after a recurrent network with 'return_sequence=True'.
+
+    # Input shape
+        3D tensor with shape `(nb_sample, time_dimension, input_dim)`.
+
+    # Output shape
+        3D tensor with shape `(nb_sample, time_dimension, input_dim)`.
+
+    # Arguments
+        init: name of initialization function for the weights of the layer
+            (see [initializations](../initializations.md)),
+            or alternatively, Theano function to use for weights
+            initialization. This parameter is only relevant
+            if you don't pass a `weights` argument.
+        transform_bias: value for the bias to take on initially (default -2)
+        activation: name of activation function to use
+            (see [activations](../activations.md)),
+            or alternatively, elementwise Theano function.
+            If you don't specify anything, no activation is applied
+            (ie. "linear" activation: a(x) = x).
+        weights: list of numpy arrays to set as initial weights.
+            The list should have 1 element, of shape `(input_dim, output_dim)`.
+        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
+            (eg. L1 or L2 regularization), applied to the main weights matrix.
+        b_regularizer: instance of [WeightRegularizer](../regularizers.md),
+            applied to the bias.
+        activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
+            applied to the network output.
+        W_constraint: instance of the [constraints](../constraints.md) module
+            (eg. maxnorm, nonneg), applied to the main weights matrix.
+        b_constraint: instance of the [constraints](../constraints.md) module,
+            applied to the bias.
+        input_dim: dimensionality of the input (integer).
+            This argument (or alternatively, the keyword argument `input_shape`)
+            is required when using this layer as the first layer in a model.
+    '''
+    input_ndim = 3
+
+    def __init__(self, 
+                 init='glorot_uniform', transform_bias=-1, activation='linear', weights=None,
+                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
+                 W_constraint=None, b_constraint=None,
+                 input_dim=None, input_length=None, **kwargs):
+        self.init = initializations.get(init)
+        self.transform_bias = transform_bias
+        self.activation = activations.get(activation)
+        
+        self.W_regularizer = regularizers.get(W_regularizer)
+        self.b_regularizer = regularizers.get(b_regularizer)
+        self.activity_regularizer = regularizers.get(activity_regularizer)
+
+        self.W_constraint = constraints.get(W_constraint)
+        self.b_constraint = constraints.get(b_constraint)
+        self.constraints = [self.W_constraint, self.b_constraint]
+
+        self.initial_weights = weights
+
+        self.input_dim = input_dim
+        self.input_length = input_length
+        if self.input_dim:
+            kwargs['input_shape'] = (self.input_length, self.input_dim)
+        self.input = K.placeholder(ndim=3)
+        super(TimeDistributedHighway, self).__init__(**kwargs)
+
+    def build(self):
+        input_dim = self.input_shape[2]
+
+        self.W = self.init((input_dim, input_dim))
+        self.b = K.zeros((input_dim))
+
+        self.W_carry = self.init((input_dim, input_dim))
+        self.b_carry = K.variable(np.ones((input_dim)) * self.transform_bias)
+
+        self.params = [self.W, self.b, self.W_carry, self.b_carry]
+        self.regularizers = []
+
+        if self.W_regularizer:
+            self.W_regularizer.set_param(self.W)
+            self.regularizers.append(self.W_regularizer)
+
+        if self.b_regularizer:
+            self.b_regularizer.set_param(self.b)
+            self.regularizers.append(self.b_regularizer)
+
+        if self.activity_regularizer:
+            self.activity_regularizer.set_layer(self)
+            self.regularizers.append(self.activity_regularizer)
+
+        if self.initial_weights is not None:
+            self.set_weights(self.initial_weights)
+            del self.initial_weights
+
+    @property
+    def output_shape(self):
+        input_shape = self.input_shape
+        return (input_shape[0], input_shape[1], input_shape[2])
+
+    def get_output(self, train=False):
+        X = self.get_input(train)
+
+        def step(x, states):
+            output = self.activation(K.dot(x, self.W) + self.b)
+            transform_weight = activations.sigmoid(K.dot(x, self.W_carry) + self.b_carry)
+            output *= transform_weight
+            output = output + (1 - transform_weight) * x
+            return output, []
+
+        last_output, outputs, states = K.rnn(step, X, [], masking=False)
+        return outputs
+
+    def get_config(self):
+        config = {"name": self.__class__.__name__,
+                  "init": self.init.__name__,
+                  "transform_bias": self.transform_bias,
+                  "activation": self.activation.__name__,
+                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
+                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
+                  "activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
+                  "W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
+                  "b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
+                  "input_dim": self.input_dim,
+                  "input_length": self.input_length}
+        base_config = super(TimeDistributedHighway, self).get_config()
+        return dict(list(base_config.items()) + list(config.items()))
+

From 4dcb2c04e396ee812a5fee981b45f723efee68f7 Mon Sep 17 00:00:00 2001
From: EderSantana <eder@macbook.com>
Date: Tue, 15 Dec 2015 15:51:17 -0300
Subject: [PATCH 039/145] Add sequences tests and fix sequences docs

---
 keras/preprocessing/sequence.py             | 53 ++++++++++++++++-----
 tests/keras/preprocessing/test_sequences.py | 25 ++++++++++
 2 files changed, 65 insertions(+), 13 deletions(-)
 create mode 100644 tests/keras/preprocessing/test_sequences.py

diff --git a/keras/preprocessing/sequence.py b/keras/preprocessing/sequence.py
index ef5f1e67a..ce9bc76a7 100644
--- a/keras/preprocessing/sequence.py
+++ b/keras/preprocessing/sequence.py
@@ -6,7 +6,7 @@ from six.moves import range
 
 def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncating='pre', value=0.):
     """
-        Pad each sequence to the same length: 
+        Pad each sequence to the same length:
         the length of the longest sequence.
 
         If maxlen is provided, any sequence longer
@@ -15,6 +15,19 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='pre', truncati
 
         Supports post-padding and pre-padding (default).
 
+        Parameters:
+        -----------
+        sequences: list of lists where each element is a sequence
+        maxlen: int, maximum length
+        dtype: type to cast the resulting sequence.
+        padding: 'pre' or 'post', pad either before or after each sequence.
+        truncating: 'pre' or 'post', remove values from sequences larger than
+            maxlen either in the beginning or in the end of the sequence
+        value: float, value to pad the sequences to the desired value.
+
+        Returns:
+        x: numpy array with dimensions (number_of_sequences, maxlen)
+
     """
     lengths = [len(s) for s in sequences]
 
@@ -47,39 +60,53 @@ def make_sampling_table(size, sampling_factor=1e-5):
         This generates an array where the ith element
         is the probability that a word of rank i would be sampled,
         according to the sampling distribution used in word2vec.
-        
+
         The word2vec formula is:
             p(word) = min(1, sqrt(word.frequency/sampling_factor) / (word.frequency/sampling_factor))
 
-        We assume that the word frequencies follow Zipf's law (s=1) to derive 
+        We assume that the word frequencies follow Zipf's law (s=1) to derive
         a numerical approximation of frequency(rank):
            frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))
         where gamma is the Euler-Mascheroni constant.
+
+        Parameters:
+        -----------
+        size: int, number of possible words to sample. 
     '''
     gamma = 0.577
     rank = np.array(list(range(size)))
     rank[0] = 1
     inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1./(12.*rank)
     f = sampling_factor * inv_fq
+
     return np.minimum(1., f / np.sqrt(f))
 
 
-def skipgrams(sequence, vocabulary_size, 
-    window_size=4, negative_samples=1., shuffle=True, 
-    categorical=False, sampling_table=None):
-    ''' 
-        Take a sequence (list of indexes of words), 
+def skipgrams(sequence, vocabulary_size,
+              window_size=4, negative_samples=1., shuffle=True,
+              categorical=False, sampling_table=None):
+    '''
+        Take a sequence (list of indexes of words),
         returns couples of [word_index, other_word index] and labels (1s or 0s),
         where label = 1 if 'other_word' belongs to the context of 'word',
         and label=0 if 'other_word' is ramdomly sampled
 
-        @param vocabulary_size: int. maximum possible word index + 1
-        @param window_size: int. actually half-window. The window of a word wi will be [i-window_size, i+window_size+1]
-        @param negative_samples: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc.
-        @param categorical: bool. if False, labels will be integers (eg. [0, 1, 1 .. ]), 
+        Paramaters:
+        -----------
+        vocabulary_size: int. maximum possible word index + 1
+        window_size: int. actually half-window. The window of a word wi will be [i-window_size, i+window_size+1]
+        negative_samples: float >= 0. 0 for no negative (=random) samples. 1 for same number as positive samples. etc.
+        categorical: bool. if False, labels will be integers (eg. [0, 1, 1 .. ]),
             if True labels will be categorical eg. [[1,0],[0,1],[0,1] .. ]
 
-        Note: by convention, index 0 in the vocabulary is a non-word and will be skipped.
+        Returns:
+        --------
+        couples, lables: where `couples` are int pairs and
+            `labels` are either 0 or 1.
+
+        Notes:
+        ------
+        By convention, index 0 in the vocabulary is a non-word and will be skipped.
     '''
     couples = []
     labels = []
diff --git a/tests/keras/preprocessing/test_sequences.py b/tests/keras/preprocessing/test_sequences.py
new file mode 100644
index 000000000..542dfc3a0
--- /dev/null
+++ b/tests/keras/preprocessing/test_sequences.py
@@ -0,0 +1,25 @@
+import numpy as np
+from numpy.testing import assert_allclose
+
+from keras.preprocessing.sequences import (pad_sequences, make_sampling_table,
+                                           skipgrams)
+
+
+def test_pad_sequences():
+    a = [np.arange(i) for i in np.arange(2, 5, 1)]
+    b = pad_sequences(a, maxlen=3)
+    assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]])
+
+
+def test_make_sampling_table():
+    a = make_sampling_table(3)
+    assert_allclose(a, np.asarray([0.00315225,  0.00315225,  0.00547597]),
+                    rtol=.1)
+
+
+def test_skipgrams():
+    couples, labels = skipgrams(np.arange(3), 3)
+    for couple in couples:
+        assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2]
+
+    assert 0 in labels and 1 in labels

From 3dddabebc4759a51a0e6bc32012fc10c8de3a85b Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 15 Dec 2015 11:22:29 -0800
Subject: [PATCH 040/145] Fix style, flaky test

---
 keras/layers/containers.py      |  6 +++---
 keras/layers/core.py            |  1 +
 tests/keras/layers/test_call.py | 17 +++++++++--------
 tests/keras/test_models.py      | 12 ++++++++----
 4 files changed, 21 insertions(+), 15 deletions(-)

diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index 3674867bc..a7e9c8898 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -26,10 +26,10 @@ class Sequential(Layer):
         self._cache_enabled = True
 
     def __call__(self, X, mask=None, train=False):
-        #turn off layer cache temporarily
+        # turn off layer cache temporarily
         tmp_cache_enabled = self.cache_enabled
         self.cache_enabled = False
-        #recursively search for a layer which is not a Sequential model
+        # recursively search for a layer which is not a Sequential model
         layer = self
         while issubclass(layer.__class__, Sequential):
             layer = layer.layers[0]
@@ -50,7 +50,7 @@ class Sequential(Layer):
 
     @property
     def cache_enabled(self):
-        return  self._cache_enabled
+        return self._cache_enabled
 
     @cache_enabled.setter
     def cache_enabled(self, value):
diff --git a/keras/layers/core.py b/keras/layers/core.py
index 3805ac3b6..9fb01ccd5 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -264,6 +264,7 @@ class Layer(object):
         '''
         return sum([K.count_params(p) for p in self.params])
 
+
 class MaskedLayer(Layer):
     '''If your layer trivially supports masking
     (by simply copying the input mask to the output),
diff --git a/tests/keras/layers/test_call.py b/tests/keras/layers/test_call.py
index a57d46fd5..e86ec91a6 100644
--- a/tests/keras/layers/test_call.py
+++ b/tests/keras/layers/test_call.py
@@ -16,10 +16,10 @@ def test_layer_call():
     W = np.asarray(K.eval(layer.W)).astype(K.floatx())
     X = K.placeholder(ndim=2)
     Y = layer(X)
-    F = K.function([X], [Y])
+    f = K.function([X], [Y])
 
     x = np.ones((nb_samples, input_dim)).astype(K.floatx())
-    y = F([x])[0].astype(K.floatx())
+    y = f([x])[0].astype(K.floatx())
     t = np.dot(x, W).astype(K.floatx())
     assert_allclose(t, y, rtol=.2)
 
@@ -34,10 +34,10 @@ def test_sequential_call():
     # test flat model
     X = K.placeholder(ndim=2)
     Y = model(X)
-    F = K.function([X], [Y])
+    f = K.function([X], [Y])
 
     x = np.ones((nb_samples, input_dim)).astype(K.floatx())
-    y1 = F([x])[0].astype(K.floatx())
+    y1 = f([x])[0].astype(K.floatx())
     y2 = model.predict(x)
     # results of __call__ should match model.predict
     assert_allclose(y1, y2)
@@ -46,14 +46,15 @@ def test_sequential_call():
     model2 = Sequential()
     model2.add(model)
     model2.compile('sgd', 'mse')
-        
-    Y2 = model2(X)
-    F = K.function([X], [Y2])
 
-    y1 = F([x])[0].astype(K.floatx())
+    Y2 = model2(X)
+    f = K.function([X], [Y2])
+
+    y1 = f([x])[0].astype(K.floatx())
     y2 = model2.predict(x)
     # results of __call__ should match model.predict
     assert_allclose(y1, y2)
 
+
 if __name__ == '__main__':
     pytest.main([__file__])
diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 1bf484c2a..9779542c2 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -56,7 +56,7 @@ def test_sequential():
     model.train_on_batch(X_train[:32], y_train[:32])
 
     loss = model.evaluate(X_train, y_train, verbose=0)
-    assert(loss < 0.7)
+    assert(loss < 0.8)
 
     model.predict(X_test, verbose=0)
     model.predict_classes(X_test, verbose=0)
@@ -388,13 +388,12 @@ def test_sequential_count_params():
     model.add(Dense(nb_units))
     model.add(Dense(nb_classes))
     model.add(Activation('softmax'))
-
     assert(n == model.count_params())
 
     model.compile('sgd', 'binary_crossentropy')
-
     assert(n == model.count_params())
 
+
 def test_siamese_1():
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
@@ -448,6 +447,7 @@ def test_siamese_1():
     nloss = model.evaluate([X_train, X_train], y_train, verbose=0)
     assert(loss == nloss)
 
+
 def test_siamese_2():
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
@@ -626,6 +626,7 @@ def test_1o_2i():
 
     graph.get_config(verbose=1)
 
+
 def test_siamese_3():
     graph = Graph()
     graph.add_input(name='input1', input_shape=(32,))
@@ -651,6 +652,7 @@ def test_siamese_3():
 
     graph.get_config(verbose=1)
 
+
 def test_siamese_4():
     graph = Graph()
     graph.add_input(name='input1', input_shape=(32,))
@@ -678,6 +680,7 @@ def test_siamese_4():
 
     graph.get_config(verbose=1)
 
+
 def test_siamese_5():
     graph = Graph()
     graph.add_input(name='input1', input_shape=(32,))
@@ -705,7 +708,8 @@ def test_siamese_5():
     assert(loss < 3.0)
 
     graph.get_config(verbose=1)
-    
+
+
 def test_2o_1i_weights():
     # test a non-sequential graph with 1 input and 2 outputs
     graph = Graph()

From 2e29ef31a7d61a84720a437801d2035b61d264fc Mon Sep 17 00:00:00 2001
From: EderSantana <eder@macbook.com>
Date: Tue, 15 Dec 2015 18:28:17 -0300
Subject: [PATCH 041/145] rename to  and add more complete tests

---
 tests/keras/preprocessing/test_sequence.py | 55 ++++++++++++++++++++++
 1 file changed, 55 insertions(+)
 create mode 100644 tests/keras/preprocessing/test_sequence.py

diff --git a/tests/keras/preprocessing/test_sequence.py b/tests/keras/preprocessing/test_sequence.py
new file mode 100644
index 000000000..c62f3b6b5
--- /dev/null
+++ b/tests/keras/preprocessing/test_sequence.py
@@ -0,0 +1,55 @@
+import numpy as np
+from numpy.testing import assert_allclose
+
+import pytest
+
+from keras.preprocessing.sequence import (pad_sequences, make_sampling_table,
+                                          skipgrams)
+
+
+def test_pad_sequences():
+    a = [[1], [1, 2], [1, 2, 3]]
+
+    # test padding
+    b = pad_sequences(a, maxlen=3, padding='pre')
+    assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]])
+    b = pad_sequences(a, maxlen=3, padding='post')
+    assert_allclose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]])
+
+    # test truncating
+    b = pad_sequences(a, maxlen=2, truncating='pre')
+    assert_allclose(b, [[0, 1], [1, 2], [2, 3]])
+    b = pad_sequences(a, maxlen=2, truncating='post')
+    assert_allclose(b, [[0, 1], [1, 2], [1, 2]])
+
+    # test value
+    b = pad_sequences(a, maxlen=3, value=1)
+    assert_allclose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]])
+
+
+def test_make_sampling_table():
+    a = make_sampling_table(3)
+    assert_allclose(a, np.asarray([0.00315225,  0.00315225,  0.00547597]),
+                    rtol=.1)
+
+
+def test_skipgrams():
+    # test with no window size and binary labels
+    couples, labels = skipgrams(np.arange(3), vocabulary_size=3)
+    for couple in couples:
+        assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2]
+
+    # test window size and categorical labels
+    couples, labels = skipgrams(np.arange(5), vocabulary_size=5, window_size=1,
+                                categorical=True)
+    for couple in couples:
+        assert couple[0] - couple[1] < 3
+    for l in labels:
+        assert len(l) == 2
+
+    # test categorical laels
+
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])

From 54d3b9e67355be8eb41aab22ba6f481b7d07c65c Mon Sep 17 00:00:00 2001
From: EderSantana <eder@macbook.com>
Date: Tue, 15 Dec 2015 18:30:01 -0300
Subject: [PATCH 042/145] rename test_sequences to test_sequence.py

---
 tests/keras/preprocessing/test_sequences.py | 25 ---------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 tests/keras/preprocessing/test_sequences.py

diff --git a/tests/keras/preprocessing/test_sequences.py b/tests/keras/preprocessing/test_sequences.py
deleted file mode 100644
index 542dfc3a0..000000000
--- a/tests/keras/preprocessing/test_sequences.py
+++ /dev/null
@@ -1,25 +0,0 @@
-import numpy as np
-from numpy.testing import assert_allclose
-
-from keras.preprocessing.sequences import (pad_sequences, make_sampling_table,
-                                           skipgrams)
-
-
-def test_pad_sequences():
-    a = [np.arange(i) for i in np.arange(2, 5, 1)]
-    b = pad_sequences(a, maxlen=3)
-    assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]])
-
-
-def test_make_sampling_table():
-    a = make_sampling_table(3)
-    assert_allclose(a, np.asarray([0.00315225,  0.00315225,  0.00547597]),
-                    rtol=.1)
-
-
-def test_skipgrams():
-    couples, labels = skipgrams(np.arange(3), 3)
-    for couple in couples:
-        assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2]
-
-    assert 0 in labels and 1 in labels

From e27587334b2327cfd145c8d0957ac896e54427cf Mon Sep 17 00:00:00 2001
From: EderSantana <eder@macbook.com>
Date: Tue, 15 Dec 2015 18:31:19 -0300
Subject: [PATCH 043/145] fix typo

---
 tests/keras/preprocessing/test_sequence.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/keras/preprocessing/test_sequence.py b/tests/keras/preprocessing/test_sequence.py
index c62f3b6b5..3fe8f3fae 100644
--- a/tests/keras/preprocessing/test_sequence.py
+++ b/tests/keras/preprocessing/test_sequence.py
@@ -47,9 +47,6 @@ def test_skipgrams():
     for l in labels:
         assert len(l) == 2
 
-    # test categorical laels
-
-
 
 if __name__ == '__main__':
     pytest.main([__file__])

From 42b3d37a54545882699283b5764cf3c997f8d9cd Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 15 Dec 2015 16:47:41 -0800
Subject: [PATCH 044/145] Fix flaky test in preprocessing

---
 tests/keras/preprocessing/test_sequence.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/keras/preprocessing/test_sequence.py b/tests/keras/preprocessing/test_sequence.py
index 3fe8f3fae..7a4dcd5fb 100644
--- a/tests/keras/preprocessing/test_sequence.py
+++ b/tests/keras/preprocessing/test_sequence.py
@@ -43,7 +43,7 @@ def test_skipgrams():
     couples, labels = skipgrams(np.arange(5), vocabulary_size=5, window_size=1,
                                 categorical=True)
     for couple in couples:
-        assert couple[0] - couple[1] < 3
+        assert couple[0] - couple[1] <= 3
     for l in labels:
         assert len(l) == 2
 

From 2f754c79f10190b7fef105bae39c43e140cd41ec Mon Sep 17 00:00:00 2001
From: olegsinyavskiy <olegsinyavskiy@gmail.com>
Date: Tue, 15 Dec 2015 19:05:47 -0800
Subject: [PATCH 045/145] move tests into tests

---
 .../integration_tests}/test_image_data_tasks.py                   | 0
 .../integration_tests}/test_temporal_data_tasks.py                | 0
 .../integration_tests}/test_vector_data_tasks.py                  | 0
 3 files changed, 0 insertions(+), 0 deletions(-)
 rename {integration_tests => tests/integration_tests}/test_image_data_tasks.py (100%)
 rename {integration_tests => tests/integration_tests}/test_temporal_data_tasks.py (100%)
 rename {integration_tests => tests/integration_tests}/test_vector_data_tasks.py (100%)

diff --git a/integration_tests/test_image_data_tasks.py b/tests/integration_tests/test_image_data_tasks.py
similarity index 100%
rename from integration_tests/test_image_data_tasks.py
rename to tests/integration_tests/test_image_data_tasks.py
diff --git a/integration_tests/test_temporal_data_tasks.py b/tests/integration_tests/test_temporal_data_tasks.py
similarity index 100%
rename from integration_tests/test_temporal_data_tasks.py
rename to tests/integration_tests/test_temporal_data_tasks.py
diff --git a/integration_tests/test_vector_data_tasks.py b/tests/integration_tests/test_vector_data_tasks.py
similarity index 100%
rename from integration_tests/test_vector_data_tasks.py
rename to tests/integration_tests/test_vector_data_tasks.py

From 5e06aa5ef1cfb33b56cdea446dd1c8530bc99a14 Mon Sep 17 00:00:00 2001
From: olegsinyavskiy <olegsinyavskiy@gmail.com>
Date: Tue, 15 Dec 2015 19:07:24 -0800
Subject: [PATCH 046/145] update the job

---
 .travis.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 53b4033f0..51d6d7c9d 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -58,9 +58,9 @@ script:
   - sed -i -e 's/"backend":[[:space:]]*"[^"]*/"backend":\ "'$KERAS_BACKEND'/g' ~/.keras/keras.json;
   - echo -e "Running tests with the following config:\n$(cat ~/.keras/keras.json)"
   - if [[ "$INTEGRATION_TESTS" == "true" ]]; then
-       PYTHONPATH=$PWD:$PYTHONPATH py.test integration_tests/;
+       PYTHONPATH=$PWD:$PYTHONPATH py.test tests/integration_tests;
     else
-       PYTHONPATH=$PWD:$PYTHONPATH py.test tests/;
+       PYTHONPATH=$PWD:$PYTHONPATH py.test tests/ --ignore=tests/integration_tests;
     fi
 after_success:
   - coveralls

From d04cac6526171af608baf38f68f9767af62b0555 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Wed, 16 Dec 2015 08:17:16 -0800
Subject: [PATCH 047/145] Fix GRU activation

---
 keras/layers/recurrent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py
index 57536b5d3..6b89c6800 100644
--- a/keras/layers/recurrent.py
+++ b/keras/layers/recurrent.py
@@ -326,7 +326,7 @@ class GRU(Recurrent):
         z = self.inner_activation(x_z + K.dot(h_tm1, self.U_z))
         r = self.inner_activation(x_r + K.dot(h_tm1, self.U_r))
 
-        hh = self.inner_activation(x_h + K.dot(r * h_tm1, self.U_h))
+        hh = self.activation(x_h + K.dot(r * h_tm1, self.U_h))
         h = z * h_tm1 + (1 - z) * hh
         return h, [h]
 

From 69cabdf6bc501c1f3c58fe71ce9a93c92bfd3d13 Mon Sep 17 00:00:00 2001
From: Luke de Oliveira <lukepercivaldeoliveira@gmail.com>
Date: Wed, 16 Dec 2015 19:10:39 +0100
Subject: [PATCH 048/145] Remove TimeDistributedHighway

[WIP] will be added after `TimeDistributed` generic layer.
---
 keras/layers/core.py | 128 -------------------------------------------
 1 file changed, 128 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index b8d1a839a..ed0318123 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1815,131 +1815,3 @@ class Highway(Layer):
         base_config = super(Highway, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
-
-class TimeDistributedHighway(MaskedLayer):
-    '''Apply a same Highway layer for each dimension[1] (time_dimension) input.
-    Especially useful after a recurrent network with 'return_sequence=True'.
-
-    # Input shape
-        3D tensor with shape `(nb_sample, time_dimension, input_dim)`.
-
-    # Output shape
-        3D tensor with shape `(nb_sample, time_dimension, input_dim)`.
-
-    # Arguments
-        init: name of initialization function for the weights of the layer
-            (see [initializations](../initializations.md)),
-            or alternatively, Theano function to use for weights
-            initialization. This parameter is only relevant
-            if you don't pass a `weights` argument.
-        transform_bias: value for the bias to take on initially (default -2)
-        activation: name of activation function to use
-            (see [activations](../activations.md)),
-            or alternatively, elementwise Theano function.
-            If you don't specify anything, no activation is applied
-            (ie. "linear" activation: a(x) = x).
-        weights: list of numpy arrays to set as initial weights.
-            The list should have 1 element, of shape `(input_dim, output_dim)`.
-        W_regularizer: instance of [WeightRegularizer](../regularizers.md)
-            (eg. L1 or L2 regularization), applied to the main weights matrix.
-        b_regularizer: instance of [WeightRegularizer](../regularizers.md),
-            applied to the bias.
-        activity_regularizer: instance of [ActivityRegularizer](../regularizers.md),
-            applied to the network output.
-        W_constraint: instance of the [constraints](../constraints.md) module
-            (eg. maxnorm, nonneg), applied to the main weights matrix.
-        b_constraint: instance of the [constraints](../constraints.md) module,
-            applied to the bias.
-        input_dim: dimensionality of the input (integer).
-            This argument (or alternatively, the keyword argument `input_shape`)
-            is required when using this layer as the first layer in a model.
-    '''
-    input_ndim = 3
-
-    def __init__(self, 
-                 init='glorot_uniform', transform_bias=-1, activation='linear', weights=None,
-                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
-                 W_constraint=None, b_constraint=None,
-                 input_dim=None, input_length=None, **kwargs):
-        self.init = initializations.get(init)
-        self.transform_bias = transform_bias
-        self.activation = activations.get(activation)
-        
-        self.W_regularizer = regularizers.get(W_regularizer)
-        self.b_regularizer = regularizers.get(b_regularizer)
-        self.activity_regularizer = regularizers.get(activity_regularizer)
-
-        self.W_constraint = constraints.get(W_constraint)
-        self.b_constraint = constraints.get(b_constraint)
-        self.constraints = [self.W_constraint, self.b_constraint]
-
-        self.initial_weights = weights
-
-        self.input_dim = input_dim
-        self.input_length = input_length
-        if self.input_dim:
-            kwargs['input_shape'] = (self.input_length, self.input_dim)
-        self.input = K.placeholder(ndim=3)
-        super(TimeDistributedHighway, self).__init__(**kwargs)
-
-    def build(self):
-        input_dim = self.input_shape[2]
-
-        self.W = self.init((input_dim, input_dim))
-        self.b = K.zeros((input_dim))
-
-        self.W_carry = self.init((input_dim, input_dim))
-        self.b_carry = K.variable(np.ones((input_dim)) * self.transform_bias)
-
-        self.params = [self.W, self.b, self.W_carry, self.b_carry]
-        self.regularizers = []
-
-        if self.W_regularizer:
-            self.W_regularizer.set_param(self.W)
-            self.regularizers.append(self.W_regularizer)
-
-        if self.b_regularizer:
-            self.b_regularizer.set_param(self.b)
-            self.regularizers.append(self.b_regularizer)
-
-        if self.activity_regularizer:
-            self.activity_regularizer.set_layer(self)
-            self.regularizers.append(self.activity_regularizer)
-
-        if self.initial_weights is not None:
-            self.set_weights(self.initial_weights)
-            del self.initial_weights
-
-    @property
-    def output_shape(self):
-        input_shape = self.input_shape
-        return (input_shape[0], input_shape[1], input_shape[2])
-
-    def get_output(self, train=False):
-        X = self.get_input(train)
-
-        def step(x, states):
-            output = self.activation(K.dot(x, self.W) + self.b)
-            transform_weight = activations.sigmoid(K.dot(x, self.W_carry) + self.b_carry)
-            output *= transform_weight
-            output = output + (1 - transform_weight) * x
-            return output, []
-
-        last_output, outputs, states = K.rnn(step, X, [], masking=False)
-        return outputs
-
-    def get_config(self):
-        config = {"name": self.__class__.__name__,
-                  "init": self.init.__name__,
-                  "transform_bias": self.transform_bias,
-                  "activation": self.activation.__name__,
-                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
-                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
-                  "activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
-                  "W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
-                  "b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
-                  "input_dim": self.input_dim,
-                  "input_length": self.input_length}
-        base_config = super(TimeDistributedHighway, self).get_config()
-        return dict(list(base_config.items()) + list(config.items()))
-

From ec663aeda11910be5c334a207faa5308d1ea8871 Mon Sep 17 00:00:00 2001
From: Luke de Oliveira <lukepercivaldeoliveira@gmail.com>
Date: Wed, 16 Dec 2015 19:26:11 +0100
Subject: [PATCH 049/145] Remove TimeDistributedHighway tests

---
 tests/keras/layers/test_core.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/tests/keras/layers/test_core.py b/tests/keras/layers/test_core.py
index 2edb3452b..db75f85c7 100644
--- a/tests/keras/layers/test_core.py
+++ b/tests/keras/layers/test_core.py
@@ -100,11 +100,6 @@ def test_time_dist_merge():
     _runner(layer)
 
 
-def test_time_dist_highway():
-    layer = core.TimeDistributedHighway(input_shape=(None, 10))
-    _runner(layer)
-
-
 def test_highway():
     layer = core.Highway(input_shape=(10,))
     _runner(layer)

From 490ba423c4fd2f5417b4f18e088402587c936784 Mon Sep 17 00:00:00 2001
From: olegsinyavskiy <olegsinyavskiy@gmail.com>
Date: Wed, 16 Dec 2015 11:23:27 -0800
Subject: [PATCH 050/145] do not share data between tests

---
 tests/keras/test_models.py | 37 +++++++++++++++++++++++++++----------
 1 file changed, 27 insertions(+), 10 deletions(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 9779542c2..e9e170a68 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -12,7 +12,7 @@ from keras.utils import np_utils
 from keras.utils.test_utils import get_test_data
 
 import os
-from keras.utils.layer_utils import model_summary
+
 
 input_dim = 32
 nb_hidden = 16
@@ -20,16 +20,21 @@ nb_class = 4
 batch_size = 32
 nb_epoch = 1
 
-train_samples = 2000
-test_samples = 500
 
-(X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
-                                                     nb_test=test_samples,
-                                                     input_shape=(input_dim,),
-                                                     classification=True,
-                                                     nb_class=4)
-y_test = np_utils.to_categorical(y_test)
-y_train = np_utils.to_categorical(y_train)
+def _get_test_data():
+    np.random.seed(1234)
+
+    train_samples = 2000
+    test_samples = 500
+
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
+                                                         nb_test=test_samples,
+                                                         input_shape=(input_dim,),
+                                                         classification=True,
+                                                         nb_class=4)
+    y_test = np_utils.to_categorical(y_test)
+    y_train = np_utils.to_categorical(y_train)
+    return (X_train, y_train), (X_test, y_test)
 
 
 ####################
@@ -38,6 +43,8 @@ y_train = np_utils.to_categorical(y_train)
 
 
 def test_sequential():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
+
     model = Sequential()
     model.add(Dense(nb_hidden, input_shape=(input_dim,)))
     model.add(Activation('relu'))
@@ -87,6 +94,7 @@ def test_sequential():
 
 
 def test_merge_sum():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
     left.add(Activation('relu'))
@@ -140,6 +148,8 @@ def test_merge_sum():
 @pytest.mark.skipif(K._BACKEND == 'tensorflow',
                     reason='currently not working with TensorFlow')
 def test_merge_dot():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
+
     left = Sequential()
     left.add(Dense(input_dim=input_dim, output_dim=nb_hidden))
     left.add(Activation('relu'))
@@ -172,6 +182,8 @@ def test_merge_dot():
 
 
 def test_merge_concat():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
+
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
     left.add(Activation('relu'))
@@ -226,6 +238,7 @@ def test_merge_concat():
 
 
 def test_merge_recursivity():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
     left.add(Activation('relu'))
@@ -274,6 +287,7 @@ def test_merge_recursivity():
 
 
 def test_merge_overlap():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
     left.add(Activation('relu'))
@@ -310,6 +324,7 @@ def test_merge_overlap():
 
 
 def test_lambda():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
     def func(X):
         s = X[0]
         for i in range(1, len(X)):
@@ -395,6 +410,7 @@ def test_sequential_count_params():
 
 
 def test_siamese_1():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
     left.add(Activation('relu'))
@@ -449,6 +465,7 @@ def test_siamese_1():
 
 
 def test_siamese_2():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
     left = Sequential()
     left.add(Dense(nb_hidden, input_shape=(input_dim,)))
     left.add(Activation('relu'))

From 827a6323b0d68c804f3b64498f2c42bf63f1d60f Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Wed, 16 Dec 2015 12:15:09 -0800
Subject: [PATCH 051/145] Style fixes

---
 keras/layers/core.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index b5992cd8e..86988d5ae 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1726,8 +1726,9 @@ def add_shared_layer(layer, inputs):
         inputs[i].add(s)
         inputs[i].add(sh)
 
+
 class Highway(Layer):
-    '''Densely connected highway network, 
+    '''Densely connected highway network,
     a natural extension of LSTMs to feedforward networks
 
     cite: http://arxiv.org/pdf/1505.00387v2.pdf
@@ -1768,7 +1769,8 @@ class Highway(Layer):
     '''
     input_ndim = 2
 
-    def __init__(self, init='glorot_uniform', transform_bias=-2, activation='linear', weights=None,
+    def __init__(self, init='glorot_uniform', transform_bias=-2,
+                 activation='linear', weights=None,
                  W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                  W_constraint=None, b_constraint=None, input_dim=None, **kwargs):
         self.init = initializations.get(init)
@@ -1796,9 +1798,9 @@ class Highway(Layer):
 
         self.W = self.init((input_dim, input_dim))
         self.W_carry = self.init((input_dim, input_dim))
-        
+
         self.b = K.zeros((input_dim,))
-        # -- initialize with a vector of values `transform_bias`
+        # initialize with a vector of values `transform_bias`
         self.b_carry = K.variable(np.ones((input_dim,)) * self.transform_bias)
 
         self.params = [self.W, self.b, self.W_carry, self.b_carry]
@@ -1845,4 +1847,3 @@ class Highway(Layer):
                   "input_dim": self.input_dim}
         base_config = super(Highway, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
-

From 8c914f793b868d5ab1534b61a0358b195686a35d Mon Sep 17 00:00:00 2001
From: Julien Rebetez <julien@fhtagn.net>
Date: Thu, 17 Dec 2015 11:40:43 +0100
Subject: [PATCH 052/145] Fix docs/autogen.py to create subdirectories for
 autogenerated MODULES doc

Without this, docs/autogen.py fails with a no such file or directory
'sources/layers/convolutional.md'
---
 docs/autogen.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/autogen.py b/docs/autogen.py
index 3b99b854d..129c5acaf 100644
--- a/docs/autogen.py
+++ b/docs/autogen.py
@@ -250,4 +250,7 @@ for module, module_name in MODULES:
         print('...inserting autogenerated content into template:', path)
     else:
         print('...creating new page with autogenerated content:', path)
+    subdir = os.path.dirname(path)
+    if not os.path.exists(subdir):
+        os.makedirs(subdir)
     open(path, 'w').write(module_page)

From ed92c141851f85eace4ebc2eb3c69b9a11706a30 Mon Sep 17 00:00:00 2001
From: Julien Rebetez <julien@fhtagn.net>
Date: Thu, 17 Dec 2015 11:45:52 +0100
Subject: [PATCH 053/145] Allows choosing the backend by setting the
 KERAS_BACKEND environment variable

---
 docs/templates/backend.md | 9 +++++++++
 keras/backend/__init__.py | 5 +++++
 2 files changed, 14 insertions(+)

diff --git a/docs/templates/backend.md b/docs/templates/backend.md
index 72c0da164..9e40a19c5 100644
--- a/docs/templates/backend.md
+++ b/docs/templates/backend.md
@@ -23,6 +23,15 @@ It probably looks like this:
 
 Simply change the field `backend` to either `"theano"` or `"tensorflow"`, and Keras will use the new configuration next time you run any Keras code.
 
+You can also define the environment variable ``KERAS_BACKEND`` and this will
+override what is defined in your config file :
+
+```bash
+KERAS_BACKEND=tensorflow python -c "from keras import backend; print backend._BACKEND"
+Using TensorFlow backend.
+tensorflow
+```
+
 ## Using the abstract Keras backend to write new code
 
 If you want the Keras modules you write to be compatible with both Theano and TensorFlow, you have to write them via the abstract Keras backend API. Here's an intro.
diff --git a/keras/backend/__init__.py b/keras/backend/__init__.py
index be33bde10..ad038c683 100644
--- a/keras/backend/__init__.py
+++ b/keras/backend/__init__.py
@@ -31,6 +31,11 @@ else:
         # add new line in order for bash 'cat' display the content correctly
         f.write(json.dumps(_config) + '\n')
 
+if 'KERAS_BACKEND' in os.environ:
+    _backend = os.environ['KERAS_BACKEND']
+    assert _backend in {'theano', 'tensorflow'}
+    _BACKEND = _backend
+
 if _BACKEND == 'theano':
     print('Using Theano backend.')
     from .theano_backend import *

From 554ed5bfc80f126595cfa0fa76b9c04d55f1d161 Mon Sep 17 00:00:00 2001
From: Julien Rebetez <julien@fhtagn.net>
Date: Thu, 17 Dec 2015 13:37:57 +0100
Subject: [PATCH 054/145] Add a K.repeat_elements function which works like
 np.repeat

---
 keras/backend/tensorflow_backend.py  | 13 +++++++++++++
 keras/backend/theano_backend.py      |  8 ++++++++
 tests/keras/backend/test_backends.py | 20 ++++++++++++++++++++
 3 files changed, 41 insertions(+)

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
index 90b60fe08..b1819932d 100644
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@@ -235,6 +235,19 @@ def permute_dimensions(x, pattern):
     '''
     return tf.transpose(x, perm=pattern)
 
+def repeat_elements(x, rep, axis):
+    '''Repeats the elements of a tensor along an axis, like np.repeat
+
+    If x has shape (s1, s2, s3) and axis=1, the output
+    will have shape (s1, s2 * rep, s3)
+    '''
+    x_shape = x.get_shape().as_list()
+    # slices along the repeat axis
+    splits = tf.split(axis, x_shape[axis], x)
+    # repeat each slice the given number of reps
+    x_rep = [s for s in splits for i in xrange(rep)]
+    return tf.concat(axis, x_rep)
+
 
 def repeat(x, n):
     '''Repeat a 2D tensor:
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index 360625cb9..67911e0c3 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -243,6 +243,14 @@ def permute_dimensions(x, pattern):
     return x.dimshuffle(pattern)
 
 
+def repeat_elements(x, rep, axis):
+    '''Repeats the elements of a tensor along an axis, like np.repeat
+
+    If x has shape (s1, s2, s3) and axis=1, the output
+    will have shape (s1, s2 * rep, s3)
+    '''
+    return T.repeat(x, rep, axis=axis)
+
 def repeat(x, n):
     '''Repeat a 2D tensor:
 
diff --git a/tests/keras/backend/test_backends.py b/tests/keras/backend/test_backends.py
index 682d632a3..cb394bb3d 100644
--- a/tests/keras/backend/test_backends.py
+++ b/tests/keras/backend/test_backends.py
@@ -64,6 +64,26 @@ class TestBackend(object):
         check_single_tensor_operation('expand_dims', (4, 3, 2), dim=1)
         check_single_tensor_operation('squeeze', (4, 3, 1), axis=2)
 
+    def test_repeat_elements(self):
+        reps = 3
+        for ndims in [1, 2, 3]:
+            shape = np.arange(2, 2+ndims)
+            arr = np.arange(np.prod(shape)).reshape(shape)
+            arr_th = KTH.variable(arr)
+            arr_tf = KTF.variable(arr)
+
+            for rep_axis in xrange(ndims):
+                np_rep = np.repeat(arr, reps, axis=rep_axis)
+                th_rep = KTH.eval(
+                    KTH.repeat_elements(arr_th, reps, axis=rep_axis))
+                tf_rep = KTF.eval(
+                    KTF.repeat_elements(arr_tf, reps, axis=rep_axis))
+
+                assert th_rep.shape == np_rep.shape
+                assert tf_rep.shape == np_rep.shape
+                assert_allclose(np_rep, th_rep, atol=1e-05)
+                assert_allclose(np_rep, tf_rep, atol=1e-05)
+
     def test_value_manipulation(self):
         val = np.random.random((4, 2))
         xth = KTH.variable(val)

From 8715c70a7413dd9fc0be1f7581cc53c7c913f694 Mon Sep 17 00:00:00 2001
From: Julien Rebetez <julien@fhtagn.net>
Date: Thu, 17 Dec 2015 14:47:04 +0100
Subject: [PATCH 055/145] Modify UpSampling1D/2D to turn [0, 1] into [0, 0, 1,
 1] instead of [0, 1, 0, 1]

---
 keras/layers/convolutional.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/keras/layers/convolutional.py b/keras/layers/convolutional.py
index fb4f8f541..c5a979f56 100644
--- a/keras/layers/convolutional.py
+++ b/keras/layers/convolutional.py
@@ -609,7 +609,7 @@ class UpSampling1D(Layer):
 
     def get_output(self, train=False):
         X = self.get_input(train)
-        output = K.concatenate([X] * self.length, axis=1)
+        output = K.repeat_elements(X, self.length, axis=1)
         return output
 
     def get_config(self):
@@ -669,11 +669,11 @@ class UpSampling2D(Layer):
     def get_output(self, train=False):
         X = self.get_input(train)
         if self.dim_ordering == 'th':
-            output = K.concatenate([X] * self.size[0], axis=2)
-            output = K.concatenate([output] * self.size[1], axis=3)
+            output = K.repeat_elements(X, self.size[0], axis=2)
+            output = K.repeat_elements(output, self.size[1], axis=3)
         elif self.dim_ordering == 'tf':
-            output = K.concatenate([X] * self.size[0], axis=1)
-            output = K.concatenate([output] * self.size[1], axis=2)
+            output = K.repeat_elements(X, self.size[0], axis=1)
+            output = K.repeat_elements(output, self.size[1], axis=2)
         else:
             raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
         return output

From 50d3fddead17b0fb8ffe315a2b3a585d7bcb3055 Mon Sep 17 00:00:00 2001
From: Julien Rebetez <julien@fhtagn.net>
Date: Thu, 17 Dec 2015 14:48:04 +0100
Subject: [PATCH 056/145] Remove hardcoded fontname in visualize_util

---
 keras/utils/visualize_util.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/utils/visualize_util.py b/keras/utils/visualize_util.py
index 9560d3f59..e5445d68b 100644
--- a/keras/utils/visualize_util.py
+++ b/keras/utils/visualize_util.py
@@ -120,7 +120,7 @@ class ModelToDot(object):
         self.g = pydot.Dot()
         self.g.set('rankdir', 'TB')
         self.g.set('concentrate', True)
-        self.g.set_node_defaults(shape='record', fontname="Fira Mono")
+        self.g.set_node_defaults(shape='record')
 
         if hasattr(model, 'outputs'):
             # Graph

From 5d685f44474286a7a05e46dde333488a520f07aa Mon Sep 17 00:00:00 2001
From: Julien Rebetez <julien@fhtagn.net>
Date: Thu, 17 Dec 2015 16:32:48 +0100
Subject: [PATCH 057/145] Use range instead of xrange to pass py35 tests

---
 keras/backend/tensorflow_backend.py  | 2 +-
 tests/keras/backend/test_backends.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
index b1819932d..61afaae3b 100644
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@@ -245,7 +245,7 @@ def repeat_elements(x, rep, axis):
     # slices along the repeat axis
     splits = tf.split(axis, x_shape[axis], x)
     # repeat each slice the given number of reps
-    x_rep = [s for s in splits for i in xrange(rep)]
+    x_rep = [s for s in splits for i in range(rep)]
     return tf.concat(axis, x_rep)
 
 
diff --git a/tests/keras/backend/test_backends.py b/tests/keras/backend/test_backends.py
index cb394bb3d..c867ad6c1 100644
--- a/tests/keras/backend/test_backends.py
+++ b/tests/keras/backend/test_backends.py
@@ -72,7 +72,7 @@ class TestBackend(object):
             arr_th = KTH.variable(arr)
             arr_tf = KTF.variable(arr)
 
-            for rep_axis in xrange(ndims):
+            for rep_axis in range(ndims):
                 np_rep = np.repeat(arr, reps, axis=rep_axis)
                 th_rep = KTH.eval(
                     KTH.repeat_elements(arr_th, reps, axis=rep_axis))

From c2a7ccd1cc5b4a3b3a1a0f920c8a29073d32ce42 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Fri, 18 Dec 2015 00:32:40 +0530
Subject: [PATCH 058/145] Remove unnecessary apology :)

---
 keras/backend/tensorflow_backend.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
index 61afaae3b..bccbedb1f 100644
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@@ -287,9 +287,6 @@ def squeeze(x, axis):
 def temporal_padding(x, padding=1):
     '''Pad the middle dimension of a 3D tensor
     with "padding" zeros left and right.
-
-    Appologies for the inane API, but Theano makes this
-    really hard.
     '''
     pattern = [[0, 0], [padding, padding], [0, 0]]
     return tf.pad(x, pattern)

From 58fb2b8af56c76c332cc7dfeada7496687d65163 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Thu, 17 Dec 2015 12:40:07 -0800
Subject: [PATCH 059/145] Improve BatchNorm documentation

---
 keras/layers/normalization.py | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py
index 5e1aec718..db6e20398 100644
--- a/keras/layers/normalization.py
+++ b/keras/layers/normalization.py
@@ -4,7 +4,9 @@ from .. import backend as K
 
 
 class BatchNormalization(Layer):
-    '''Normalize the activations of the previous layer at each batch.
+    '''Normalize the activations of the previous layer at each batch,
+    i.e. applies a transformation that maintains the mean activation
+    close to 0. and the activation standard deviation close to 1.
 
     # Input shape
         Arbitrary. Use the keyword argument `input_shape`
@@ -18,7 +20,13 @@ class BatchNormalization(Layer):
         epsilon: small float > 0. Fuzz parameter.
         mode: integer, 0 or 1.
             - 0: feature-wise normalization.
-            - 1: sample-wise normalization.
+                If the input has multiple feature dimensions,
+                each will be normalized separately
+                (e.g. for an image input with shape
+                `(channels, rows, cols)`,
+                each combination of a channel, row and column
+                will be normalized separately).
+            - 1: sample-wise normalization. This mode assumes a 2D input.
         momentum: momentum in the computation of the
             exponential average of the mean and standard deviation
             of the data, for feature-wise normalization.

From 5255b5df548860362de29dec9a8c91ae1a7ec4a9 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Thu, 17 Dec 2015 12:40:33 -0800
Subject: [PATCH 060/145] Style normalization in layers.core

---
 keras/layers/core.py | 97 +++++++++++++++++++++++---------------------
 1 file changed, 51 insertions(+), 46 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 86988d5ae..c5e2c13da 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -38,7 +38,7 @@ class Layer(object):
                           'batch_input_shape',
                           'cache_enabled'}
         for kwarg in kwargs:
-            assert kwarg in allowed_kwargs, "Keyword argument not understood: " + kwarg
+            assert kwarg in allowed_kwargs, 'Keyword argument not understood: ' + kwarg
         if 'input_shape' in kwargs:
             self.set_input_shape((None,) + tuple(kwargs['input_shape']))
         if 'batch_input_shape' in kwargs:
@@ -77,12 +77,14 @@ class Layer(object):
     def set_previous(self, layer, connection_map={}):
         '''Connect a layer to its parent in the computational graph.
         '''
-        assert self.nb_input == layer.nb_output == 1, "Cannot connect layers: input count and output count should be 1."
+        assert self.nb_input == layer.nb_output == 1, 'Cannot connect layers: input count and output count should be 1.'
         if hasattr(self, 'input_ndim'):
-            assert self.input_ndim == len(layer.output_shape), "Incompatible shapes: layer expected input with ndim=" +\
-                str(self.input_ndim) + " but previous layer has output_shape " + str(layer.output_shape)
+            assert self.input_ndim == len(layer.output_shape), ('Incompatible shapes: layer expected input with ndim=' +
+                                                                str(self.input_ndim) +
+                                                                ' but previous layer has output_shape ' +
+                                                                str(layer.output_shape))
         if layer.get_output_mask() is not None:
-            assert self.supports_masked_input(), "Cannot connect non-masking layer to layer with masked output"
+            assert self.supports_masked_input(), 'Cannot connect non-masking layer to layer with masked output'
         self.previous = layer
         self.build()
 
@@ -206,11 +208,12 @@ class Layer(object):
             of the layer (i.e. it should match the
             output of `get_weights`).
         '''
-        assert len(self.params) == len(weights), 'Provided weight array does not match layer weights (' + \
-            str(len(self.params)) + ' layer params vs. ' + str(len(weights)) + ' provided weights)'
+        assert len(self.params) == len(weights), ('Provided weight array does not match layer weights (' +
+                                                  str(len(self.params)) + ' layer params vs. ' +
+                                                  str(len(weights)) + ' provided weights)')
         for p, w in zip(self.params, weights):
             if K.get_value(p).shape != w.shape:
-                raise Exception("Layer shape %s not compatible with weight shape %s." % (K.get_value(p).shape, w.shape))
+                raise Exception('Layer shape %s not compatible with weight shape %s.' % (K.get_value(p).shape, w.shape))
             K.set_value(p, w)
 
     def get_weights(self):
@@ -225,7 +228,7 @@ class Layer(object):
     def get_config(self):
         '''Return the parameters of the layer, as a dictionary.
         '''
-        config = {"name": self.__class__.__name__}
+        config = {'name': self.__class__.__name__}
         if hasattr(self, '_input_shape'):
             config['input_shape'] = self._input_shape[1:]
         if hasattr(self, '_trainable'):
@@ -304,8 +307,8 @@ class Masking(MaskedLayer):
         self.input = K.placeholder(ndim=3)
 
     def get_output_mask(self, train=False):
-        if K._BACKEND == "tensorflow":
-            raise Exception("Masking is Theano-only for the time being.")
+        if K._BACKEND == 'tensorflow':
+            raise Exception('Masking is Theano-only for the time being.')
         X = self.get_input(train)
         return K.any(K.ones_like(X) * (1. - K.equal(X, self.mask_value)),
                      axis=-1)
@@ -316,8 +319,8 @@ class Masking(MaskedLayer):
                          axis=-1, keepdims=True)
 
     def get_config(self):
-        config = {"name": self.__class__.__name__,
-                  "mask_value": self.mask_value}
+        config = {'name': self.__class__.__name__,
+                  'mask_value': self.mask_value}
         base_config = super(Masking, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
@@ -363,8 +366,8 @@ class TimeDistributedMerge(Layer):
             raise Exception('Unknown merge mode')
 
     def get_config(self):
-        config = {"name": self.__class__.__name__,
-                  "mode": self.mode}
+        config = {'name': self.__class__.__name__,
+                  'mode': self.mode}
         base_config = super(TimeDistributedMerge, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
@@ -792,8 +795,8 @@ class RepeatVector(Layer):
         return K.repeat(X, self.n)
 
     def get_config(self):
-        config = {"name": self.__class__.__name__,
-                  "n": self.n}
+        config = {'name': self.__class__.__name__,
+                  'n': self.n}
         base_config = super(RepeatVector, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
@@ -938,9 +941,9 @@ class ActivityRegularization(Layer):
         return self.get_input(train)
 
     def get_config(self):
-        config = {"name": self.__class__.__name__,
-                  "l1": self.l1,
-                  "l2": self.l2}
+        config = {'name': self.__class__.__name__,
+                  'l1': self.l1,
+                  'l2': self.l2}
         base_config = super(ActivityRegularization, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
@@ -1053,17 +1056,17 @@ class TimeDistributedDense(MaskedLayer):
         return outputs
 
     def get_config(self):
-        config = {"name": self.__class__.__name__,
-                  "output_dim": self.output_dim,
-                  "init": self.init.__name__,
-                  "activation": self.activation.__name__,
-                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
-                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
-                  "activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
-                  "W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
-                  "b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
-                  "input_dim": self.input_dim,
-                  "input_length": self.input_length}
+        config = {'name': self.__class__.__name__,
+                  'output_dim': self.output_dim,
+                  'init': self.init.__name__,
+                  'activation': self.activation.__name__,
+                  'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
+                  'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
+                  'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
+                  'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
+                  'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
+                  'input_dim': self.input_dim,
+                  'input_length': self.input_length}
         base_config = super(TimeDistributedDense, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))
 
@@ -1168,10 +1171,10 @@ class AutoEncoder(Layer):
         return self.decoder.get_output(train)
 
     def get_config(self):
-        return {"name": self.__class__.__name__,
-                "encoder_config": self.encoder.get_config(),
-                "decoder_config": self.decoder.get_config(),
-                "output_reconstruction": self.output_reconstruction}
+        return {'name': self.__class__.__name__,
+                'encoder_config': self.encoder.get_config(),
+                'decoder_config': self.decoder.get_config(),
+                'output_reconstruction': self.output_reconstruction}
 
 
 class MaxoutDense(Layer):
@@ -1295,6 +1298,8 @@ class Lambda(Layer):
         if py3:
             self.function = marshal.dumps(function.__code__)
         else:
+            assert hasattr(function, 'func_code'), ('The Lambda layer "function"'
+                                                    ' argument must be a Python function.')
             self.function = marshal.dumps(function.func_code)
         if output_shape is None:
             self._output_shape = None
@@ -1318,7 +1323,7 @@ class Lambda(Layer):
             output_shape_func = types.FunctionType(output_shape_func, globals())
             shape = output_shape_func(self.previous.output_shape)
             if type(shape) not in {list, tuple}:
-                raise Exception("output_shape function must return a tuple")
+                raise Exception('output_shape function must return a tuple')
             return tuple(shape)
 
     def get_output(self, train=False):
@@ -1835,15 +1840,15 @@ class Highway(Layer):
         return output
 
     def get_config(self):
-        config = {"name": self.__class__.__name__,
-                  "init": self.init.__name__,
-                  "transform_bias": self.transform_bias,
-                  "activation": self.activation.__name__,
-                  "W_regularizer": self.W_regularizer.get_config() if self.W_regularizer else None,
-                  "b_regularizer": self.b_regularizer.get_config() if self.b_regularizer else None,
-                  "activity_regularizer": self.activity_regularizer.get_config() if self.activity_regularizer else None,
-                  "W_constraint": self.W_constraint.get_config() if self.W_constraint else None,
-                  "b_constraint": self.b_constraint.get_config() if self.b_constraint else None,
-                  "input_dim": self.input_dim}
+        config = {'name': self.__class__.__name__,
+                  'init': self.init.__name__,
+                  'transform_bias': self.transform_bias,
+                  'activation': self.activation.__name__,
+                  'W_regularizer': self.W_regularizer.get_config() if self.W_regularizer else None,
+                  'b_regularizer': self.b_regularizer.get_config() if self.b_regularizer else None,
+                  'activity_regularizer': self.activity_regularizer.get_config() if self.activity_regularizer else None,
+                  'W_constraint': self.W_constraint.get_config() if self.W_constraint else None,
+                  'b_constraint': self.b_constraint.get_config() if self.b_constraint else None,
+                  'input_dim': self.input_dim}
         base_config = super(Highway, self).get_config()
         return dict(list(base_config.items()) + list(config.items()))

From 097e46837cd48fe53e59971e8b5237e242797b0c Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Thu, 17 Dec 2015 22:14:35 -0800
Subject: [PATCH 061/145] Callback robustness fix

---
 keras/callbacks.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/keras/callbacks.py b/keras/callbacks.py
index 37ba1718c..d6050eb2f 100644
--- a/keras/callbacks.py
+++ b/keras/callbacks.py
@@ -49,6 +49,8 @@ class CallbackList(object):
         self._t_enter_batch = time.time()
 
     def on_batch_end(self, batch, logs={}):
+        if not hasattr(self, '_t_enter_batch'):
+            self._t_enter_batch = time.time()
         self._delta_t_batch = time.time() - self._t_enter_batch
         t_before_callbacks = time.time()
         for callback in self.callbacks:

From 47d074fec3836206450facbbbc0965acd482f69c Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Thu, 17 Dec 2015 22:32:44 -0800
Subject: [PATCH 062/145] Add fit_generator methods in models

---
 keras/models.py            | 390 +++++++++++++++++++++++++++++++++++++
 tests/keras/test_models.py |  93 +++++++--
 2 files changed, 466 insertions(+), 17 deletions(-)

diff --git a/keras/models.py b/keras/models.py
index 4a4d5b92d..cc8ec3336 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -5,6 +5,12 @@ import warnings
 import pprint
 from six.moves import range
 import six
+import time
+import threading
+try:
+    import queue
+except ImportError:
+    import Queue as queue
 
 from . import backend as K
 from . import optimizers
@@ -713,6 +719,199 @@ class Sequential(Model, containers.Sequential):
             self.layers[k].set_weights(weights)
         f.close()
 
+    def fit_generator(self, generator, samples_per_epoch, nb_epoch,
+                      verbose=1, show_accuracy=False, callbacks=[],
+                      validation_data=None, class_weight=None, nb_worker=1):
+        '''Fit a model on data generated batch-by-batch by a Python generator.
+        The generator is run in parallel to the model, for efficiency,
+        and can be run by multiple workers at the same time.
+        For instance, this allows you to do real-time data augmentation
+        on images on CPU in parallel to training your model on GPU.
+
+        # Arguments
+            generator: a Python generator,
+                yielding either (X, y) or (X, y, sample_weight).
+                The generator is expected to loop over its data
+                indefinitely. An epoch finishes when `samples_per_epoch`
+                samples have been seen by the model.
+                The output of the generator must be a tuple of either 2 or 3
+                numpy arrays.
+                If the output tuple has two elements, they are assumed to be
+                (input_data, target_data).
+                If it has three elements, they are assumed to be
+                (input_data, target_data, sample_weight).
+                All arrays should contain the same number of samples.
+            samples_per_epoch: integer, number of samples to process before
+                starting a new epoch.
+            nb_epoch: integer, total number of iterations on the data.
+            verbose: verbosity mode, 0, 1, or 2.
+            show_accuracy: boolean. Whether to display accuracy (only relevant
+                for classification problems).
+            callbacks: list of callbacks to be called during training.
+            validation_data: tuple of 2 or 3 numpy arrays. If 2 elements,
+                they are assumed to be (input_data, target_data);
+                if 3 elements, they are assumed to be
+                (input_data, target_data, sample weights).
+            class_weight: dictionary mapping class indices to a weight
+                for the class.
+            nb_worker: integer, number of workers to use for running
+                the generator (in parallel to model training).
+                If using multiple workers, the processing order of batches
+                generated by the model will be non-deterministic.
+                If using multiple workers, make sure to protect
+                any thread-unsafe operation done by the generator
+                using a Python mutex.
+
+        # Returns
+
+        A `History` object.
+
+        # Examples
+
+        ```python
+        def generate_arrays_from_file(path):
+            while 1:
+                f = open(path)
+                for line in f:
+                    # create numpy arrays of input data
+                    # and labels, from each line in the file
+                    x, y = process_line(line)
+                    yield x, y
+                f.close()
+
+        model.fit_generator(generate_arrays_from_file('/my_file.txt'),
+                            samples_per_epoch=10000, nb_epoch=10)
+        ```
+        '''
+        max_queue_size = 10  # maximum number of batches in queue
+        wait_time = 0.05  # in seconds
+        epoch = 0
+        do_validation = bool(validation_data)
+        if show_accuracy:
+            out_labels = ['loss', 'acc']
+        else:
+            out_labels = ['loss']
+        metrics = ['loss', 'acc', 'val_loss', 'val_acc']
+
+        # prepare callbacks
+        history = cbks.History()
+        if verbose:
+            callbacks = [history, cbks.BaseLogger()] + callbacks
+        else:
+            callbacks = [history] + callbacks
+        callbacks = cbks.CallbackList(callbacks)
+
+        callbacks._set_model(self)
+        callbacks._set_params({
+            'nb_epoch': nb_epoch,
+            'nb_sample': samples_per_epoch,
+            'verbose': verbose,
+            'do_validation': do_validation,
+            'metrics': metrics,
+        })
+        callbacks.on_train_begin()
+
+        # util function to validate the batches produced
+        # by the generator
+        def input_validation(generator_output):
+            if not hasattr(generator_output, '__len__'):
+                _stop.set()
+                raise Exception('The generator output must be a tuple.')
+            if len(generator_output) == 2:
+                X, y = generator_output
+                sample_weight = None
+            elif len(generator_output) == 3:
+                X, y, sample_weight = generator_output
+            else:
+                _stop.set()
+                raise Exception('The generator output tuple must have 2 or 3 elements.')
+            return X, y, sample_weight
+
+        # start generator thread storing batches into a queue
+        generator_queue = queue.Queue()
+        _stop = threading.Event()
+
+        def generator_task():
+            i = 0
+            while not _stop.is_set():
+                try:
+                    if generator_queue.qsize() < max_queue_size:
+                        generator_output = generator.next()
+                        generator_queue.put(generator_output)
+                        i += 1
+                    else:
+                        time.sleep(wait_time)
+                except KeyboardInterrupt:
+                    _stop.set()
+                    return
+
+        generator_threads = [threading.Thread(target=generator_task) for _ in range(nb_worker)]
+        for thread in generator_threads:
+            thread.start()
+
+        self.stop_training = False
+        while epoch < nb_epoch:
+            callbacks.on_epoch_begin(epoch)
+            samples_seen = 0
+            batch_index = 0
+            while samples_seen < samples_per_epoch:
+                while not _stop.is_set():
+                    if not generator_queue.empty():
+                        generator_output = generator_queue.get()
+                        break
+                    else:
+                        time.sleep(wait_time)
+
+                X, y, sample_weight = input_validation(generator_output)
+
+                batch_logs = {}
+                batch_size = len(X[0])
+                batch_logs['batch'] = batch_index
+                batch_logs['size'] = batch_size
+                callbacks.on_batch_begin(batch_index, batch_logs)
+                outs = self.train_on_batch(X, y,
+                                           accuracy=show_accuracy,
+                                           sample_weight=sample_weight,
+                                           class_weight=class_weight)
+                if type(outs) != list:
+                    outs = [outs]
+                for l, o in zip(out_labels, outs):
+                    batch_logs[l] = o
+
+                callbacks.on_batch_end(batch_index, batch_logs)
+
+                # construct epoch logs
+                epoch_logs = {}
+                batch_index += 1
+                samples_seen += batch_size
+                if samples_seen >= samples_per_epoch:  # epoch finished
+                    if do_validation:
+                        if hasattr(validation_data, 'next'):
+                            # assumed to be generator
+                            # TODO: call self.evaluate_generator()
+                            _stop.set()
+                            raise NotImplementedError()
+                        else:
+                            # input validation
+                            X, y, sample_weight = input_validation(validation_data)
+                            val_outs = self.evaluate(X, y,
+                                                     show_accuracy=show_accuracy,
+                                                     sample_weight=sample_weight,
+                                                     verbose=0)
+                        if type(val_outs) != list:
+                            val_outs = [val_outs]
+                        # same labels assumed
+                        for l, o in zip(out_labels, val_outs):
+                            epoch_logs['val_' + l] = o
+
+            callbacks.on_epoch_end(epoch, epoch_logs)
+            epoch += 1
+            if self.stop_training:
+                break
+        _stop.set()
+        callbacks.on_train_end()
+        return history
+
 
 class Graph(Model, containers.Graph):
     '''Arbitrary connection graph.
@@ -938,3 +1137,194 @@ class Graph(Model, containers.Graph):
         weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
         self.set_weights(weights)
         f.close()
+
+    def fit_generator(self, generator, samples_per_epoch, nb_epoch,
+                      verbose=1, callbacks=[],
+                      validation_data=None, class_weight={}, nb_worker=1):
+        '''Fit a model on data generated batch-by-batch by a Python generator.
+        The generator is run in parallel to the model, for efficiency,
+        and can be run by multiple workers at the same time.
+        For instance, this allows you to do real-time data augmentation
+        on images on CPU in parallel to training your model on GPU.
+
+        # Arguments
+            generator: a generator.
+                The output of the generator must be either a dictionary
+                mapping inputs and outputs names to numpy arrays, or
+                a tuple of dictionaries (input_data, sample_weight).
+                All arrays should contain the same number of samples.
+                The generator is expected to loop over its data
+                indefinitely. An epoch finishes when `samples_per_epoch`
+                samples have been seen by the model.
+            samples_per_epoch: integer, number of samples to process before
+                going to the next epoch.
+            nb_epoch: integer, total number of iterations on the data.
+            verbose: verbosity mode, 0, 1, or 2.
+            callbacks: list of callbacks to be called during training.
+            validation_data: dictionary mapping input names and outputs names
+                to appropriate numpy arrays to be used as
+                held-out validation data.
+                All arrays should contain the same number of samples.
+            class_weight: dictionary mapping class indices to a weight
+                for the class.
+            nb_worker: integer, number of workers to use for running
+                the generator (in parallel to model training).
+                If using multiple workers, the processing order of batches
+                generated by the model will be non-deterministic.
+                If using multiple workers, make sure to protect
+                any thread-unsafe operation done by the generator
+                using a Python mutex.
+
+        # Returns
+
+        A `History` object.
+
+        # Examples
+
+        ```python
+        def generate_arrays_from_file(path):
+            while 1:
+                f = open(path)
+                for line in f:
+                    # create numpy arrays of input data
+                    # and labels, from each line in the file
+                    x1, x2, y = process_line(line)
+                    yield {'input_1': x1, 'input_2': x2, 'output': y}
+                f.close()
+
+        graph.fit_generator(generate_arrays_from_file('/my_file.txt'),
+                            samples_per_epoch=10000, nb_epoch=10)
+        ```
+        '''
+        max_queue_size = 10  # maximum number of batches in queue
+        wait_time = 0.05  # in seconds
+        epoch = 0
+        do_validation = bool(validation_data)
+        out_labels = ['loss']
+        metrics = ['loss', 'val_loss']
+        if not class_weight:
+            class_weight = {}
+
+        # prepare callbacks
+        history = cbks.History()
+        if verbose:
+            callbacks = [history, cbks.BaseLogger()] + callbacks
+        else:
+            callbacks = [history] + callbacks
+        callbacks = cbks.CallbackList(callbacks)
+
+        callbacks._set_model(self)
+        callbacks._set_params({
+            'nb_epoch': nb_epoch,
+            'nb_sample': samples_per_epoch,
+            'verbose': verbose,
+            'do_validation': do_validation,
+            'metrics': metrics,
+        })
+        callbacks.on_train_begin()
+
+        # util function to validate the batches produced
+        # by the generator
+        def input_validation(generator_output):
+            if type(generator_output) in [list, tuple]:
+                if len(generator_output) == 2:
+                    data, sample_weight = generator_output
+                else:
+                    _stop.set()
+                    raise Exception('The generator output tuple must have '
+                                    '2 dictionary elements: '
+                                    '(data, sample_weight).')
+            elif type(generator_output) == dict:
+                data = generator_output
+                sample_weight = {}
+            else:
+                _stop.set()
+                raise Exception('The generator output must be '
+                                'a data dictionary or a tuple '
+                                '(data, sample_weight).')
+            assert type(data) == dict
+            assert type(sample_weight) == dict
+            return data, sample_weight
+
+        # start generator thread storing batches into a queue
+        generator_queue = queue.Queue()
+        _stop = threading.Event()
+
+        def generator_task():
+            i = 0
+            while not _stop.is_set():
+                try:
+                    if generator_queue.qsize() < max_queue_size:
+                        generator_output = generator.next()
+                        generator_queue.put(generator_output)
+                        i += 1
+                    else:
+                        time.sleep(wait_time)
+                except KeyboardInterrupt:
+                    _stop.set()
+                    return
+
+        generator_threads = [threading.Thread(target=generator_task) for _ in range(nb_worker)]
+        for thread in generator_threads:
+            thread.start()
+
+        self.stop_training = False
+        while epoch < nb_epoch:
+            callbacks.on_epoch_begin(epoch)
+            samples_seen = 0
+            batch_index = 0
+            while samples_seen < samples_per_epoch:
+                while not _stop.is_set():
+                    if not generator_queue.empty():
+                        generator_output = generator_queue.get()
+                        break
+                    else:
+                        time.sleep(wait_time)
+
+                data, sample_weight = input_validation(generator_output)
+
+                batch_logs = {}
+                batch_size = len(data[data.keys()[0]])
+                batch_logs['batch'] = batch_index
+                batch_logs['size'] = batch_size
+                callbacks.on_batch_begin(batch_index, batch_logs)
+                outs = self.train_on_batch(data,
+                                           sample_weight=sample_weight,
+                                           class_weight=class_weight)
+                if type(outs) != list:
+                    outs = [outs]
+                for l, o in zip(out_labels, outs):
+                    batch_logs[l] = o
+
+                callbacks.on_batch_end(batch_index, batch_logs)
+
+                # construct epoch logs
+                epoch_logs = {}
+                batch_index += 1
+                samples_seen += batch_size
+                if samples_seen >= samples_per_epoch:  # epoch finished
+                    if do_validation:
+                        if hasattr(validation_data, 'next'):
+                            # assumed to be generator
+                            # TODO: call self.evaluate_generator()
+                            _stop.set()
+                            raise NotImplementedError()
+                        else:
+                            # input validation
+                            data, sample_weight = input_validation(validation_data)
+                            val_outs = self.evaluate(data,
+                                                     sample_weight=sample_weight,
+                                                     verbose=0)
+                        if type(val_outs) != list:
+                            val_outs = [val_outs]
+                        # same labels assumed
+                        for l, o in zip(out_labels, val_outs):
+                            epoch_logs['val_' + l] = o
+
+            callbacks.on_epoch_end(epoch, epoch_logs)
+            epoch += 1
+            if self.stop_training:
+                break
+        _stop.set()
+        callbacks.on_train_end()
+        return history
diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 9779542c2..8cf7302c4 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -36,6 +36,36 @@ y_train = np_utils.to_categorical(y_train)
 # SEQUENTIAL TEST  #
 ####################
 
+def test_sequential_fit_generator():
+    def data_generator(train):
+        if train:
+            max_batch_index = len(X_train) // batch_size
+        else:
+            max_batch_index = len(X_test) // batch_size
+        i = 0
+        while 1:
+            if train:
+                yield (X_train[i * batch_size: (i + 1) * batch_size], y_train[i * batch_size: (i + 1) * batch_size])
+            else:
+                yield (X_test[i * batch_size: (i + 1) * batch_size], y_test[i * batch_size: (i + 1) * batch_size])
+            i += 1
+            i = i % max_batch_index
+
+    model = Sequential()
+    model.add(Dense(nb_hidden, input_shape=(input_dim,)))
+    model.add(Activation('relu'))
+    model.add(Dense(nb_class))
+    model.add(Activation('softmax'))
+    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+
+    model.fit_generator(data_generator(True), len(X_train), nb_epoch, show_accuracy=False)
+    model.fit_generator(data_generator(True), len(X_train), nb_epoch, show_accuracy=True)
+    model.fit_generator(data_generator(True), len(X_train), nb_epoch, show_accuracy=False, validation_data=(X_test, y_test))
+    model.fit_generator(data_generator(True), len(X_train), nb_epoch, show_accuracy=True, validation_data=(X_test, y_test))
+
+    loss = model.evaluate(X_train, y_train, verbose=0)
+    assert(loss < 0.8)
+
 
 def test_sequential():
     model = Sequential()
@@ -55,7 +85,7 @@ def test_sequential():
 
     model.train_on_batch(X_train[:32], y_train[:32])
 
-    loss = model.evaluate(X_train, y_train, verbose=0)
+    loss = model.evaluate(X_test, y_test, verbose=0)
     assert(loss < 0.8)
 
     model.predict(X_test, verbose=0)
@@ -74,7 +104,7 @@ def test_sequential():
     model.load_weights(fname)
     os.remove(fname)
 
-    nloss = model.evaluate(X_train, y_train, verbose=0)
+    nloss = model.evaluate(X_test, y_test, verbose=0)
     assert(loss == nloss)
 
     # test json serialization
@@ -108,7 +138,7 @@ def test_merge_sum():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
-    loss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    loss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss < 0.7)
 
     model.predict([X_test, X_test], verbose=0)
@@ -133,7 +163,7 @@ def test_merge_sum():
     os.remove(fname)
     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
 
-    nloss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    nloss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss == nloss)
 
 
@@ -193,7 +223,7 @@ def test_merge_concat():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
-    loss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    loss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss < 0.7)
 
     model.predict([X_test, X_test], verbose=0)
@@ -221,7 +251,7 @@ def test_merge_concat():
     model.load_weights(fname)
     os.remove(fname)
 
-    nloss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    nloss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss == nloss)
 
 
@@ -256,7 +286,7 @@ def test_merge_recursivity():
     model.fit([X_train, X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
     model.fit([X_train, X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
-    loss = model.evaluate([X_train, X_train, X_train], y_train, verbose=0)
+    loss = model.evaluate([X_test, X_test, X_test], y_test, verbose=0)
     assert(loss < 0.7)
 
     model.predict([X_test, X_test, X_test], verbose=0)
@@ -269,7 +299,7 @@ def test_merge_recursivity():
     model.load_weights(fname)
     os.remove(fname)
 
-    nloss = model.evaluate([X_train, X_train, X_train], y_train, verbose=0)
+    nloss = model.evaluate([X_test, X_test, X_test], y_test, verbose=0)
     assert(loss == nloss)
 
 
@@ -293,7 +323,7 @@ def test_merge_overlap():
 
     model.train_on_batch(X_train[:32], y_train[:32])
 
-    loss = model.evaluate(X_train, y_train, verbose=0)
+    loss = model.evaluate(X_test, y_test, verbose=0)
     assert(loss < 0.9)
     model.predict(X_test, verbose=0)
     model.predict_classes(X_test, verbose=0)
@@ -305,7 +335,7 @@ def test_merge_overlap():
     model.load_weights(fname)
     os.remove(fname)
 
-    nloss = model.evaluate(X_train, y_train, verbose=0)
+    nloss = model.evaluate(X_test, y_test, verbose=0)
     assert(loss == nloss)
 
 
@@ -344,7 +374,7 @@ def test_lambda():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
-    loss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    loss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss < 0.7)
 
     model.predict([X_test, X_test], verbose=0)
@@ -370,7 +400,7 @@ def test_lambda():
     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
     os.remove(fname)
 
-    nloss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    nloss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss == nloss)
 
 
@@ -416,7 +446,7 @@ def test_siamese_1():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
-    loss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    loss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss < 0.8)
 
     model.predict([X_test, X_test], verbose=0)
@@ -444,7 +474,7 @@ def test_siamese_1():
     os.remove(fname)
     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
 
-    nloss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    nloss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss == nloss)
 
 
@@ -477,7 +507,7 @@ def test_siamese_2():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0)
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
-    loss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    loss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss < 0.8)
 
     model.predict([X_test, X_test], verbose=0)
@@ -512,7 +542,7 @@ def test_siamese_2():
     os.remove(fname)
     model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
 
-    nloss = model.evaluate([X_train, X_train], y_train, verbose=0)
+    nloss = model.evaluate([X_test, X_test], y_test, verbose=0)
     assert(loss == nloss)
 
 
@@ -532,6 +562,35 @@ def test_siamese_2():
                                                                                  output_shape=(1,))
 
 
+def test_graph_fit_generator():
+    def data_generator_graph(train):
+        while 1:
+            if train:
+                yield {'input1': X_train_graph, 'output1': y_train_graph}
+            else:
+                yield {'input1': X_test_graph, 'output1': y_test_graph}
+
+    graph = Graph()
+    graph.add_input(name='input1', input_shape=(32,))
+
+    graph.add_node(Dense(16), name='dense1', input='input1')
+    graph.add_node(Dense(4), name='dense2', input='input1')
+    graph.add_node(Dense(4), name='dense3', input='dense1')
+
+    graph.add_output(name='output1',
+                     inputs=['dense2', 'dense3'],
+                     merge_mode='sum')
+    graph.compile('rmsprop', {'output1': 'mse'})
+
+    graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4)
+    graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4)
+    graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4, validation_data={'input1': X_test_graph, 'output1': y_test_graph})
+    graph.fit_generator(data_generator_graph(True), 1000, nb_epoch=4, validation_data={'input1': X_test_graph, 'output1': y_test_graph})
+
+    loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}, verbose=0)
+    assert(loss < 3.)
+
+
 def test_1o_1i():
     # test a non-sequential graph with 1 input and 1 output
     np.random.seed(1337)
@@ -555,7 +614,7 @@ def test_1o_1i():
     assert(len(out) == 1)
     loss = graph.test_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
     loss = graph.train_on_batch({'input1': X_test_graph, 'output1': y_test_graph})
-    loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph})
+    loss = graph.evaluate({'input1': X_test_graph, 'output1': y_test_graph}, verbose=0)
     assert(loss < 2.5)
 
     # test validation split

From f9911c10b46df0ffaf93cc90a9c008f00026d23d Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Thu, 17 Dec 2015 22:32:57 -0800
Subject: [PATCH 063/145] Style fixes in datasets

---
 keras/datasets/cifar10.py |  1 -
 keras/datasets/imdb.py    | 12 ++++++------
 keras/datasets/mnist.py   |  1 -
 keras/datasets/reuters.py | 18 ++++++------------
 4 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/keras/datasets/cifar10.py b/keras/datasets/cifar10.py
index 92ead6b16..35773c40c 100644
--- a/keras/datasets/cifar10.py
+++ b/keras/datasets/cifar10.py
@@ -10,7 +10,6 @@ def load_data():
     origin = "http://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz"
     path = get_file(dirname, origin=origin, untar=True)
 
-    nb_test_samples = 10000
     nb_train_samples = 50000
 
     X_train = np.zeros((nb_train_samples, 3, 32, 32), dtype="uint8")
diff --git a/keras/datasets/imdb.py b/keras/datasets/imdb.py
index 0c20b34be..4afae2fcd 100644
--- a/keras/datasets/imdb.py
+++ b/keras/datasets/imdb.py
@@ -2,12 +2,12 @@ from __future__ import absolute_import
 from six.moves import cPickle
 import gzip
 from .data_utils import get_file
-import random
 from six.moves import zip
 import numpy as np
 
 
-def load_data(path="imdb.pkl", nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113,
+def load_data(path="imdb.pkl", nb_words=None, skip_top=0,
+              maxlen=None, test_split=0.2, seed=113,
               start_char=1, oov_char=2, index_from=3):
 
     path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/imdb.pkl")
@@ -57,10 +57,10 @@ def load_data(path="imdb.pkl", nb_words=None, skip_top=0, maxlen=None, test_spli
             nX.append(nx)
         X = nX
 
-    X_train = X[:int(len(X)*(1-test_split))]
-    y_train = labels[:int(len(X)*(1-test_split))]
+    X_train = X[:int(len(X) * (1 - test_split))]
+    y_train = labels[:int(len(X) * (1 - test_split))]
 
-    X_test = X[int(len(X)*(1-test_split)):]
-    y_test = labels[int(len(X)*(1-test_split)):]
+    X_test = X[int(len(X) * (1 - test_split)):]
+    y_test = labels[int(len(X) * (1 - test_split)):]
 
     return (X_train, y_train), (X_test, y_test)
diff --git a/keras/datasets/mnist.py b/keras/datasets/mnist.py
index 0cdc119b8..c493acf59 100644
--- a/keras/datasets/mnist.py
+++ b/keras/datasets/mnist.py
@@ -19,5 +19,4 @@ def load_data(path="mnist.pkl.gz"):
         data = cPickle.load(f, encoding="bytes")
 
     f.close()
-
     return data  # (X_train, y_train), (X_test, y_test)
diff --git a/keras/datasets/reuters.py b/keras/datasets/reuters.py
index 8c41a2dea..1ff7b74c0 100644
--- a/keras/datasets/reuters.py
+++ b/keras/datasets/reuters.py
@@ -1,18 +1,17 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import
 from .data_utils import get_file
-import random
 from six.moves import cPickle
 from six.moves import zip
 import numpy as np
 
 
-def load_data(path="reuters.pkl", nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113,
+def load_data(path="reuters.pkl", nb_words=None, skip_top=0,
+              maxlen=None, test_split=0.2, seed=113,
               start_char=1, oov_char=2, index_from=3):
 
     path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters.pkl")
     f = open(path, 'rb')
-
     X, labels = cPickle.load(f)
     f.close()
 
@@ -53,11 +52,11 @@ def load_data(path="reuters.pkl", nb_words=None, skip_top=0, maxlen=None, test_s
             nX.append(nx)
         X = nX
 
-    X_train = X[:int(len(X)*(1-test_split))]
-    y_train = labels[:int(len(X)*(1-test_split))]
+    X_train = X[:int(len(X) * (1 - test_split))]
+    y_train = labels[:int(len(X) * (1 - test_split))]
 
-    X_test = X[int(len(X)*(1-test_split)):]
-    y_test = labels[int(len(X)*(1-test_split)):]
+    X_test = X[int(len(X) * (1 - test_split)):]
+    y_test = labels[int(len(X) * (1 - test_split)):]
 
     return (X_train, y_train), (X_test, y_test)
 
@@ -66,8 +65,3 @@ def get_word_index(path="reuters_word_index.pkl"):
     path = get_file(path, origin="https://s3.amazonaws.com/text-datasets/reuters_word_index.pkl")
     f = open(path, 'rb')
     return cPickle.load(f)
-
-
-if __name__ == "__main__":
-    make_reuters_dataset()
-    (X_train, y_train), (X_test, y_test) = load_data()

From 3f67168c44f8c7d34e5266a99bbdd90669a8eaa6 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Thu, 17 Dec 2015 23:03:23 -0800
Subject: [PATCH 064/145] Fix flaky test

---
 tests/keras/test_models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 8cf7302c4..65984186b 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -64,7 +64,7 @@ def test_sequential_fit_generator():
     model.fit_generator(data_generator(True), len(X_train), nb_epoch, show_accuracy=True, validation_data=(X_test, y_test))
 
     loss = model.evaluate(X_train, y_train, verbose=0)
-    assert(loss < 0.8)
+    assert(loss < 0.9)
 
 
 def test_sequential():

From f9325e8fe5fe002f9720b7cb1c6d3614948a10ea Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Thu, 17 Dec 2015 23:39:12 -0800
Subject: [PATCH 065/145] Fix py3 compatibility.

---
 keras/models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/keras/models.py b/keras/models.py
index cc8ec3336..2ed5662a9 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -836,7 +836,7 @@ class Sequential(Model, containers.Sequential):
             while not _stop.is_set():
                 try:
                     if generator_queue.qsize() < max_queue_size:
-                        generator_output = generator.next()
+                        generator_output = next(generator)
                         generator_queue.put(generator_output)
                         i += 1
                     else:
@@ -1255,7 +1255,7 @@ class Graph(Model, containers.Graph):
             while not _stop.is_set():
                 try:
                     if generator_queue.qsize() < max_queue_size:
-                        generator_output = generator.next()
+                        generator_output = next(generator)
                         generator_queue.put(generator_output)
                         i += 1
                     else:
@@ -1284,7 +1284,7 @@ class Graph(Model, containers.Graph):
                 data, sample_weight = input_validation(generator_output)
 
                 batch_logs = {}
-                batch_size = len(data[data.keys()[0]])
+                batch_size = len(data[list(data.keys())[0]])
                 batch_logs['batch'] = batch_index
                 batch_logs['size'] = batch_size
                 callbacks.on_batch_begin(batch_index, batch_logs)

From 93b01aff157253ed28fe67b0ffc5ff3308dc3eba Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Fri, 18 Dec 2015 00:01:23 -0800
Subject: [PATCH 066/145] dem flaky tests

---
 tests/keras/test_models.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index 65984186b..b241d2420 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -139,7 +139,7 @@ def test_merge_sum():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
     loss = model.evaluate([X_test, X_test], y_test, verbose=0)
-    assert(loss < 0.7)
+    assert(loss < 0.8)
 
     model.predict([X_test, X_test], verbose=0)
     model.predict_classes([X_test, X_test], verbose=0)
@@ -224,7 +224,7 @@ def test_merge_concat():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
     loss = model.evaluate([X_test, X_test], y_test, verbose=0)
-    assert(loss < 0.7)
+    assert(loss < 0.8)
 
     model.predict([X_test, X_test], verbose=0)
     model.predict_classes([X_test, X_test], verbose=0)
@@ -287,7 +287,7 @@ def test_merge_recursivity():
     model.fit([X_train, X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
     loss = model.evaluate([X_test, X_test, X_test], y_test, verbose=0)
-    assert(loss < 0.7)
+    assert(loss < 0.8)
 
     model.predict([X_test, X_test, X_test], verbose=0)
     model.predict_classes([X_test, X_test, X_test], verbose=0)
@@ -375,7 +375,7 @@ def test_lambda():
     model.fit([X_train, X_train], y_train, batch_size=batch_size, nb_epoch=nb_epoch, verbose=0, shuffle=False)
 
     loss = model.evaluate([X_test, X_test], y_test, verbose=0)
-    assert(loss < 0.7)
+    assert(loss < 0.8)
 
     model.predict([X_test, X_test], verbose=0)
     model.predict_classes([X_test, X_test], verbose=0)

From e5d3abdf09d8c281ca8817b6292a044673ba3007 Mon Sep 17 00:00:00 2001
From: "gw0 [http://gw.tnode.com/]" <gw.2015@tnode.com>
Date: Sat, 12 Dec 2015 11:09:28 +0100
Subject: [PATCH 067/145] Import pydot-ng with pydot as fallback when
 visualizing.

---
 keras/utils/visualize_util.py | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/keras/utils/visualize_util.py b/keras/utils/visualize_util.py
index e5445d68b..3074bdf1a 100644
--- a/keras/utils/visualize_util.py
+++ b/keras/utils/visualize_util.py
@@ -1,10 +1,17 @@
-import pydot
-# old pydot will not work with python3, must use one
-# that works with python3 such as pydot2 or pydot
 import itertools
 from keras.layers.containers import Graph, Sequential
 from keras.layers.core import Merge
 
+try:
+    # pydot-ng is a fork of pydot that is better maintained
+    import pydot_ng as pydot
+except ImportError:
+    # fall back on pydot if necessary
+    import pydot
+if not pydot.find_graphviz():
+    raise RuntimeError("Failed to import pydot. You must install pydot"
+                       " and graphviz for `pydotprint` to work.")
+
 
 def layer_typename(layer):
     return type(layer).__module__ + "." + type(layer).__name__

From bdf084e35ef015eae45d3eed79758f652c717056 Mon Sep 17 00:00:00 2001
From: "gw0 [http://gw.tnode.com/]" <gw.2015@tnode.com>
Date: Fri, 18 Dec 2015 12:04:00 +0100
Subject: [PATCH 068/145] Fix shapes should be tuples.

---
 keras/layers/core.py          |  2 +-
 keras/layers/normalization.py |  4 ++--
 keras/layers/recurrent.py     | 10 +++++-----
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index c5e2c13da..19a3386fa 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1018,7 +1018,7 @@ class TimeDistributedDense(MaskedLayer):
         input_dim = self.input_shape[2]
 
         self.W = self.init((input_dim, self.output_dim))
-        self.b = K.zeros((self.output_dim))
+        self.b = K.zeros((self.output_dim,))
 
         self.params = [self.W, self.b]
         self.regularizers = []
diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py
index db6e20398..e85ad941f 100644
--- a/keras/layers/normalization.py
+++ b/keras/layers/normalization.py
@@ -50,12 +50,12 @@ class BatchNormalization(Layer):
         input_shape = self.input_shape  # starts with samples axis
         input_shape = input_shape[1:]
 
-        self.gamma = self.init((input_shape))
+        self.gamma = self.init(input_shape)
         self.beta = K.zeros(input_shape)
 
         self.params = [self.gamma, self.beta]
         self.running_mean = K.zeros(input_shape)
-        self.running_std = K.ones((input_shape))
+        self.running_std = K.ones(input_shape)
 
         # initialize self.updates: batch mean/std computation
         X = self.get_input(train=True)
diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py
index 6b89c6800..13b9ceee1 100644
--- a/keras/layers/recurrent.py
+++ b/keras/layers/recurrent.py
@@ -205,7 +205,7 @@ class SimpleRNN(Recurrent):
 
         self.W = self.init((input_dim, self.output_dim))
         self.U = self.inner_init((self.output_dim, self.output_dim))
-        self.b = K.zeros((self.output_dim))
+        self.b = K.zeros((self.output_dim,))
         self.params = [self.W, self.U, self.b]
 
         if self.initial_weights is not None:
@@ -391,19 +391,19 @@ class LSTM(Recurrent):
 
         self.W_i = self.init((input_dim, self.output_dim))
         self.U_i = self.inner_init((self.output_dim, self.output_dim))
-        self.b_i = K.zeros((self.output_dim))
+        self.b_i = K.zeros((self.output_dim,))
 
         self.W_f = self.init((input_dim, self.output_dim))
         self.U_f = self.inner_init((self.output_dim, self.output_dim))
-        self.b_f = self.forget_bias_init((self.output_dim))
+        self.b_f = self.forget_bias_init((self.output_dim,))
 
         self.W_c = self.init((input_dim, self.output_dim))
         self.U_c = self.inner_init((self.output_dim, self.output_dim))
-        self.b_c = K.zeros((self.output_dim))
+        self.b_c = K.zeros((self.output_dim,))
 
         self.W_o = self.init((input_dim, self.output_dim))
         self.U_o = self.inner_init((self.output_dim, self.output_dim))
-        self.b_o = K.zeros((self.output_dim))
+        self.b_o = K.zeros((self.output_dim,))
 
         self.params = [self.W_i, self.U_i, self.b_i,
                        self.W_c, self.U_c, self.b_c,

From 80096798fc3ecbd51e8e7a9109d6344d808d03a7 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Fri, 18 Dec 2015 10:09:53 -0800
Subject: [PATCH 069/145] Fix borked merge in test_models

---
 tests/keras/test_models.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tests/keras/test_models.py b/tests/keras/test_models.py
index d53beefe6..7f1a75d48 100644
--- a/tests/keras/test_models.py
+++ b/tests/keras/test_models.py
@@ -42,6 +42,8 @@ def _get_test_data():
 ####################
 
 def test_sequential_fit_generator():
+    (X_train, y_train), (X_test, y_test) = _get_test_data()
+
     def data_generator(train):
         if train:
             max_batch_index = len(X_train) // batch_size

From dd58103a3cdee9cf57c3def74de22032c7765348 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Fri, 18 Dec 2015 15:10:52 -0800
Subject: [PATCH 070/145] Better MemNN example

---
 examples/babi_memnn.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/examples/babi_memnn.py b/examples/babi_memnn.py
index 31f933e58..ba45bbabc 100644
--- a/examples/babi_memnn.py
+++ b/examples/babi_memnn.py
@@ -9,7 +9,7 @@ References:
   "End-To-End Memory Networks",
   http://arxiv.org/abs/1503.08895
 
-Reaches 93% accuracy on task 'single_supporting_fact_10k' after 70 epochs.
+Reaches 98.6% accuracy on task 'single_supporting_fact_10k' after 120 epochs.
 Time per epoch: 3s on CPU (core i7).
 '''
 
@@ -153,12 +153,14 @@ input_encoder_m = Sequential()
 input_encoder_m.add(Embedding(input_dim=vocab_size,
                               output_dim=64,
                               input_length=story_maxlen))
+input_encoder_m.add(Dropout(0.3))
 # output: (samples, story_maxlen, embedding_dim)
 # embed the question into a sequence of vectors
 question_encoder = Sequential()
 question_encoder.add(Embedding(input_dim=vocab_size,
                                output_dim=64,
                                input_length=query_maxlen))
+question_encoder.add(Dropout(0.3))
 # output: (samples, query_maxlen, embedding_dim)
 # compute a 'match' between input sequence elements (which are vectors)
 # and the question vector sequence
@@ -172,6 +174,7 @@ input_encoder_c = Sequential()
 input_encoder_c.add(Embedding(input_dim=vocab_size,
                               output_dim=query_maxlen,
                               input_length=story_maxlen))
+input_encoder_c.add(Dropout(0.3))
 # output: (samples, story_maxlen, query_maxlen)
 # sum the match vector with the input vector:
 response = Sequential()
@@ -185,9 +188,9 @@ answer = Sequential()
 answer.add(Merge([response, question_encoder], mode='concat', concat_axis=-1))
 # the original paper uses a matrix multiplication for this reduction step.
 # we choose to use a RNN instead.
-answer.add(LSTM(64))
+answer.add(LSTM(32))
 # one regularization layer -- more would probably be needed.
-answer.add(Dropout(0.25))
+answer.add(Dropout(0.3))
 answer.add(Dense(vocab_size))
 # we output a probability distribution over the vocabulary
 answer.add(Activation('softmax'))
@@ -196,6 +199,6 @@ answer.compile(optimizer='rmsprop', loss='categorical_crossentropy')
 # Note: you could use a Graph model to avoid repeat the input twice
 answer.fit([inputs_train, queries_train, inputs_train], answers_train,
            batch_size=32,
-           nb_epoch=70,
+           nb_epoch=120,
            show_accuracy=True,
            validation_data=([inputs_test, queries_test, inputs_test], answers_test))

From 8d3b8ff62795733ba8adcedfe454728a6f2a1181 Mon Sep 17 00:00:00 2001
From: fchollet <francois.chollet@gmail.com>
Date: Sat, 19 Dec 2015 19:07:50 -0800
Subject: [PATCH 071/145] Improve callback functionality

---
 keras/callbacks.py | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/keras/callbacks.py b/keras/callbacks.py
index d6050eb2f..2cb351cc9 100644
--- a/keras/callbacks.py
+++ b/keras/callbacks.py
@@ -8,6 +8,7 @@ import warnings
 
 from collections import deque
 from .utils.generic_utils import Progbar
+from keras import backend as K
 
 
 class CallbackList(object):
@@ -301,23 +302,46 @@ class EarlyStopping(Callback):
         patience: number of epochs with no improvement
             after which training will be stopped.
         verbose: verbosity mode.
+        mode: one of {auto, min, max}. In 'min' mode,
+            training will stop when the quantity
+            monitored has stopped decreasing; in 'max'
+            mode it will stopped when the quantity
+            monitored has stopped increasing.
     '''
-    def __init__(self, monitor='val_loss', patience=0, verbose=0):
+    def __init__(self, monitor='val_loss', patience=0, verbose=0, mode='auto'):
         super(Callback, self).__init__()
 
         self.monitor = monitor
         self.patience = patience
         self.verbose = verbose
-        self.best = np.Inf
         self.wait = 0
 
+        if mode not in ['auto', 'min', 'max']:
+            warnings.warn('EarlyStopping mode %s is unknown, '
+                          'fallback to auto mode' % (self.mode), RuntimeWarning)
+            mode = 'auto'
+
+        if mode == 'min':
+            self.monitor_op = np.less
+            self.best = np.Inf
+        elif mode == 'max':
+            self.monitor_op = np.greater
+            self.best = -np.Inf
+        else:
+            if 'acc' in self.monitor:
+                self.monitor_op = np.greater
+                self.best = -np.Inf
+            else:
+                self.monitor_op = np.less
+                self.best = np.Inf
+
     def on_epoch_end(self, epoch, logs={}):
         current = logs.get(self.monitor)
         if current is None:
             warnings.warn('Early stopping requires %s available!' %
                           (self.monitor), RuntimeWarning)
 
-        if current < self.best:
+        if self.monitor_op(current, self.best):
             self.best = current
             self.wait = 0
         else:
@@ -380,4 +404,5 @@ class LearningRateScheduler(Callback):
         self.schedule = schedule
 
     def on_epoch_begin(self, epoch, logs={}):
-        self.model.optimizer.lr.set_value(self.schedule(epoch))
+        assert hasattr(self.model.optimizer, 'lr'), 'Optimizer must have a "lr" attribute.'
+        K.set_value(self.model.optimizer.lr, self.schedule(epoch))

From e34f9e6debf3b39da77c72e8a4c75cf7ccd94ef9 Mon Sep 17 00:00:00 2001
From: fchollet <francois.chollet@gmail.com>
Date: Sat, 19 Dec 2015 19:08:03 -0800
Subject: [PATCH 072/145] Add tests for callbacks

---
 tests/keras/test_callbacks.py | 117 ++++++++++++++++++++++++++++++++++
 1 file changed, 117 insertions(+)
 create mode 100644 tests/keras/test_callbacks.py

diff --git a/tests/keras/test_callbacks.py b/tests/keras/test_callbacks.py
new file mode 100644
index 000000000..71e55baf5
--- /dev/null
+++ b/tests/keras/test_callbacks.py
@@ -0,0 +1,117 @@
+import pytest
+import os
+import numpy as np
+np.random.seed(1337)
+
+from keras import callbacks
+from keras.models import Graph, Sequential
+from keras.layers.core import Dense
+from keras.utils.test_utils import get_test_data
+from keras import backend as K
+
+input_dim = 2
+nb_hidden = 4
+nb_class = 2
+batch_size = 5
+train_samples = 20
+test_samples = 20
+
+
+def test_ModelCheckpoint():
+    filepath = 'checkpoint.h5'
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
+                                                         nb_test=test_samples,
+                                                         input_shape=(input_dim,),
+                                                         classification=True,
+                                                         nb_class=nb_class)
+    # case 1
+    monitor = 'val_loss'
+    save_best_only = False
+    mode = 'auto'
+
+    model = Sequential()
+    model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
+    model.add(Dense(nb_class, activation='softmax'))
+    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+
+    cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor,
+                                      save_best_only=save_best_only, mode=mode)]
+    model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
+              validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=1)
+    assert os.path.exists(filepath)
+    os.remove(filepath)
+
+    # case 2
+    mode = 'min'
+    cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor,
+                                      save_best_only=save_best_only, mode=mode)]
+    model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
+              validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=1)
+    assert os.path.exists(filepath)
+    os.remove(filepath)
+
+    # case 3
+    mode = 'max'
+    monitor = 'val_acc'
+    cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor,
+                                      save_best_only=save_best_only, mode=mode)]
+    model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
+              validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=1)
+    assert os.path.exists(filepath)
+    os.remove(filepath)
+
+    # case 4
+    save_best_only = True
+    cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor,
+                                      save_best_only=save_best_only, mode=mode)]
+    model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
+              validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=1)
+    assert os.path.exists(filepath)
+    os.remove(filepath)
+
+
+def test_EarlyStopping():
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
+                                                         nb_test=test_samples,
+                                                         input_shape=(input_dim,),
+                                                         classification=True,
+                                                         nb_class=nb_class)
+    model = Sequential()
+    model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
+    model.add(Dense(nb_class, activation='softmax'))
+    model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+
+    mode = 'max'
+    monitor = 'val_acc'
+    patience = 0
+    cbks = [callbacks.EarlyStopping(patience=patience, monitor=monitor, mode=mode)]
+    history = model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
+                        validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=20)
+
+    mode = 'auto'
+    monitor = 'val_acc'
+    patience = 2
+    cbks = [callbacks.EarlyStopping(patience=patience, monitor=monitor, mode=mode)]
+    history = model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
+                        validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=20)
+
+
+def test_LearningRateScheduler():
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
+                                                         nb_test=test_samples,
+                                                         input_shape=(input_dim,),
+                                                         classification=True,
+                                                         nb_class=nb_class)
+    model = Sequential()
+    model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
+    model.add(Dense(nb_class, activation='softmax'))
+    model.compile(loss='categorical_crossentropy', optimizer='sgd')
+
+    cbks = [callbacks.LearningRateScheduler(lambda x: 1. / (1. + x))]
+    model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
+              validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=5)
+    assert (float(K.get_value(model.optimizer.lr)) - 0.2) < K.epsilon()
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])

From 6ec1f7a498097c8176598e8c60c50b103353b0ca Mon Sep 17 00:00:00 2001
From: fchollet <francois.chollet@gmail.com>
Date: Sat, 19 Dec 2015 19:46:57 -0800
Subject: [PATCH 073/145] Fix callback tests

---
 tests/keras/test_callbacks.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tests/keras/test_callbacks.py b/tests/keras/test_callbacks.py
index 71e55baf5..e836d0fea 100644
--- a/tests/keras/test_callbacks.py
+++ b/tests/keras/test_callbacks.py
@@ -8,6 +8,7 @@ from keras.models import Graph, Sequential
 from keras.layers.core import Dense
 from keras.utils.test_utils import get_test_data
 from keras import backend as K
+from keras.utils import np_utils
 
 input_dim = 2
 nb_hidden = 4
@@ -24,6 +25,8 @@ def test_ModelCheckpoint():
                                                          input_shape=(input_dim,),
                                                          classification=True,
                                                          nb_class=nb_class)
+    y_test = np_utils.to_categorical(y_test)
+    y_train = np_utils.to_categorical(y_train)
     # case 1
     monitor = 'val_loss'
     save_best_only = False
@@ -76,6 +79,8 @@ def test_EarlyStopping():
                                                          input_shape=(input_dim,),
                                                          classification=True,
                                                          nb_class=nb_class)
+    y_test = np_utils.to_categorical(y_test)
+    y_train = np_utils.to_categorical(y_train)
     model = Sequential()
     model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
     model.add(Dense(nb_class, activation='softmax'))
@@ -102,6 +107,8 @@ def test_LearningRateScheduler():
                                                          input_shape=(input_dim,),
                                                          classification=True,
                                                          nb_class=nb_class)
+    y_test = np_utils.to_categorical(y_test)
+    y_train = np_utils.to_categorical(y_train)
     model = Sequential()
     model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
     model.add(Dense(nb_class, activation='softmax'))
@@ -115,3 +122,4 @@ def test_LearningRateScheduler():
 
 if __name__ == '__main__':
     pytest.main([__file__])
+

From 49c343f83648cea3432cf18f3e7f8e3134d2328d Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 21 Dec 2015 10:26:31 -0800
Subject: [PATCH 074/145] Update CONTRIBUTING with info wrt commit squashing

---
 CONTRIBUTING.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 73ae79972..17affa401 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -49,7 +49,7 @@ We love pull requests. Here's a quick guide:
   - with the Theano backend, on Python 2.7 and Python 3.5
   - with the TensorFlow backend, on Python 2.7
 
-7. When committing, use appropriate, descriptive commit messages. Make sure that your branch history is not a string of "bug fix", "fix", "oops", etc. When submitting your PR, squash your commit history into 1-3 easy to follow commits, to make sure the project history stays clean and readable.
+7. When committing, use appropriate, descriptive commit messages. Make sure that your branch history is not a string of "bug fix", "fix", "oops", etc. When submitting your PR, squash your commits into a single commit with an appropriate commit message, to make sure the project history stays clean and readable. See ['rebase and squash'](http://rebaseandsqua.sh/) for technical help on how to squash your commits.
 
 8. Update the documentation. If introducing new functionality, make sure you include code snippets demonstrating the usage of your new feature.
 

From 80eab1bc022d88d202d5aec87b4d6f446cf5d77a Mon Sep 17 00:00:00 2001
From: tboquet <thomas.boquet@hec.ca>
Date: Mon, 21 Dec 2015 11:58:23 -0800
Subject: [PATCH 075/145] Add TensorBoard visualization callback.

---
 keras/callbacks.py            | 104 +++++++++++++++++++++++++++++++---
 tests/keras/test_callbacks.py |  82 +++++++++++++++++++++++++++
 2 files changed, 179 insertions(+), 7 deletions(-)

diff --git a/keras/callbacks.py b/keras/callbacks.py
index 2cb351cc9..a5cc1bbcd 100644
--- a/keras/callbacks.py
+++ b/keras/callbacks.py
@@ -8,6 +8,7 @@ import warnings
 
 from collections import deque
 from .utils.generic_utils import Progbar
+from .backend import _BACKEND
 from keras import backend as K
 
 
@@ -44,9 +45,11 @@ class CallbackList(object):
             callback.on_batch_begin(batch, logs)
         self._delta_ts_batch_begin.append(time.time() - t_before_callbacks)
         delta_t_median = np.median(self._delta_ts_batch_begin)
-        if self._delta_t_batch > 0. and delta_t_median > 0.95 * self._delta_t_batch and delta_t_median > 0.1:
+        if self._delta_t_batch > 0. and delta_t_median > 0.95 * \
+           self._delta_t_batch and delta_t_median > 0.1:
             warnings.warn('Method on_batch_begin() is slow compared '
-                          'to the batch update (%f). Check your callbacks.' % delta_t_median)
+                          'to the batch update (%f). Check your callbacks.'
+                          % delta_t_median)
         self._t_enter_batch = time.time()
 
     def on_batch_end(self, batch, logs={}):
@@ -58,9 +61,11 @@ class CallbackList(object):
             callback.on_batch_end(batch, logs)
         self._delta_ts_batch_end.append(time.time() - t_before_callbacks)
         delta_t_median = np.median(self._delta_ts_batch_end)
-        if self._delta_t_batch > 0. and delta_t_median > 0.95 * self._delta_t_batch and delta_t_median > 0.1:
+        if self._delta_t_batch > 0. and delta_t_median > 0.95 * \
+           self._delta_t_batch and delta_t_median > 0.1:
             warnings.warn('Method on_batch_end() is slow compared '
-                          'to the batch update (%f). Check your callbacks.' % delta_t_median)
+                          'to the batch update (%f). Check your callbacks.'
+                          % delta_t_median)
 
     def on_train_begin(self, logs={}):
         for callback in self.callbacks:
@@ -252,7 +257,8 @@ class ModelCheckpoint(Callback):
 
         if mode not in ['auto', 'min', 'max']:
             warnings.warn('ModelCheckpoint mode %s is unknown, '
-                          'fallback to auto mode' % (self.mode), RuntimeWarning)
+                          'fallback to auto mode' % (self.mode),
+                          RuntimeWarning)
             mode = 'auto'
 
         if mode == 'min':
@@ -279,7 +285,8 @@ class ModelCheckpoint(Callback):
             else:
                 if self.monitor_op(current, self.best):
                     if self.verbose > 0:
-                        print('Epoch %05d: %s improved from %0.5f to %0.5f, saving model to %s'
+                        print('Epoch %05d: %s improved from %0.5f to %0.5f,'
+                              ' saving model to %s'
                               % (epoch, self.monitor, self.best,
                                  current, filepath))
                     self.best = current
@@ -404,5 +411,88 @@ class LearningRateScheduler(Callback):
         self.schedule = schedule
 
     def on_epoch_begin(self, epoch, logs={}):
-        assert hasattr(self.model.optimizer, 'lr'), 'Optimizer must have a "lr" attribute.'
+        assert hasattr(self.model.optimizer, 'lr'), \
+            'Optimizer must have a "lr" attribute.'
         K.set_value(self.model.optimizer.lr, self.schedule(epoch))
+
+
+class TensorBoard(Callback):
+    ''' Tensorboard basic visualizations.
+
+    This callback writes a log usable with TensorBoard.
+    TensorBoard is a visualization tools provided with TensorFlow.
+
+    If you have installed TensorFlow with pip, you should be able
+    to launch TensorBoard from the command line:
+    ```
+    tensorboard --logdir=/full_path_to_your_logs
+    ```
+    You could find more information at:
+    https://www.tensorflow.org/versions/master/how_tos/summaries_and_tensorboard/index.html
+
+    # Arguments
+        model: a keras model linked to a tensorflow session
+        feed: a dictionnary mapping tensors (inputs, outputs, weigths)
+            from the model._test keras function i.e. model._test.inputs
+            to the corresponding arrays.
+        freq: the frequency at which the callback will output
+            parameters and metrics to the log
+        log_dir: the path of the directory where to save the log
+            files to be parsed by tensorboard
+    '''
+    def __init__(self, model, feed, freq=2, log_dir='./logs',
+                 show_accuracy=False):
+        super(Callback, self).__init__()
+        assert _BACKEND == 'tensorflow', \
+            'TensorBoard callback only works with the tensorflow backend'
+        import tensorflow as tf
+        import keras.backend.tensorflow_backend as KTF
+
+        self.model = model
+        self.freq = freq
+        self.log_dir = log_dir
+        self.sess = KTF._get_session()
+        self.feed = feed
+        mod_type = self.model.get_config()['name']
+        if mod_type == 'Sequential':
+            layers = {l.get_config()['name']: l for l in self.model.layers}
+        elif mod_type == 'Graph':
+            layers = self.model.nodes
+        else:
+            raise Exception('Unrecognized model:',
+                            self.model.get_config()['name'])
+        for l in layers:
+            cur_layer = layers[l]
+            if hasattr(cur_layer, 'W'):
+                tf.histogram_summary('{}_W'.format(l), cur_layer.W)
+            if hasattr(cur_layer, 'b'):
+                tf.histogram_summary('{}_b'.format(l), cur_layer.b)
+            if hasattr(cur_layer, 'get_output'):
+                tf.histogram_summary('{}_out'.format(l),
+                                     cur_layer.get_output())
+        f_output = self.model._test
+        if mod_type == 'Sequential':
+            if show_accuracy is True:
+                f_output = self.model._test_with_acc
+                tf.scalar_summary('Accuracy',
+                                  f_output.outputs[1])
+            tf.scalar_summary('Loss',
+                              f_output.outputs[0])
+        else:
+            losses = [self.model.loss[loss] for loss in self.model.loss]
+            if len(losses) > 1:
+                l_name = " + ".join(losses)
+            else:
+                l_name = losses[0]
+            tf.scalar_summary(l_name,
+                              f_output.outputs[0])
+        self.merged = tf.merge_all_summaries()
+        self.writer = tf.train.SummaryWriter(self.log_dir,
+                                             self.sess.graph_def)
+
+    def on_epoch_end(self, epoch, logs={}):
+        if epoch % self.freq == 0:
+            result = self.sess.run([self.merged],
+                                   feed_dict=self.feed)
+            summary_str = result[0]
+            self.writer.add_summary(summary_str, epoch)
diff --git a/tests/keras/test_callbacks.py b/tests/keras/test_callbacks.py
index e836d0fea..aafda6ed3 100644
--- a/tests/keras/test_callbacks.py
+++ b/tests/keras/test_callbacks.py
@@ -9,6 +9,7 @@ from keras.layers.core import Dense
 from keras.utils.test_utils import get_test_data
 from keras import backend as K
 from keras.utils import np_utils
+from keras.callbacks import _BACKEND
 
 input_dim = 2
 nb_hidden = 4
@@ -120,6 +121,87 @@ def test_LearningRateScheduler():
     assert (float(K.get_value(model.optimizer.lr)) - 0.2) < K.epsilon()
 
 
+@pytest.mark.skipif(_BACKEND != 'tensorflow',
+                    reason="Requires tensorflow backend")
+def test_TensorBoard():
+    import shutil
+    import tensorflow as tf
+    import keras.backend.tensorflow_backend as KTF
+    old_session = KTF._get_session()
+    filepath = './logs'
+    (X_train, y_train), (X_test, y_test) = get_test_data(nb_train=train_samples,
+                                                         nb_test=test_samples,
+                                                         input_shape=(input_dim,),
+                                                         classification=True,
+                                                         nb_class=nb_class)
+    y_test = np_utils.to_categorical(y_test)
+    y_train = np_utils.to_categorical(y_train)
+    # case 1 Sequential wo accuracy
+    with tf.Graph().as_default():
+        session = tf.Session('')
+        KTF._set_session(session)
+        model = Sequential()
+        model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
+        model.add(Dense(nb_class, activation='softmax'))
+        model.compile(loss='categorical_crossentropy', optimizer='sgd')
+
+        feed = {model._test.inputs[0]: X_train, model._test.inputs[1]: y_train,
+                model._test.inputs[2]: np.ones(train_samples)}
+        tsb = callbacks.TensorBoard(model=model, feed=feed, log_dir=filepath,
+                                    show_accuracy=False)
+        cbks = [tsb]
+        model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
+                  validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=5)
+        assert os.path.exists(filepath)
+        shutil.rmtree(filepath)
+
+    # case 2 Sequential w accuracy
+    with tf.Graph().as_default():
+        session = tf.Session('')
+        KTF._set_session(session)
+        model = Sequential()
+        model.add(Dense(nb_hidden, input_dim=input_dim, activation='relu'))
+        model.add(Dense(nb_class, activation='softmax'))
+        model.compile(loss='categorical_crossentropy', optimizer='sgd')
+
+        feed = {model._test.inputs[0]: X_train, model._test.inputs[1]: y_train,
+                model._test.inputs[2]: np.ones(train_samples)}
+        tsb = callbacks.TensorBoard(model=model, feed=feed, log_dir=filepath,
+                                    show_accuracy=False)
+        cbks = [tsb]
+        model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
+                  validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=5)
+        assert os.path.exists(filepath)
+        shutil.rmtree(filepath)
+
+    # case 3 Graph
+    with tf.Graph().as_default():
+        session = tf.Session('')
+        KTF._set_session(session)
+        model = Graph()
+        model.add_input(name='X_vars', input_shape=(input_dim, ))
+
+        model.add_node(Dense(nb_hidden, activation="sigmoid"),
+                       name='Dense1', input='X_vars')
+        model.add_node(Dense(nb_class, activation="softmax"),
+                       name='last_dense',
+                       input='Dense1')
+        model.add_output(name='output', input='last_dense')
+        model.compile(optimizer='sgd', loss={'output': 'mse'})
+
+        feed = {model._test.inputs[0]: X_train, model._test.inputs[1]: y_train,
+                model._test.inputs[2]: np.ones(train_samples)}
+        tsb = callbacks.TensorBoard(model=model, feed=feed, log_dir=filepath,
+                                    show_accuracy=False)
+        cbks = [tsb]
+        model.fit({'X_vars': X_train, 'output': y_train}, batch_size=batch_size,
+                  validation_data={'X_vars': X_test, 'output': y_test},
+                  callbacks=cbks, nb_epoch=5)
+        assert os.path.exists(filepath)
+        shutil.rmtree(filepath)
+
+        KTF._set_session(old_session)
+
 if __name__ == '__main__':
     pytest.main([__file__])
 

From b602a93e17964cfcedda09eec90f140665246431 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 21 Dec 2015 13:52:47 -0800
Subject: [PATCH 076/145] Update TensorBoard callback

---
 keras/backend/tensorflow_backend.py |   1 +
 keras/callbacks.py                  | 115 ++++++++++++++++------------
 keras/models.py                     |   2 +
 tests/keras/test_callbacks.py       |  28 +++----
 4 files changed, 76 insertions(+), 70 deletions(-)

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
index bccbedb1f..fcd574dc1 100644
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@@ -235,6 +235,7 @@ def permute_dimensions(x, pattern):
     '''
     return tf.transpose(x, perm=pattern)
 
+
 def repeat_elements(x, rep, axis):
     '''Repeats the elements of a tensor along an axis, like np.repeat
 
diff --git a/keras/callbacks.py b/keras/callbacks.py
index a5cc1bbcd..79c16d7c3 100644
--- a/keras/callbacks.py
+++ b/keras/callbacks.py
@@ -8,7 +8,6 @@ import warnings
 
 from collections import deque
 from .utils.generic_utils import Progbar
-from .backend import _BACKEND
 from keras import backend as K
 
 
@@ -431,68 +430,82 @@ class TensorBoard(Callback):
     https://www.tensorflow.org/versions/master/how_tos/summaries_and_tensorboard/index.html
 
     # Arguments
-        model: a keras model linked to a tensorflow session
-        feed: a dictionnary mapping tensors (inputs, outputs, weigths)
-            from the model._test keras function i.e. model._test.inputs
-            to the corresponding arrays.
-        freq: the frequency at which the callback will output
-            parameters and metrics to the log
         log_dir: the path of the directory where to save the log
             files to be parsed by tensorboard
+        histogram_freq: frequency (in epochs) at which to compute activation
+            histograms for the layers of the model. If set to 0,
+            histograms won't be computed.
     '''
-    def __init__(self, model, feed, freq=2, log_dir='./logs',
-                 show_accuracy=False):
+    def __init__(self, log_dir='./logs', histogram_freq=0):
         super(Callback, self).__init__()
-        assert _BACKEND == 'tensorflow', \
-            'TensorBoard callback only works with the tensorflow backend'
+        if K._BACKEND != 'tensorflow':
+            raise Exception('TensorBoard callback only works '
+                            'with the TensorFlow backend')
+        self.log_dir = log_dir
+        self.histogram_freq = histogram_freq
+
+    def _set_model(self, model):
         import tensorflow as tf
         import keras.backend.tensorflow_backend as KTF
 
         self.model = model
-        self.freq = freq
-        self.log_dir = log_dir
         self.sess = KTF._get_session()
-        self.feed = feed
-        mod_type = self.model.get_config()['name']
-        if mod_type == 'Sequential':
-            layers = {l.get_config()['name']: l for l in self.model.layers}
-        elif mod_type == 'Graph':
-            layers = self.model.nodes
-        else:
-            raise Exception('Unrecognized model:',
-                            self.model.get_config()['name'])
-        for l in layers:
-            cur_layer = layers[l]
-            if hasattr(cur_layer, 'W'):
-                tf.histogram_summary('{}_W'.format(l), cur_layer.W)
-            if hasattr(cur_layer, 'b'):
-                tf.histogram_summary('{}_b'.format(l), cur_layer.b)
-            if hasattr(cur_layer, 'get_output'):
-                tf.histogram_summary('{}_out'.format(l),
-                                     cur_layer.get_output())
-        f_output = self.model._test
-        if mod_type == 'Sequential':
-            if show_accuracy is True:
-                f_output = self.model._test_with_acc
-                tf.scalar_summary('Accuracy',
-                                  f_output.outputs[1])
-            tf.scalar_summary('Loss',
-                              f_output.outputs[0])
-        else:
-            losses = [self.model.loss[loss] for loss in self.model.loss]
-            if len(losses) > 1:
-                l_name = " + ".join(losses)
+        if self.histogram_freq:
+            mod_type = self.model.get_config()['name']
+            if mod_type == 'Sequential':
+                layers = {l.get_config()['name']: l for l in self.model.layers}
+            elif mod_type == 'Graph':
+                layers = self.model.nodes
             else:
-                l_name = losses[0]
-            tf.scalar_summary(l_name,
-                              f_output.outputs[0])
+                raise Exception('Unrecognized model:',
+                                self.model.get_config()['name'])
+            for l in layers:
+                cur_layer = layers[l]
+                if hasattr(cur_layer, 'W'):
+                    tf.histogram_summary('{}_W'.format(l), cur_layer.W)
+                if hasattr(cur_layer, 'b'):
+                    tf.histogram_summary('{}_b'.format(l), cur_layer.b)
+                if hasattr(cur_layer, 'get_output'):
+                    tf.histogram_summary('{}_out'.format(l),
+                                         cur_layer.get_output())
         self.merged = tf.merge_all_summaries()
         self.writer = tf.train.SummaryWriter(self.log_dir,
                                              self.sess.graph_def)
 
+    def on_epoch_begin(self, epoch, logs={}):
+        self.seen = 0
+        self.totals = {}
+
+    def on_batch_end(self, batch, logs={}):
+        batch_size = logs.get('size', 0)
+        self.seen += batch_size
+        for k, v in logs.items():
+            if k in self.totals:
+                self.totals[k] += v * batch_size
+            else:
+                self.totals[k] = v * batch_size
+
     def on_epoch_end(self, epoch, logs={}):
-        if epoch % self.freq == 0:
-            result = self.sess.run([self.merged],
-                                   feed_dict=self.feed)
-            summary_str = result[0]
-            self.writer.add_summary(summary_str, epoch)
+        import tensorflow as tf
+
+        if self.model.validation_data and self.histogram_freq:
+            if epoch % self.histogram_freq == 0:
+                if self.params.get('show_accuracy'):
+                    test_function = self.model._test_with_acc
+                else:
+                    test_function = self.model._test
+                names = [v.name for v in test_function.inputs]
+                feed_dict = dict(zip(names, self.model.validation_data))
+                result = self.sess.run([self.merged], feed_dict=feed_dict)
+                summary_str = result[0]
+                self.writer.add_summary(summary_str, epoch)
+
+        for name, value in self.totals.items() + logs.items():
+            if name in ['batch', 'size']:
+                continue
+            summary = tf.Summary()
+            summary_value = summary.value.add()
+            summary_value.simple_value = value
+            summary_value.tag = name
+            self.writer.add_summary(summary, epoch)
+        self.writer.flush()
diff --git a/keras/models.py b/keras/models.py
index 2ed5662a9..85ab2c844 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -185,6 +185,8 @@ class Model(object):
             Abstract fit function for f(ins).
             Assume that f returns a list, labelled by out_labels.
         '''
+        self.training_data = ins
+        self.validation_data = val_ins
         do_validation = False
         if val_f and val_ins:
             do_validation = True
diff --git a/tests/keras/test_callbacks.py b/tests/keras/test_callbacks.py
index aafda6ed3..905705693 100644
--- a/tests/keras/test_callbacks.py
+++ b/tests/keras/test_callbacks.py
@@ -9,7 +9,6 @@ from keras.layers.core import Dense
 from keras.utils.test_utils import get_test_data
 from keras import backend as K
 from keras.utils import np_utils
-from keras.callbacks import _BACKEND
 
 input_dim = 2
 nb_hidden = 4
@@ -121,7 +120,7 @@ def test_LearningRateScheduler():
     assert (float(K.get_value(model.optimizer.lr)) - 0.2) < K.epsilon()
 
 
-@pytest.mark.skipif(_BACKEND != 'tensorflow',
+@pytest.mark.skipif(K._BACKEND != 'tensorflow',
                     reason="Requires tensorflow backend")
 def test_TensorBoard():
     import shutil
@@ -145,13 +144,10 @@ def test_TensorBoard():
         model.add(Dense(nb_class, activation='softmax'))
         model.compile(loss='categorical_crossentropy', optimizer='sgd')
 
-        feed = {model._test.inputs[0]: X_train, model._test.inputs[1]: y_train,
-                model._test.inputs[2]: np.ones(train_samples)}
-        tsb = callbacks.TensorBoard(model=model, feed=feed, log_dir=filepath,
-                                    show_accuracy=False)
+        tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
         cbks = [tsb]
         model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
-                  validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=5)
+                  validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
         assert os.path.exists(filepath)
         shutil.rmtree(filepath)
 
@@ -164,13 +160,10 @@ def test_TensorBoard():
         model.add(Dense(nb_class, activation='softmax'))
         model.compile(loss='categorical_crossentropy', optimizer='sgd')
 
-        feed = {model._test.inputs[0]: X_train, model._test.inputs[1]: y_train,
-                model._test.inputs[2]: np.ones(train_samples)}
-        tsb = callbacks.TensorBoard(model=model, feed=feed, log_dir=filepath,
-                                    show_accuracy=False)
+        tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
         cbks = [tsb]
         model.fit(X_train, y_train, batch_size=batch_size, show_accuracy=True,
-                  validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=5)
+                  validation_data=(X_test, y_test), callbacks=cbks, nb_epoch=2)
         assert os.path.exists(filepath)
         shutil.rmtree(filepath)
 
@@ -189,14 +182,12 @@ def test_TensorBoard():
         model.add_output(name='output', input='last_dense')
         model.compile(optimizer='sgd', loss={'output': 'mse'})
 
-        feed = {model._test.inputs[0]: X_train, model._test.inputs[1]: y_train,
-                model._test.inputs[2]: np.ones(train_samples)}
-        tsb = callbacks.TensorBoard(model=model, feed=feed, log_dir=filepath,
-                                    show_accuracy=False)
+        tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1)
         cbks = [tsb]
-        model.fit({'X_vars': X_train, 'output': y_train}, batch_size=batch_size,
+        model.fit({'X_vars': X_train, 'output': y_train},
+                  batch_size=batch_size,
                   validation_data={'X_vars': X_test, 'output': y_test},
-                  callbacks=cbks, nb_epoch=5)
+                  callbacks=cbks, nb_epoch=2)
         assert os.path.exists(filepath)
         shutil.rmtree(filepath)
 
@@ -204,4 +195,3 @@ def test_TensorBoard():
 
 if __name__ == '__main__':
     pytest.main([__file__])
-

From fd632b70c57d7e94f587105c829c2f316093d85f Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 21 Dec 2015 14:14:41 -0800
Subject: [PATCH 077/145] Fix callback tests

---
 tests/keras/test_callbacks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/keras/test_callbacks.py b/tests/keras/test_callbacks.py
index 905705693..ef408cba4 100644
--- a/tests/keras/test_callbacks.py
+++ b/tests/keras/test_callbacks.py
@@ -191,7 +191,7 @@ def test_TensorBoard():
         assert os.path.exists(filepath)
         shutil.rmtree(filepath)
 
-        KTF._set_session(old_session)
+    KTF._set_session(old_session)
 
 if __name__ == '__main__':
     pytest.main([__file__])

From 13df0bf32accd57a7288a29fa09e4badf1d0b780 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 21 Dec 2015 14:21:57 -0800
Subject: [PATCH 078/145] Skip tensorboard test if py3

---
 tests/keras/test_callbacks.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/keras/test_callbacks.py b/tests/keras/test_callbacks.py
index ef408cba4..b02d9d390 100644
--- a/tests/keras/test_callbacks.py
+++ b/tests/keras/test_callbacks.py
@@ -1,5 +1,6 @@
 import pytest
 import os
+import sys
 import numpy as np
 np.random.seed(1337)
 
@@ -120,7 +121,7 @@ def test_LearningRateScheduler():
     assert (float(K.get_value(model.optimizer.lr)) - 0.2) < K.epsilon()
 
 
-@pytest.mark.skipif(K._BACKEND != 'tensorflow',
+@pytest.mark.skipif((K._BACKEND != 'tensorflow') or (sys.version_info[0] == 3),
                     reason="Requires tensorflow backend")
 def test_TensorBoard():
     import shutil

From 908d86655831e153bb11aa8ce2c9bdaf821b1154 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 21 Dec 2015 17:55:26 -0800
Subject: [PATCH 079/145] Improve unit test coverage

---
 keras/datasets/imdb.py                |  5 +++-
 tests/keras/datasets/test_datasets.py |  2 ++
 tests/keras/layers/test_core.py       |  1 -
 tests/keras/layers/test_noise.py      | 41 +++++++++++++++++++++++++++
 4 files changed, 47 insertions(+), 2 deletions(-)
 create mode 100644 tests/keras/layers/test_noise.py

diff --git a/keras/datasets/imdb.py b/keras/datasets/imdb.py
index 4afae2fcd..cd1e925b8 100644
--- a/keras/datasets/imdb.py
+++ b/keras/datasets/imdb.py
@@ -39,7 +39,10 @@ def load_data(path="imdb.pkl", nb_words=None, skip_top=0,
                 new_labels.append(y)
         X = new_X
         labels = new_labels
-
+    if not X:
+        raise Exception('After filtering for sequences shorter than maxlen=' +
+                        str(maxlen) + ', no sequence was kept. '
+                        'Increase maxlen.')
     if not nb_words:
         nb_words = max([max(x) for x in X])
 
diff --git a/tests/keras/datasets/test_datasets.py b/tests/keras/datasets/test_datasets.py
index 8239b1ea4..2ce00f2b4 100644
--- a/tests/keras/datasets/test_datasets.py
+++ b/tests/keras/datasets/test_datasets.py
@@ -11,6 +11,7 @@ def test_cifar():
 
 def test_reuters():
     (X_train, y_train), (X_test, y_test) = reuters.load_data()
+    (X_train, y_train), (X_test, y_test) = reuters.load_data(maxlen=10)
 
 
 def test_mnist():
@@ -19,6 +20,7 @@ def test_mnist():
 
 def test_imdb():
     (X_train, y_train), (X_test, y_test) = imdb.load_data()
+    (X_train, y_train), (X_test, y_test) = imdb.load_data(maxlen=40)
 
 
 if __name__ == '__main__':
diff --git a/tests/keras/layers/test_core.py b/tests/keras/layers/test_core.py
index db75f85c7..a16dc71a2 100644
--- a/tests/keras/layers/test_core.py
+++ b/tests/keras/layers/test_core.py
@@ -182,4 +182,3 @@ def _runner(layer):
 
 if __name__ == '__main__':
     pytest.main([__file__])
-
diff --git a/tests/keras/layers/test_noise.py b/tests/keras/layers/test_noise.py
new file mode 100644
index 000000000..5741647d4
--- /dev/null
+++ b/tests/keras/layers/test_noise.py
@@ -0,0 +1,41 @@
+import pytest
+import numpy as np
+from keras import backend as K
+from keras.layers import core
+from keras.layers import noise
+
+input_shape = (10, 10)
+batch_input_shape = (10, 10, 10)
+
+
+def test_GaussianNoise():
+    layer = noise.GaussianNoise(sigma=1., input_shape=input_shape)
+    _runner(layer)
+
+
+def test_GaussianDropout():
+    layer = noise.GaussianDropout(p=0.2, input_shape=input_shape)
+    _runner(layer)
+
+
+def _runner(layer):
+    assert isinstance(layer, core.Layer)
+    layer.build()
+    conf = layer.get_config()
+    assert (type(conf) == dict)
+
+    param = layer.get_params()
+    # Typically a list or a tuple, but may be any iterable
+    assert hasattr(param, '__iter__')
+    layer.input = K.variable(np.random.random(batch_input_shape))
+    output = layer.get_output(train=False)
+    output_np = K.eval(output)
+    assert output_np.shape == batch_input_shape
+
+    output = layer.get_output(train=True)
+    output_np = K.eval(output)
+    assert output_np.shape == batch_input_shape
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])

From d8e83cc773a0f7bd7ed550c784bacda4fa4da53b Mon Sep 17 00:00:00 2001
From: cyc <chenchiapply@gmail.com>
Date: Mon, 21 Dec 2015 22:33:35 -0500
Subject: [PATCH 080/145] Fix the wrong link

Fix the wrong link for "Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks"
---
 examples/babi_memnn.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/babi_memnn.py b/examples/babi_memnn.py
index ba45bbabc..4c7026812 100644
--- a/examples/babi_memnn.py
+++ b/examples/babi_memnn.py
@@ -3,7 +3,7 @@
 References:
 - Jason Weston, Antoine Bordes, Sumit Chopra, Tomas Mikolov, Alexander M. Rush,
   "Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks",
-  http://arxiv.org/abs/1503.08895
+  http://arxiv.org/abs/1502.05698
 
 - Sainbayar Sukhbaatar, Arthur Szlam, Jason Weston, Rob Fergus,
   "End-To-End Memory Networks",

From 2a0f3e3dfc02233461b70b640878b3ee5acf2bc1 Mon Sep 17 00:00:00 2001
From: Thomas McColgan <thomas.mccolgan@gmail.com>
Date: Tue, 22 Dec 2015 13:20:46 +0100
Subject: [PATCH 081/145] further test of siamese layer

---
 tests/keras/layers/test_core.py | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tests/keras/layers/test_core.py b/tests/keras/layers/test_core.py
index a16dc71a2..b0749dd9e 100644
--- a/tests/keras/layers/test_core.py
+++ b/tests/keras/layers/test_core.py
@@ -180,5 +180,24 @@ def _runner(layer):
     layer.trainable = True
     layer.trainable = False
 
+def test_siamese():
+    right_input_layer = core.Dense(7, input_dim=3)
+    left_input_layer = core.Dense(7, input_dim=3)
+
+    shared_layer = core.Dense(5,input_dim=7)
+    for mode in ['sum', 'mul', 'ave', 'concat']:
+        siamese_layer = core.Siamese(shared_layer, [left_input_layer, right_input_layer], merge_mode=mode)
+        siamese_layer.output_shape
+        siamese_layer.get_output()
+
+    # Merge modes 'dot' and 'cos' requires a different call signature
+    for mode in ['dot', 'cos']:
+        siamese_layer = core.Siamese(shared_layer, [left_input_layer, right_input_layer], merge_mode=mode,
+                                     dot_axes=([1], [1]))
+        siamese_layer.output_shape
+        siamese_layer.get_output()
+
+
+
 if __name__ == '__main__':
     pytest.main([__file__])

From 3d109c6ebe07af3c235a112d689aa8f5af3f691a Mon Sep 17 00:00:00 2001
From: Thomas McColgan <thomas.mccolgan@gmail.com>
Date: Tue, 22 Dec 2015 13:42:04 +0100
Subject: [PATCH 082/145] split out theano only part

---
 tests/keras/layers/test_core.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/tests/keras/layers/test_core.py b/tests/keras/layers/test_core.py
index b0749dd9e..3f81e4999 100644
--- a/tests/keras/layers/test_core.py
+++ b/tests/keras/layers/test_core.py
@@ -180,7 +180,7 @@ def _runner(layer):
     layer.trainable = True
     layer.trainable = False
 
-def test_siamese():
+def test_siamese_all():
     right_input_layer = core.Dense(7, input_dim=3)
     left_input_layer = core.Dense(7, input_dim=3)
 
@@ -190,14 +190,19 @@ def test_siamese():
         siamese_layer.output_shape
         siamese_layer.get_output()
 
-    # Merge modes 'dot' and 'cos' requires a different call signature
+@pytest.mark.skipif(K._BACKEND == 'tensorflow',
+                    reason='currently not working with TensorFlow')
+def test_siamese_theano_only():
+    right_input_layer = core.Dense(7, input_dim=3)
+    left_input_layer = core.Dense(7, input_dim=3)
+
+    shared_layer = core.Dense(5,input_dim=7)
+
     for mode in ['dot', 'cos']:
         siamese_layer = core.Siamese(shared_layer, [left_input_layer, right_input_layer], merge_mode=mode,
                                      dot_axes=([1], [1]))
         siamese_layer.output_shape
         siamese_layer.get_output()
 
-
-
 if __name__ == '__main__':
     pytest.main([__file__])

From f2f4f4ec48a3ae49f3f345f5e0eda60084d606a8 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 07:53:21 -0800
Subject: [PATCH 083/145] Add helpful error message in Flatten

---
 keras/layers/core.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 19a3386fa..a4fdef9a6 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -762,6 +762,13 @@ class Flatten(Layer):
     @property
     def output_shape(self):
         input_shape = self.input_shape
+        if not all(input_shape[1:]):
+            raise Exception('The shape of the input to "Flatten" '
+                            'is not fully defined '
+                            '(got ' + str(input_shape[1:]) + '. '
+                            'Make sure to pass a complete "input_shape" '
+                            'or "batch_input_shape" argument to the first '
+                            'layer in your model.')
         return (input_shape[0], np.prod(input_shape[1:]))
 
     def get_output(self, train=False):

From d870e45eb0e4b3d6a8c8441d797becde2d17ab4d Mon Sep 17 00:00:00 2001
From: fchollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 08:09:55 -0800
Subject: [PATCH 084/145] Fix flaky test

---
 tests/keras/layers/test_embeddings.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/keras/layers/test_embeddings.py b/tests/keras/layers/test_embeddings.py
index 86c64eb0c..a9c0bd3a8 100644
--- a/tests/keras/layers/test_embeddings.py
+++ b/tests/keras/layers/test_embeddings.py
@@ -24,7 +24,7 @@ def test_unitnorm_constraint():
                    class_mode='binary')
     lookup.train_on_batch(X1, np.array([[1], [0]], dtype='int32'))
     norm = np.linalg.norm(K.get_value(lookup.params[0]), axis=1)
-    assert_allclose(norm, np.ones_like(norm).astype('float32'))
+    assert_allclose(norm, np.ones_like(norm).astype('float32'), rtol=1e-05)
 
 
 if __name__ == '__main__':

From c8176fd3bc791bfa38cd1db35ec5d89b37d28287 Mon Sep 17 00:00:00 2001
From: fchollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 08:10:18 -0800
Subject: [PATCH 085/145] Update FAQ in documentation

---
 docs/templates/faq.md | 56 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 53 insertions(+), 3 deletions(-)

diff --git a/docs/templates/faq.md b/docs/templates/faq.md
index 2210a8cfe..39ab1fd12 100644
--- a/docs/templates/faq.md
+++ b/docs/templates/faq.md
@@ -20,6 +20,8 @@
 
 [How can I record the training / validation loss / accuracy at each epoch?](#how-can-i-record-the-training-validation-loss-accuracy-at-each-epoch)
 
+[How can I use stateful RNNs?](#how-can-i-use-stateful-rnns)
+
 ---
 
 ### How can I run Keras on GPU?
@@ -105,7 +107,7 @@ You can build a Theano function that will return the output of a certain layer g
 
 ```python
 # with a Sequential model
-get_3rd_layer_output = theano.function([model.layers[0].input], 
+get_3rd_layer_output = theano.function([model.layers[0].input],
                                        model.layers[3].get_output(train=False))
 layer_output = get_3rd_layer_output(X)
 
@@ -120,7 +122,7 @@ conv_output = get_conv_output(input_data_dict)
 
 ### Isn't there a bug with Merge or Graph related to input concatenation?
 
-Yes, there was a known bug with tensor concatenation in Thenao that was fixed early 2015. 
+Yes, there was a known bug with tensor concatenation in Thenao that was fixed early 2015.
 Please upgrade to the latest version of Theano:
 
 ```bash
@@ -176,4 +178,52 @@ hist = model.fit(X, y, validation_split=0.2)
 print(hist.history)
 ```
 
----
\ No newline at end of file
+---
+
+### How can I use stateful RNNs?
+
+Making a RNN stateful means that the states for the samples of each batch will be reused as initial states for the samples in the next batch.
+
+When using stateful RNNs, it is therefore assumed that:
+
+- all batches have the same number of samples
+- If `X1` and `X2` are successive batches of samples, then `X2[i]` is the follow-up sequence to `X1[i]`, for every `i`.
+
+To use statefulness in RNNs, you need to:
+
+- explicitely specify the batch size you are using, by passing a `batch_input_shape` argument to the first layer in your model. It should be a tuple of integers, e.g. `(32, 10, 16)` for a 32-samples batch of sequences of 10 timesteps with 16 features per timestep.
+- set `stateful=True` in your RNN layer(s).
+
+To reset the states accumulated:
+
+- use `model.reset_states()` to reset the states of all layers in the model
+- use `layer.reset_states()` to reset the states of a specific stateful RNN layer
+
+Example:
+
+```python
+
+X  # this is our input data, of shape (32, 21, 16)
+# we will feed it to our model in sequences of length 10
+
+model = Sequential()
+model.add(LSTM(32, batch_input_shape=(32, 10, 16), stateful=True))
+model.add(Dense(16, activation='softmax'))
+
+model.compile(optimizer='rmsprop', loss='categorical_crossentropy')
+
+# we train the network to predict the 11th timestep given the first 10:
+model.train_on_batch(X[:, :10, :], np.reshape(X[:, 10, :], (32, 16)))
+
+# the state of the network has changed. We can feed the follow-up sequences:
+model.train_on_batch(X[:, 10:20, :], np.reshape(X[:, 20, :], (32, 16)))
+
+# let's reset the states of the LSTM layer:
+model.reset_states()
+
+# another way to do it in this case:
+model.layers[0].reset_states()
+```
+
+Notes that the methods `predict`, `fit`, `train_on_batch`, `predict_classes`, etc. will *all* update the states of the stateful layers in a model. This allows you to do not only stateful training, but also stateful prediction.
+

From d5fb5d1f15a94d43bb803be257342c613f882087 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 09:33:06 -0800
Subject: [PATCH 086/145] Improve callbacks docs.

---
 keras/callbacks.py | 29 +++++++++++++++++++++--------
 1 file changed, 21 insertions(+), 8 deletions(-)

diff --git a/keras/callbacks.py b/keras/callbacks.py
index 79c16d7c3..7ce81d32c 100644
--- a/keras/callbacks.py
+++ b/keras/callbacks.py
@@ -359,9 +359,16 @@ class EarlyStopping(Callback):
 
 
 class RemoteMonitor(Callback):
-    '''Experimental callback used to stream events to a server.
+    '''Callback used to stream events to a server.
 
     Requires the `requests` library.
+
+    # Arguments
+        root: root url to which the events will be send (at the end
+            of every epoch). Events are sent to
+            `root + '/publish/epoch/end/'`. Calls are HTTP POST,
+            with a `data` argument which is a JSON-encoded dictionary
+            of event data.
     '''
     def __init__(self, root='http://localhost:9000'):
         self.root = root
@@ -401,9 +408,9 @@ class LearningRateScheduler(Callback):
     '''Learning rate scheduler.
 
     # Arguments
-        schedule: a function that gets an epoch index as input
+        schedule: a function that takes an epoch index as input
             (integer, indexed from 0) and returns a new
-            learning rate as output.
+            learning rate as output (float).
     '''
     def __init__(self, schedule):
         super(LearningRateScheduler, self).__init__()
@@ -412,22 +419,28 @@ class LearningRateScheduler(Callback):
     def on_epoch_begin(self, epoch, logs={}):
         assert hasattr(self.model.optimizer, 'lr'), \
             'Optimizer must have a "lr" attribute.'
-        K.set_value(self.model.optimizer.lr, self.schedule(epoch))
+        lr = self.schedule(epoch)
+        assert type(lr) == float, 'The output of the "schedule" function should be float.'
+        K.set_value(self.model.optimizer.lr, lr)
 
 
 class TensorBoard(Callback):
     ''' Tensorboard basic visualizations.
 
-    This callback writes a log usable with TensorBoard.
-    TensorBoard is a visualization tools provided with TensorFlow.
+    This callback writes a log for TensorBoard, which allows
+    you to visualize dynamic graphs of your training and test
+    metrics, as well as activation histograms for the different
+    layers in your model.
+
+    TensorBoard is a visualization tool provided with TensorFlow.
 
     If you have installed TensorFlow with pip, you should be able
     to launch TensorBoard from the command line:
     ```
     tensorboard --logdir=/full_path_to_your_logs
     ```
-    You could find more information at:
-    https://www.tensorflow.org/versions/master/how_tos/summaries_and_tensorboard/index.html
+    You can find more information about TensorBoard
+    [here](https://www.tensorflow.org/versions/master/how_tos/summaries_and_tensorboard/index.html).
 
     # Arguments
         log_dir: the path of the directory where to save the log

From 485d451b62600d01bd728b2ae301ef233b5c4d7b Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 09:33:22 -0800
Subject: [PATCH 087/145] Remove no-longer used util function.

---
 keras/utils/generic_utils.py | 28 ++++------------------------
 1 file changed, 4 insertions(+), 24 deletions(-)

diff --git a/keras/utils/generic_utils.py b/keras/utils/generic_utils.py
index f9e3c357b..8ca6b00bf 100644
--- a/keras/utils/generic_utils.py
+++ b/keras/utils/generic_utils.py
@@ -5,11 +5,13 @@ import sys
 import six
 
 
-def get_from_module(identifier, module_params, module_name, instantiate=False, kwargs=None):
+def get_from_module(identifier, module_params, module_name,
+                    instantiate=False, kwargs=None):
     if isinstance(identifier, six.string_types):
         res = module_params.get(identifier)
         if not res:
-            raise Exception('Invalid ' + str(module_name) + ': ' + str(identifier))
+            raise Exception('Invalid ' + str(module_name) + ': ' +
+                            str(identifier))
         if instantiate and not kwargs:
             return res()
         elif instantiate and kwargs:
@@ -23,28 +25,6 @@ def make_tuple(*args):
     return args
 
 
-def printv(v, prefix=''):
-    if type(v) == dict:
-        if 'name' in v:
-            print(prefix + '#' + v['name'])
-            del v['name']
-        prefix += '...'
-        for nk, nv in v.items():
-            if type(nv) in [dict, list]:
-                print(prefix + nk + ':')
-                printv(nv, prefix)
-            else:
-                print(prefix + nk + ':' + str(nv))
-    elif type(v) == list:
-        prefix += '...'
-        for i, nv in enumerate(v):
-            print(prefix + '#' + str(i))
-            printv(nv, prefix)
-    else:
-        prefix += '...'
-        print(prefix + str(v))
-
-
 class Progbar(object):
     def __init__(self, target, width=30, verbose=1):
         '''

From 18d52e634d75074983cba62f51c3fc6737828d97 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 10:36:59 -0800
Subject: [PATCH 088/145] Add text preprocessing tests

---
 keras/preprocessing/text.py                | 30 ++++++++++++++++++++--
 tests/keras/preprocessing/test_sequence.py |  5 ++--
 2 files changed, 31 insertions(+), 4 deletions(-)

diff --git a/keras/preprocessing/text.py b/keras/preprocessing/text.py
index 5eee564d8..9e014c70d 100644
--- a/keras/preprocessing/text.py
+++ b/keras/preprocessing/text.py
@@ -39,7 +39,30 @@ def one_hot(text, n, filters=base_filter(), lower=True, split=" "):
 
 
 class Tokenizer(object):
-    def __init__(self, nb_words=None, filters=base_filter(), lower=True, split=" "):
+    def __init__(self, nb_words=None, filters=base_filter(),
+                 lower=True, split=' '):
+        '''The class allows to vectorize a text corpus, by turning each
+        text into either a sequence of integers (each integer being the index
+        of a token in a dictionary) or into a vector where the coefficient
+        for each token could be binary, based on word count, based on tf-idf...
+
+        # Arguments
+            nb_words: the maximum number of words to keep, based
+                on word frequency. Only the most common `nb_words` words will
+                be kept.
+            filters: a string where each element is a character that will be
+                filtered from the texts. The default is all punctuation, plus
+                tabs and line breaks, minus the `'` character.
+            lower: boolean. Whether to convert the texts to lowercase.
+            split: character or string to use for token splitting.
+
+        By default, all punctuation is removed, turning the texts into
+        space-separated sequences of words
+        (words maybe include the `'` character). These sequences are then
+        splits into lists of tokens. They will then be indexed or vectorized.
+
+        `0` is a reserved index that won't be assigned to any word.
+        '''
         self.word_counts = {}
         self.word_docs = {}
         self.filters = filters
@@ -51,7 +74,10 @@ class Tokenizer(object):
     def fit_on_texts(self, texts):
         '''
             required before using texts_to_sequences or texts_to_matrix
-            @param texts: can be a list or a generator (for memory-efficiency)
+
+        # Arguments
+            texts: can be a list of strings,
+                or a generator of strings (for memory-efficiency)
         '''
         self.document_count = 0
         for text in texts:
diff --git a/tests/keras/preprocessing/test_sequence.py b/tests/keras/preprocessing/test_sequence.py
index 7a4dcd5fb..88724481b 100644
--- a/tests/keras/preprocessing/test_sequence.py
+++ b/tests/keras/preprocessing/test_sequence.py
@@ -3,8 +3,9 @@ from numpy.testing import assert_allclose
 
 import pytest
 
-from keras.preprocessing.sequence import (pad_sequences, make_sampling_table,
-                                          skipgrams)
+from keras.preprocessing.sequence import pad_sequences
+from keras.preprocessing.sequence import make_sampling_table
+from keras.preprocessing.sequence import skipgrams
 
 
 def test_pad_sequences():

From 7f3cd093c096c03d9710940dc5cd700c2b7142a6 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 10:37:09 -0800
Subject: [PATCH 089/145] Fix flaky test

---
 tests/integration_tests/test_image_data_tasks.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/integration_tests/test_image_data_tasks.py b/tests/integration_tests/test_image_data_tasks.py
index 24b7b86de..d914e4a4d 100644
--- a/tests/integration_tests/test_image_data_tasks.py
+++ b/tests/integration_tests/test_image_data_tasks.py
@@ -39,7 +39,7 @@ def test_image_classification():
     history = model.fit(X_train, y_train, nb_epoch=10, batch_size=16,
                         validation_data=(X_test, y_test),
                         show_accuracy=True, verbose=0)
-    assert(history.history['val_acc'][-1] > 0.9)
+    assert(history.history['val_acc'][-1] > 0.85)
 
 
 if __name__ == '__main__':

From 7f85541785da1183796d1d5fc6494f938ea77b5b Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 11:26:25 -0800
Subject: [PATCH 090/145] Add text preprocessing test

---
 tests/keras/preprocessing/test_text.py | 34 ++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)
 create mode 100644 tests/keras/preprocessing/test_text.py

diff --git a/tests/keras/preprocessing/test_text.py b/tests/keras/preprocessing/test_text.py
new file mode 100644
index 000000000..145d3665f
--- /dev/null
+++ b/tests/keras/preprocessing/test_text.py
@@ -0,0 +1,34 @@
+from keras.preprocessing.text import Tokenizer, one_hot
+import pytest
+import numpy as np
+
+
+def test_one_hot():
+    text = 'The cat sat on the mat.'
+    encoded = one_hot(text, 5)
+    assert len(encoded) == 6
+    assert np.max(encoded) <= 4
+    assert np.min(encoded) >= 0
+
+
+def test_tokenizer():
+    texts = ['The cat sat on the mat.',
+             'The dog sat on the log.',
+             'Dogs and cats living together.']
+    tokenizer = Tokenizer(nb_words=20)
+    tokenizer.fit_on_texts(texts)
+
+    sequences = []
+    for seq in tokenizer.texts_to_sequences_generator(texts):
+        sequences.append(seq)
+    assert np.max(np.max(sequences)) == 12
+    assert np.min(np.min(sequences)) == 1
+
+    tokenizer.fit_on_sequences(sequences)
+
+    for mode in ['binary', 'count', 'tfidf', 'freq']:
+        matrix = tokenizer.texts_to_matrix(texts, mode)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])

From 69932604f9b9ecd6bf63af60aaf4b2b854759d56 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 12:03:28 -0800
Subject: [PATCH 091/145] Fix text preprocessing test

---
 tests/keras/preprocessing/test_text.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tests/keras/preprocessing/test_text.py b/tests/keras/preprocessing/test_text.py
index 145d3665f..53ac7e65d 100644
--- a/tests/keras/preprocessing/test_text.py
+++ b/tests/keras/preprocessing/test_text.py
@@ -15,13 +15,13 @@ def test_tokenizer():
     texts = ['The cat sat on the mat.',
              'The dog sat on the log.',
              'Dogs and cats living together.']
-    tokenizer = Tokenizer(nb_words=20)
+    tokenizer = Tokenizer(nb_words=10)
     tokenizer.fit_on_texts(texts)
 
     sequences = []
     for seq in tokenizer.texts_to_sequences_generator(texts):
         sequences.append(seq)
-    assert np.max(np.max(sequences)) == 12
+    assert np.max(np.max(sequences)) < 10
     assert np.min(np.min(sequences)) == 1
 
     tokenizer.fit_on_sequences(sequences)

From eda1a9e0a4eb786128a117409f26c5bf072ea172 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 17:57:04 -0800
Subject: [PATCH 092/145] Add tests for initializations

---
 keras/initializations.py            |  5 +-
 tests/keras/test_initializations.py | 88 +++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+), 2 deletions(-)
 create mode 100644 tests/keras/test_initializations.py

diff --git a/keras/initializations.py b/keras/initializations.py
index a19b37320..e1ae7a222 100644
--- a/keras/initializations.py
+++ b/keras/initializations.py
@@ -14,7 +14,7 @@ def uniform(shape, scale=0.05):
 
 
 def normal(shape, scale=0.05):
-    return K.variable(np.random.randn(*shape) * scale)
+    return K.variable(np.random.normal(loc=0.0, scale=scale, size=shape))
 
 
 def lecun_uniform(shape):
@@ -68,7 +68,8 @@ def orthogonal(shape, scale=1.1):
 
 def identity(shape, scale=1):
     if len(shape) != 2 or shape[0] != shape[1]:
-        raise Exception("Identity matrix initialization can only be used for 2D square matrices")
+        raise Exception('Identity matrix initialization can only be used '
+                        'for 2D square matrices')
     else:
         return K.variable(scale * np.identity(shape[0]))
 
diff --git a/tests/keras/test_initializations.py b/tests/keras/test_initializations.py
new file mode 100644
index 000000000..8ad849471
--- /dev/null
+++ b/tests/keras/test_initializations.py
@@ -0,0 +1,88 @@
+import pytest
+import numpy as np
+
+from keras import initializations
+from keras import backend as K
+
+SHAPE = (100, 100)
+
+
+def _runner(init, shape, target_mean=None, target_std=None,
+            target_max=None, target_min=None):
+    variable = init(shape)
+    output = K.get_value(variable)
+    print target_std
+    print output.std()
+    print output.mean()
+    lim = 1e-2
+    if target_std is not None:
+        assert abs(output.std() - target_std) < lim
+    if target_mean is not None:
+        assert abs(output.mean() - target_mean) < lim
+    if target_max is not None:
+        assert abs(output.max() - target_max) < lim
+    if target_min is not None:
+        assert abs(output.min() - target_min) < lim
+
+
+def test_uniform():
+    _runner(initializations.uniform, SHAPE, target_mean=0.,
+            target_max=0.05, target_min=-0.05)
+
+
+def test_normal():
+    _runner(initializations.normal, SHAPE, target_mean=0., target_std=0.05)
+
+
+def test_lecun_uniform():
+    scale = np.sqrt(3. / SHAPE[0])
+    _runner(initializations.lecun_uniform, SHAPE,
+            target_mean=0., target_max=scale, target_min=-scale)
+
+
+def test_glorot_uniform():
+    scale = np.sqrt(6. / (SHAPE[0] + SHAPE[1]))
+    _runner(initializations.glorot_uniform, SHAPE, target_mean=0.,
+            target_max=scale, target_min=-scale)
+
+
+def test_glorot_normal():
+    scale = np.sqrt(2. / (SHAPE[0] + SHAPE[1]))
+    _runner(initializations.glorot_normal, SHAPE,
+            target_mean=0., target_std=scale)
+
+
+def test_he_uniform():
+    scale = np.sqrt(6. / SHAPE[0])
+    _runner(initializations.he_uniform, SHAPE, target_mean=0.,
+            target_max=scale, target_min=-scale)
+
+
+def test_he_normal():
+    scale = np.sqrt(2. / SHAPE[0])
+    _runner(initializations.he_normal, SHAPE,
+            target_mean=0., target_std=scale)
+
+
+def test_orthogonal():
+    _runner(initializations.orthogonal, SHAPE,
+            target_mean=0.)
+
+
+def test_identity():
+    _runner(initializations.identity, SHAPE,
+            target_mean=1./SHAPE[0], target_max=1.)
+
+
+def test_zero():
+    _runner(initializations.zero, SHAPE,
+            target_mean=0., target_max=0.)
+
+
+def test_one():
+    _runner(initializations.one, SHAPE,
+            target_mean=1., target_max=1.)
+
+
+if __name__ == '__main__':
+    pytest.main([__file__])

From 29e60ab372e1a123be2f2884a6818f9a2508bf68 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 22 Dec 2015 18:09:40 -0800
Subject: [PATCH 093/145] Remove print statement in test

---
 tests/keras/test_initializations.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/keras/test_initializations.py b/tests/keras/test_initializations.py
index 8ad849471..faf2bc6ce 100644
--- a/tests/keras/test_initializations.py
+++ b/tests/keras/test_initializations.py
@@ -11,9 +11,6 @@ def _runner(init, shape, target_mean=None, target_std=None,
             target_max=None, target_min=None):
     variable = init(shape)
     output = K.get_value(variable)
-    print target_std
-    print output.std()
-    print output.mean()
     lim = 1e-2
     if target_std is not None:
         assert abs(output.std() - target_std) < lim

From 19290c07fdd3ece4903de31325153f6ce5731d08 Mon Sep 17 00:00:00 2001
From: sjebbara <sjebbara@uni-bielefeld.de>
Date: Wed, 23 Dec 2015 09:48:19 +0100
Subject: [PATCH 094/145] return outputs of predict_on_batch function of the
 Graph model as a dictionary

---
 keras/models.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/keras/models.py b/keras/models.py
index 85ab2c844..0010695a1 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -1097,7 +1097,8 @@ class Graph(Model, containers.Graph):
         '''Generate predictions for a single batch of samples.
         '''
         ins = [data[name] for name in self.input_order]
-        return self._predict(ins)
+        outs = self._predict(ins)
+        return dict(zip(self.output_order, outs))
 
     def save_weights(self, filepath, overwrite=False):
         '''Save weights from all layers to a HDF5 files.

From 0695b82f7476329bd1dcba1baf482986b357c0c0 Mon Sep 17 00:00:00 2001
From: Bin Wang <wbin00@gmail.com>
Date: Wed, 23 Dec 2015 17:14:51 +0800
Subject: [PATCH 095/145] fix iteration shadowed in loop

---
 examples/lstm_text_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/lstm_text_generation.py b/examples/lstm_text_generation.py
index 4e1fa3876..388272d24 100644
--- a/examples/lstm_text_generation.py
+++ b/examples/lstm_text_generation.py
@@ -85,7 +85,7 @@ for iteration in range(1, 60):
         print('----- Generating with seed: "' + sentence + '"')
         sys.stdout.write(generated)
 
-        for iteration in range(400):
+        for i in range(400):
             x = np.zeros((1, maxlen, len(chars)))
             for t, char in enumerate(sentence):
                 x[0, t, char_indices[char]] = 1.

From 85e51a0f8f6bfbb2ca8c0b5e07f87e843313c3e9 Mon Sep 17 00:00:00 2001
From: rpinsler <robertpinsler@live.de>
Date: Wed, 23 Dec 2015 13:06:03 +0100
Subject: [PATCH 096/145] Fix typos and minor inconsistencies.

---
 docs/templates/faq.md           |  6 +++---
 docs/templates/index.md         |  8 ++++----
 keras/activations.py            |  2 +-
 keras/backend/theano_backend.py | 16 ++++++++--------
 keras/callbacks.py              | 10 +++++-----
 keras/initializations.py        |  2 +-
 keras/layers/containers.py      |  4 ++--
 keras/layers/convolutional.py   |  4 ++--
 keras/layers/core.py            | 24 ++++++++++++------------
 keras/layers/embeddings.py      |  6 +++---
 keras/layers/normalization.py   |  2 +-
 keras/layers/recurrent.py       |  6 +++---
 keras/models.py                 |  8 ++++----
 keras/objectives.py             |  2 +-
 keras/preprocessing/text.py     |  6 +++---
 keras/utils/np_utils.py         |  2 +-
 keras/utils/visualize_util.py   |  4 ++--
 17 files changed, 56 insertions(+), 56 deletions(-)

diff --git a/docs/templates/faq.md b/docs/templates/faq.md
index 39ab1fd12..d3f2ac33c 100644
--- a/docs/templates/faq.md
+++ b/docs/templates/faq.md
@@ -122,7 +122,7 @@ conv_output = get_conv_output(input_data_dict)
 
 ### Isn't there a bug with Merge or Graph related to input concatenation?
 
-Yes, there was a known bug with tensor concatenation in Thenao that was fixed early 2015.
+Yes, there was a known bug with tensor concatenation in Theano that was fixed early 2015.
 Please upgrade to the latest version of Theano:
 
 ```bash
@@ -155,7 +155,7 @@ Find out more in the [callbacks documentation](callbacks.md).
 
 ### How is the validation split computed?
 
-If you set the `validation_split` arugment in `model.fit` to e.g. 0.1, then the validation data used will be the *last 10%* of the data. If you set it to 0.25, it will be the last 25% of the data, etc.
+If you set the `validation_split` argument in `model.fit` to e.g. 0.1, then the validation data used will be the *last 10%* of the data. If you set it to 0.25, it will be the last 25% of the data, etc.
 
 
 ---
@@ -191,7 +191,7 @@ When using stateful RNNs, it is therefore assumed that:
 
 To use statefulness in RNNs, you need to:
 
-- explicitely specify the batch size you are using, by passing a `batch_input_shape` argument to the first layer in your model. It should be a tuple of integers, e.g. `(32, 10, 16)` for a 32-samples batch of sequences of 10 timesteps with 16 features per timestep.
+- explicitly specify the batch size you are using, by passing a `batch_input_shape` argument to the first layer in your model. It should be a tuple of integers, e.g. `(32, 10, 16)` for a 32-samples batch of sequences of 10 timesteps with 16 features per timestep.
 - set `stateful=True` in your RNN layer(s).
 
 To reset the states accumulated:
diff --git a/docs/templates/index.md b/docs/templates/index.md
index d8ef1fb19..563e0b911 100644
--- a/docs/templates/index.md
+++ b/docs/templates/index.md
@@ -2,7 +2,7 @@
 
 ## You have just found Keras.
 
-Keras is a minimalist, highly modular neural networks library, written in Python and capable of running either on top of either [TensorFlow](https://github.com/tensorflow/tensorflow) or [Theano](https://github.com/Theano/Theano). It was developed with a focus on enabling fast experimentation. Being able to go from idea to result with the least possible delay is key to doing good research.
+Keras is a minimalist, highly modular neural networks library, written in Python and capable of running on top of either [TensorFlow](https://github.com/tensorflow/tensorflow) or [Theano](https://github.com/Theano/Theano). It was developed with a focus on enabling fast experimentation. Being able to go from idea to result with the least possible delay is key to doing good research.
 
 Use Keras if you need a deep learning library that:
 - allows for easy and fast prototyping (through total modularity, minimalism, and extensibility).
@@ -26,7 +26,7 @@ Keras is compatible with: __Python 2.7-3.5__.
 
 - __Easy extensibility.__ New modules are dead simple to add (as new classes and functions), and existing modules provide ample examples. To be able to easily create new modules allows for total expressiveness, making Keras suitable for advanced research.
 
-- __Work with Python__. No separate models configuration files in a declarative format. Models are described in Python code, which is compact, easier to debug, and allows for ease of extensibility.
+- __Work with Python__. No separate model configuration files in a declarative format. Models are described in Python code, which is compact, easier to debug, and allows for ease of extensibility.
 
 
 ------------------
@@ -34,7 +34,7 @@ Keras is compatible with: __Python 2.7-3.5__.
 
 ## Getting started: 30 seconds to Keras
 
-The core datastructure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](/models/#sequential) and [`Graph`](/models/#graph).
+The core data structure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](/models/#sequential) and [`Graph`](/models/#graph).
 
 Here's the `Sequential` model (a linear pile of layers):
 
@@ -157,4 +157,4 @@ Keras was initially developed as part of the research effort of project ONEIROS
 
 >_"Oneiroi are beyond our unravelling --who can be sure what tale they tell? Not all that men look for comes to pass. Two gates there are that give passage to fleeting Oneiroi; one is made of horn, one of ivory. The Oneiroi that pass through sawn ivory are deceitful, bearing a message that will not be fulfilled; those that come out through polished horn have truth behind them, to be accomplished for men who see them."_ Homer, Odyssey 19. 562 ff (Shewring translation).
 
-------------------
\ No newline at end of file
+------------------
diff --git a/keras/activations.py b/keras/activations.py
index 93f37b580..e92e03046 100644
--- a/keras/activations.py
+++ b/keras/activations.py
@@ -39,7 +39,7 @@ def hard_sigmoid(x):
 
 def linear(x):
     '''
-    The function returns the variable that is passed in, so all types work
+    The function returns the variable that is passed in, so all types work.
     '''
     return x
 
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index 67911e0c3..ad9f7d27a 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -11,7 +11,7 @@ theano.config.floatX = _FLOATX
 
 
 def _on_gpu():
-    '''Returns whether the session is set to
+    '''Return whether the session is set to
     run on GPU or not (i.e. on CPU).
     '''
     return theano.config.device[:3] == 'gpu'
@@ -19,7 +19,7 @@ def _on_gpu():
 
 if _on_gpu():
     '''Import cuDNN only if running on GPU:
-    not having Cuda install should not
+    not having Cuda installed should not
     prevent from running the present code.
     '''
     from theano.sandbox.cuda import dnn
@@ -244,18 +244,18 @@ def permute_dimensions(x, pattern):
 
 
 def repeat_elements(x, rep, axis):
-    '''Repeats the elements of a tensor along an axis, like np.repeat
+    '''Repeat the elements of a tensor along an axis, like np.repeat.
 
     If x has shape (s1, s2, s3) and axis=1, the output
-    will have shape (s1, s2 * rep, s3)
+    will have shape (s1, s2 * rep, s3).
     '''
     return T.repeat(x, rep, axis=axis)
 
 def repeat(x, n):
-    '''Repeat a 2D tensor:
+    '''Repeat a 2D tensor.
 
-    if x has shape (samples, dim) and n=2,
-    the output will have shape (samples, 2, dim)
+    If x has shape (samples, dim) and n=2,
+    the output will have shape (samples, 2, dim).
     '''
     tensors = [x] * n
     stacked = T.stack(*tensors)
@@ -377,7 +377,7 @@ def gradients(loss, variables):
 
 def rnn(step_function, inputs, initial_states,
         go_backwards=False, masking=True):
-    '''Iterates over the time dimension of a tensor.
+    '''Iterate over the time dimension of a tensor.
 
     Parameters
     ----------
diff --git a/keras/callbacks.py b/keras/callbacks.py
index 7ce81d32c..59ae993be 100644
--- a/keras/callbacks.py
+++ b/keras/callbacks.py
@@ -256,7 +256,7 @@ class ModelCheckpoint(Callback):
 
         if mode not in ['auto', 'min', 'max']:
             warnings.warn('ModelCheckpoint mode %s is unknown, '
-                          'fallback to auto mode' % (self.mode),
+                          'fallback to auto mode.' % (self.mode),
                           RuntimeWarning)
             mode = 'auto'
 
@@ -311,7 +311,7 @@ class EarlyStopping(Callback):
         mode: one of {auto, min, max}. In 'min' mode,
             training will stop when the quantity
             monitored has stopped decreasing; in 'max'
-            mode it will stopped when the quantity
+            mode it will stop when the quantity
             monitored has stopped increasing.
     '''
     def __init__(self, monitor='val_loss', patience=0, verbose=0, mode='auto'):
@@ -324,7 +324,7 @@ class EarlyStopping(Callback):
 
         if mode not in ['auto', 'min', 'max']:
             warnings.warn('EarlyStopping mode %s is unknown, '
-                          'fallback to auto mode' % (self.mode), RuntimeWarning)
+                          'fallback to auto mode.' % (self.mode), RuntimeWarning)
             mode = 'auto'
 
         if mode == 'min':
@@ -364,7 +364,7 @@ class RemoteMonitor(Callback):
     Requires the `requests` library.
 
     # Arguments
-        root: root url to which the events will be send (at the end
+        root: root url to which the events will be sent (at the end
             of every epoch). Events are sent to
             `root + '/publish/epoch/end/'`. Calls are HTTP POST,
             with a `data` argument which is a JSON-encoded dictionary
@@ -453,7 +453,7 @@ class TensorBoard(Callback):
         super(Callback, self).__init__()
         if K._BACKEND != 'tensorflow':
             raise Exception('TensorBoard callback only works '
-                            'with the TensorFlow backend')
+                            'with the TensorFlow backend.')
         self.log_dir = log_dir
         self.histogram_freq = histogram_freq
 
diff --git a/keras/initializations.py b/keras/initializations.py
index e1ae7a222..d0afff97c 100644
--- a/keras/initializations.py
+++ b/keras/initializations.py
@@ -69,7 +69,7 @@ def orthogonal(shape, scale=1.1):
 def identity(shape, scale=1):
     if len(shape) != 2 or shape[0] != shape[1]:
         raise Exception('Identity matrix initialization can only be used '
-                        'for 2D square matrices')
+                        'for 2D square matrices.')
     else:
         return K.variable(scale * np.identity(shape[0]))
 
diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index a7e9c8898..bde77d98d 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -104,7 +104,7 @@ class Sequential(Layer):
     @property
     def state_updates(self):
         """
-        Returns the `updates` from all layers in the sequence that are
+        Return the `updates` from all layers in the sequence that are
         stateful.  This is useful for separating _training_ updates and
         _prediction_ updates for when we need to update a layers internal state
         during a stateful prediction.
@@ -232,7 +232,7 @@ class Graph(Layer):
     @property
     def state_updates(self):
         """
-        Returns the `updates` from all nodes in that graph for nodes that are
+        Return the `updates` from all nodes in that graph for nodes that are
         stateful.  This is useful for separating _training_ updates and
         _prediction_ updates for when we need to update a layers internal state
         during a stateful prediction.
diff --git a/keras/layers/convolutional.py b/keras/layers/convolutional.py
index c5a979f56..35dc8cf15 100644
--- a/keras/layers/convolutional.py
+++ b/keras/layers/convolutional.py
@@ -584,7 +584,7 @@ class AveragePooling2D(_Pooling2D):
 
 
 class UpSampling1D(Layer):
-    '''Repeats each temporal step `length` times along the time axis.
+    '''Repeat each temporal step `length` times along the time axis.
 
     # Input shape
         3D tensor with shape: `(samples, steps, features)`.
@@ -620,7 +620,7 @@ class UpSampling1D(Layer):
 
 
 class UpSampling2D(Layer):
-    '''Repeats the rows and columns of the data
+    '''Repeat the rows and columns of the data
     by size[0] and size[1] respectively.
 
     # Input shape
diff --git a/keras/layers/core.py b/keras/layers/core.py
index a4fdef9a6..bab51740b 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -84,7 +84,7 @@ class Layer(object):
                                                                 ' but previous layer has output_shape ' +
                                                                 str(layer.output_shape))
         if layer.get_output_mask() is not None:
-            assert self.supports_masked_input(), 'Cannot connect non-masking layer to layer with masked output'
+            assert self.supports_masked_input(), 'Cannot connect non-masking layer to layer with masked output.'
         self.previous = layer
         self.build()
 
@@ -530,7 +530,7 @@ class Merge(Layer):
             for i in range(len(self.layers)):
                 X = self.layers[i].get_output(train)
                 if X.name is None:
-                    raise ValueError('merge_mode="join" only works with named inputs')
+                    raise ValueError('merge_mode="join" only works with named inputs.')
                 else:
                     inputs[X.name] = X
             return inputs
@@ -560,7 +560,7 @@ class Merge(Layer):
             output = output.dimshuffle((0, 'x'))
             return output
         else:
-            raise Exception('Unknown merge mode')
+            raise Exception('Unknown merge mode.')
 
     def get_input(self, train=False):
         res = []
@@ -748,7 +748,7 @@ class Flatten(Layer):
     '''Flatten the input. Does not affect the batch size.
 
     # Input shape
-        Arbitrary, although all dimensions in the input shaped must be fixed.
+        Arbitrary, although all dimensions in the input shape must be fixed.
         Use the keyword argument `input_shape`
         (tuple of integers, does not include the samples axis)
         when using this layer as the first layer in a model.
@@ -1363,7 +1363,7 @@ class LambdaMerge(Lambda):
     def __init__(self, layers, function, output_shape=None):
         if len(layers) < 2:
             raise Exception('Please specify two or more input layers '
-                            '(or containers) to merge')
+                            '(or containers) to merge.')
         self.layers = layers
         self.params = []
         self.regularizers = []
@@ -1406,7 +1406,7 @@ class LambdaMerge(Lambda):
             output_shape_func = types.FunctionType(output_shape_func, globals())
             shape = output_shape_func(input_shapes)
             if type(shape) not in {list, tuple}:
-                raise Exception('output_shape function must return a tuple')
+                raise Exception('output_shape function must return a tuple.')
             return tuple(shape)
 
     def get_params(self):
@@ -1486,7 +1486,7 @@ class Siamese(Layer):
 
         if merge_mode in {'cos', 'dot'}:
             if len(inputs) > 2:
-                raise Exception(merge_mode + ' merge takes exactly 2 layers')
+                raise Exception(merge_mode + ' merge takes exactly 2 layers.')
 
         self.layer = layer
         self.trainable = layer.trainable
@@ -1573,7 +1573,7 @@ class Siamese(Layer):
             X = self.get_output_at(i, train)
             if X.name is None:
                 raise ValueError('merge_mode="join" '
-                                 'only works with named inputs')
+                                 'only works with named inputs.')
             o[X.name] = X
         return o
 
@@ -1691,10 +1691,10 @@ class Siamese(Layer):
 
 class SiameseHead(Layer):
     '''This layer should be added only on top of a Siamese layer
-    with merge_mode = None
+    with merge_mode = None.
 
     Outputs the output of the Siamese layer at a given index,
-    specified by the head argument
+    specified by the head argument.
 
     # Arguments
         head: The index at which the output of the Siamese layer
@@ -1729,7 +1729,7 @@ class SiameseHead(Layer):
 
 def add_shared_layer(layer, inputs):
     '''Use this function to add a shared layer across
-    multiple Sequential models without merging the outputs
+    multiple Sequential models without merging the outputs.
     '''
     input_layers = [l.layers[-1] for l in inputs]
     s = Siamese(layer, input_layers, merge_mode=None)
@@ -1741,7 +1741,7 @@ def add_shared_layer(layer, inputs):
 
 class Highway(Layer):
     '''Densely connected highway network,
-    a natural extension of LSTMs to feedforward networks
+    a natural extension of LSTMs to feedforward networks.
 
     cite: http://arxiv.org/pdf/1505.00387v2.pdf
 
diff --git a/keras/layers/embeddings.py b/keras/layers/embeddings.py
index 9d03c3a0c..8e089f14b 100644
--- a/keras/layers/embeddings.py
+++ b/keras/layers/embeddings.py
@@ -8,10 +8,10 @@ from ..constraints import unitnorm
 
 
 class Embedding(Layer):
-    '''Turn positive integers (indexes) into denses vectors of fixed size.
+    '''Turn positive integers (indexes) into dense vectors of fixed size.
     eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]]
 
-    This layer can only be used as the first layer in  a model.
+    This layer can only be used as the first layer in a model.
 
     # Input shape
         2D tensor with shape: `(nb_samples, sequence_length)`.
@@ -38,7 +38,7 @@ class Embedding(Layer):
           This is useful for [recurrent layers](recurrent.md) which may take
           variable length input. If this is `True` then all subsequent layers
           in the model need to support masking or an exception will be raised.
-      input_length: Length of input sequences, when it is constantself.
+      input_length: Length of input sequences, when it is constant.
           This argument is required if you are going to connect
           `Flatten` then `Dense` layers upstream
           (without it, the shape of the dense outputs cannot be computed).
diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py
index e85ad941f..b4ceb4c94 100644
--- a/keras/layers/normalization.py
+++ b/keras/layers/normalization.py
@@ -6,7 +6,7 @@ from .. import backend as K
 class BatchNormalization(Layer):
     '''Normalize the activations of the previous layer at each batch,
     i.e. applies a transformation that maintains the mean activation
-    close to 0. and the activation standard deviation close to 1.
+    close to 0 and the activation standard deviation close to 1.
 
     # Input shape
         Arbitrary. Use the keyword argument `input_shape`
diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py
index 13b9ceee1..90aa3c4ec 100644
--- a/keras/layers/recurrent.py
+++ b/keras/layers/recurrent.py
@@ -30,7 +30,7 @@ class Recurrent(MaskedLayer):
         return_sequences: Boolean. Whether to return the last output
             in the output sequence, or the full sequence.
         go_backwards: Boolean (default False).
-            If True, rocess the input sequence backwards.
+            If True, process the input sequence backwards.
         stateful: Boolean (default False). If True, the last state
             for each sample at index i in a batch will be used as initial
             state for the sample of index i in the following batch.
@@ -43,7 +43,7 @@ class Recurrent(MaskedLayer):
             `Flatten` then `Dense` layers upstream
             (without it, the shape of the dense outputs cannot be computed).
             Note that if the recurrent layer is not the first layer
-            in your model, you would need to specify the input Length
+            in your model, you would need to specify the input length
             at the level of the first layer
             (e.g. via the `input_shape` argument)
 
@@ -129,7 +129,7 @@ class Recurrent(MaskedLayer):
         if K._BACKEND == 'tensorflow':
             if not self.input_shape[1]:
                 raise Exception('When using TensorFlow, you should define ' +
-                                'explicitely the number of timesteps of ' +
+                                'explicitly the number of timesteps of ' +
                                 'your sequences. Make sure the first layer ' +
                                 'has a "batch_input_shape" argument ' +
                                 'including the samples axis.')
diff --git a/keras/models.py b/keras/models.py
index 85ab2c844..98e2ac4f9 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -59,7 +59,7 @@ def slice_X(X, start=None, stop=None):
     '''
     if type(X) == list:
         if hasattr(start, '__len__'):
-            # hdf5 dataset only support list object as indices
+            # hdf5 datasets only support list objects as indices
             if hasattr(start, 'shape'):
                 start = start.tolist()
             return [x[start] for x in X]
@@ -84,7 +84,7 @@ def weighted_objective(fn):
             # mask should have the same shape as score_array
             score_array *= mask
             #  the loss per batch should be proportional
-            #  to the number of unmasked sampled.
+            #  to the number of unmasked samples.
             score_array /= K.mean(mask)
 
         # reduce score_array to 1D
@@ -461,7 +461,7 @@ class Sequential(Model, containers.Sequential):
             show_accuracy=False, class_weight=None, sample_weight=None):
         '''Train the model for a fixed number of epochs.
 
-        Returns a history object. It `history` attribute is a record of
+        Returns a history object. Its `history` attribute is a record of
         training loss values at successive epochs,
         as well as validation loss values (if applicable).
 
@@ -984,7 +984,7 @@ class Graph(Model, containers.Graph):
             class_weight={}, sample_weight={}):
         '''Train the model for a fixed number of epochs.
 
-        Returns a history object. It `history` attribute is a record of
+        Returns a history object. Its `history` attribute is a record of
         training loss values at successive epochs,
         as well as validation loss values (if applicable).
 
diff --git a/keras/objectives.py b/keras/objectives.py
index 6719c6414..510ae5def 100644
--- a/keras/objectives.py
+++ b/keras/objectives.py
@@ -35,7 +35,7 @@ def hinge(y_true, y_pred):
 
 
 def categorical_crossentropy(y_true, y_pred):
-    '''Expects a binary class matrix instead of a vector of scalar classes
+    '''Expects a binary class matrix instead of a vector of scalar classes.
     '''
     return K.mean(K.categorical_crossentropy(y_pred, y_true), axis=-1)
 
diff --git a/keras/preprocessing/text.py b/keras/preprocessing/text.py
index 9e014c70d..83d693bfc 100644
--- a/keras/preprocessing/text.py
+++ b/keras/preprocessing/text.py
@@ -59,7 +59,7 @@ class Tokenizer(object):
         By default, all punctuation is removed, turning the texts into
         space-separated sequences of words
         (words maybe include the `'` character). These sequences are then
-        splits into lists of tokens. They will then be indexed or vectorized.
+        split into lists of tokens. They will then be indexed or vectorized.
 
         `0` is a reserved index that won't be assigned to any word.
         '''
@@ -167,12 +167,12 @@ class Tokenizer(object):
             if self.word_index:
                 nb_words = len(self.word_index) + 1
             else:
-                raise Exception("Specify a dimension (nb_words argument), or fit on some text data first")
+                raise Exception("Specify a dimension (nb_words argument), or fit on some text data first.")
         else:
             nb_words = self.nb_words
 
         if mode == "tfidf" and not self.document_count:
-            raise Exception("Fit the Tokenizer on some data before using tfidf mode")
+            raise Exception("Fit the Tokenizer on some data before using tfidf mode.")
 
         X = np.zeros((len(sequences), nb_words))
         for i, seq in enumerate(sequences):
diff --git a/keras/utils/np_utils.py b/keras/utils/np_utils.py
index e2da74231..a5e5f3d98 100644
--- a/keras/utils/np_utils.py
+++ b/keras/utils/np_utils.py
@@ -7,7 +7,7 @@ from six.moves import zip
 
 def to_categorical(y, nb_classes=None):
     '''Convert class vector (integers from 0 to nb_classes)
-    to binary class matrix, for use with categorical_crossentropy
+    to binary class matrix, for use with categorical_crossentropy.
     '''
     y = np.asarray(y, dtype='int32')
     if not nb_classes:
diff --git a/keras/utils/visualize_util.py b/keras/utils/visualize_util.py
index 3074bdf1a..c0d6972a5 100644
--- a/keras/utils/visualize_util.py
+++ b/keras/utils/visualize_util.py
@@ -143,8 +143,8 @@ class ModelToDot(object):
 
 def to_graph(model, **kwargs):
     """
-    `recursive` controls wether we recursively explore container layers
-    `show_shape` controls wether the shape is shown in the graph
+    `recursive` controls whether we recursively explore container layers
+    `show_shape` controls whether the shape is shown in the graph
     """
     return ModelToDot()(model, **kwargs)
 

From 16675b98c0d47c1f58f42ada534ff52721aa085e Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Wed, 23 Dec 2015 13:55:13 -0800
Subject: [PATCH 097/145] Better input validation in Sequential & Graph.

---
 keras/models.py | 103 ++++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 99 insertions(+), 4 deletions(-)

diff --git a/keras/models.py b/keras/models.py
index 6f6ed43a1..c5a608313 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -498,6 +498,20 @@ class Sequential(Model, containers.Sequential):
                 output timesteps, which is useful
                 in sequence to sequence learning.
         '''
+        if type(X) == list:
+            if len(set([len(a) for a in X] + [len(y)])) != 1:
+                raise Exception('All input arrays and the target array must '
+                                'have the same number of samples.')
+        else:
+            if len(X) != len(y):
+                raise Exception('The input data tensor (X) and '
+                                'the target tensor (y) must have '
+                                'the same number of samples. Found: '
+                                'len(X) = {}, len(y) = {}'.format(len(X), len(y)))
+        if sample_weight is not None:
+            assert len(sample_weight) == len(y), ('"sample_weight" must have '
+                                                  'the same number of samples '
+                                                  'as X and y.')
         X = standardize_X(X)
         y = standardize_y(y)
 
@@ -511,11 +525,20 @@ class Sequential(Model, containers.Sequential):
         if validation_data:
             if len(validation_data) == 2:
                 X_val, y_val = validation_data
+                if type(X_val) == list:
+                    assert len(set([len(a) for a in X_val] + [len(y_val)])) == 1
+                else:
+                    assert len(X_val) == len(y_val)
                 X_val = standardize_X(X_val)
                 y_val = standardize_y(y_val)
                 sample_weight_val = standardize_weights(y_val)
             elif len(validation_data) == 3:
                 X_val, y_val, sample_weight_val = validation_data
+                if type(X_val) == list:
+                    assert len(set([len(a) for a in X_val] +
+                                   [len(y_val), len(sample_weight_val)])) == 1
+                else:
+                    assert len(X_val) == len(y_val) == len(sample_weight_val)
                 X_val = standardize_X(X_val)
                 y_val = standardize_y(y_val)
                 sample_weight_val = standardize_weights(y_val,
@@ -619,6 +642,20 @@ class Sequential(Model, containers.Sequential):
             verbose: verbosity mode, 0 or 1.
             sample_weight: sample weights, as a numpy array.
         '''
+        if type(X) == list:
+            if len(set([len(a) for a in X] + [len(y)])) != 1:
+                raise Exception('All input arrays and the target array must '
+                                'have the same number of samples.')
+        else:
+            if len(X) != len(y):
+                raise Exception('The input data tensor (X) and '
+                                'the target tensor (y) must have '
+                                'the same number of samples. Found: '
+                                'len(X) = {}, len(y) = {}'.format(len(X), len(y)))
+        if sample_weight is not None:
+            assert len(sample_weight) == len(y), ('"sample_weight" must have '
+                                                  'the same number of samples '
+                                                  'as X and y.')
         X = standardize_X(X)
         y = standardize_y(y)
         sample_weight = standardize_weights(y, sample_weight=sample_weight)
@@ -643,6 +680,20 @@ class Sequential(Model, containers.Sequential):
 
         Arguments: see `fit` method.
         '''
+        if type(X) == list:
+            if len(set([len(a) for a in X] + [len(y)])) != 1:
+                raise Exception('All input arrays and the target array must '
+                                'have the same number of samples.')
+        else:
+            if len(X) != len(y):
+                raise Exception('The input data tensor (X) and '
+                                'the target tensor (y) must have '
+                                'the same number of samples. Found: '
+                                'len(X) = {}, len(y) = {}'.format(len(X), len(y)))
+        if sample_weight is not None:
+            assert len(sample_weight) == len(y), ('"sample_weight" must have '
+                                                  'the same number of samples '
+                                                  'as X and y.')
         X = standardize_X(X)
         y = standardize_y(y)
         sample_weight = standardize_weights(y, class_weight=class_weight,
@@ -659,6 +710,20 @@ class Sequential(Model, containers.Sequential):
 
         Arguments: see `fit` method.
         '''
+        if type(X) == list:
+            if len(set([len(a) for a in X] + [len(y)])) != 1:
+                raise Exception('All input arrays and the target array must '
+                                'have the same number of samples.')
+        else:
+            if len(X) != len(y):
+                raise Exception('The input data tensor (X) and '
+                                'the target tensor (y) must have '
+                                'the same number of samples. Found: '
+                                'len(X) = {}, len(y) = {}'.format(len(X), len(y)))
+        if sample_weight is not None:
+            assert len(sample_weight) == len(y), ('"sample_weight" must have '
+                                                  'the same number of samples '
+                                                  'as X and y.')
         X = standardize_X(X)
         y = standardize_y(y)
         sample_weight = standardize_weights(y, sample_weight=sample_weight)
@@ -821,12 +886,21 @@ class Sequential(Model, containers.Sequential):
                 raise Exception('The generator output must be a tuple.')
             if len(generator_output) == 2:
                 X, y = generator_output
+                if type(X) == list:
+                    assert len(set([len(a) for a in X] + [len(y)])) == 1
+                else:
+                    assert len(X) == len(y)
                 sample_weight = None
             elif len(generator_output) == 3:
                 X, y, sample_weight = generator_output
+                if type(X) == list:
+                    assert len(set([len(a) for a in X] + [len(y), len(sample_weight)])) == 1
+                else:
+                    assert len(X) == len(y) == len(sample_weight)
             else:
                 _stop.set()
-                raise Exception('The generator output tuple must have 2 or 3 elements.')
+                raise Exception('The generator output tuple must have '
+                                '2 or 3 elements.')
             return X, y, sample_weight
 
         # start generator thread storing batches into a queue
@@ -843,7 +917,7 @@ class Sequential(Model, containers.Sequential):
                         i += 1
                     else:
                         time.sleep(wait_time)
-                except KeyboardInterrupt:
+                except:
                     _stop.set()
                     return
 
@@ -1013,6 +1087,9 @@ class Graph(Model, containers.Graph):
         '''
         X = [data[name] for name in self.input_order]
         y = [standardize_y(data[name]) for name in self.output_order]
+        if len(set([len(a) for a in X] + [len(a) for a in y])) != 1:
+            raise Exception('All input arrays and target arrays must have '
+                            'the same number of samples.')
 
         sample_weight_list = [standardize_weights(y[i],
                                                   sample_weight=sample_weight.get(self.output_order[i])) for i in range(len(self.output_order))]
@@ -1057,8 +1134,10 @@ class Graph(Model, containers.Graph):
         '''
         sample_weight = [standardize_weights(data[name],
                                              sample_weight=sample_weight.get(name)) for name in self.output_order]
-
         ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight
+        if len(set([len(a) for a in ins])) != 1:
+            raise Exception('All input arrays and target arrays must have '
+                            'the same number of samples.')
         outs = self._test_loop(self._test, ins, batch_size, verbose)
         return outs[0]
 
@@ -1069,6 +1148,9 @@ class Graph(Model, containers.Graph):
         Arguments: see `fit` method.
         '''
         ins = [data[name] for name in self.input_order]
+        if len(set([len(a) for a in ins])) != 1:
+            raise Exception('All input arrays and target arrays must have '
+                            'the same number of samples.')
         outs = self._predict_loop(self._predict, ins, batch_size, verbose)
         return dict(zip(self.output_order, outs))
 
@@ -1081,6 +1163,9 @@ class Graph(Model, containers.Graph):
                                              sample_weight=sample_weight.get(name),
                                              class_weight=class_weight.get(name)) for name in self.output_order]
         ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight
+        if len(set([len(a) for a in ins])) != 1:
+            raise Exception('All input arrays and target arrays must have '
+                            'the same number of samples.')
         return self._train(ins)
 
     def test_on_batch(self, data, sample_weight={}):
@@ -1091,12 +1176,18 @@ class Graph(Model, containers.Graph):
         sample_weight = [standardize_weights(data[name],
                                              sample_weight=sample_weight.get(name)) for name in self.output_order]
         ins = [data[name] for name in self.input_order] + [standardize_y(data[name]) for name in self.output_order] + sample_weight
+        if len(set([len(a) for a in ins])) != 1:
+            raise Exception('All input arrays and target arrays must have '
+                            'the same number of samples.')
         return self._test(ins)
 
     def predict_on_batch(self, data):
         '''Generate predictions for a single batch of samples.
         '''
         ins = [data[name] for name in self.input_order]
+        if len(set([len(a) for a in ins])) != 1:
+            raise Exception('All input arrays and target arrays must have '
+                            'the same number of samples.')
         outs = self._predict(ins)
         return dict(zip(self.output_order, outs))
 
@@ -1247,6 +1338,10 @@ class Graph(Model, containers.Graph):
                                 '(data, sample_weight).')
             assert type(data) == dict
             assert type(sample_weight) == dict
+            if len(set([len(data[name]) for name in data.keys()] +
+                       [len(sample_weight[name]) for name in sample_weight.keys()])) != 1:
+                raise Exception('All input arrays and target arrays must have '
+                                'the same number of samples.')
             return data, sample_weight
 
         # start generator thread storing batches into a queue
@@ -1263,7 +1358,7 @@ class Graph(Model, containers.Graph):
                         i += 1
                     else:
                         time.sleep(wait_time)
-                except KeyboardInterrupt:
+                except:
                     _stop.set()
                     return
 

From 58ed77b0d2f1a0223d551c779e624c55a10df296 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Fri, 25 Dec 2015 18:07:20 +0800
Subject: [PATCH 098/145] Check keras_dir writing permission.

---
 keras/backend/__init__.py    | 4 +++-
 keras/datasets/data_utils.py | 2 ++
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/keras/backend/__init__.py b/keras/backend/__init__.py
index ad038c683..640004112 100644
--- a/keras/backend/__init__.py
+++ b/keras/backend/__init__.py
@@ -5,11 +5,13 @@ import json
 from .common import epsilon, floatx, set_epsilon, set_floatx
 
 _keras_dir = os.path.expanduser(os.path.join('~', '.keras'))
+if not os.access(_keras_dir, os.W_OK):
+    _keras_dir = os.path.join('/tmp', '.keras')
 if not os.path.exists(_keras_dir):
     os.makedirs(_keras_dir)
 
 _BACKEND = 'theano'
-_config_path = os.path.expanduser(os.path.join('~', '.keras', 'keras.json'))
+_config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json'))
 if os.path.exists(_config_path):
     _config = json.load(open(_config_path))
     _floatx = _config.get('floatx', floatx())
diff --git a/keras/datasets/data_utils.py b/keras/datasets/data_utils.py
index f01b58b91..1cf9e8406 100644
--- a/keras/datasets/data_utils.py
+++ b/keras/datasets/data_utils.py
@@ -15,6 +15,8 @@ class ParanoidURLopener(FancyURLopener):
 
 def get_file(fname, origin, untar=False):
     datadir = os.path.expanduser(os.path.join('~', '.keras', 'datasets'))
+    if not os.access(datadir, os.W_OK):
+        datadir = os.path.join('/tmp', '.keras', 'datasets')
     if not os.path.exists(datadir):
         os.makedirs(datadir)
 

From 186d95ae9ce7ce3eb70ecdb715cc510717abaf2d Mon Sep 17 00:00:00 2001
From: Kevin Loney <kevin.loney@brainsinjars.com>
Date: Fri, 25 Dec 2015 11:14:01 -0700
Subject: [PATCH 099/145] Fixed handling of negative dimensions in
 Reshape.output_shape and Reshape.get_output

---
 keras/layers/core.py          | 34 ++++++++++++++++++++++++++++++++--
 tests/test_shape_inference.py | 11 ++++++++++-
 2 files changed, 42 insertions(+), 3 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index bab51740b..82f7364b3 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -685,13 +685,43 @@ class Reshape(Layer):
         super(Reshape, self).__init__(**kwargs)
         self.dims = tuple(dims)
 
+    def _fix_unknown_dimension(self, input_shape, output_shape):
+        """
+        A near direct port of the internal numpy function _fix_unknown_dimension
+        in numpy/core/src/multiarray/shape.c
+        """
+
+        output_shape = list(output_shape)
+
+        msg = 'total size of new array must be unchanged'
+
+        known, unknown = 1, None
+        for index, dim in enumerate(output_shape):
+            if dim < 0:
+                if unknown is None:
+                    unknown = index
+                else:
+                    raise ValueError('can only specify one unknown dimension')
+            else:
+                known *= dim
+
+        original = np.prod(input_shape, dtype=int)
+        if not unknown is None:
+            if known == 0 or original % known != 0:
+                raise ValueError(msg)
+            output_shape[unknown] = original // known
+        elif original != known:
+            raise ValueError(msg)
+
+        return tuple(output_shape)
+
     @property
     def output_shape(self):
-        return (self.input_shape[0],) + self.dims
+        return (self.input_shape[0],) + self._fix_unknown_dimension(self.input_shape[1:], self.dims)
 
     def get_output(self, train=False):
         X = self.get_input(train)
-        return K.reshape(X, (-1,) + self.dims)
+        return K.reshape(X, (-1,) + self.output_shape[1:])
 
     def get_config(self):
         config = {'name': self.__class__.__name__,
diff --git a/tests/test_shape_inference.py b/tests/test_shape_inference.py
index a448e61ca..3026b5c96 100644
--- a/tests/test_shape_inference.py
+++ b/tests/test_shape_inference.py
@@ -22,10 +22,19 @@ def check_layer_output_shape(layer, input_data):
 # Core #
 ########
 def test_Reshape():
-    layer = Reshape(dims=(2, 3))
     input_data = np.random.random((2, 6))
+
+    layer = Reshape(dims=(2, 3))
     check_layer_output_shape(layer, input_data)
 
+    layer = Reshape(dims=(-1,))
+    check_layer_output_shape(layer, input_data)
+
+    layer = Reshape(dims=(-1, 2))
+    check_layer_output_shape(layer, input_data)
+
+    layer = Reshape(dims=(2, -1))
+    check_layer_output_shape(layer, input_data)
 
 def test_Permute():
     layer = Permute(dims=(1, 3, 2))

From b4eb1d9491cb96d6608fa83869aace06146ae793 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 26 Dec 2015 09:11:16 +0800
Subject: [PATCH 100/145] Check base dir.

---
 keras/backend/__init__.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/keras/backend/__init__.py b/keras/backend/__init__.py
index 640004112..1097165c9 100644
--- a/keras/backend/__init__.py
+++ b/keras/backend/__init__.py
@@ -4,9 +4,11 @@ import os
 import json
 from .common import epsilon, floatx, set_epsilon, set_floatx
 
-_keras_dir = os.path.expanduser(os.path.join('~', '.keras'))
-if not os.access(_keras_dir, os.W_OK):
-    _keras_dir = os.path.join('/tmp', '.keras')
+_keras_base_dir = os.path.expanduser('~')
+if not os.access(_keras_base_dir, os.W_OK):
+    _keras_base_dir = '/tmp'
+
+_keras_dir = os.path.join(_keras_base_dir, '.keras')
 if not os.path.exists(_keras_dir):
     os.makedirs(_keras_dir)
 

From a98eec34f7b7df642252733af31f42ec67209071 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Sat, 26 Dec 2015 14:46:21 +0800
Subject: [PATCH 101/145] Check basedir for dataset path.

---
 keras/datasets/data_utils.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/keras/datasets/data_utils.py b/keras/datasets/data_utils.py
index 1cf9e8406..b3f3f9dcd 100644
--- a/keras/datasets/data_utils.py
+++ b/keras/datasets/data_utils.py
@@ -14,9 +14,10 @@ class ParanoidURLopener(FancyURLopener):
 
 
 def get_file(fname, origin, untar=False):
-    datadir = os.path.expanduser(os.path.join('~', '.keras', 'datasets'))
-    if not os.access(datadir, os.W_OK):
-        datadir = os.path.join('/tmp', '.keras', 'datasets')
+    datadir_base = os.path.expanduser(os.path.join('~', '.keras'))
+    if not os.access(datadir_base, os.W_OK):
+        datadir_base = os.path.join('/tmp', '.keras')
+    datadir = os.path.join(datadir_base, 'datasets')
     if not os.path.exists(datadir):
         os.makedirs(datadir)
 

From 03cd7bf493d5b0c33280ab4be001f4f3cf21514c Mon Sep 17 00:00:00 2001
From: Kevin Loney <kevin.loney@brainsinjars.com>
Date: Mon, 28 Dec 2015 23:59:44 -0700
Subject: [PATCH 102/145] Fixed some stylistic issues and expanded the doc
 string for the Reshape. _fix_unknown_dimension

---
 keras/layers/core.py | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 82f7364b3..4e96d21ba 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -686,10 +686,26 @@ class Reshape(Layer):
         self.dims = tuple(dims)
 
     def _fix_unknown_dimension(self, input_shape, output_shape):
-        """
+        '''Find and replace a single missing dimension in an output shape
+        given and input shape.
+        
         A near direct port of the internal numpy function _fix_unknown_dimension
         in numpy/core/src/multiarray/shape.c
-        """
+
+        # Arguments
+            input_shape: shape of array being reshaped
+
+            output_shape: desired shaped of the array with at most
+                a single -1 which indicates a dimension that should be
+                derived from the input shape.
+
+        # Returns
+            The new output shape with a -1 replaced with its computed value.
+
+            Raises a ValueError if the total array size of the output_shape is
+            different then the input_shape, or more then one unknown dimension
+            is specified.
+        '''
 
         output_shape = list(output_shape)
 
@@ -706,7 +722,7 @@ class Reshape(Layer):
                 known *= dim
 
         original = np.prod(input_shape, dtype=int)
-        if not unknown is None:
+        if unknown is not None:
             if known == 0 or original % known != 0:
                 raise ValueError(msg)
             output_shape[unknown] = original // known

From c95f5d10c222c0e596135f6d32ee70f18f2958c6 Mon Sep 17 00:00:00 2001
From: Liang-Chi Hsieh <viirya@gmail.com>
Date: Tue, 29 Dec 2015 18:24:57 +0800
Subject: [PATCH 103/145] Minor: remove duplicate code.

---
 keras/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/models.py b/keras/models.py
index c5a608313..f2653ad4e 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -399,7 +399,7 @@ class Sequential(Model, containers.Sequential):
         self.optimizer = optimizers.get(optimizer)
 
         self.loss = objectives.get(loss)
-        weighted_loss = weighted_objective(objectives.get(loss))
+        weighted_loss = weighted_objective(self.loss)
 
         # input of model
         self.X_train = self.get_input(train=True)

From 643961723c6affe3d561375a2791fb1c7df851a5 Mon Sep 17 00:00:00 2001
From: PiranjaF <PiranjaF@users.noreply.github.com>
Date: Tue, 29 Dec 2015 23:18:03 +0100
Subject: [PATCH 104/145] Update layer_utils.py

---
 keras/utils/layer_utils.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/keras/utils/layer_utils.py b/keras/utils/layer_utils.py
index e228b9975..069b3fa5f 100644
--- a/keras/utils/layer_utils.py
+++ b/keras/utils/layer_utils.py
@@ -26,12 +26,14 @@ def container_from_config(original_layer_dict, custom_objects={}):
 
     if name == 'Merge':
         mode = layer_dict.get('mode')
+        concat_axis = layer_dict.get('concat_axis')
+        dot_axes = layer_dict.get('dot_axes')
         layers = layer_dict.get('layers')
         layer_list = []
         for layer in layers:
             init_layer = container_from_config(layer)
             layer_list.append(init_layer)
-        merge_layer = Merge(layer_list, mode)
+        merge_layer = Merge(layer_list, mode, concat_axis, dot_axes)
         return merge_layer
 
     elif name == 'Sequential':

From 729f0765da577a4ebd879331f651b6d241a10632 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 29 Dec 2015 16:00:39 -0800
Subject: [PATCH 105/145] Progbar: scientific notation only for small values

---
 keras/utils/generic_utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/utils/generic_utils.py b/keras/utils/generic_utils.py
index 8ca6b00bf..552463bcf 100644
--- a/keras/utils/generic_utils.py
+++ b/keras/utils/generic_utils.py
@@ -90,7 +90,7 @@ class Progbar(object):
                 info += ' - %s:' % k
                 if type(self.sum_values[k]) is list:
                     avg = self.sum_values[k][0] / max(1, self.sum_values[k][1])
-                    if avg > 1e-3:
+                    if abs(avg) > 1e-3:
                         info += ' %.4f' % avg
                     else:
                         info += ' %.4e' % avg

From d49baf1bfb303e56d29d48847342b2bbfd7ce207 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 29 Dec 2015 16:00:56 -0800
Subject: [PATCH 106/145] Fix example in FAQ

---
 docs/templates/faq.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/templates/faq.md b/docs/templates/faq.md
index d3f2ac33c..800e9ebb5 100644
--- a/docs/templates/faq.md
+++ b/docs/templates/faq.md
@@ -113,9 +113,9 @@ layer_output = get_3rd_layer_output(X)
 
 # with a Graph model
 get_conv_layer_output = theano.function([model.inputs[i].input for i in model.input_order],
-                                        model.outputs['conv'].get_output(train=False),
+                                        model.nodes['conv'].get_output(train=False),
                                         on_unused_input='ignore')
-conv_output = get_conv_output(input_data_dict)
+conv_output = get_conv_layer_output([input_data_dict[i] for i in model.input_order])
 ```
 
 ---

From be9f7bc62fcada746e446115e6b7c40608f33ebf Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Wed, 30 Dec 2015 13:09:16 -0800
Subject: [PATCH 107/145] Documentation fixes

---
 README.md               | 5 ++++-
 docs/templates/index.md | 7 +++++--
 2 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/README.md b/README.md
index 59cb64777..36c8392ff 100644
--- a/README.md
+++ b/README.md
@@ -4,9 +4,10 @@
 
 ## You have just found Keras.
 
-Keras is a minimalist, highly modular neural networks library, written in Python and capable of running either on top of either [TensorFlow](https://github.com/tensorflow/tensorflow) or [Theano](https://github.com/Theano/Theano). It was developed with a focus on enabling fast experimentation. Being able to go from idea to result with the least possible delay is key to doing good research.
+Keras is a minimalist, highly modular neural networks library, written in Python and capable of running on top of either [TensorFlow](https://github.com/tensorflow/tensorflow) or [Theano](https://github.com/Theano/Theano). It was developed with a focus on enabling fast experimentation. Being able to go from idea to result with the least possible delay is key to doing good research.
 
 Use Keras if you need a deep learning library that:
+
 - allows for easy and fast prototyping (through total modularity, minimalism, and extensibility).
 - supports both convolutional networks and recurrent networks, as well as combinations of the two.
 - supports arbitrary connectivity schemes (including multi-input and multi-output training).
@@ -109,6 +110,7 @@ Keras uses the following dependencies:
 - Optional but recommended if you use CNNs: cuDNN.
 
 *When using the Theano backend:*
+
 - Theano
     - [See installation instructions](http://deeplearning.net/software/theano/install.html#install).
 
@@ -118,6 +120,7 @@ sudo pip install git+git://github.com/Theano/Theano.git
 ```
 
 *When using the TensorFlow backend:*
+
 - TensorFlow
     - [See installation instructions](https://github.com/tensorflow/tensorflow#download-and-setup).
 
diff --git a/docs/templates/index.md b/docs/templates/index.md
index 563e0b911..fcb86581c 100644
--- a/docs/templates/index.md
+++ b/docs/templates/index.md
@@ -5,6 +5,7 @@
 Keras is a minimalist, highly modular neural networks library, written in Python and capable of running on top of either [TensorFlow](https://github.com/tensorflow/tensorflow) or [Theano](https://github.com/Theano/Theano). It was developed with a focus on enabling fast experimentation. Being able to go from idea to result with the least possible delay is key to doing good research.
 
 Use Keras if you need a deep learning library that:
+
 - allows for easy and fast prototyping (through total modularity, minimalism, and extensibility).
 - supports both convolutional networks and recurrent networks, as well as combinations of the two.
 - supports arbitrary connectivity schemes (including multi-input and multi-output training).
@@ -26,7 +27,7 @@ Keras is compatible with: __Python 2.7-3.5__.
 
 - __Easy extensibility.__ New modules are dead simple to add (as new classes and functions), and existing modules provide ample examples. To be able to easily create new modules allows for total expressiveness, making Keras suitable for advanced research.
 
-- __Work with Python__. No separate model configuration files in a declarative format. Models are described in Python code, which is compact, easier to debug, and allows for ease of extensibility.
+- __Work with Python__. No separate models configuration files in a declarative format. Models are described in Python code, which is compact, easier to debug, and allows for ease of extensibility.
 
 
 ------------------
@@ -34,7 +35,7 @@ Keras is compatible with: __Python 2.7-3.5__.
 
 ## Getting started: 30 seconds to Keras
 
-The core data structure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](/models/#sequential) and [`Graph`](/models/#graph).
+The core datastructure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](http://keras.io/models/#sequential) and [`Graph`](http://keras.io/models/#graph).
 
 Here's the `Sequential` model (a linear pile of layers):
 
@@ -107,6 +108,7 @@ Keras uses the following dependencies:
 - Optional but recommended if you use CNNs: cuDNN.
 
 *When using the Theano backend:*
+
 - Theano
     - [See installation instructions](http://deeplearning.net/software/theano/install.html#install).
 
@@ -116,6 +118,7 @@ sudo pip install git+git://github.com/Theano/Theano.git
 ```
 
 *When using the TensorFlow backend:*
+
 - TensorFlow
     - [See installation instructions](https://github.com/tensorflow/tensorflow#download-and-setup).
 

From c00cf10ef84273d473bec5cc2bba7d647b2689c7 Mon Sep 17 00:00:00 2001
From: tboquet <thomas.boquet@hec.ca>
Date: Wed, 30 Dec 2015 22:01:49 -0500
Subject: [PATCH 108/145] * deleted custom padding/replaced by a slice

---
 keras/backend/theano_backend.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index ad9f7d27a..c234b3176 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -548,12 +548,14 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid', dim_ordering='th',
     if _on_gpu() and dnn.dnn_available():
         if border_mode == 'same':
             assert(strides == (1, 1))
-            np_kernel = kernel.eval()
-            pad_x = (np_kernel.shape[2] - strides[0]) // 2
-            pad_y = (np_kernel.shape[3] - strides[1]) // 2
             conv_out = dnn.dnn_conv(img=x,
                                     kerns=kernel,
-                                    border_mode=(pad_x, pad_y))
+                                    border_mode='full')
+            shift_x = (kernel.shape[2] - 1) // 2
+            shift_y = (kernel.shape[3] - 1) // 2
+            conv_out = conv_out[:, :,
+                                shift_x:x.shape[2] + shift_x,
+                                shift_y:x.shape[3] + shift_y]
         else:
             conv_out = dnn.dnn_conv(img=x,
                                     kerns=kernel,

From 579a219614557b11d3d20fd2ddce42aff1abf63b Mon Sep 17 00:00:00 2001
From: berleon <github@leon-sixt.de>
Date: Thu, 31 Dec 2015 16:54:21 +0100
Subject: [PATCH 109/145] [AutoEncoder] set_previous triggers build

The `params`, `regularizers`, `constraints` and `updates` member of the
AutoEncoder were set in the `__init__` method.
When set_previous was called, the mentioned members were not updated.
This behavior resulted in a DisconnectedInputError.
Now the mentioned members are set in the `build` method and the
`set_previous` method calls the `build` method every time the
input changes. This commit fixes issue #1275.
---
 keras/layers/core.py            | 12 +++++++-----
 tests/keras/layers/test_core.py | 12 ++++++++++++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 4e96d21ba..611ff2c7a 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1166,6 +1166,10 @@ class AutoEncoder(Layer):
 
         self.decoder.set_previous(self.encoder)
 
+        if weights is not None:
+            self.set_weights(weights)
+
+    def build(self):
         self.params = []
         self.regularizers = []
         self.constraints = []
@@ -1179,11 +1183,9 @@ class AutoEncoder(Layer):
                     self.params.append(p)
                     self.constraints.append(c)
 
-        if weights is not None:
-            self.set_weights(weights)
-
-    def set_previous(self, node):
-        self.encoder.set_previous(node)
+    def set_previous(self, node, connection_map={}):
+        self.encoder.set_previous(node, connection_map)
+        super(AutoEncoder, self).set_previous(node, connection_map)
 
     def get_weights(self):
         weights = []
diff --git a/tests/keras/layers/test_core.py b/tests/keras/layers/test_core.py
index 3f81e4999..965d5b7be 100644
--- a/tests/keras/layers/test_core.py
+++ b/tests/keras/layers/test_core.py
@@ -1,5 +1,6 @@
 import pytest
 import numpy as np
+from keras.models import Sequential
 from numpy.testing import assert_allclose
 
 from keras import backend as K
@@ -113,6 +114,17 @@ def test_autoencoder():
     _runner(layer)
 
 
+def test_autoencoder_second_layer():
+    # regression test for issue #1275
+    encoder = core.Dense(input_dim=10, output_dim=2)
+    decoder = core.Dense(input_dim=2, output_dim=10)
+    model = Sequential()
+    model.add(core.Dense(input_dim=20, output_dim=10))
+    model.add(core.AutoEncoder(encoder=encoder, decoder=decoder,
+                               output_reconstruction=False))
+    model.compile(loss='mse', optimizer='sgd')
+
+
 def test_maxout_dense():
     layer = core.MaxoutDense(10, 10, input_shape=(20,))
     _runner(layer)

From 177f7b6b6eb65d783097c78f1cc05b65c202d580 Mon Sep 17 00:00:00 2001
From: berleon <github@leon-sixt.de>
Date: Thu, 31 Dec 2015 17:04:20 +0100
Subject: [PATCH 110/145] [BatchNormalization] set updates in get_output

This commit fixes the DisconnectedInputError described in issue
the `get_output` method. Before this commit the `updates` member
could would use another input as the `get_output` method, if the
input was changed.
---
 keras/layers/normalization.py     | 17 +++++++----------
 tests/keras/test_normalization.py | 25 ++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 11 deletions(-)

diff --git a/keras/layers/normalization.py b/keras/layers/normalization.py
index b4ceb4c94..28d9bee26 100644
--- a/keras/layers/normalization.py
+++ b/keras/layers/normalization.py
@@ -57,16 +57,6 @@ class BatchNormalization(Layer):
         self.running_mean = K.zeros(input_shape)
         self.running_std = K.ones(input_shape)
 
-        # initialize self.updates: batch mean/std computation
-        X = self.get_input(train=True)
-        m = K.mean(X, axis=0)
-        std = K.mean(K.square(X - m) + self.epsilon, axis=0)
-        std = K.sqrt(std)
-        mean_update = self.momentum * self.running_mean + (1-self.momentum) * m
-        std_update = self.momentum * self.running_std + (1-self.momentum) * std
-        self.updates = [(self.running_mean, mean_update),
-                        (self.running_std, std_update)]
-
         if self.initial_weights is not None:
             self.set_weights(self.initial_weights)
             del self.initial_weights
@@ -84,6 +74,13 @@ class BatchNormalization(Layer):
     def get_output(self, train):
         X = self.get_input(train)
         if self.mode == 0:
+            m = K.mean(X, axis=0)
+            std = K.mean(K.square(X - m) + self.epsilon, axis=0)
+            std = K.sqrt(std)
+            mean_update = self.momentum * self.running_mean + (1-self.momentum) * m
+            std_update = self.momentum * self.running_std + (1-self.momentum) * std
+            self.updates = [(self.running_mean, mean_update),
+                            (self.running_std, std_update)]
             X_normed = ((X - self.running_mean) /
                         (self.running_std + self.epsilon))
         elif self.mode == 1:
diff --git a/tests/keras/test_normalization.py b/tests/keras/test_normalization.py
index 4355b34a3..f38e70068 100644
--- a/tests/keras/test_normalization.py
+++ b/tests/keras/test_normalization.py
@@ -1,9 +1,10 @@
 import pytest
 import numpy as np
+from keras.layers.core import Dense, Activation
 from numpy.testing import assert_allclose
 
 from keras.layers import normalization
-from keras.models import Sequential
+from keras.models import Sequential, Graph
 from keras import backend as K
 
 
@@ -98,5 +99,27 @@ def test_batchnorm_save_weights():
     norm.set_weights(weights)
 
 
+def test_batchnorm_nested():
+    # regression test for issue #1386
+    g = Graph()
+    g.add_input("input", input_shape=[20])
+    g.add_node(Dense(10), "dense", "input")
+    g.add_node(normalization.BatchNormalization(), "bn", "dense")
+    g.add_node(Activation('relu'), "activ", "bn")
+    g.add_output("output", "activ")
+
+    g2 = Graph()
+    g2.add_input("input", input_shape=[10])
+    g2.add_node(Dense(15), "dense", "input")
+    g2.add_node(normalization.BatchNormalization(), "bn", "dense")
+    g2.add_node(Activation('relu'), "activ", "bn")
+    g2.add_output("output", "activ")
+
+    model = Sequential()
+    model.add(g)
+    model.add(g2)
+    model.compile(loss="mse", optimizer="adadelta")
+
+
 if __name__ == '__main__':
     pytest.main([__file__])

From 582dfc42336dcb0ac5c5d96ad5b52a6b181dc695 Mon Sep 17 00:00:00 2001
From: Kyle McDonald <kyle@kylemcdonald.net>
Date: Thu, 31 Dec 2015 14:53:11 -0800
Subject: [PATCH 111/145] typo in doc: batch_input_size => batch_input_shape

---
 keras/layers/recurrent.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/recurrent.py b/keras/layers/recurrent.py
index 90aa3c4ec..a27bc9b1a 100644
--- a/keras/layers/recurrent.py
+++ b/keras/layers/recurrent.py
@@ -73,7 +73,7 @@ class Recurrent(MaskedLayer):
         To enable statefulness:
             - specify `stateful=True` in the layer constructor.
             - specify a fixed batch size for your model, by passing
-                a `batch_input_size=(...)` to the first layer in your model.
+                a `batch_input_shape=(...)` to the first layer in your model.
                 This is the expected shape of your inputs *including the batch size*.
                 It should be a tuple of integers, e.g. `(32, 10, 100)`.
 

From 421a2cdf04b636a582a1b952332553339e9d70be Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Fri, 1 Jan 2016 11:07:19 -0800
Subject: [PATCH 112/145] Move batch norm tests to tests/keras/layers/

---
 tests/keras/{ => layers}/test_normalization.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename tests/keras/{ => layers}/test_normalization.py (100%)

diff --git a/tests/keras/test_normalization.py b/tests/keras/layers/test_normalization.py
similarity index 100%
rename from tests/keras/test_normalization.py
rename to tests/keras/layers/test_normalization.py

From d401bb46dd23c8c9dac93c3aded1205da8ce4300 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Fri, 1 Jan 2016 22:31:25 -0800
Subject: [PATCH 113/145] Doc fixes

---
 keras/layers/core.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 611ff2c7a..534f12194 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1791,8 +1791,6 @@ class Highway(Layer):
     '''Densely connected highway network,
     a natural extension of LSTMs to feedforward networks.
 
-    cite: http://arxiv.org/pdf/1505.00387v2.pdf
-
     # Input shape
         2D tensor with shape: `(nb_samples, input_dim)`.
 
@@ -1826,6 +1824,9 @@ class Highway(Layer):
         input_dim: dimensionality of the input (integer).
             This argument (or alternatively, the keyword argument `input_shape`)
             is required when using this layer as the first layer in a model.
+
+    # References
+        - [Highway Networks](http://arxiv.org/pdf/1505.00387v2.pdf)
     '''
     input_ndim = 2
 

From 5c72e14034eb055482fbbb08e2bc5b48bd93c8c1 Mon Sep 17 00:00:00 2001
From: Kashif Rasul <kashif.rasul@gmail.com>
Date: Fri, 1 Jan 2016 19:12:22 +0100
Subject: [PATCH 114/145] adamax optimizer

---
 CONTRIBUTING.md                |  4 +--
 README.md                      |  2 +-
 keras/optimizers.py            | 54 ++++++++++++++++++++++++++++++++++
 tests/keras/test_optimizers.py |  6 +++-
 4 files changed, 62 insertions(+), 4 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 17affa401..3070f52ca 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -22,13 +22,13 @@ The more information you provide, the easier it is for us to validate that there
 
 ## Requesting a Feature
 
-You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API. 
+You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API.
 
 1. Provide a clear and detailed explanation of the feature you want and why it's important to add. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on library for Keras. It is crucial for Keras to avoid bloating the API and codebase.
 
 2. Provide code snippets demonstrating the API you have in mind and illustrating the use cases of your feature. Of course, you don't need to write any real code at this point!
 
-3. After disussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along.
+3. After discussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along.
 
 ## Pull Requests
 
diff --git a/README.md b/README.md
index 36c8392ff..1a1db1527 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Keras is compatible with: __Python 2.7-3.5__.
 
 ## Getting started: 30 seconds to Keras
 
-The core datastructure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](http://keras.io/models/#sequential) and [`Graph`](http://keras.io/models/#graph).
+The core data structure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](http://keras.io/models/#sequential) and [`Graph`](http://keras.io/models/#graph).
 
 Here's the `Sequential` model (a linear pile of layers):
 
diff --git a/keras/optimizers.py b/keras/optimizers.py
index ce4cc1efd..503e2e915 100644
--- a/keras/optimizers.py
+++ b/keras/optimizers.py
@@ -275,12 +275,66 @@ class Adam(Optimizer):
                 "beta_2": float(K.get_value(self.beta_2)),
                 "epsilon": self.epsilon}
 
+class Adamax(Optimizer):
+    '''Adamax optimizer from Adam paper's Section 7. It is a variant
+     of Adam based on the infinity norm.
+
+    Default parameters follow those provided in the paper.
+
+    # Arguments
+        lr: float >= 0. Learning rate.
+        beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
+        epsilon: float >= 0. Fuzz factor.
+
+    # References
+        - [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
+    '''
+    def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
+                 *args, **kwargs):
+        super(Adamax, self).__init__(**kwargs)
+        self.__dict__.update(locals())
+        self.iterations = K.variable(0)
+        self.lr = K.variable(lr)
+        self.beta_1 = K.variable(beta_1)
+        self.beta_2 = K.variable(beta_2)
+
+    def get_updates(self, params, constraints, loss):
+        grads = self.get_gradients(loss, params)
+        self.updates = [(self.iterations, self.iterations+1.)]
+
+        t = self.iterations + 1
+        lr_t = self.lr / (1 - K.pow(self.beta_1, t))
+
+        for p, g, c in zip(params, grads, constraints):
+            # zero init of 1st moment
+            m = K.variable(np.zeros(K.get_value(p).shape))
+            # zero init of exponentially weighted infinity norm
+            u = K.variable(np.zeros(K.get_value(p).shape))
+
+            m_t = (self.beta_1 * m) + (1 - self.beta_1) * g
+            u_t = K.maximum(self.beta_2 * u, K.abs(g))
+            p_t = p - lr_t * m_t / (u_t + self.epsilon)
+
+            self.updates.append((m, m_t))
+            self.updates.append((u, u_t))
+            self.updates.append((p, c(p_t)))  # apply constraints
+        return self.updates
+
+    def get_config(self):
+        return {"name": self.__class__.__name__,
+                "lr": float(K.get_value(self.lr)),
+                "beta_1": float(K.get_value(self.beta_1)),
+                "beta_2": float(K.get_value(self.beta_2)),
+                "epsilon": self.epsilon}
+
+
 # aliases
 sgd = SGD
 rmsprop = RMSprop
 adagrad = Adagrad
 adadelta = Adadelta
 adam = Adam
+adamax = Adamax
 
 
 def get(identifier, kwargs=None):
diff --git a/tests/keras/test_optimizers.py b/tests/keras/test_optimizers.py
index 2829084fa..81fcd0a14 100644
--- a/tests/keras/test_optimizers.py
+++ b/tests/keras/test_optimizers.py
@@ -2,7 +2,7 @@ from __future__ import print_function
 import pytest
 
 from keras.utils.test_utils import get_test_data
-from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam
+from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax
 from keras.models import Sequential
 from keras.layers.core import Dense, Activation
 from keras.utils.np_utils import to_categorical
@@ -56,5 +56,9 @@ def test_adam():
     assert(_test_optimizer(Adam()))
 
 
+def test_adamax():
+    assert(_test_optimizer(Adamax()))
+
+
 if __name__ == '__main__':
     pytest.main([__file__])

From 9d15c9611570bd3ecab52ed924c69a60ac3b2784 Mon Sep 17 00:00:00 2001
From: Julien Rebetez <julien@fhtagn.net>
Date: Sun, 3 Jan 2016 12:37:14 +0100
Subject: [PATCH 115/145] Add K.resize_images backend op.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows to take advantage of tensorflow’s resize_images operator in UpSampling2D.
---
 keras/backend/tensorflow_backend.py      | 18 +++++++++
 keras/backend/theano_backend.py          | 17 +++++++++
 keras/layers/convolutional.py            | 12 +++---
 tests/keras/layers/test_convolutional.py | 47 +++++++++++++++++++-----
 4 files changed, 77 insertions(+), 17 deletions(-)

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
index fcd574dc1..a24592e82 100644
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@@ -236,6 +236,24 @@ def permute_dimensions(x, pattern):
     return tf.transpose(x, perm=pattern)
 
 
+def resize_images(X, height, width, height_factor, width_factor, dim_ordering):
+    '''Resize the images contained in a 4D tensor of shape
+    - [batch, channels, height, width] (for 'th' dim_ordering)
+    - [batch, height, width, channels] (for 'tf' dim_ordering)
+    by a factor of (height_factor, width_factor)
+    '''
+    new_height = height * height_factor
+    new_width = width * width_factor
+    if dim_ordering == 'th':
+        X = permute_dimensions(X, [0, 2, 3, 1])
+        X = tf.image.resize_nearest_neighbor(X, (new_height, new_width))
+        return permute_dimensions(X, [0, 3, 1, 2])
+    elif dim_ordering == 'tf':
+        return tf.image.resize_nearest_neighbor(X, (new_height, new_width))
+    else:
+        raise Exception('Invalid dim_ordering: ' + dim_ordering)
+
+
 def repeat_elements(x, rep, axis):
     '''Repeats the elements of a tensor along an axis, like np.repeat
 
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index c234b3176..36cb514fb 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -251,6 +251,23 @@ def repeat_elements(x, rep, axis):
     '''
     return T.repeat(x, rep, axis=axis)
 
+
+def resize_images(X, height, width, height_factor, width_factor, dim_ordering):
+    '''Resize the images contained in a 4D tensor of shape
+    - [batch, channels, height, width] (for 'th' dim_ordering)
+    - [batch, height, width, channels] (for 'tf' dim_ordering)
+    by a factor of (height_factor, width_factor)
+    '''
+    if self.dim_ordering == 'th':
+        output = K.repeat_elements(X, height_factor, axis=2)
+        output = K.repeat_elements(output, width_factor, axis=3)
+    elif self.dim_ordering == 'tf':
+        output = K.repeat_elements(X, height_factor, axis=1)
+        output = K.repeat_elements(output, width_factor, axis=2)
+    else:
+        raise Exception('Invalid dim_ordering: ' + dim_ordering)
+
+
 def repeat(x, n):
     '''Repeat a 2D tensor.
 
diff --git a/keras/layers/convolutional.py b/keras/layers/convolutional.py
index 35dc8cf15..1722ba632 100644
--- a/keras/layers/convolutional.py
+++ b/keras/layers/convolutional.py
@@ -668,15 +668,13 @@ class UpSampling2D(Layer):
 
     def get_output(self, train=False):
         X = self.get_input(train)
+        input_shape = self.input_shape
         if self.dim_ordering == 'th':
-            output = K.repeat_elements(X, self.size[0], axis=2)
-            output = K.repeat_elements(output, self.size[1], axis=3)
+            height, width = input_shape[2], input_shape[3]
         elif self.dim_ordering == 'tf':
-            output = K.repeat_elements(X, self.size[0], axis=1)
-            output = K.repeat_elements(output, self.size[1], axis=2)
-        else:
-            raise Exception('Invalid dim_ordering: ' + self.dim_ordering)
-        return output
+            height, width = input_shape[1], input_shape[2]
+        return K.resize_images(X, height, width, self.size[0], self.size[1],
+                               self.dim_ordering)
 
     def get_config(self):
         config = {'name': self.__class__.__name__,
diff --git a/tests/keras/layers/test_convolutional.py b/tests/keras/layers/test_convolutional.py
index e67ca5c66..0623dd6fb 100644
--- a/tests/keras/layers/test_convolutional.py
+++ b/tests/keras/layers/test_convolutional.py
@@ -188,17 +188,44 @@ def test_upsampling_2d():
     input_nb_row = 11
     input_nb_col = 12
 
-    input = np.ones((nb_samples, stack_size, input_nb_row, input_nb_col))
 
-    for length_row in [2, 3, 9]:
-        for length_col in [2, 3, 9]:
-            layer = convolutional.UpSampling2D(size=(length_row, length_col))
-            layer.input = K.variable(input)
-            for train in [True, False]:
-                out = K.eval(layer.get_output(train))
-                assert out.shape[2] == length_row * input_nb_row
-                assert out.shape[3] == length_col * input_nb_col
-        layer.get_config()
+    for dim_ordering in ['th', 'tf']:
+        if dim_ordering == 'th':
+            input = np.random.rand(nb_samples, stack_size, input_nb_row,
+                                   input_nb_col)
+        else:  # tf
+            input = np.random.rand(nb_samples, input_nb_row, input_nb_col,
+                                   stack_size)
+
+        for length_row in [2, 3, 9]:
+            for length_col in [2, 3, 9]:
+                    layer = convolutional.UpSampling2D(
+                        size=(length_row, length_col),
+                        input_shape=input.shape[1:],
+                        dim_ordering=dim_ordering)
+                    layer.input = K.variable(input)
+                    for train in [True, False]:
+                        out = K.eval(layer.get_output(train))
+                        if dim_ordering == 'th':
+                            assert out.shape[2] == length_row * input_nb_row
+                            assert out.shape[3] == length_col * input_nb_col
+                        else:  # tf
+                            assert out.shape[1] == length_row * input_nb_row
+                            assert out.shape[2] == length_col * input_nb_col
+
+                        # compare with numpy
+                        if dim_ordering == 'th':
+                            expected_out = np.repeat(input, length_row, axis=2)
+                            expected_out = np.repeat(expected_out, length_col,
+                                                     axis=3)
+                        else:  # tf
+                            expected_out = np.repeat(input, length_row, axis=1)
+                            expected_out = np.repeat(expected_out, length_col,
+                                                     axis=2)
+
+                        assert_allclose(out, expected_out)
+
+                    layer.get_config()
 
 
 if __name__ == '__main__':

From b17e4c5edfa8a82c82054d0ba3c92d568b1f4d20 Mon Sep 17 00:00:00 2001
From: Steven Xu <xxu@student.unimelb.edu.au>
Date: Sun, 3 Jan 2016 15:56:31 +1100
Subject: [PATCH 116/145] input_shape fix

---
 keras/layers/containers.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/containers.py b/keras/layers/containers.py
index bde77d98d..7e1416585 100644
--- a/keras/layers/containers.py
+++ b/keras/layers/containers.py
@@ -313,7 +313,7 @@ class Graph(Layer):
         if dtype == 'float':
             layer.input = K.placeholder(shape=layer.input_shape, name=name)
         else:
-            if len(input_shape) == 1:
+            if (input_shape and len(input_shape) == 1) or (batch_input_shape and len(batch_input_shape) == 2):
                 layer.input = K.placeholder(shape=layer.input_shape,
                                             dtype='int32',
                                             name=name)

From 69e19b1e03492c54179bf98ea3cab7c7d032cf2b Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Sun, 3 Jan 2016 09:38:02 -0800
Subject: [PATCH 117/145] Improve optimizer tests

---
 tests/keras/test_optimizers.py | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/tests/keras/test_optimizers.py b/tests/keras/test_optimizers.py
index 81fcd0a14..08fa4243b 100644
--- a/tests/keras/test_optimizers.py
+++ b/tests/keras/test_optimizers.py
@@ -32,32 +32,34 @@ def _test_optimizer(optimizer, target=0.9):
     history = model.fit(X_train, y_train, nb_epoch=12, batch_size=16,
                         validation_data=(X_test, y_test),
                         show_accuracy=True, verbose=2)
-    return history.history['val_acc'][-1] > target
+    config = optimizer.get_config()
+    assert type(config) == dict
+    assert history.history['val_acc'][-1] > target
 
 
 def test_sgd():
     sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)
-    assert(_test_optimizer(sgd))
+    _test_optimizer(sgd)
 
 
 def test_rmsprop():
-    assert(_test_optimizer(RMSprop()))
+    _test_optimizer(RMSprop())
 
 
 def test_adagrad():
-    assert(_test_optimizer(Adagrad()))
+    _test_optimizer(Adagrad())
 
 
 def test_adadelta():
-    assert(_test_optimizer(Adadelta()))
+    _test_optimizer(Adadelta())
 
 
 def test_adam():
-    assert(_test_optimizer(Adam()))
+    _test_optimizer(Adam())
 
 
 def test_adamax():
-    assert(_test_optimizer(Adamax()))
+    _test_optimizer(Adamax())
 
 
 if __name__ == '__main__':

From f447644900fd1c7653ce4c9a3728ce20ba20f610 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Sun, 3 Jan 2016 09:41:04 -0800
Subject: [PATCH 118/145] Update PyPi release to 0.3.1

---
 keras/__init__.py | 2 +-
 setup.py          | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/keras/__init__.py b/keras/__init__.py
index 282d73faa..55a8be192 100644
--- a/keras/__init__.py
+++ b/keras/__init__.py
@@ -1 +1 @@
-__version__ = '0.3.0'
+__version__ = '0.3.1'
diff --git a/setup.py b/setup.py
index e29dd89bc..f6598ef3c 100644
--- a/setup.py
+++ b/setup.py
@@ -3,12 +3,12 @@ from setuptools import find_packages
 
 
 setup(name='Keras',
-      version='0.3.0',
+      version='0.3.1',
       description='Theano-based Deep Learning library',
       author='Francois Chollet',
       author_email='francois.chollet@gmail.com',
       url='https://github.com/fchollet/keras',
-      download_url='https://github.com/fchollet/keras/tarball/0.3.0',
+      download_url='https://github.com/fchollet/keras/tarball/0.3.1',
       license='MIT',
       install_requires=['theano', 'pyyaml', 'six'],
       extras_require={

From 4330fd78e96ee4974713fcf6988ae8c4c0d96530 Mon Sep 17 00:00:00 2001
From: Julien Rebetez <julien@fhtagn.net>
Date: Sun, 3 Jan 2016 19:04:49 +0100
Subject: [PATCH 119/145] Fix theano_backend.resize_images

---
 keras/backend/theano_backend.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index 36cb514fb..8bfc90857 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -258,12 +258,14 @@ def resize_images(X, height, width, height_factor, width_factor, dim_ordering):
     - [batch, height, width, channels] (for 'tf' dim_ordering)
     by a factor of (height_factor, width_factor)
     '''
-    if self.dim_ordering == 'th':
-        output = K.repeat_elements(X, height_factor, axis=2)
-        output = K.repeat_elements(output, width_factor, axis=3)
-    elif self.dim_ordering == 'tf':
-        output = K.repeat_elements(X, height_factor, axis=1)
-        output = K.repeat_elements(output, width_factor, axis=2)
+    if dim_ordering == 'th':
+        output = repeat_elements(X, height_factor, axis=2)
+        output = repeat_elements(output, width_factor, axis=3)
+        return output
+    elif dim_ordering == 'tf':
+        output = repeat_elements(X, height_factor, axis=1)
+        output = repeat_elements(output, width_factor, axis=2)
+        return output
     else:
         raise Exception('Invalid dim_ordering: ' + dim_ordering)
 

From 6445d385eeee56e77728ea4a5ee6c810e5ff6dc6 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Sun, 3 Jan 2016 13:17:36 -0800
Subject: [PATCH 120/145] Update CONTRIBUTING.md

---
 CONTRIBUTING.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 3070f52ca..b2c7f9f82 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -53,8 +53,8 @@ We love pull requests. Here's a quick guide:
 
 8. Update the documentation. If introducing new functionality, make sure you include code snippets demonstrating the usage of your new feature.
 
-9. Submit your PR. If your changes have been approved in a previous discussion, and if you have have complete (and passing) unit tests, your PR is likely to be merged promptly. Otherwise, well...
+9. Submit your PR. If your changes have been approved in a previous discussion, and if you have complete (and passing) unit tests, your PR is likely to be merged promptly. Otherwise, well...
 
 ## Adding new examples
 
-Even if you don't contribute to the Keras source code, if you have an application of Keras that is concise and powerful, please consider adding it to our collection of examples. Existing examples show idiomatic Keras code: make sure to keep your own script in the same spirit.
+Even if you don't contribute to the Keras source code, if you have an application of Keras that is concise and powerful, please consider adding it to our collection of examples. [Existing examples](https://github.com/fchollet/keras/tree/master/examples) show idiomatic Keras code: make sure to keep your own script in the same spirit.

From 01395f13ed75b9b66f8dde757f9865d2a1d4ff87 Mon Sep 17 00:00:00 2001
From: Julien Rebetez <julien@fhtagn.net>
Date: Sun, 3 Jan 2016 22:36:28 +0100
Subject: [PATCH 121/145] Figure out tensor shape automatically in
 K.resize_images

---
 keras/backend/tensorflow_backend.py | 11 +++++++----
 keras/backend/theano_backend.py     |  5 +++--
 keras/layers/convolutional.py       |  7 +------
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
index a24592e82..58108f37d 100644
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@@ -236,19 +236,22 @@ def permute_dimensions(x, pattern):
     return tf.transpose(x, perm=pattern)
 
 
-def resize_images(X, height, width, height_factor, width_factor, dim_ordering):
+def resize_images(X, height_factor, width_factor, dim_ordering):
     '''Resize the images contained in a 4D tensor of shape
     - [batch, channels, height, width] (for 'th' dim_ordering)
     - [batch, height, width, channels] (for 'tf' dim_ordering)
-    by a factor of (height_factor, width_factor)
+    by a factor of (height_factor, width_factor). Both factors should be
+    positive integers.
     '''
-    new_height = height * height_factor
-    new_width = width * width_factor
     if dim_ordering == 'th':
+        new_height = shape(X)[2].value * height_factor
+        new_width = shape(X)[3].value * width_factor
         X = permute_dimensions(X, [0, 2, 3, 1])
         X = tf.image.resize_nearest_neighbor(X, (new_height, new_width))
         return permute_dimensions(X, [0, 3, 1, 2])
     elif dim_ordering == 'tf':
+        new_height = shape(X)[1].value * height_factor
+        new_width = shape(X)[2].value * width_factor
         return tf.image.resize_nearest_neighbor(X, (new_height, new_width))
     else:
         raise Exception('Invalid dim_ordering: ' + dim_ordering)
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index 8bfc90857..dfa10fd41 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -252,11 +252,12 @@ def repeat_elements(x, rep, axis):
     return T.repeat(x, rep, axis=axis)
 
 
-def resize_images(X, height, width, height_factor, width_factor, dim_ordering):
+def resize_images(X, height_factor, width_factor, dim_ordering):
     '''Resize the images contained in a 4D tensor of shape
     - [batch, channels, height, width] (for 'th' dim_ordering)
     - [batch, height, width, channels] (for 'tf' dim_ordering)
-    by a factor of (height_factor, width_factor)
+    by a factor of (height_factor, width_factor). Both factors should be
+    positive integers.
     '''
     if dim_ordering == 'th':
         output = repeat_elements(X, height_factor, axis=2)
diff --git a/keras/layers/convolutional.py b/keras/layers/convolutional.py
index 1722ba632..b08b0464b 100644
--- a/keras/layers/convolutional.py
+++ b/keras/layers/convolutional.py
@@ -668,12 +668,7 @@ class UpSampling2D(Layer):
 
     def get_output(self, train=False):
         X = self.get_input(train)
-        input_shape = self.input_shape
-        if self.dim_ordering == 'th':
-            height, width = input_shape[2], input_shape[3]
-        elif self.dim_ordering == 'tf':
-            height, width = input_shape[1], input_shape[2]
-        return K.resize_images(X, height, width, self.size[0], self.size[1],
+        return K.resize_images(X, self.size[0], self.size[1],
                                self.dim_ordering)
 
     def get_config(self):

From 458641f33aeb4861f011605e8f1b4441f8825651 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 5 Jan 2016 10:35:10 -0800
Subject: [PATCH 122/145] Add K.l2_normalization

---
 keras/backend/tensorflow_backend.py  | 6 ++++++
 keras/backend/theano_backend.py      | 6 +++++-
 tests/keras/backend/test_backends.py | 4 +++-
 3 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
index fcd574dc1..332910cf0 100644
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@@ -510,6 +510,12 @@ def dropout(x, level, seed=None):
     return tf.nn.dropout(x * 1., retain_prob, seed=seed)
 
 
+def l2_normalize(x, axis):
+    if axis < 0:
+        axis = axis % len(x.get_shape())
+    return tf.nn.l2_normalize(x, dim=axis)
+
+
 # CONVOLUTIONS
 
 
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index c234b3176..4d7f8b97a 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -517,9 +517,13 @@ def dropout(x, level, seed=None):
     return x
 
 
-# CONVOLUTIONS
+def l2_normalize(x, axis):
+    norm = T.sqrt(T.sum(T.square(x), axis=axis, keepdims=True))
+    return x / norm
 
 
+# CONVOLUTIONS
+
 def conv2d(x, kernel, strides=(1, 1), border_mode='valid', dim_ordering='th',
            image_shape=None, filter_shape=None):
     '''
diff --git a/tests/keras/backend/test_backends.py b/tests/keras/backend/test_backends.py
index c867ad6c1..63c1bf752 100644
--- a/tests/keras/backend/test_backends.py
+++ b/tests/keras/backend/test_backends.py
@@ -281,9 +281,11 @@ class TestBackend(object):
         check_two_tensor_operation('binary_crossentropy', (4, 2), (4, 2), from_logits=True)
         check_two_tensor_operation('categorical_crossentropy', (4, 2), (4, 2), from_logits=True)
         check_two_tensor_operation('binary_crossentropy', (4, 2), (4, 2), from_logits=False)
-
         check_two_tensor_operation('categorical_crossentropy', (4, 2), (4, 2), from_logits=False)
 
+        check_single_tensor_operation('l2_normalize', (4, 3), axis=-1)
+        check_single_tensor_operation('l2_normalize', (4, 3), axis=1)
+
     # def test_conv2d(self):
     #     '''conv2d works "properly" with Theano and TF but outputs different
     #     values in each case. Cause unclear (input / kernel shape format?)

From 6cb1172668269be864b57b3cfedb19c9587ecf77 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 5 Jan 2016 10:35:29 -0800
Subject: [PATCH 123/145] Add cosine proximity objective

---
 docs/templates/objectives.md |  2 ++
 keras/objectives.py          | 12 +++++++++++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/docs/templates/objectives.md b/docs/templates/objectives.md
index 1413392d8..6a5699bdf 100644
--- a/docs/templates/objectives.md
+++ b/docs/templates/objectives.md
@@ -27,3 +27,5 @@ For a few examples of such functions, check out the [objectives source](https://
 - __hinge__
 - __binary_crossentropy__: Also known as logloss. 
 - __categorical_crossentropy__: Also known as multiclass logloss. __Note__: using this objective requires that your labels are binary arrays of shape `(nb_samples, nb_classes)`.
+- __poisson__: mean of `(predictions - targets * log(predictions))`
+- __cosine_proximity__: the opposite (negative) of the mean cosine proximity between predictions and targets.
diff --git a/keras/objectives.py b/keras/objectives.py
index 510ae5def..474221c95 100644
--- a/keras/objectives.py
+++ b/keras/objectives.py
@@ -44,15 +44,25 @@ def binary_crossentropy(y_true, y_pred):
     return K.mean(K.binary_crossentropy(y_pred, y_true), axis=-1)
 
 
-def poisson_loss(y_true, y_pred):
+def poisson(y_true, y_pred):
     return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1)
 
+
+def cosine_proximity(y_true, y_pred):
+    assert K.ndim(y_true) == 2
+    assert K.ndim(y_pred) == 2
+    y_true = K.l2_normalize(y_true, axis=1)
+    y_pred = K.l2_normalize(y_pred, axis=1)
+    return -K.mean(y_true * y_pred, axis=1)
+
+
 # aliases
 mse = MSE = mean_squared_error
 rmse = RMSE = root_mean_squared_error
 mae = MAE = mean_absolute_error
 mape = MAPE = mean_absolute_percentage_error
 msle = MSLE = mean_squared_logarithmic_error
+cosine = cosine_proximity
 
 from .utils.generic_utils import get_from_module
 def get(identifier):

From 13379da81bff3d72d7ade8b662640545f4c1b559 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Tue, 5 Jan 2016 15:44:15 -0800
Subject: [PATCH 124/145] Fix kernel shape type in theano conv2d

---
 keras/backend/theano_backend.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index 278f65082..f43e276c1 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -575,8 +575,9 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid', dim_ordering='th',
             conv_out = dnn.dnn_conv(img=x,
                                     kerns=kernel,
                                     border_mode='full')
-            shift_x = (kernel.shape[2] - 1) // 2
-            shift_y = (kernel.shape[3] - 1) // 2
+            np_kernel = kernel.eval()
+            shift_x = (np_kernel.shape[2] - 1) // 2
+            shift_y = (np_kernel.shape[3] - 1) // 2
             conv_out = conv_out[:, :,
                                 shift_x:x.shape[2] + shift_x,
                                 shift_y:x.shape[3] + shift_y]

From d02ea0346289418958b241e9a3df7eb6ca560cdd Mon Sep 17 00:00:00 2001
From: jarfo <jarfo@yahoo.com>
Date: Wed, 6 Jan 2016 13:27:28 +0100
Subject: [PATCH 125/145] Update models.py

Model evaluation (test) using the _test K.function should be also stateful for stateful recurrent networks
---
 keras/models.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/keras/models.py b/keras/models.py
index f2653ad4e..2f48f1d20 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -453,8 +453,8 @@ class Sequential(Model, containers.Sequential):
         self._train = K.function(train_ins, [train_loss], updates=updates)
         self._train_with_acc = K.function(train_ins, [train_loss, train_accuracy], updates=updates)
         self._predict = K.function(predict_ins, [self.y_test], updates=self.state_updates)
-        self._test = K.function(test_ins, [test_loss])
-        self._test_with_acc = K.function(test_ins, [test_loss, test_accuracy])
+        self._test = K.function(test_ins, [test_loss], updates=self.state_updates)
+        self._test_with_acc = K.function(test_ins, [test_loss, test_accuracy], updates=self.state_updates)
 
     def fit(self, X, y, batch_size=128, nb_epoch=100, verbose=1, callbacks=[],
             validation_split=0., validation_data=None, shuffle=True,
@@ -1049,7 +1049,7 @@ class Graph(Model, containers.Graph):
         self.loss = loss
 
         self._train = K.function(train_ins, [train_loss], updates=updates)
-        self._test = K.function(test_ins, [test_loss])
+        self._test = K.function(test_ins, [test_loss], updates=self.state_updates)
         self._predict = K.function(inputs=ins, outputs=ys_test,
                                    updates=self.state_updates)
 

From f8dd6da08df4b79f344a302abe54c7b9df26a1c7 Mon Sep 17 00:00:00 2001
From: tboquet <thomas.boquet@hec.ca>
Date: Wed, 6 Jan 2016 21:59:50 -0500
Subject: [PATCH 126/145] Eval kernel base Theano conv2d same border mode

---
 keras/backend/theano_backend.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index f43e276c1..f3de18beb 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -601,8 +601,9 @@ def conv2d(x, kernel, strides=(1, 1), border_mode='valid', dim_ordering='th',
                                       image_shape=image_shape,
                                       filter_shape=filter_shape)
         if border_mode == 'same':
-            shift_x = (kernel.shape[2] - 1) // 2
-            shift_y = (kernel.shape[3] - 1) // 2
+            np_kernel = kernel.eval()
+            shift_x = (np_kernel.shape[2] - 1) // 2
+            shift_y = (np_kernel.shape[3] - 1) // 2
             conv_out = conv_out[:, :,
                                 shift_x:x.shape[2] + shift_x,
                                 shift_y:x.shape[3] + shift_y]

From fe18ad8dde6b4d1597cddcadfcf5571e1ec83cd4 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Thu, 7 Jan 2016 17:06:43 +0530
Subject: [PATCH 127/145] Lambda layer:get_output should use get_input.

---
 keras/layers/core.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index 534f12194..e81a30f28 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import
-from __future__ import division
+from __future__ import divisionla
 
 import numpy as np
 
@@ -1382,12 +1382,10 @@ class Lambda(Layer):
             return tuple(shape)
 
     def get_output(self, train=False):
+        X = self.get_input(train)
         func = marshal.loads(self.function)
         func = types.FunctionType(func, globals())
-        if hasattr(self, 'previous'):
-            return func(self.previous.get_output(train))
-        else:
-            return func(self.input)
+        return func(X)
 
 
 class MaskedLambda(MaskedLayer, Lambda):

From f800e448a231dfa5d4c4ccffb5882cd08869f9f5 Mon Sep 17 00:00:00 2001
From: Ozan Caglayan <ozancag@gmail.com>
Date: Thu, 7 Jan 2016 20:19:24 +0200
Subject: [PATCH 128/145] models: Cast the mask to floatX to avoid theano
 upcasting

Issue #1416
---
 keras/models.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/keras/models.py b/keras/models.py
index 2f48f1d20..f844466f8 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -81,6 +81,8 @@ def weighted_objective(fn):
         # score_array has ndim >= 2
         score_array = fn(y_true, y_pred)
         if mask is not None:
+            # Cast the mask to floatX to avoid float64 upcasting in theano
+            mask = K.cast(mask, K.floatx())
             # mask should have the same shape as score_array
             score_array *= mask
             #  the loss per batch should be proportional

From bf2f64bfd5ce4f0595fe6e88f98cd4159e7a57f3 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Fri, 8 Jan 2016 00:27:43 +0530
Subject: [PATCH 129/145] Update core.py

---
 keras/layers/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index e81a30f28..d3074a41e 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 from __future__ import absolute_import
-from __future__ import divisionla
+from __future__ import division
 
 import numpy as np
 

From e21a6a9ebff8659594ecd70e8123252715d6b101 Mon Sep 17 00:00:00 2001
From: Fariz Rahman <farizrahman4u@gmail.com>
Date: Fri, 8 Jan 2016 01:13:00 +0530
Subject: [PATCH 130/145] Fix output_shape too.

---
 keras/layers/core.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/keras/layers/core.py b/keras/layers/core.py
index d3074a41e..7ab8e9953 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -1376,7 +1376,7 @@ class Lambda(Layer):
         else:
             output_shape_func = marshal.loads(self._output_shape)
             output_shape_func = types.FunctionType(output_shape_func, globals())
-            shape = output_shape_func(self.previous.output_shape)
+            shape = output_shape_func(self.input_shape)
             if type(shape) not in {list, tuple}:
                 raise Exception('output_shape function must return a tuple')
             return tuple(shape)

From e947a56c52923f55e09b25bbdd6ed3bc4d46adce Mon Sep 17 00:00:00 2001
From: jake <findjakebian@gmail.com>
Date: Thu, 7 Jan 2016 17:39:04 -0800
Subject: [PATCH 131/145] models: when dumping config to json, parse unhandled
 types correctly

- properly convert numpy types
- handle python 'type' objects
---
 keras/models.py | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/keras/models.py b/keras/models.py
index f844466f8..42c4da77d 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -370,8 +370,21 @@ class Model(object):
         `keras.models.from_json(json_string, custom_objects={})`.
         '''
         import json
+
+        def get_json_type(obj):
+
+            # if obj is any numpy type
+            if type(obj).__module__ == np.__name__:
+                return obj.item();
+
+            # if obj is a python 'type'
+            if type(obj).__name__ == type.__name__:
+                return obj.__name__
+
+            raise TypeError('Not JSON Serializable')
+
         config = self.get_config()
-        return json.dumps(config, **kwargs)
+        return json.dumps(config, default=get_json_type, **kwargs)
 
     def summary(self):
         '''Print out a summary of the model architecture,

From 09d91fccb9c56af2a18ffbd4dd5fee022b94e04b Mon Sep 17 00:00:00 2001
From: fchollet <francois.chollet@gmail.com>
Date: Thu, 7 Jan 2016 20:22:33 -0800
Subject: [PATCH 132/145] Add antirectifier example

---
 examples/antirectifier.py | 110 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 110 insertions(+)
 create mode 100644 examples/antirectifier.py

diff --git a/examples/antirectifier.py b/examples/antirectifier.py
new file mode 100644
index 000000000..73dbd9e3c
--- /dev/null
+++ b/examples/antirectifier.py
@@ -0,0 +1,110 @@
+'''The example demonstrates how to write custom layers for Keras.
+
+We build a custom activation layer called 'Antirectifier',
+which modifies the shape of the tensor that passes through it.
+We need to specify two methods: `output_shape` and `get_output`.
+
+Note that same result can also be achieved via a Lambda layer.
+
+Because our custom layer is written with primitives from the Keras
+backend (`K`), our code can run both on TensorFlow and Theano.
+'''
+
+from __future__ import print_function
+import numpy as np
+from keras.models import Sequential
+from keras.layers.core import Dense, Dropout, Layer, Activation
+from keras.datasets import mnist
+from keras import backend as K
+from keras.utils import np_utils
+
+
+class Antirectifier(Layer):
+    '''This is the combination of a sample-wise
+    L2 normalization with the concatenation of the
+    positive part of the output with the negative part
+    of the output. The result is a tensor of samples that are
+    twice as large as the input samples.
+
+    It can be used in place of a ReLU.
+
+    # Input shape
+        2D tensor of shape (samples, n)
+
+    # Output shape
+        2D tensor of shape (samples, 2*n)
+
+    # Theoretical justification
+        When applying ReLU, assuming that the distribution
+        of the previous output is approximately centered around 0.,
+        you are discarding half of your input. This is inefficient.
+
+        Antirectifier allows to return all-positive outputs like ReLU,
+        without discarding any data.
+
+        Further, the samplewise normalization of the output
+        allows to interpret output features as a probability distribution
+        (since they are between 0 and 1 and sum to 1 for each sample).
+
+        Tests on MNIST show that Antirectifier allows to train networks
+        with twice less parameters yet with the same
+        classification performance as an equivalent ReLU-based network.
+    '''
+    @property
+    def output_shape(self):
+        shape = list(self.input_shape)
+        assert len(shape) == 2  # only valid for 2D tensors
+        shape[-1] *= 2
+        return tuple(shape)
+
+    def get_output(self, train):
+        x = self.get_input(train)
+        x -= K.mean(x, axis=1, keepdims=True)
+        x = K.l2_normalize(x, axis=1)
+        pos = K.relu(x)
+        neg = K.relu(-x)
+        return K.concatenate([pos, neg], axis=1)
+
+# global parameters
+batch_size = 128
+nb_classes = 10
+nb_epoch = 40
+
+# the data, shuffled and split between tran and test sets
+(X_train, y_train), (X_test, y_test) = mnist.load_data()
+
+X_train = X_train.reshape(60000, 784)
+X_test = X_test.reshape(10000, 784)
+X_train = X_train.astype('float32')
+X_test = X_test.astype('float32')
+X_train /= 255
+X_test /= 255
+print(X_train.shape[0], 'train samples')
+print(X_test.shape[0], 'test samples')
+
+# convert class vectors to binary class matrices
+Y_train = np_utils.to_categorical(y_train, nb_classes)
+Y_test = np_utils.to_categorical(y_test, nb_classes)
+
+# build the model
+model = Sequential()
+model.add(Dense(256, input_shape=(784,)))
+model.add(Antirectifier())
+model.add(Dropout(0.1))
+model.add(Dense(256))
+model.add(Antirectifier())
+model.add(Dropout(0.1))
+model.add(Dense(10))
+model.add(Activation('softmax'))
+
+# compile the model
+model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
+
+# train the model
+model.fit(X_train, Y_train,
+          batch_size=batch_size, nb_epoch=nb_epoch,
+          show_accuracy=True, verbose=1,
+          validation_data=(X_test, Y_test))
+
+# next, compare with an equivalent network
+# with2x bigger Dense layers and ReLU

From d0b98a2cb5014ce23a16ab98cf3ebec2a28d5612 Mon Sep 17 00:00:00 2001
From: fchollet <francois.chollet@gmail.com>
Date: Thu, 7 Jan 2016 21:18:34 -0800
Subject: [PATCH 133/145] Antirectifier example style fixes

---
 examples/antirectifier.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/antirectifier.py b/examples/antirectifier.py
index 73dbd9e3c..3c65a2ea3 100644
--- a/examples/antirectifier.py
+++ b/examples/antirectifier.py
@@ -4,7 +4,7 @@ We build a custom activation layer called 'Antirectifier',
 which modifies the shape of the tensor that passes through it.
 We need to specify two methods: `output_shape` and `get_output`.
 
-Note that same result can also be achieved via a Lambda layer.
+Note that the same result can also be achieved via a Lambda layer.
 
 Because our custom layer is written with primitives from the Keras
 backend (`K`), our code can run both on TensorFlow and Theano.
@@ -22,8 +22,8 @@ from keras.utils import np_utils
 class Antirectifier(Layer):
     '''This is the combination of a sample-wise
     L2 normalization with the concatenation of the
-    positive part of the output with the negative part
-    of the output. The result is a tensor of samples that are
+    positive part of the input with the negative part
+    of the input. The result is a tensor of samples that are
     twice as large as the input samples.
 
     It can be used in place of a ReLU.
@@ -47,8 +47,8 @@ class Antirectifier(Layer):
         (since they are between 0 and 1 and sum to 1 for each sample).
 
         Tests on MNIST show that Antirectifier allows to train networks
-        with twice less parameters yet with the same
-        classification performance as an equivalent ReLU-based network.
+        with twice less parameters yet with comparable
+        classification accuracy as an equivalent ReLU-based network.
     '''
     @property
     def output_shape(self):

From ced84d53bc01e1623da671187588a63dcc9a5a11 Mon Sep 17 00:00:00 2001
From: fchollet <francois.chollet@gmail.com>
Date: Thu, 7 Jan 2016 22:24:40 -0800
Subject: [PATCH 134/145] Fix example docstring

---
 examples/antirectifier.py | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/examples/antirectifier.py b/examples/antirectifier.py
index 3c65a2ea3..c75da5e19 100644
--- a/examples/antirectifier.py
+++ b/examples/antirectifier.py
@@ -42,10 +42,6 @@ class Antirectifier(Layer):
         Antirectifier allows to return all-positive outputs like ReLU,
         without discarding any data.
 
-        Further, the samplewise normalization of the output
-        allows to interpret output features as a probability distribution
-        (since they are between 0 and 1 and sum to 1 for each sample).
-
         Tests on MNIST show that Antirectifier allows to train networks
         with twice less parameters yet with comparable
         classification accuracy as an equivalent ReLU-based network.

From 037e592f2ba7c18b71bc9b39f84de11af0252863 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Fri, 8 Jan 2016 10:02:28 -0800
Subject: [PATCH 135/145] Naming, batch_flatten

---
 keras/backend/tensorflow_backend.py      | 10 +++++-
 keras/backend/theano_backend.py          |  8 +++++
 keras/initializations.py                 | 46 ++++++++++++------------
 keras/layers/core.py                     | 29 +++++++++++----
 keras/utils/layer_utils.py               |  6 +++-
 tests/keras/layers/test_core.py          | 15 ++++++++
 tests/keras/layers/test_normalization.py |  2 ++
 7 files changed, 85 insertions(+), 31 deletions(-)

diff --git a/keras/backend/tensorflow_backend.py b/keras/backend/tensorflow_backend.py
index 5bb472c4c..154ffcf13 100644
--- a/keras/backend/tensorflow_backend.py
+++ b/keras/backend/tensorflow_backend.py
@@ -287,6 +287,10 @@ def tile(x, n):
 
 
 def flatten(x):
+    return tf.reshape(x, [-1])
+
+
+def batch_flatten(x):
     '''Turn a n-D tensor into a 2D tensor where
     the first dimension is conserved.
     '''
@@ -345,12 +349,16 @@ def set_value(x, value):
 class Function(object):
 
     def __init__(self, inputs, outputs, updates=[]):
+        assert type(inputs) in {list, tuple}
+        assert type(outputs) in {list, tuple}
+        assert type(updates) in {list, tuple}
         self.inputs = list(inputs)
         self.outputs = list(outputs)
         with tf.control_dependencies(self.outputs):
             self.updates = [tf.assign(p, new_p) for (p, new_p) in updates]
 
     def __call__(self, inputs):
+        assert type(inputs) in {list, tuple}
         names = [v.name for v in self.inputs]
         feed_dict = dict(zip(names, inputs))
         session = _get_session()
@@ -442,7 +450,7 @@ def rnn(step_function, inputs, initial_states,
     new_states = successive_states[-1]
 
     outputs = tf.transpose(outputs, (1, 0, 2))
-    return last_output, outputs, states
+    return last_output, outputs, new_states
 
 
 def switch(condition, then_expression, else_expression):
diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index f43e276c1..fa049b8f9 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -287,6 +287,10 @@ def tile(x, n):
 
 
 def flatten(x):
+    return T.flatten(x)
+
+
+def batch_flatten(x):
     '''Turn a n-D tensor into a 2D tensor where
     the first dimension is conserved.
     '''
@@ -378,10 +382,14 @@ def set_value(x, value):
 class Function(object):
 
     def __init__(self, inputs, outputs, updates=[], **kwargs):
+        assert type(inputs) in {list, tuple}
+        assert type(outputs) in {list, tuple}
+        assert type(updates) in {list, tuple}
         self.function = theano.function(inputs, outputs, updates=updates,
                                         allow_input_downcast=True, **kwargs)
 
     def __call__(self, inputs):
+        assert type(inputs) in {list, tuple}
         return self.function(*inputs)
 
 
diff --git a/keras/initializations.py b/keras/initializations.py
index d0afff97c..a1451e6d6 100644
--- a/keras/initializations.py
+++ b/keras/initializations.py
@@ -9,52 +9,54 @@ def get_fans(shape):
     return fan_in, fan_out
 
 
-def uniform(shape, scale=0.05):
-    return K.variable(np.random.uniform(low=-scale, high=scale, size=shape))
+def uniform(shape, scale=0.05, name=None):
+    return K.variable(np.random.uniform(low=-scale, high=scale, size=shape),
+                      name=name)
 
 
-def normal(shape, scale=0.05):
-    return K.variable(np.random.normal(loc=0.0, scale=scale, size=shape))
+def normal(shape, scale=0.05, name=None):
+    return K.variable(np.random.normal(loc=0.0, scale=scale, size=shape),
+                      name=name)
 
 
-def lecun_uniform(shape):
+def lecun_uniform(shape, name=None):
     ''' Reference: LeCun 98, Efficient Backprop
         http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf
     '''
     fan_in, fan_out = get_fans(shape)
     scale = np.sqrt(3. / fan_in)
-    return uniform(shape, scale)
+    return uniform(shape, scale, name=name)
 
 
-def glorot_normal(shape):
+def glorot_normal(shape, name=None):
     ''' Reference: Glorot & Bengio, AISTATS 2010
     '''
     fan_in, fan_out = get_fans(shape)
     s = np.sqrt(2. / (fan_in + fan_out))
-    return normal(shape, s)
+    return normal(shape, s, name=name)
 
 
-def glorot_uniform(shape):
+def glorot_uniform(shape, name=None):
     fan_in, fan_out = get_fans(shape)
     s = np.sqrt(6. / (fan_in + fan_out))
-    return uniform(shape, s)
+    return uniform(shape, s, name=name)
 
 
-def he_normal(shape):
+def he_normal(shape, name=None):
     ''' Reference:  He et al., http://arxiv.org/abs/1502.01852
     '''
     fan_in, fan_out = get_fans(shape)
     s = np.sqrt(2. / fan_in)
-    return normal(shape, s)
+    return normal(shape, s, name=name)
 
 
-def he_uniform(shape):
+def he_uniform(shape, name=None):
     fan_in, fan_out = get_fans(shape)
     s = np.sqrt(6. / fan_in)
-    return uniform(shape, s)
+    return uniform(shape, s, name=name)
 
 
-def orthogonal(shape, scale=1.1):
+def orthogonal(shape, scale=1.1, name=None):
     ''' From Lasagne. Reference: Saxe et al., http://arxiv.org/abs/1312.6120
     '''
     flat_shape = (shape[0], np.prod(shape[1:]))
@@ -63,23 +65,23 @@ def orthogonal(shape, scale=1.1):
     # pick the one with the correct shape
     q = u if u.shape == flat_shape else v
     q = q.reshape(shape)
-    return K.variable(scale * q[:shape[0], :shape[1]])
+    return K.variable(scale * q[:shape[0], :shape[1]], name=name)
 
 
-def identity(shape, scale=1):
+def identity(shape, scale=1, name=None):
     if len(shape) != 2 or shape[0] != shape[1]:
         raise Exception('Identity matrix initialization can only be used '
                         'for 2D square matrices.')
     else:
-        return K.variable(scale * np.identity(shape[0]))
+        return K.variable(scale * np.identity(shape[0]), name=name)
 
 
-def zero(shape):
-    return K.zeros(shape)
+def zero(shape, name=None):
+    return K.zeros(shape, name=name)
 
 
-def one(shape):
-    return K.ones(shape)
+def one(shape, name=None):
+    return K.ones(shape, name=name)
 
 
 from .utils.generic_utils import get_from_module
diff --git a/keras/layers/core.py b/keras/layers/core.py
index 534f12194..c8a50d3e7 100644
--- a/keras/layers/core.py
+++ b/keras/layers/core.py
@@ -36,20 +36,34 @@ class Layer(object):
         allowed_kwargs = {'input_shape',
                           'trainable',
                           'batch_input_shape',
-                          'cache_enabled'}
+                          'cache_enabled',
+                          'name'}
         for kwarg in kwargs:
             assert kwarg in allowed_kwargs, 'Keyword argument not understood: ' + kwarg
+
         if 'input_shape' in kwargs:
             self.set_input_shape((None,) + tuple(kwargs['input_shape']))
         if 'batch_input_shape' in kwargs:
             self.set_input_shape(tuple(kwargs['batch_input_shape']))
+        self.trainable = True
         if 'trainable' in kwargs:
-            self._trainable = kwargs['trainable']
+            self.trainable = kwargs['trainable']
+        self.name = self.__class__.__name__.lower()
+        if 'name' in kwargs:
+            self.name = kwargs['name']
         if not hasattr(self, 'params'):
             self.params = []
-        self._cache_enabled = True
+        self.cache_enabled = True
         if 'cache_enabled' in kwargs:
-            self._cache_enabled = kwargs['cache_enabled']
+            self.cache_enabled = kwargs['cache_enabled']
+
+    @property
+    def name(self):
+        return self._name
+
+    @name.setter
+    def name(self, name):
+        self._name = name
 
     @property
     def cache_enabled(self):
@@ -233,7 +247,8 @@ class Layer(object):
             config['input_shape'] = self._input_shape[1:]
         if hasattr(self, '_trainable'):
             config['trainable'] = self._trainable
-        config['cache_enabled'] =  self.cache_enabled
+        config['cache_enabled'] = self.cache_enabled
+        config['custom_name'] = self.name
         return config
 
     def get_params(self):
@@ -688,7 +703,7 @@ class Reshape(Layer):
     def _fix_unknown_dimension(self, input_shape, output_shape):
         '''Find and replace a single missing dimension in an output shape
         given and input shape.
-        
+
         A near direct port of the internal numpy function _fix_unknown_dimension
         in numpy/core/src/multiarray/shape.c
 
@@ -819,7 +834,7 @@ class Flatten(Layer):
 
     def get_output(self, train=False):
         X = self.get_input(train)
-        return K.flatten(X)
+        return K.batch_flatten(X)
 
 
 class RepeatVector(Layer):
diff --git a/keras/utils/layer_utils.py b/keras/utils/layer_utils.py
index 069b3fa5f..a92dd75fa 100644
--- a/keras/utils/layer_utils.py
+++ b/keras/utils/layer_utils.py
@@ -71,10 +71,11 @@ def container_from_config(original_layer_dict, custom_objects={}):
                 kwargs[kwarg] = layer_dict[kwarg]
         return AutoEncoder(**kwargs)
 
-    else:
+    else:  # this is a non-topological layer (e.g. Dense, etc.)
         layer_dict.pop('name')
 
         for k, v in layer_dict.items():
+            # a dictionary argument may be a regularizer or constraint
             if isinstance(v, dict):
                 vname = v.pop('name')
                 if vname in [x for x, y in inspect.getmembers(constraints, predicate=inspect.isclass)]:
@@ -85,6 +86,9 @@ def container_from_config(original_layer_dict, custom_objects={}):
                     # not a regularizer of constraint, don't touch it
                     v['name'] = vname
 
+        # the "name" keyword argument of layers is saved as "custom_name"
+        if 'custom_name' in layer_dict:
+            layer_dict['name'] = layer_dict.pop('custom_name')
         base_layer = get_layer(name, layer_dict)
         return base_layer
 
diff --git a/tests/keras/layers/test_core.py b/tests/keras/layers/test_core.py
index 965d5b7be..621626a01 100644
--- a/tests/keras/layers/test_core.py
+++ b/tests/keras/layers/test_core.py
@@ -130,6 +130,21 @@ def test_maxout_dense():
     _runner(layer)
 
 
+def test_naming():
+    layer = core.Dense(2, input_dim=2)
+    assert layer.name == 'dense'
+
+    model = Sequential()
+    model.add(core.Dense(2, input_dim=2, name='my_dense'))
+    model.add(core.Dense(2, name='my_dense'))
+
+    assert model.layers[0].name == 'my_dense'
+    assert model.layers[1].name == 'my_dense'
+
+    model.compile(optimizer='rmsprop', loss='mse')
+    model.train_on_batch(np.random.random((2, 2)), np.random.random((2, 2)))
+
+
 @pytest.mark.skipif(K._BACKEND == 'tensorflow',
                     reason='currently not working with TensorFlow')
 def test_sequences():
diff --git a/tests/keras/layers/test_normalization.py b/tests/keras/layers/test_normalization.py
index f38e70068..a62eaa258 100644
--- a/tests/keras/layers/test_normalization.py
+++ b/tests/keras/layers/test_normalization.py
@@ -85,6 +85,8 @@ def test_batchnorm_config():
                                             epsilon=0.1, momentum=0.9)
     conf = norm.get_config()
     del conf['cache_enabled']
+    del conf['trainable']
+    del conf['custom_name']
     conf_target = {"input_shape": (10, 10),
                    "name": normalization.BatchNormalization.__name__,
                    "epsilon": 0.1, "mode": 1, "momentum": 0.9}

From 6bb4cbbf5efaf5ad68be03183b3dc2dda58f48ba Mon Sep 17 00:00:00 2001
From: tboquet <thomas.boquet@hec.ca>
Date: Fri, 8 Jan 2016 13:19:25 -0500
Subject: [PATCH 136/145] added compatibility for custom loss functions

---
 keras/models.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/keras/models.py b/keras/models.py
index f844466f8..b84f2d5d0 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -156,6 +156,13 @@ def model_from_config(config, custom_objects={}):
     if 'optimizer' in config:
         # if it has an optimizer, the model is assumed to be compiled
         loss = config.get('loss')
+
+        # if a custom loss function is passed replace it in loss
+        for l in loss:
+            for c in custom_objects:
+                if loss[l] == c:
+                    loss[l] = custom_objects[c]
+
         class_mode = config.get('class_mode')
 
         optimizer_params = dict([(k, v) for k, v in config.get('optimizer').items()])

From 42cd4d6b629548f2adce63a5d2bd819daa9b19cb Mon Sep 17 00:00:00 2001
From: tboquet <thomas.boquet@hec.ca>
Date: Fri, 8 Jan 2016 14:16:27 -0500
Subject: [PATCH 137/145] Added support for both Sequential and Graph

---
 keras/models.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/keras/models.py b/keras/models.py
index b84f2d5d0..08a8b5e32 100644
--- a/keras/models.py
+++ b/keras/models.py
@@ -158,10 +158,13 @@ def model_from_config(config, custom_objects={}):
         loss = config.get('loss')
 
         # if a custom loss function is passed replace it in loss
-        for l in loss:
-            for c in custom_objects:
-                if loss[l] == c:
-                    loss[l] = custom_objects[c]
+        if model_name == 'Graph':
+            for l in loss:
+                for c in custom_objects:
+                    if loss[l] == c:
+                        loss[l] = custom_objects[c]
+        elif model_name == 'Sequential' and loss in custom_objects:
+            loss = custom_objects[loss]
 
         class_mode = config.get('class_mode')
 

From 314ee54e60802caffa6700c000fc25b8d5f7e85a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jo=C3=A3o=20Felipe=20Santos?= <joao.eel@gmail.com>
Date: Sun, 10 Jan 2016 13:12:00 -0500
Subject: [PATCH 138/145] FIxed Tensorboard callback for Python 3

Adding `dict_items` [does not work](https://stackoverflow.com/questions/13361510/typeerror-unsupported-operand-types-for-dict-items-and-dict-items) in Python 3. Workaround is to create a copy of the dict and `update` it with the other dict.
---
 keras/callbacks.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/keras/callbacks.py b/keras/callbacks.py
index 59ae993be..93c47e706 100644
--- a/keras/callbacks.py
+++ b/keras/callbacks.py
@@ -513,7 +513,10 @@ class TensorBoard(Callback):
                 summary_str = result[0]
                 self.writer.add_summary(summary_str, epoch)
 
-        for name, value in self.totals.items() + logs.items():
+        all_values = self.totals.copy()
+        all_values.update(logs)
+        
+        for name, value in all_values.items():
             if name in ['batch', 'size']:
                 continue
             summary = tf.Summary()

From a5c07d796aa63733cfc79ccfa8c37cd604aef3a0 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 11 Jan 2016 11:22:29 -0800
Subject: [PATCH 139/145] Update theano backend

---
 keras/backend/theano_backend.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/keras/backend/theano_backend.py b/keras/backend/theano_backend.py
index bb0ea9847..74092278d 100644
--- a/keras/backend/theano_backend.py
+++ b/keras/backend/theano_backend.py
@@ -382,9 +382,6 @@ def set_value(x, value):
 class Function(object):
 
     def __init__(self, inputs, outputs, updates=[], **kwargs):
-        assert type(inputs) in {list, tuple}
-        assert type(outputs) in {list, tuple}
-        assert type(updates) in {list, tuple}
         self.function = theano.function(inputs, outputs, updates=updates,
                                         allow_input_downcast=True, **kwargs)
 

From ada6dd29435066a19782ad25dc79827a000a90d3 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 11 Jan 2016 11:23:38 -0800
Subject: [PATCH 140/145] Add neural style transfer example

---
 examples/neural_style_transfer.py | 259 ++++++++++++++++++++++++++++++
 1 file changed, 259 insertions(+)
 create mode 100644 examples/neural_style_transfer.py

diff --git a/examples/neural_style_transfer.py b/examples/neural_style_transfer.py
new file mode 100644
index 000000000..6642cf98d
--- /dev/null
+++ b/examples/neural_style_transfer.py
@@ -0,0 +1,259 @@
+'''Neural style transfer with Keras.
+
+Before running this script, download the weights for the VGG16 model at:
+https://drive.google.com/file/d/0Bz7KyqmuGsilT0J5dmRCM0ROVHc/view?usp=sharing
+(source: https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3)
+and make sure the variable `weights_path` in this script matches the location of the file.
+
+Run the script with:
+```
+python neural_style.py path_to_your_base_image.jpg path_to_your_reference.jpg prefix_for_results
+```
+e.g.:
+```
+python neural_style.py img/tuebingen.jpg img/starry_night.jpg results/my_result
+```
+
+It is preferrable to run this script on GPU, for speed.
+If running on CPU, prefer the TensorFlow backend (much faster).
+
+# Details
+
+Style transfer consists in generating an image
+with the same "content" as a base image, but with the
+"style" of a different picture (typically artistic).
+
+This is achieved through the optimization of a loss function
+that has 3 components: "style loss", "content loss",
+and "total variation loss":
+
+ - The content loss is a L2 distance between the pixels of the base
+image and the pixels of the combination image, keeping the generated image
+close enough to the original one.
+
+- The total variation loss imposes local spatial continuity between
+the pixels of the combination image, giving it visual coherence.
+
+- The style loss is where the deep learning keeps in --that one is defined
+using a deep convolutional neural network. Precisely, it consists in a sum of
+L2 distances betwen the Gram matrices of the representations of
+the base image and the style reference image, extracted from
+different layers of a convnet (trained on ImageNet). The general idea
+is to capture color/texture information at different spatial
+scales (fairly large scales --defined by the depth of the layer considered).
+
+# Reference:
+    - [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576)
+'''
+
+from __future__ import print_function
+import cv2
+import numpy as np
+from scipy.optimize import fmin_l_bfgs_b
+import time
+import argparse
+import h5py
+
+from keras.models import Sequential
+from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
+from keras.layers.core import Dense, Dropout, Flatten
+from keras import backend as K
+
+parser = argparse.ArgumentParser(description='Neural style transfer with Keras.')
+parser.add_argument('base_image_path', metavar='base', type=str,
+                    help='Path to the image to transform.')
+parser.add_argument('style_reference_image_path', metavar='ref', type=str,
+                    help='Path to the style reference image.')
+parser.add_argument('result_prefix', metavar='res_prefix', type=str,
+                    help='Prefix for the saved results.')
+
+# base_image_path = 'tuebingen.jpg'
+# style_reference_image_path = 'starry_night.jpg'
+# result_prefix = 'my_result_th'
+args = parser.parse_args()
+base_image_path = args.base_image_path
+style_reference_image_path = args.style_reference_image_path
+result_prefix = args.result_prefix
+weights_path = 'vgg16_weights.h5'
+
+# these are the weights of the different loss components
+total_variation_weight = 1.
+style_weight = 1.
+content_weight = 0.025
+
+# dimensions of the generated picture.
+img_width = 400
+img_height = 400
+assert img_height == img_width, 'Due to the use of the Gram matrix, width and height must match.'
+
+# util function to open, resize and format pictures into appropriate tensors
+def preprocess_image(image_path):
+    im = cv2.resize(cv2.imread(image_path), (img_width, img_height))
+    im = im.transpose((2, 0, 1))
+    im = np.expand_dims(im, axis=0)
+    return im
+
+# util function to convert a tensor into a valid image
+def deprocess_image(x):
+    x = x.transpose((1, 2, 0))
+    x = np.clip(x, 0, 255).astype('uint8')
+    return x
+
+# get tensor representations of our images
+base_image = K.variable(preprocess_image(base_image_path))
+style_reference_image = K.variable(preprocess_image(style_reference_image_path))
+
+# this will contain our generated image
+combination_image = K.placeholder((1, 3, img_width, img_height))
+
+# combine the 3 images into a single Keras tensor
+input_tensor = K.concatenate([base_image,
+                              style_reference_image,
+                              combination_image], axis=0)
+
+# build the VGG16 network with our 3 images as input
+first_layer = ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height))
+first_layer.input = input_tensor
+
+model = Sequential()
+model.add(first_layer)
+model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(64, 3, 3, activation='relu'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(128, 3, 3, activation='relu'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(256, 3, 3, activation='relu'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(256, 3, 3, activation='relu'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+# load the weights of the VGG16 networks
+# (trained on ImageNet, won the ILSVRC competition in 2014)
+# note: when there is a complete match between your model definition
+# and your weight savefile, you can simply call model.load_weights(filename)
+f = h5py.File(weights_path)
+for k in range(f.attrs['nb_layers']):
+    if k >= len(model.layers):
+        # we don't look at the last (fully-connected) layers in the savefile
+        break
+    g = f['layer_{}'.format(k)]
+    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
+    model.layers[k].set_weights(weights)
+f.close()
+print('Model loaded.')
+
+# get the symbolic outputs of each "key" layer (we gave them unique names).
+outputs_dict = dict([(layer.name, layer.get_output()) for layer in model.layers])
+
+# compute the neural style loss
+# first we need to define 4 util functions
+
+# the gram matrix of an image tensor (feature-wise outer product)
+def gram_matrix(x):
+    assert K.ndim(x) == 3
+    features = K.batch_flatten(x)
+    gram = K.dot(features, K.transpose(features))
+    return gram
+
+# the "style loss" is designed to maintain
+# the style of the reference image in the generated image.
+# It is based on the gram matrices (which capture style) of
+# feature maps from the style reference image
+# and from the generated image
+def style_loss(style, combination):
+    assert K.ndim(style) == 3
+    assert K.ndim(combination) == 3
+    S = gram_matrix(style)
+    C = gram_matrix(combination)
+    channels = 3
+    size = img_width * img_height
+    return K.sum(K.square(S - C)) / (4. * (channels ** 2) * (size ** 2))
+
+# an auxiliary loss function
+# designed to maintain the "content" of the
+# base image in the generated image
+def content_loss(base, combination):
+    return K.sum(K.square(combination - base))
+
+# the 3rd loss function, total variation loss,
+# designed to keep the generated image locally coherent
+def total_variation_loss(x):
+    assert K.ndim(x) == 4
+    a = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, 1:, :img_height-1])
+    b = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, :img_width-1, 1:])
+    return K.sum(K.pow(a + b, 1.25))
+
+# combine these loss functions into a single scalar
+loss = K.variable(0.)
+layer_features = outputs_dict['conv4_2']
+base_image_features = layer_features[0, :, :, :]
+combination_features = layer_features[2, :, :, :]
+loss += content_weight * content_loss(base_image_features,
+                                      combination_features)
+
+feature_layers = ['conv1_1', 'conv2_1', 'conv3_1', 'conv4_1', 'conv5_1']
+for layer_name in feature_layers:
+    layer_features = outputs_dict[layer_name]
+    style_reference_features = layer_features[1, :, :, :]
+    combination_features = layer_features[2, :, :, :]
+    sl = style_loss(style_reference_features, combination_features)
+    loss += (style_weight / len(feature_layers)) * sl
+loss += total_variation_weight * total_variation_loss(combination_image)
+
+# get the gradients of the generated image wrt the loss
+grads = K.gradients(loss, combination_image)
+
+# set up helper functions to extract the loss and gradients
+# from the computational graph as Numpy arrays
+f_grads = K.function([combination_image], grads)
+def eval_grads(x):
+    x = x.reshape((1, 3, img_width, img_height))
+    return np.array(f_grads([x])).flatten().astype('float64')
+
+f_loss = K.function([combination_image], [loss])
+def eval_loss(x):
+    x = x.reshape((1, 3, img_width, img_height))
+    return f_loss([x])[0].astype('float64')
+
+# run scipy-based optimization (L-BFGS) over the pixels of the generated image
+# so as to minimize the neural style loss
+x = np.random.uniform(0, 255, (1, 3, img_width, img_height))
+for i in range(10):
+    print('Start of iteration', i)
+    eval_loss_calls = 0
+    start_time = time.time()
+    x, min_val, info = fmin_l_bfgs_b(eval_loss, x.flatten(),
+                                     fprime=eval_grads, maxfun=20)
+    print('Current loss value:', min_val)
+    # save current generated image
+    im = deprocess_image(x.reshape((3, img_width, img_height)))
+    fname = result_prefix + '_at_iteration_%d.png' % i
+    cv2.imwrite(fname, im)
+    end_time = time.time()
+    print('Image saved as', fname)
+    print('Iteration %d completed in %ds' % (i, end_time - start_time))

From 1fdcc370b6d9a71483bc968d5a0f3d259e07a78e Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 11 Jan 2016 11:30:10 -0800
Subject: [PATCH 141/145] Remove unnecessary commented code.

---
 examples/neural_style_transfer.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/examples/neural_style_transfer.py b/examples/neural_style_transfer.py
index 6642cf98d..ea998ce51 100644
--- a/examples/neural_style_transfer.py
+++ b/examples/neural_style_transfer.py
@@ -17,6 +17,8 @@ python neural_style.py img/tuebingen.jpg img/starry_night.jpg results/my_result
 It is preferrable to run this script on GPU, for speed.
 If running on CPU, prefer the TensorFlow backend (much faster).
 
+Example result: https://twitter.com/fchollet/status/686631033085677568
+
 # Details
 
 Style transfer consists in generating an image
@@ -67,9 +69,6 @@ parser.add_argument('style_reference_image_path', metavar='ref', type=str,
 parser.add_argument('result_prefix', metavar='res_prefix', type=str,
                     help='Prefix for the saved results.')
 
-# base_image_path = 'tuebingen.jpg'
-# style_reference_image_path = 'starry_night.jpg'
-# result_prefix = 'my_result_th'
 args = parser.parse_args()
 base_image_path = args.base_image_path
 style_reference_image_path = args.style_reference_image_path

From cb13a33a3191d703d439444b9aaa5a5089dae0d9 Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Mon, 11 Jan 2016 11:43:10 -0800
Subject: [PATCH 142/145] Fix neural style comments

---
 examples/neural_style_transfer.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/examples/neural_style_transfer.py b/examples/neural_style_transfer.py
index ea998ce51..d60e78a09 100644
--- a/examples/neural_style_transfer.py
+++ b/examples/neural_style_transfer.py
@@ -29,10 +29,6 @@ This is achieved through the optimization of a loss function
 that has 3 components: "style loss", "content loss",
 and "total variation loss":
 
- - The content loss is a L2 distance between the pixels of the base
-image and the pixels of the combination image, keeping the generated image
-close enough to the original one.
-
 - The total variation loss imposes local spatial continuity between
 the pixels of the combination image, giving it visual coherence.
 
@@ -44,7 +40,11 @@ different layers of a convnet (trained on ImageNet). The general idea
 is to capture color/texture information at different spatial
 scales (fairly large scales --defined by the depth of the layer considered).
 
-# Reference:
+ - The content loss is a L2 distance between the features of the base
+image (extracted from a deep layer) and the features of the combination image,
+keeping the generated image close enough to the original one.
+
+# References
     - [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576)
 '''
 

From ee6bad63b0fac8d2b76cc00c32e916aeb38513b3 Mon Sep 17 00:00:00 2001
From: Keunwoo Choi <gnuchoi+github@gmail.com>
Date: Mon, 11 Jan 2016 23:33:25 +0000
Subject: [PATCH 143/145] Remove cv2 dependency and use scipy.misc

to read, resize, and save images.
---
 examples/neural_style_transfer.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/neural_style_transfer.py b/examples/neural_style_transfer.py
index d60e78a09..c6d7056db 100644
--- a/examples/neural_style_transfer.py
+++ b/examples/neural_style_transfer.py
@@ -49,7 +49,7 @@ keeping the generated image close enough to the original one.
 '''
 
 from __future__ import print_function
-import cv2
+from scipy.misc import imread, imresize, imsave
 import numpy as np
 from scipy.optimize import fmin_l_bfgs_b
 import time
@@ -87,7 +87,7 @@ assert img_height == img_width, 'Due to the use of the Gram matrix, width and he
 
 # util function to open, resize and format pictures into appropriate tensors
 def preprocess_image(image_path):
-    im = cv2.resize(cv2.imread(image_path), (img_width, img_height))
+    im = imresize(imread(image_path), (img_width, img_height))
     im = im.transpose((2, 0, 1))
     im = np.expand_dims(im, axis=0)
     return im
@@ -252,7 +252,7 @@ for i in range(10):
     # save current generated image
     im = deprocess_image(x.reshape((3, img_width, img_height)))
     fname = result_prefix + '_at_iteration_%d.png' % i
-    cv2.imwrite(fname, im)
+    imsave(fname, im)
     end_time = time.time()
     print('Image saved as', fname)
     print('Iteration %d completed in %ds' % (i, end_time - start_time))

From 3d3b8c52e97ab8cbb66612b74e5564f70748b7ed Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Wed, 13 Jan 2016 10:46:19 -0800
Subject: [PATCH 144/145] Cleanup of neural style transfer example

---
 examples/neural_style_transfer.py | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/examples/neural_style_transfer.py b/examples/neural_style_transfer.py
index c6d7056db..22ea1fb06 100644
--- a/examples/neural_style_transfer.py
+++ b/examples/neural_style_transfer.py
@@ -58,7 +58,6 @@ import h5py
 
 from keras.models import Sequential
 from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
-from keras.layers.core import Dense, Dropout, Flatten
 from keras import backend as K
 
 parser = argparse.ArgumentParser(description='Neural style transfer with Keras.')
@@ -87,10 +86,10 @@ assert img_height == img_width, 'Due to the use of the Gram matrix, width and he
 
 # util function to open, resize and format pictures into appropriate tensors
 def preprocess_image(image_path):
-    im = imresize(imread(image_path), (img_width, img_height))
-    im = im.transpose((2, 0, 1))
-    im = np.expand_dims(im, axis=0)
-    return im
+    img = imresize(imread(image_path), (img_width, img_height))
+    img = img.transpose((2, 0, 1)).astype('float64')
+    img = np.expand_dims(img, axis=0)
+    return img
 
 # util function to convert a tensor into a valid image
 def deprocess_image(x):
@@ -244,15 +243,14 @@ def eval_loss(x):
 x = np.random.uniform(0, 255, (1, 3, img_width, img_height))
 for i in range(10):
     print('Start of iteration', i)
-    eval_loss_calls = 0
     start_time = time.time()
     x, min_val, info = fmin_l_bfgs_b(eval_loss, x.flatten(),
                                      fprime=eval_grads, maxfun=20)
     print('Current loss value:', min_val)
     # save current generated image
-    im = deprocess_image(x.reshape((3, img_width, img_height)))
+    img = deprocess_image(x.reshape((3, img_width, img_height)))
     fname = result_prefix + '_at_iteration_%d.png' % i
-    imsave(fname, im)
+    imsave(fname, img)
     end_time = time.time()
     print('Image saved as', fname)
     print('Iteration %d completed in %ds' % (i, end_time - start_time))

From 58a94a9b058cded717583be4bf99b9365bdb7f2d Mon Sep 17 00:00:00 2001
From: Francois Chollet <francois.chollet@gmail.com>
Date: Wed, 13 Jan 2016 10:46:45 -0800
Subject: [PATCH 145/145] Add deep dream example

---
 examples/deep_dream.py | 198 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 198 insertions(+)
 create mode 100644 examples/deep_dream.py

diff --git a/examples/deep_dream.py b/examples/deep_dream.py
new file mode 100644
index 000000000..57e1cf080
--- /dev/null
+++ b/examples/deep_dream.py
@@ -0,0 +1,198 @@
+'''Deep Dreaming in Keras.
+
+Run the script with:
+```
+python deep_dream.py path_to_your_base_image.jpg prefix_for_results
+```
+e.g.:
+```
+python deep_dream.py img/mypic.jpg results/dream
+```
+
+It is preferrable to run this script on GPU, for speed.
+If running on CPU, prefer the TensorFlow backend (much faster).
+
+Example results: http://i.imgur.com/FX6ROg9.jpg
+'''
+from __future__ import print_function
+from scipy.misc import imread, imresize, imsave
+import numpy as np
+from scipy.optimize import fmin_l_bfgs_b
+import time
+import argparse
+import h5py
+
+from keras.models import Sequential
+from keras.layers.convolutional import Convolution2D, ZeroPadding2D, MaxPooling2D
+from keras import backend as K
+
+parser = argparse.ArgumentParser(description='Deep Dreams with Keras.')
+parser.add_argument('base_image_path', metavar='base', type=str,
+                    help='Path to the image to transform.')
+parser.add_argument('result_prefix', metavar='res_prefix', type=str,
+                    help='Prefix for the saved results.')
+
+args = parser.parse_args()
+base_image_path = args.base_image_path
+result_prefix = args.result_prefix
+
+# dimensions of the generated picture.
+img_width = 600
+img_height = 600
+
+# path to the model weights file.
+weights_path = 'vgg16_weights.h5'
+
+# some settings we found interesting
+saved_settings = {
+    'bad_trip': {'features': {'conv4_1': 0.05,
+                              'conv4_2': 0.01,
+                              'conv4_3': 0.01},
+                 'continuity': 0.1,
+                 'dream_l2': 0.8,
+                 'jitter': 5},
+    'dreamy': {'features': {'conv5_1': 0.05,
+                            'conv5_2': 0.02},
+               'continuity': 0.1,
+               'dream_l2': 0.02,
+               'jitter': 0},
+}
+# the settings we will use in this experiment
+settings = saved_settings['dreamy']
+
+# util function to open, resize and format pictures into appropriate tensors
+def preprocess_image(image_path):
+    img = imresize(imread(image_path), (img_width, img_height))
+    img = img.transpose((2, 0, 1)).astype('float64')
+    img = np.expand_dims(img, axis=0)
+    return img
+
+# util function to convert a tensor into a valid image
+def deprocess_image(x):
+    x = x.transpose((1, 2, 0))
+    x = np.clip(x, 0, 255).astype('uint8')
+    return x
+
+# this will contain our generated image
+dream = K.placeholder((1, 3, img_width, img_height))
+
+# build the VGG16 network with our dream as input
+first_layer = ZeroPadding2D((1, 1), input_shape=(3, img_width, img_height))
+first_layer.input = dream
+
+model = Sequential()
+model.add(first_layer)
+model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2'))
+model.add(ZeroPadding2D((1, 1)))
+model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3'))
+model.add(MaxPooling2D((2, 2), strides=(2, 2)))
+
+# load the weights of the VGG16 networks
+# (trained on ImageNet, won the ILSVRC competition in 2014)
+# note: when there is a complete match between your model definition
+# and your weight savefile, you can simply call model.load_weights(filename)
+f = h5py.File(weights_path)
+for k in range(f.attrs['nb_layers']):
+    if k >= len(model.layers):
+        # we don't look at the last (fully-connected) layers in the savefile
+        break
+    g = f['layer_{}'.format(k)]
+    weights = [g['param_{}'.format(p)] for p in range(g.attrs['nb_params'])]
+    model.layers[k].set_weights(weights)
+f.close()
+print('Model loaded.')
+
+# get the symbolic outputs of each "key" layer (we gave them unique names).
+layer_dict = dict([(layer.name, layer) for layer in model.layers])
+
+# continuity loss util function
+def continuity_loss(x):
+    assert K.ndim(x) == 4
+    a = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, 1:, :img_height-1])
+    b = K.square(x[:, :, :img_width-1, :img_height-1] - x[:, :, :img_width-1, 1:])
+    return K.sum(K.pow(a + b, 1.25))
+
+# define the loss
+loss = K.variable(0.)
+for layer_name in settings['features']:
+    # add the L2 norm of the features of a layer to the loss
+    assert layer_name in layer_dict.keys(), 'Layer ' + layer_name + ' not found in model.'
+    coeff = settings['features'][layer_name]
+    x = layer_dict[layer_name].get_output()
+    shape = layer_dict[layer_name].output_shape
+    # we avoid border artifacts by only involving non-border pixels in the loss
+    loss -= coeff * K.sum(K.square(x[:, :, 2: shape[2]-2, 2: shape[3]-2])) / np.prod(shape[1:])
+
+# add continuity loss (gives image local coherence, can result in an artful blur)
+loss += settings['continuity'] * continuity_loss(dream) / (3 * img_width * img_height)
+# add image L2 norm to loss (prevents pixels from taking very high values, makes image darker)
+loss += settings['dream_l2'] * K.sum(K.square(dream)) / (3 * img_width * img_height)
+
+# feel free to further modify the loss as you see fit, to achieve new effects...
+
+# compute the gradients of the dream wrt the loss
+grads = K.gradients(loss, dream)
+
+# set up helper functions to extract the loss and gradients
+# from the computational graph as Numpy arrays
+f_grads = K.function([dream], grads)
+def eval_grads(x):
+    x = x.reshape((1, 3, img_width, img_height))
+    return np.array(f_grads([x])).flatten().astype('float64')
+
+f_loss = K.function([dream], [loss])
+def eval_loss(x):
+    x = x.reshape((1, 3, img_width, img_height))
+    return f_loss([x])[0].astype('float64')
+
+# add a random jitter to the initial image. This will be reverted at decoding time
+random_jitter = (settings['jitter'] * 2) * (np.random.random((3, img_width, img_height)) - 0.5)
+x = preprocess_image(base_image_path)
+x += random_jitter
+
+# run scipy-based optimization (L-BFGS) over the pixels of the generated image
+# so as to minimize the loss
+for i in range(5):
+    start_time = time.time()
+    x, min_val, info = fmin_l_bfgs_b(eval_loss, x.flatten(),
+                                     fprime=eval_grads, maxfun=7)
+    print('Current loss value:', min_val)
+    # decode the dream and save it
+    x = x.reshape((3, img_width, img_height))
+    x -= random_jitter
+    img = deprocess_image(x)
+    fname = result_prefix + '_at_iteration_%d.png' % i
+    imsave(fname, img)
+    end_time = time.time()
+    print('Image saved as', fname)
+    print('Iteration %d completed in %ds' % (i, end_time - start_time))