diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 17affa401..3070f52ca 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -22,13 +22,13 @@ The more information you provide, the easier it is for us to validate that there ## Requesting a Feature -You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API. +You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API. 1. Provide a clear and detailed explanation of the feature you want and why it's important to add. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on library for Keras. It is crucial for Keras to avoid bloating the API and codebase. 2. Provide code snippets demonstrating the API you have in mind and illustrating the use cases of your feature. Of course, you don't need to write any real code at this point! -3. After disussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along. +3. After discussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along. ## Pull Requests diff --git a/README.md b/README.md index 36c8392ff..1a1db1527 100644 --- a/README.md +++ b/README.md @@ -37,7 +37,7 @@ Keras is compatible with: __Python 2.7-3.5__. ## Getting started: 30 seconds to Keras -The core datastructure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](http://keras.io/models/#sequential) and [`Graph`](http://keras.io/models/#graph). +The core data structure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](http://keras.io/models/#sequential) and [`Graph`](http://keras.io/models/#graph). Here's the `Sequential` model (a linear pile of layers): diff --git a/keras/optimizers.py b/keras/optimizers.py index ce4cc1efd..503e2e915 100644 --- a/keras/optimizers.py +++ b/keras/optimizers.py @@ -275,12 +275,66 @@ class Adam(Optimizer): "beta_2": float(K.get_value(self.beta_2)), "epsilon": self.epsilon} +class Adamax(Optimizer): + '''Adamax optimizer from Adam paper's Section 7. It is a variant + of Adam based on the infinity norm. + + Default parameters follow those provided in the paper. + + # Arguments + lr: float >= 0. Learning rate. + beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1. + epsilon: float >= 0. Fuzz factor. + + # References + - [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8) + ''' + def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8, + *args, **kwargs): + super(Adamax, self).__init__(**kwargs) + self.__dict__.update(locals()) + self.iterations = K.variable(0) + self.lr = K.variable(lr) + self.beta_1 = K.variable(beta_1) + self.beta_2 = K.variable(beta_2) + + def get_updates(self, params, constraints, loss): + grads = self.get_gradients(loss, params) + self.updates = [(self.iterations, self.iterations+1.)] + + t = self.iterations + 1 + lr_t = self.lr / (1 - K.pow(self.beta_1, t)) + + for p, g, c in zip(params, grads, constraints): + # zero init of 1st moment + m = K.variable(np.zeros(K.get_value(p).shape)) + # zero init of exponentially weighted infinity norm + u = K.variable(np.zeros(K.get_value(p).shape)) + + m_t = (self.beta_1 * m) + (1 - self.beta_1) * g + u_t = K.maximum(self.beta_2 * u, K.abs(g)) + p_t = p - lr_t * m_t / (u_t + self.epsilon) + + self.updates.append((m, m_t)) + self.updates.append((u, u_t)) + self.updates.append((p, c(p_t))) # apply constraints + return self.updates + + def get_config(self): + return {"name": self.__class__.__name__, + "lr": float(K.get_value(self.lr)), + "beta_1": float(K.get_value(self.beta_1)), + "beta_2": float(K.get_value(self.beta_2)), + "epsilon": self.epsilon} + + # aliases sgd = SGD rmsprop = RMSprop adagrad = Adagrad adadelta = Adadelta adam = Adam +adamax = Adamax def get(identifier, kwargs=None): diff --git a/tests/keras/test_optimizers.py b/tests/keras/test_optimizers.py index 2829084fa..81fcd0a14 100644 --- a/tests/keras/test_optimizers.py +++ b/tests/keras/test_optimizers.py @@ -2,7 +2,7 @@ from __future__ import print_function import pytest from keras.utils.test_utils import get_test_data -from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam +from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax from keras.models import Sequential from keras.layers.core import Dense, Activation from keras.utils.np_utils import to_categorical @@ -56,5 +56,9 @@ def test_adam(): assert(_test_optimizer(Adam())) +def test_adamax(): + assert(_test_optimizer(Adamax())) + + if __name__ == '__main__': pytest.main([__file__])