adamax optimizer

This commit is contained in:
Kashif Rasul 2016-01-01 19:12:22 +01:00
parent bb45991899
commit 5c72e14034
4 changed files with 62 additions and 4 deletions

@ -22,13 +22,13 @@ The more information you provide, the easier it is for us to validate that there
## Requesting a Feature
You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API.
You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API.
1. Provide a clear and detailed explanation of the feature you want and why it's important to add. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on library for Keras. It is crucial for Keras to avoid bloating the API and codebase.
2. Provide code snippets demonstrating the API you have in mind and illustrating the use cases of your feature. Of course, you don't need to write any real code at this point!
3. After disussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along.
3. After discussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along.
## Pull Requests

@ -37,7 +37,7 @@ Keras is compatible with: __Python 2.7-3.5__.
## Getting started: 30 seconds to Keras
The core datastructure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](http://keras.io/models/#sequential) and [`Graph`](http://keras.io/models/#graph).
The core data structure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](http://keras.io/models/#sequential) and [`Graph`](http://keras.io/models/#graph).
Here's the `Sequential` model (a linear pile of layers):

@ -275,12 +275,66 @@ class Adam(Optimizer):
"beta_2": float(K.get_value(self.beta_2)),
"epsilon": self.epsilon}
class Adamax(Optimizer):
'''Adamax optimizer from Adam paper's Section 7. It is a variant
of Adam based on the infinity norm.
Default parameters follow those provided in the paper.
# Arguments
lr: float >= 0. Learning rate.
beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
epsilon: float >= 0. Fuzz factor.
# References
- [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
'''
def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
*args, **kwargs):
super(Adamax, self).__init__(**kwargs)
self.__dict__.update(locals())
self.iterations = K.variable(0)
self.lr = K.variable(lr)
self.beta_1 = K.variable(beta_1)
self.beta_2 = K.variable(beta_2)
def get_updates(self, params, constraints, loss):
grads = self.get_gradients(loss, params)
self.updates = [(self.iterations, self.iterations+1.)]
t = self.iterations + 1
lr_t = self.lr / (1 - K.pow(self.beta_1, t))
for p, g, c in zip(params, grads, constraints):
# zero init of 1st moment
m = K.variable(np.zeros(K.get_value(p).shape))
# zero init of exponentially weighted infinity norm
u = K.variable(np.zeros(K.get_value(p).shape))
m_t = (self.beta_1 * m) + (1 - self.beta_1) * g
u_t = K.maximum(self.beta_2 * u, K.abs(g))
p_t = p - lr_t * m_t / (u_t + self.epsilon)
self.updates.append((m, m_t))
self.updates.append((u, u_t))
self.updates.append((p, c(p_t))) # apply constraints
return self.updates
def get_config(self):
return {"name": self.__class__.__name__,
"lr": float(K.get_value(self.lr)),
"beta_1": float(K.get_value(self.beta_1)),
"beta_2": float(K.get_value(self.beta_2)),
"epsilon": self.epsilon}
# aliases
sgd = SGD
rmsprop = RMSprop
adagrad = Adagrad
adadelta = Adadelta
adam = Adam
adamax = Adamax
def get(identifier, kwargs=None):

@ -2,7 +2,7 @@ from __future__ import print_function
import pytest
from keras.utils.test_utils import get_test_data
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam
from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax
from keras.models import Sequential
from keras.layers.core import Dense, Activation
from keras.utils.np_utils import to_categorical
@ -56,5 +56,9 @@ def test_adam():
assert(_test_optimizer(Adam()))
def test_adamax():
assert(_test_optimizer(Adamax()))
if __name__ == '__main__':
pytest.main([__file__])