diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index 17affa401..3070f52ca 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -22,13 +22,13 @@ The more information you provide, the easier it is for us to validate that there
 
 ## Requesting a Feature
 
-You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API. 
+You can also use Github issues to request features you would like to see in Keras, or changes in the Keras API.
 
 1. Provide a clear and detailed explanation of the feature you want and why it's important to add. Keep in mind that we want features that will be useful to the majority of our users and not just a small subset. If you're just targeting a minority of users, consider writing an add-on library for Keras. It is crucial for Keras to avoid bloating the API and codebase.
 
 2. Provide code snippets demonstrating the API you have in mind and illustrating the use cases of your feature. Of course, you don't need to write any real code at this point!
 
-3. After disussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along.
+3. After discussing the feature you may choose to attempt a Pull Request. If you're at all able, start writing some code. We always have more work to do than time to do it. If you can write some code then that will speed the process along.
 
 ## Pull Requests
 
diff --git a/README.md b/README.md
index 36c8392ff..1a1db1527 100644
--- a/README.md
+++ b/README.md
@@ -37,7 +37,7 @@ Keras is compatible with: __Python 2.7-3.5__.
 
 ## Getting started: 30 seconds to Keras
 
-The core datastructure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](http://keras.io/models/#sequential) and [`Graph`](http://keras.io/models/#graph).
+The core data structure of Keras is a __model__, a way to organize layers. There are two types of models: [`Sequential`](http://keras.io/models/#sequential) and [`Graph`](http://keras.io/models/#graph).
 
 Here's the `Sequential` model (a linear pile of layers):
 
diff --git a/keras/optimizers.py b/keras/optimizers.py
index ce4cc1efd..503e2e915 100644
--- a/keras/optimizers.py
+++ b/keras/optimizers.py
@@ -275,12 +275,66 @@ class Adam(Optimizer):
                 "beta_2": float(K.get_value(self.beta_2)),
                 "epsilon": self.epsilon}
 
+class Adamax(Optimizer):
+    '''Adamax optimizer from Adam paper's Section 7. It is a variant
+     of Adam based on the infinity norm.
+
+    Default parameters follow those provided in the paper.
+
+    # Arguments
+        lr: float >= 0. Learning rate.
+        beta_1/beta_2: floats, 0 < beta < 1. Generally close to 1.
+        epsilon: float >= 0. Fuzz factor.
+
+    # References
+        - [Adam - A Method for Stochastic Optimization](http://arxiv.org/abs/1412.6980v8)
+    '''
+    def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, epsilon=1e-8,
+                 *args, **kwargs):
+        super(Adamax, self).__init__(**kwargs)
+        self.__dict__.update(locals())
+        self.iterations = K.variable(0)
+        self.lr = K.variable(lr)
+        self.beta_1 = K.variable(beta_1)
+        self.beta_2 = K.variable(beta_2)
+
+    def get_updates(self, params, constraints, loss):
+        grads = self.get_gradients(loss, params)
+        self.updates = [(self.iterations, self.iterations+1.)]
+
+        t = self.iterations + 1
+        lr_t = self.lr / (1 - K.pow(self.beta_1, t))
+
+        for p, g, c in zip(params, grads, constraints):
+            # zero init of 1st moment
+            m = K.variable(np.zeros(K.get_value(p).shape))
+            # zero init of exponentially weighted infinity norm
+            u = K.variable(np.zeros(K.get_value(p).shape))
+
+            m_t = (self.beta_1 * m) + (1 - self.beta_1) * g
+            u_t = K.maximum(self.beta_2 * u, K.abs(g))
+            p_t = p - lr_t * m_t / (u_t + self.epsilon)
+
+            self.updates.append((m, m_t))
+            self.updates.append((u, u_t))
+            self.updates.append((p, c(p_t)))  # apply constraints
+        return self.updates
+
+    def get_config(self):
+        return {"name": self.__class__.__name__,
+                "lr": float(K.get_value(self.lr)),
+                "beta_1": float(K.get_value(self.beta_1)),
+                "beta_2": float(K.get_value(self.beta_2)),
+                "epsilon": self.epsilon}
+
+
 # aliases
 sgd = SGD
 rmsprop = RMSprop
 adagrad = Adagrad
 adadelta = Adadelta
 adam = Adam
+adamax = Adamax
 
 
 def get(identifier, kwargs=None):
diff --git a/tests/keras/test_optimizers.py b/tests/keras/test_optimizers.py
index 2829084fa..81fcd0a14 100644
--- a/tests/keras/test_optimizers.py
+++ b/tests/keras/test_optimizers.py
@@ -2,7 +2,7 @@ from __future__ import print_function
 import pytest
 
 from keras.utils.test_utils import get_test_data
-from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam
+from keras.optimizers import SGD, RMSprop, Adagrad, Adadelta, Adam, Adamax
 from keras.models import Sequential
 from keras.layers.core import Dense, Activation
 from keras.utils.np_utils import to_categorical
@@ -56,5 +56,9 @@ def test_adam():
     assert(_test_optimizer(Adam()))
 
 
+def test_adamax():
+    assert(_test_optimizer(Adamax()))
+
+
 if __name__ == '__main__':
     pytest.main([__file__])