[GitHub] sandeep-krishnamurthy closed pull request #13467: [MXNET-1235] Add a test for AdaMax optimizer

GitBox Tue, 04 Dec 2018 11:19:05 -0800

sandeep-krishnamurthy closed pull request #13467: [MXNET-1235] Add a test for 
AdaMax optimizer
URL: https://github.com/apache/incubator-mxnet/pull/13467


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/tests/python/unittest/test_optimizer.py 
b/tests/python/unittest/test_optimizer.py
index 334b7d4c0fd..b03dcdcfba4 100644
--- a/tests/python/unittest/test_optimizer.py
+++ b/tests/python/unittest/test_optimizer.py
@@ -16,6 +16,7 @@
 # under the License.
 
 import numpy as np
+import itertools
 import mxnet as mx
 import mxnet.lr_scheduler as lr_scheduler
 from mxnet import gluon
@@ -501,7 +502,6 @@ def test_ftml():
 
 
 # ADAM
-
 class PyAdam(mx.optimizer.Optimizer):
     """python reference implemenation of adam"""
     def __init__(self, learning_rate=0.001, beta1=0.9, beta2=0.999, 
epsilon=1e-8,
@@ -613,6 +613,80 @@ def test_adam():
                                           dtype, w_stype='default', 
g_stype='row_sparse',
                                           rtol=1e-4, atol=2e-5)
 
+
+# AdaMax
+class PyAdamax(mx.optimizer.Optimizer):
+    """The python reference of AdaMax optimizer.
+
+    This class implements the AdaMax optimizer, one variant of Adam based on 
the infinity norm,
+    available at http://arxiv.org/abs/1412.6980 Section 7.
+
+    The optimizer updates the weight by::
+        grad = clip(grad * rescale_grad + wd * weight, clip_gradient)
+        m = beta1 * m_t + (1 - beta1) * grad
+        u = maximum(beta2 * u, abs(grad))
+        weight -= lr / (1 - beta1**t) * m / u
+
+    This optimizer accepts the following parameters in addition to those 
accepted
+    by :class:`.Optimizer`.
+
+    Parameters
+    ----------
+    beta1 : float, optional
+        Exponential decay rate for the first moment estimates.
+    beta2 : float, optional
+        Exponential decay rate for the second moment estimates.
+    """
+    def __init__(self, learning_rate=0.002, beta1=0.9, beta2=0.999, **kwargs):
+        super(PyAdamax, self).__init__(learning_rate=learning_rate, **kwargs)
+        self.beta1 = beta1
+        self.beta2 = beta2
+
+    def create_state(self, index, weight):
+        return (mx.nd.zeros(weight.shape, weight.context, dtype=weight.dtype), 
 # mean
+                mx.nd.zeros(weight.shape, weight.context, dtype=weight.dtype)) 
 # variance
+
+    def update(self, index, weight, grad, state):
+        self._update_count(index)
+        lr = self._get_lr(index)
+        wd = self._get_wd(index)
+
+        t = self._index_update_count[index]
+        lr /= (1. - self.beta1**t)
+
+        # preprocess grad
+        grad = grad * self.rescale_grad + wd * weight
+        if self.clip_gradient is not None:
+            grad = mx.nd.clip(grad, -self.clip_gradient, self.clip_gradient)
+
+        # update m_t and u_t
+        m_t, u_t = state
+        m_t[:] = self.beta1 * m_t + (1. - self.beta1) * grad
+        u_t[:] = mx.nd.maximum(self.beta2 * u_t, mx.nd.abs(grad))
+
+        # update weight
+        weight[:] -= lr * m_t / u_t
+
+
+@with_seed()
+def test_adamax():
+    opt1 = PyAdamax
+    opt2 = mx.optimizer.Adamax
+    shape = (3, 4, 5)
+    cg_options = [{}, {'clip_gradient': 0.4}, {'clip_gradient': 0.5}]
+    rg_options = [{}, {'rescale_grad': 0.14}, {'rescale_grad': 0.8}]
+    wd_options = [{}, {'wd': 0.03}, {'wd': 0.05}, {'wd': 0.07}]
+    mp_options = [{}, {'multi_precision': False}, {'multi_precision': True}]
+    for dtype in [np.float16, np.float32, np.float64]:
+        for params in itertools.product(cg_options, rg_options, wd_options, 
mp_options):
+            kwarg = {k: v for param in params for k, v in param.items()}
+            if (dtype == np.float16 and
+                    ('multi_precision' not in kwarg or
+                    not kwarg['multi_precision'])):
+                continue
+            compare_optimizer(opt1(**kwarg), opt2(**kwarg), shape, dtype)
+
+
 # Signum
 class PySignum(mx.optimizer.Optimizer):
     """The python reference of Signum optimizer.


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

[GitHub] sandeep-krishnamurthy closed pull request #13467: [MXNET-1235] Add a test for AdaMax optimizer

Reply via email to