This is an automated email from the ASF dual-hosted git repository.
skm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 06245b1 Adadelta optimizer test (#13443)
06245b1 is described below
commit 06245b16bb2e2e8caabf03841e8341ac5f7f98c8
Author: Anirudh <[email protected]>
AuthorDate: Tue Dec 4 11:37:50 2018 -0800
Adadelta optimizer test (#13443)
* adadelta test
* comments
---
python/mxnet/optimizer/optimizer.py | 2 +
tests/python/unittest/test_optimizer.py | 73 ++++++++++++++++++++++++++++++++-
2 files changed, 74 insertions(+), 1 deletion(-)
diff --git a/python/mxnet/optimizer/optimizer.py
b/python/mxnet/optimizer/optimizer.py
index 442a11d..d7b6821 100644
--- a/python/mxnet/optimizer/optimizer.py
+++ b/python/mxnet/optimizer/optimizer.py
@@ -637,6 +637,8 @@ class FTML(Optimizer):
z = beta1 * z + (1 - beta1) * rescaled_grad - (d_t - beta1 * d_(t-1))
* weight
weight = - z / d_t
+ For details of the update algorithm, see
:class:`~mxnet.ndarray.ftml_update`.
+
This optimizer accepts the following parameters in addition to those
accepted
by :class:`.Optimizer`.
diff --git a/tests/python/unittest/test_optimizer.py
b/tests/python/unittest/test_optimizer.py
index b03dcdc..acf24ee 100644
--- a/tests/python/unittest/test_optimizer.py
+++ b/tests/python/unittest/test_optimizer.py
@@ -15,6 +15,7 @@
# specific language governing permissions and limitations
# under the License.
+import itertools
import numpy as np
import itertools
import mxnet as mx
@@ -1050,8 +1051,8 @@ class PyAdaGrad(mx.optimizer.Optimizer):
div = grad / mx.nd.sqrt(history + self.float_stable_eps)
weight[:] += (div + weight * wd) * -lr
+@with_seed()
def test_adagrad():
- mx.random.seed(0)
opt1 = PyAdaGrad
opt2 = mx.optimizer.AdaGrad
shape = (3, 4, 5)
@@ -1076,6 +1077,76 @@ def test_adagrad():
compare_optimizer(opt1(**kwarg), opt2(**kwarg),
shape, dtype,
g_stype='row_sparse')
+# AdaDelta
+class PyAdaDelta(mx.optimizer.Optimizer):
+ """The python reference of AdaDelta optimizer.
+
+ This class implements AdaDelta, an optimizer described in *ADADELTA: An
adaptive
+ learning rate method*, available at https://arxiv.org/abs/1212.5701.
+
+ This optimizer updates each weight by::
+
+ grad = clip(grad * rescale_grad + wd * weight, clip_gradient)
+ acc_grad = rho * acc_grad + (1. - rho) * grad ** 2
+ cur_delta = sqrt(acc_delta + epsilon) / sqrt(acc_grad + epsilon) * grad
+ acc_delta = rho * acc_delta + (1. - rho) * cur_delta ** 2
+ weight -= (cur_delta + wd * weight)
+
+ This optimizer accepts the following parameters in addition to those
accepted
+ by :class:`.Optimizer`.
+
+ Parameters
+ ----------
+ rho: float
+ Decay rate for both squared gradients and delta.
+ epsilon : float
+ Small value to avoid division by 0.
+ """
+ def __init__(self, rho=0.90, epsilon=1e-5, **kwargs):
+ super(PyAdaDelta, self).__init__(**kwargs)
+ self.rho = rho
+ self.epsilon = epsilon
+
+ def create_state(self, index, weight):
+ return (mx.nd.zeros(weight.shape, weight.context),
+ mx.nd.zeros(weight.shape, weight.context))
+
+ def update(self, index, weight, grad, state):
+ self._update_count(index)
+ wd = self._get_wd(index)
+
+ grad *= self.rescale_grad
+ if self.clip_gradient is not None:
+ grad = mx.nd.clip(grad, -self.clip_gradient, self.clip_gradient)
+
+ acc_grad, acc_delta = state
+
+ acc_grad[:] = self.rho * acc_grad + (1. - self.rho) * grad ** 2
+ current_delta = (mx.nd.sqrt(acc_delta + self.epsilon) /
+ mx.nd.sqrt(acc_grad + self.epsilon)) * grad
+ acc_delta[:] = self.rho * acc_delta + (1. - self.rho) * current_delta
** 2
+
+ # update weight
+ weight[:] -= current_delta + wd * weight
+
+@with_seed()
+def test_adadelta():
+ opt1 = PyAdaDelta
+ opt2 = mx.optimizer.AdaDelta
+ shape = (3, 4, 5)
+ rho_options = [{'rho': 0.9}]
+ eps_options = [{}, {'epsilon': 1e-8}]
+ cg_options = [{}, {'clip_gradient': 0.4}, {'clip_gradient': 0.5}]
+ rg_options = [{}, {'rescale_grad': 0.14}, {'rescale_grad': 0.8}]
+ wd_options = [{}, {'wd': 0.0}]
+ for dtype in [np.float16, np.float32]:
+ for params in itertools.product(rho_options, eps_options, cg_options,
+ rg_options, wd_options):
+ kwarg = {k: v for param in params for k, v in param.items()}
+ if dtype is np.float16:
+ kwarg.update({'multi_precision': True})
+ compare_optimizer(opt1(**kwarg), opt2(**kwarg), shape, dtype)
+
def test_factor_scheduler():
base_lr = 1