This is an automated email from the ASF dual-hosted git repository. skm pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new 06245b1 Adadelta optimizer test (#13443) 06245b1 is described below commit 06245b16bb2e2e8caabf03841e8341ac5f7f98c8 Author: Anirudh <anirudhk...@gmail.com> AuthorDate: Tue Dec 4 11:37:50 2018 -0800 Adadelta optimizer test (#13443) * adadelta test * comments --- python/mxnet/optimizer/optimizer.py | 2 + tests/python/unittest/test_optimizer.py | 73 ++++++++++++++++++++++++++++++++- 2 files changed, 74 insertions(+), 1 deletion(-) diff --git a/python/mxnet/optimizer/optimizer.py b/python/mxnet/optimizer/optimizer.py index 442a11d..d7b6821 100644 --- a/python/mxnet/optimizer/optimizer.py +++ b/python/mxnet/optimizer/optimizer.py @@ -637,6 +637,8 @@ class FTML(Optimizer): z = beta1 * z + (1 - beta1) * rescaled_grad - (d_t - beta1 * d_(t-1)) * weight weight = - z / d_t + For details of the update algorithm, see :class:`~mxnet.ndarray.ftml_update`. + This optimizer accepts the following parameters in addition to those accepted by :class:`.Optimizer`. diff --git a/tests/python/unittest/test_optimizer.py b/tests/python/unittest/test_optimizer.py index b03dcdc..acf24ee 100644 --- a/tests/python/unittest/test_optimizer.py +++ b/tests/python/unittest/test_optimizer.py @@ -15,6 +15,7 @@ # specific language governing permissions and limitations # under the License. +import itertools import numpy as np import itertools import mxnet as mx @@ -1050,8 +1051,8 @@ class PyAdaGrad(mx.optimizer.Optimizer): div = grad / mx.nd.sqrt(history + self.float_stable_eps) weight[:] += (div + weight * wd) * -lr +@with_seed() def test_adagrad(): - mx.random.seed(0) opt1 = PyAdaGrad opt2 = mx.optimizer.AdaGrad shape = (3, 4, 5) @@ -1076,6 +1077,76 @@ def test_adagrad(): compare_optimizer(opt1(**kwarg), opt2(**kwarg), shape, dtype, g_stype='row_sparse') +# AdaDelta +class PyAdaDelta(mx.optimizer.Optimizer): + """The python reference of AdaDelta optimizer. + + This class implements AdaDelta, an optimizer described in *ADADELTA: An adaptive + learning rate method*, available at https://arxiv.org/abs/1212.5701. + + This optimizer updates each weight by:: + + grad = clip(grad * rescale_grad + wd * weight, clip_gradient) + acc_grad = rho * acc_grad + (1. - rho) * grad ** 2 + cur_delta = sqrt(acc_delta + epsilon) / sqrt(acc_grad + epsilon) * grad + acc_delta = rho * acc_delta + (1. - rho) * cur_delta ** 2 + weight -= (cur_delta + wd * weight) + + This optimizer accepts the following parameters in addition to those accepted + by :class:`.Optimizer`. + + Parameters + ---------- + rho: float + Decay rate for both squared gradients and delta. + epsilon : float + Small value to avoid division by 0. + """ + def __init__(self, rho=0.90, epsilon=1e-5, **kwargs): + super(PyAdaDelta, self).__init__(**kwargs) + self.rho = rho + self.epsilon = epsilon + + def create_state(self, index, weight): + return (mx.nd.zeros(weight.shape, weight.context), + mx.nd.zeros(weight.shape, weight.context)) + + def update(self, index, weight, grad, state): + self._update_count(index) + wd = self._get_wd(index) + + grad *= self.rescale_grad + if self.clip_gradient is not None: + grad = mx.nd.clip(grad, -self.clip_gradient, self.clip_gradient) + + acc_grad, acc_delta = state + + acc_grad[:] = self.rho * acc_grad + (1. - self.rho) * grad ** 2 + current_delta = (mx.nd.sqrt(acc_delta + self.epsilon) / + mx.nd.sqrt(acc_grad + self.epsilon)) * grad + acc_delta[:] = self.rho * acc_delta + (1. - self.rho) * current_delta ** 2 + + # update weight + weight[:] -= current_delta + wd * weight + +@with_seed() +def test_adadelta(): + opt1 = PyAdaDelta + opt2 = mx.optimizer.AdaDelta + shape = (3, 4, 5) + rho_options = [{'rho': 0.9}] + eps_options = [{}, {'epsilon': 1e-8}] + cg_options = [{}, {'clip_gradient': 0.4}, {'clip_gradient': 0.5}] + rg_options = [{}, {'rescale_grad': 0.14}, {'rescale_grad': 0.8}] + wd_options = [{}, {'wd': 0.0}] + for dtype in [np.float16, np.float32]: + for params in itertools.product(rho_options, eps_options, cg_options, + rg_options, wd_options): + kwarg = {k: v for param in params for k, v in param.items()} + if dtype is np.float16: + kwarg.update({'multi_precision': True}) + compare_optimizer(opt1(**kwarg), opt2(**kwarg), shape, dtype) + def test_factor_scheduler(): base_lr = 1