SINGA-192 Implement optimization algorithms for v1 Optimize the Tensor computation for Adagrad, RMSProp and Nesterov using in-place operations to avoid creating intermediate Tensor.
Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b06db0b0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b06db0b0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b06db0b0 Branch: refs/heads/dev Commit: b06db0b0a7ba514597c7585e5aebdad1be2b9508 Parents: d064e54 Author: Wei Wang <[email protected]> Authored: Mon Jun 13 14:10:22 2016 +0800 Committer: Wei Wang <[email protected]> Committed: Mon Jun 13 14:10:22 2016 +0800 ---------------------------------------------------------------------- include/singa/core/tensor.h | 11 +++++++++++ src/model/optimizer/adagrad.cc | 10 +++++++--- src/model/optimizer/nesterov.cc | 4 ++++ src/model/optimizer/rmsprop.cc | 9 ++++++--- src/model/optimizer/sgd.cc | 9 +++++---- 5 files changed, 33 insertions(+), 10 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b06db0b0/include/singa/core/tensor.h ---------------------------------------------------------------------- diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h index cd750c5..fa3c456 100644 --- a/include/singa/core/tensor.h +++ b/include/singa/core/tensor.h @@ -227,6 +227,17 @@ Tensor Sqrt(const Tensor &in); Tensor Square(const Tensor &in); Tensor Tanh(const Tensor &in); +void Abs(const Tensor &in, Tensor *out); +void Exp(const Tensor &in, Tensor *out); +void Log(const Tensor &in, Tensor *out); +void ReLU(const Tensor &in, Tensor *out); +void Sigmoid(const Tensor &in, Tensor *out); +void Sign(const Tensor &in, Tensor *out); +void Sqrt(const Tensor &in, Tensor *out); +void Square(const Tensor &in, Tensor *out); +void Tanh(const Tensor &in, Tensor *out); + + /// Element-wise opeartion, out[i]=in[i]^x template <typename SType> Tensor Pow(const Tensor &in, const SType x); http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b06db0b0/src/model/optimizer/adagrad.cc ---------------------------------------------------------------------- diff --git a/src/model/optimizer/adagrad.cc b/src/model/optimizer/adagrad.cc index 6910320..fec9c96 100644 --- a/src/model/optimizer/adagrad.cc +++ b/src/model/optimizer/adagrad.cc @@ -23,14 +23,18 @@ namespace singa { void Adagrad::Setup(const OptimizerConf& conf) { delta_ = conf.delta(); } +// history += grad*grad; +// value = value - lr*grad/sqrt(history+delta) void Adagrad::Apply(int step, float lr, const string& name, const Tensor& grad, Tensor* value) { if (history_gradient_.find(name) == history_gradient_.end()) history_gradient_[name].ResetLike(*value); Tensor& history = history_gradient_[name]; - Tensor tmp = grad.Clone(); - history += Square(tmp); - tmp /= Sqrt(history + delta_); + Tensor tmp = Square(grad); + history += tmp; + Add(history, delta_, &tmp); + Sqrt(tmp, &tmp); + Div(grad, tmp, &tmp); Axpy(-lr, tmp, value); } } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b06db0b0/src/model/optimizer/nesterov.cc ---------------------------------------------------------------------- diff --git a/src/model/optimizer/nesterov.cc b/src/model/optimizer/nesterov.cc index 2f16c06..eaaf445 100644 --- a/src/model/optimizer/nesterov.cc +++ b/src/model/optimizer/nesterov.cc @@ -26,6 +26,10 @@ void Nesterov::Setup(const OptimizerConf& conf) { SetMomentumGenerator([m](int step) { return m; }); } +// tmp = history; +// history = lr * grad + history * mom +// tmp = (1+mom) * history - tmp * mom; +// value = value - tmp; void Nesterov::Apply(int step, float lr, const string& name, const Tensor& grad, Tensor* value) { if (momentum_generator_) { http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b06db0b0/src/model/optimizer/rmsprop.cc ---------------------------------------------------------------------- diff --git a/src/model/optimizer/rmsprop.cc b/src/model/optimizer/rmsprop.cc index 9f13e11..6a74b8e 100644 --- a/src/model/optimizer/rmsprop.cc +++ b/src/model/optimizer/rmsprop.cc @@ -26,6 +26,8 @@ void RMSProp::Setup(const OptimizerConf& conf) { rho_ = conf.rho(); } +// history = history * rho + grad * grad * (1 - rho) +// value = value - lr * grad / sqrt(history + delta) void RMSProp::Apply(int step, float lr, const string& name, const Tensor& grad, Tensor* value) { if (history_gradient_.find(name) == history_gradient_.end()) { @@ -33,9 +35,10 @@ void RMSProp::Apply(int step, float lr, const string& name, const Tensor& grad, } Tensor& history = history_gradient_[name]; history *= rho_; - Tensor tmp = grad.Clone(); - Axpy(1 - rho_, Square(tmp), &history); - tmp /= Sqrt(history + delta_); + Tensor tmp = Square(grad); + Axpy(1 - rho_, tmp, &history); + Sqrt(history + delta_, &tmp); + Div(grad, tmp, &tmp); Axpy(-lr, tmp, value); } } // namespace singa http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b06db0b0/src/model/optimizer/sgd.cc ---------------------------------------------------------------------- diff --git a/src/model/optimizer/sgd.cc b/src/model/optimizer/sgd.cc index ae8b7ac..a5c66a1 100644 --- a/src/model/optimizer/sgd.cc +++ b/src/model/optimizer/sgd.cc @@ -28,10 +28,10 @@ void SGD::Setup(const OptimizerConf& conf) { } } +// history = history * momentum + grad * lr +// value = value - history void SGD::Apply(int step, float lr, const string& name, const Tensor& grad, Tensor* value) { - Tensor tmp = grad.Clone(); - tmp *= lr; if (momentum_generator_) { float mom = momentum_generator_(step); if (mom != 0) { @@ -39,12 +39,13 @@ void SGD::Apply(int step, float lr, const string& name, const Tensor& grad, history_gradient_[name].ResetLike(*value); Tensor& history = history_gradient_[name]; history *= mom; - history += tmp; + Axpy(lr, grad, &history); (*value) -= history; return; } + } else { + Axpy(-lr, grad, value); } - (*value) -= tmp; } } // namespace singa #endif // SRC_MODEL_OPTIMIZER_SGD_H_
