Repository: incubator-singa
Updated Branches:
  refs/heads/dev 1b4f74b2d -> 4d596dde8 (forced update)


SINGA-192 Implement optimization algorithms for v1

Optimize the Tensor computation for Adagrad, RMSProp and Nesterov using
in-place operations to avoid creating intermediate Tensor.


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/4d596dde
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/4d596dde
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/4d596dde

Branch: refs/heads/dev
Commit: 4d596dde8d0c017a7937cdba462c565ac55ab3b8
Parents: 0cd9663
Author: Wei Wang <[email protected]>
Authored: Mon Jun 13 14:10:22 2016 +0800
Committer: Wei Wang <[email protected]>
Committed: Mon Jun 13 20:28:52 2016 +0800

----------------------------------------------------------------------
 include/singa/core/tensor.h     | 11 +++++++++++
 src/model/optimizer/adagrad.cc  | 10 +++++++---
 src/model/optimizer/nesterov.cc |  4 ++++
 src/model/optimizer/rmsprop.cc  |  9 ++++++---
 src/model/optimizer/sgd.cc      |  9 +++++----
 5 files changed, 33 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4d596dde/include/singa/core/tensor.h
----------------------------------------------------------------------
diff --git a/include/singa/core/tensor.h b/include/singa/core/tensor.h
index 8cfa705..522c20b 100644
--- a/include/singa/core/tensor.h
+++ b/include/singa/core/tensor.h
@@ -227,6 +227,17 @@ Tensor Sqrt(const Tensor &in);
 Tensor Square(const Tensor &in);
 Tensor Tanh(const Tensor &in);
 
+void Abs(const Tensor &in, Tensor *out);
+void Exp(const Tensor &in, Tensor *out);
+void Log(const Tensor &in, Tensor *out);
+void ReLU(const Tensor &in, Tensor *out);
+void Sigmoid(const Tensor &in, Tensor *out);
+void Sign(const Tensor &in, Tensor *out);
+void Sqrt(const Tensor &in, Tensor *out);
+void Square(const Tensor &in, Tensor *out);
+void Tanh(const Tensor &in, Tensor *out);
+
+
 /// Element-wise opeartion, out[i]=in[i]^x
 template <typename SType>
 Tensor Pow(const Tensor &in, const SType x);

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4d596dde/src/model/optimizer/adagrad.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/adagrad.cc b/src/model/optimizer/adagrad.cc
index 6910320..fec9c96 100644
--- a/src/model/optimizer/adagrad.cc
+++ b/src/model/optimizer/adagrad.cc
@@ -23,14 +23,18 @@ namespace singa {
 
 void Adagrad::Setup(const OptimizerConf& conf) { delta_ = conf.delta(); }
 
+// history += grad*grad;
+// value = value - lr*grad/sqrt(history+delta)
 void Adagrad::Apply(int step, float lr, const string& name, const Tensor& grad,
                     Tensor* value) {
   if (history_gradient_.find(name) == history_gradient_.end())
     history_gradient_[name].ResetLike(*value);
   Tensor& history = history_gradient_[name];
-  Tensor tmp = grad.Clone();
-  history += Square(tmp);
-  tmp /= Sqrt(history + delta_);
+  Tensor tmp = Square(grad);
+  history += tmp;
+  Add(history, delta_, &tmp);
+  Sqrt(tmp, &tmp);
+  Div(grad, tmp, &tmp);
   Axpy(-lr, tmp, value);
 }
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4d596dde/src/model/optimizer/nesterov.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/nesterov.cc b/src/model/optimizer/nesterov.cc
index 2f16c06..eaaf445 100644
--- a/src/model/optimizer/nesterov.cc
+++ b/src/model/optimizer/nesterov.cc
@@ -26,6 +26,10 @@ void Nesterov::Setup(const OptimizerConf& conf) {
   SetMomentumGenerator([m](int step) { return m; });
 }
 
+// tmp = history;
+// history = lr * grad + history * mom
+// tmp = (1+mom) * history - tmp * mom;
+// value = value - tmp;
 void Nesterov::Apply(int step, float lr, const string& name, const Tensor& 
grad,
                      Tensor* value) {
   if (momentum_generator_) {

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4d596dde/src/model/optimizer/rmsprop.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/rmsprop.cc b/src/model/optimizer/rmsprop.cc
index 9f13e11..6a74b8e 100644
--- a/src/model/optimizer/rmsprop.cc
+++ b/src/model/optimizer/rmsprop.cc
@@ -26,6 +26,8 @@ void RMSProp::Setup(const OptimizerConf& conf) {
   rho_ = conf.rho();
 }
 
+// history = history * rho + grad * grad * (1 - rho)
+// value = value - lr * grad / sqrt(history + delta)
 void RMSProp::Apply(int step, float lr, const string& name, const Tensor& grad,
                     Tensor* value) {
   if (history_gradient_.find(name) == history_gradient_.end()) {
@@ -33,9 +35,10 @@ void RMSProp::Apply(int step, float lr, const string& name, 
const Tensor& grad,
   }
   Tensor& history = history_gradient_[name];
   history *= rho_;
-  Tensor tmp = grad.Clone();
-  Axpy(1 - rho_, Square(tmp), &history);
-  tmp /= Sqrt(history + delta_);
+  Tensor tmp = Square(grad);
+  Axpy(1 - rho_, tmp, &history);
+  Sqrt(history + delta_, &tmp);
+  Div(grad, tmp, &tmp);
   Axpy(-lr, tmp, value);
 }
 }  // namespace singa

http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/4d596dde/src/model/optimizer/sgd.cc
----------------------------------------------------------------------
diff --git a/src/model/optimizer/sgd.cc b/src/model/optimizer/sgd.cc
index ae8b7ac..a5c66a1 100644
--- a/src/model/optimizer/sgd.cc
+++ b/src/model/optimizer/sgd.cc
@@ -28,10 +28,10 @@ void SGD::Setup(const OptimizerConf& conf) {
   }
 }
 
+// history = history * momentum + grad * lr
+// value = value - history
 void SGD::Apply(int step, float lr, const string& name, const Tensor& grad,
                 Tensor* value) {
-  Tensor tmp = grad.Clone();
-  tmp *= lr;
   if (momentum_generator_) {
     float mom = momentum_generator_(step);
     if (mom != 0) {
@@ -39,12 +39,13 @@ void SGD::Apply(int step, float lr, const string& name, 
const Tensor& grad,
         history_gradient_[name].ResetLike(*value);
       Tensor& history = history_gradient_[name];
       history *= mom;
-      history += tmp;
+      Axpy(lr, grad, &history);
       (*value) -= history;
       return;
     }
+  } else {
+    Axpy(-lr, grad, value);
   }
-  (*value) -= tmp;
 }
 }  // namespace singa
 #endif  // SRC_MODEL_OPTIMIZER_SGD_H_

Reply via email to