Repository: incubator-singa
Updated Branches:
  refs/heads/master f2f4d1f9c -> 770d6cdb6


SINGA-387 Modified the design of autograd backward engine and correct some 
mistakes in it

one of alternative solutions, the simplest one:
- output every intermediate gradient once it is calcualted and sum these 
gradients togather in the funciton gradients().


Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6c28abde
Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6c28abde
Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6c28abde

Branch: refs/heads/master
Commit: 6c28abdeb6929334ba10d327fe1fd80e2d0b604c
Parents: f2f4d1f
Author: xuewanqi <[email protected]>
Authored: Thu Aug 9 15:08:06 2018 +0000
Committer: xuewanqi <[email protected]>
Committed: Mon Aug 13 05:59:06 2018 +0000

----------------------------------------------------------------------
 python/singa/autograd.py | 20 +++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6c28abde/python/singa/autograd.py
----------------------------------------------------------------------
diff --git a/python/singa/autograd.py b/python/singa/autograd.py
index a084764..007af27 100755
--- a/python/singa/autograd.py
+++ b/python/singa/autograd.py
@@ -64,7 +64,10 @@ def infer_dependency(op):
 def gradients(y, dy=None):
     grads = {}  # mapping: x->dx if x.stores_grad
     for p, dp in backward(y, dy):
-        gradients[p] = dp
+        if not grads.has_key(p):
+            grads[p] = dp
+        else: 
+            grads[p] += dp
     return grads
 
 
@@ -96,7 +99,13 @@ def backward(y, dy=None):
     not_ready = {}  # mapping: op->[dy]
 
     if y.stores_grad:
-        gradients[y] = dy
+        #gradients[y] = dy
+        if isinstance(dy, float):
+            g=np.array(dy)
+        else:
+            g=dy
+        tg = Tensor(device=g.device(), data=g)
+        yield (y, tg)
 
     while len(ready) > 0:
         op, dys = ready.pop()
@@ -135,7 +144,12 @@ def backward(y, dy=None):
                     dxs[y_idx] += dx
             if y_stores_grad:
                 # store the gradient for final return, e.g. if x is parameter
-                g = not_ready[src_op][y_idx]
+
+                # g = not_ready[src_op][y_idx]
+
+                g = dx # connot confirm that the gradient of a parameter is 
calculated completely. May disobey some optimize algorithms as the engine 
transmit 
+                       # a gradient (partly) once it is calculated which may 
cause wrongly records of some optimizer parameters.
+
                 tg = Tensor(device=g.device(), data=g)
                 yield (y, tg)
             dependency[src_op] -= 1

Reply via email to