Repository: incubator-singa Updated Branches: refs/heads/master f2f4d1f9c -> 770d6cdb6
SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it one of alternative solutions, the simplest one: - output every intermediate gradient once it is calcualted and sum these gradients togather in the funciton gradients(). Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/6c28abde Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/6c28abde Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/6c28abde Branch: refs/heads/master Commit: 6c28abdeb6929334ba10d327fe1fd80e2d0b604c Parents: f2f4d1f Author: xuewanqi <[email protected]> Authored: Thu Aug 9 15:08:06 2018 +0000 Committer: xuewanqi <[email protected]> Committed: Mon Aug 13 05:59:06 2018 +0000 ---------------------------------------------------------------------- python/singa/autograd.py | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/6c28abde/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py index a084764..007af27 100755 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -64,7 +64,10 @@ def infer_dependency(op): def gradients(y, dy=None): grads = {} # mapping: x->dx if x.stores_grad for p, dp in backward(y, dy): - gradients[p] = dp + if not grads.has_key(p): + grads[p] = dp + else: + grads[p] += dp return grads @@ -96,7 +99,13 @@ def backward(y, dy=None): not_ready = {} # mapping: op->[dy] if y.stores_grad: - gradients[y] = dy + #gradients[y] = dy + if isinstance(dy, float): + g=np.array(dy) + else: + g=dy + tg = Tensor(device=g.device(), data=g) + yield (y, tg) while len(ready) > 0: op, dys = ready.pop() @@ -135,7 +144,12 @@ def backward(y, dy=None): dxs[y_idx] += dx if y_stores_grad: # store the gradient for final return, e.g. if x is parameter - g = not_ready[src_op][y_idx] + + # g = not_ready[src_op][y_idx] + + g = dx # connot confirm that the gradient of a parameter is calculated completely. May disobey some optimize algorithms as the engine transmit + # a gradient (partly) once it is calculated which may cause wrongly records of some optimizer parameters. + tg = Tensor(device=g.device(), data=g) yield (y, tg) dependency[src_op] -= 1
