SINGA-387 Modified the design of autograd backward engine and correct some mistakes in it
Optimize the design of autograd engine: - differentiate creator of inputs and creator of parameters, even though they are both Dummy class. - this can avoid unnecessary memory use, maining in store unnecessary gradients. Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/b55b046c Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/b55b046c Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/b55b046c Branch: refs/heads/master Commit: b55b046ccac33876a28861cba6badbddfae75788 Parents: 2fea345 Author: xuewanqi <[email protected]> Authored: Fri Aug 10 05:57:18 2018 +0000 Committer: xuewanqi <[email protected]> Committed: Mon Aug 13 06:07:18 2018 +0000 ---------------------------------------------------------------------- python/singa/autograd.py | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/b55b046c/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py index 94214fc..56b5498 100755 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -54,11 +54,18 @@ def infer_dependency(op): for src_op, _, _, _ in cur_op.src: if src_op not in dependency_count: # dependency[src_op] = [Counter() for _ in src_op.y_id2idx] - dependency_count[src_op] = 0 - queue.append(src_op) + if isinstance(src_op, Dummy): + # only when a Dummy operator needs store grads, its dependency needs to be counted. + if src_op.stores_grad: + dependency_count[src_op] = 0 + queue.append(src_op) + else: + dependency_count[src_op] = 0 + queue.append(src_op) # y_idx = src_op.y_id2idx[x_id] # dependency[src_op][y_idx][cur_op] += 1 - dependency_count[src_op] += 1 + if dependency_count.has_key(src_op): + dependency_count[src_op] += 1 return dependency_count @@ -127,6 +134,11 @@ def backward(y, dy=None): # the gradient of all its outputs are available, i.e. all children # operations have been backwarded. # y is None if y.stores_grad is false; otherwise it is a Tensor + + if isinstance(src_op, Dummy): + if not src_op.stores_grad: + continue + y_idx = src_op.y_id2idx[x_id] if src_op not in not_ready: # src_op may have mulitple outputs @@ -253,6 +265,7 @@ class Dummy(Operation): self.name = name self.src = [] self.y_id2idx = {id(tensor): 0} + self.stores_grad = tensor.stores_grad self.requires_grad = False
