Repository: incubator-singa Updated Branches: refs/heads/master e16cea129 -> b30d7ea55
SINGA-381 - Update the autograd API to yeild the gradients yield gradients by backward() in autograd.py; this saves memory by releasing gradients early Project: http://git-wip-us.apache.org/repos/asf/incubator-singa/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-singa/commit/81908a82 Tree: http://git-wip-us.apache.org/repos/asf/incubator-singa/tree/81908a82 Diff: http://git-wip-us.apache.org/repos/asf/incubator-singa/diff/81908a82 Branch: refs/heads/master Commit: 81908a82f4c9ea01b1359ed3d8fb4118a5bfd147 Parents: e16cea1 Author: Wang Wei <[email protected]> Authored: Thu Jul 5 22:09:27 2018 +0800 Committer: wang wei <[email protected]> Committed: Wed Jul 11 15:19:27 2018 +0800 ---------------------------------------------------------------------- examples/autograd/mlp.py | 8 +++----- examples/autograd/mnist_cnn.py | 6 ++---- python/singa/autograd.py | 20 +++++++++++++------- 3 files changed, 18 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/81908a82/examples/autograd/mlp.py ---------------------------------------------------------------------- diff --git a/examples/autograd/mlp.py b/examples/autograd/mlp.py index 0447927..e90ff1d 100755 --- a/examples/autograd/mlp.py +++ b/examples/autograd/mlp.py @@ -62,7 +62,7 @@ if __name__ == '__main__': label = to_categorical(label, 2).astype(np.float32) print('train_data_shape:', data.shape) print('train_label_shape:', label.shape) - + inputs = Tensor(data=data) target = Tensor(data=label) @@ -86,10 +86,8 @@ if __name__ == '__main__': x = autograd.add_bias(x, b1) x = autograd.soft_max(x) loss = autograd.cross_entropy(x, target) - in_grads = autograd.backward(loss) - - for param in in_grads: - sgd.apply(0, in_grads[param], param, '') + for p, gp in autograd.backward(loss): + sgd.apply(0, gp, p, '') if (i % 100 == 0): print('training loss = ', tensor.to_numpy(loss)[0]) http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/81908a82/examples/autograd/mnist_cnn.py ---------------------------------------------------------------------- diff --git a/examples/autograd/mnist_cnn.py b/examples/autograd/mnist_cnn.py index 5b4e608..db21485 100755 --- a/examples/autograd/mnist_cnn.py +++ b/examples/autograd/mnist_cnn.py @@ -135,7 +135,5 @@ if __name__ == '__main__': print('accuracy is:', accuracy_rate, 'loss is:', tensor.to_numpy(loss)[0]) - in_grads = autograd.backward(loss) - - for param in in_grads: - sgd.apply(0, in_grads[param], param, '') + for p, gp in autograd.backward(loss): + sgd.apply(0, gp, p, '') http://git-wip-us.apache.org/repos/asf/incubator-singa/blob/81908a82/python/singa/autograd.py ---------------------------------------------------------------------- diff --git a/python/singa/autograd.py b/python/singa/autograd.py index 9fd8b4d..2ba3098 100755 --- a/python/singa/autograd.py +++ b/python/singa/autograd.py @@ -538,6 +538,13 @@ def infer_dependency(op): return dependency_count +def gradients(y, dy=None): + grads = {} # mapping: x->dx if x.stores_grad + for p, dp in backward(y, dy): + gradients[p] = dp + return grads + + def backward(y, dy=None): ''' Run the backward propagation starting at y. @@ -566,7 +573,7 @@ def backward(y, dy=None): # ready is a queue of (operation, dy list) ready = deque([(y.creator, (dy,))]) not_ready = {} # mapping: op->[dy] - gradients = {} # mapping: x->dx if x.stores_grad + if y.stores_grad: gradients[y] = dy @@ -608,7 +615,8 @@ def backward(y, dy=None): if y_stores_grad: # store the gradient for final return, e.g. if x is parameter g = not_ready[src_op][y_idx] - gradients[y] = Tensor(device=g.device(), data=g) + tg = Tensor(device=g.device(), data=g) + yield (y, tg) dependency[src_op] -= 1 if src_op.requires_grad is True: if dependency[src_op] == 0: @@ -616,10 +624,8 @@ def backward(y, dy=None): ready.append((src_op, not_ready[src_op])) del not_ready[src_op] - return gradients - -class NewLayer(object): +class Layer(object): def __init__(self): pass @@ -631,7 +637,7 @@ class NewLayer(object): var.to_device(x_device) -class Linear(NewLayer): +class Linear(Layer): def __init__(self, in_features, out_features, bias=True): #self.in_features = in_features @@ -661,7 +667,7 @@ class Linear(NewLayer): return y -class Conv2D(NewLayer): +class Conv2D(Layer): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, **kwargs):
