KexinFeng commented on a change in pull request #20559:
URL: https://github.com/apache/incubator-mxnet/pull/20559#discussion_r701944084
##########
File path: python/mxnet/gluon/block.py
##########
@@ -1635,6 +1637,50 @@ def reset_ctx(self, ctx):
for p in params.values():
p.reset_ctx(ctx)
+ def mark_vars(self, var_arrays):
+ """Mark the intermediate nodes for autograd computation.
+
+ Parameters
+ ----------
+ vars : ndarrays or List[ndarrays]
+ The marked arrays used in deferredcomputation
+ """
+ if not self._active:
+ var_arrays = _as_list(var_arrays)
+ self._nleaf_vars.extend(var_arrays)
+ else:
+ prev_val = dc.set_deferred_compute(False)
+ var_arrays = _as_list(var_arrays)
+ # Prepare ctypes array types
+ import ctypes
+ var_handles_type = ctypes.c_void_p * len(var_arrays)
+ # Convert handles
+ var_handles = var_handles_type(*[arr.handle for arr in var_arrays])
+ check_call(_LIB.MXNDArrayMarkDCVariables(var_handles,
len(var_arrays), len(self._nleaf_vars)))
+ self._nleaf_vars.extend(var_arrays)
+ dc.set_deferred_compute(prev_val)
+
+ def get_mark_vars(self, mark_ids):
+ """Retrieve the marked ndarrays according to the order by which they
are marked.
+
+ Parameters
+ ----------
+ mark_ids : int or List[int]
+ The order by which the ndarray is marked.
Review comment:
Now it is implemented with `OrderedDict`. So the intermediate variables
can be retrieved by name.
##########
File path: tests/python/unittest/test_autograd.py
##########
@@ -519,3 +519,110 @@ def test_gradient():
dx.backward()
assert abs(x.grad.asscalar() - 2.71828175) < 1e-7
+def test_retain_grad_drop_grad():
+ x = nd.array([1,2,3,4])
+ x.attach_grad()
+ y = nd.array([5,6,7,8])
+ y.attach_grad()
+
+ with mx.autograd.record():
+ u = x * y
+ z = u * x
+
+ u.attach_grad()
+ z.attach_grad()
+ out_grad = nd.array([10, 10, 10, 10])
+ z.backward(out_grad, retain_graph=True)
+
+ assert (u.grad == out_grad * x).asnumpy().all()
+ assert (z.grad == out_grad).asnumpy().all()
+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()
+ assert (y.grad == out_grad * x*x).asnumpy().all()
+
+ u.drop_grad()
+ z.drop_grad()
+ y.drop_grad()
+ out_grad = nd.array([0.1, 0.1, 0.1, 0.1])
+ z.backward(out_grad)
+
+ assert u.grad is None and z.grad is None and y.grad is None
+ assert (x.grad == out_grad * 2 * x * y).asnumpy().all()
+
+def test_retain_grad_drop_grad_gluon():
+ class CompBlock(mx.gluon.HybridBlock):
+ def __init__(self):
+ super().__init__()
+
+ def forward(self, a, b):
+ out1 = a*b
+ out2 = out1 * a
+ self.mark_vars(out1)
+ return out2
+
+ x = mx.np.array([1,2,3,4])
+ y = mx.np.array([5,6,7,8])
+ x.attach_grad()
+ y.attach_grad()
+ block2 = CompBlock()
+ block2.initialize()
+ # block2.hybridize()
Review comment:
Done.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]