This is an automated email from the ASF dual-hosted git repository.
haibin pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 59f4395 Fallback to dense version for grad(reshape),
grad(expand_dims) (#13599)
59f4395 is described below
commit 59f43956e2b045458f2c10b5197a727b58849f57
Author: Yizhi Liu <[email protected]>
AuthorDate: Thu Dec 20 11:48:22 2018 -0800
Fallback to dense version for grad(reshape), grad(expand_dims) (#13599)
* fallback to dense version for grad(reshape), grad(expand_dims)
* add _backward_reshape gpu version
* reshape test case comments
* fix gpu test
* remove mkldnn support for _backward_reshape
---
src/operator/tensor/elemwise_unary_op_basic.cc | 14 +++++++++
src/operator/tensor/elemwise_unary_op_basic.cu | 4 +++
src/operator/tensor/matrix_op.cc | 4 +--
tests/python/unittest/test_sparse_operator.py | 42 ++++++++++++++++++++++++++
4 files changed, 62 insertions(+), 2 deletions(-)
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cc
b/src/operator/tensor/elemwise_unary_op_basic.cc
index 9730d00..7f69395 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cc
+++ b/src/operator/tensor/elemwise_unary_op_basic.cc
@@ -236,6 +236,20 @@ NNVM_REGISTER_OP(_backward_copy)
return std::vector<bool>{true};
});
+NNVM_REGISTER_OP(_backward_reshape)
+.set_num_inputs(1)
+.set_num_outputs(1)
+.set_attr<nnvm::TIsBackward>("TIsBackward", true)
+.set_attr<nnvm::FInplaceOption>("FInplaceOption",
+ [](const NodeAttrs& attrs){
+ return std::vector<std::pair<int, int> >{{0,
0}};
+ })
+.set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
+.set_attr<nnvm::FInplaceIdentity>("FInplaceIdentity",
+ [](const NodeAttrs& attrs){
+ return std::vector<bool>{true};
+ });
+
MXNET_OPERATOR_REGISTER_UNARY(BlockGrad)
MXNET_ADD_SPARSE_OP_ALIAS(stop_gradient)
.add_alias("stop_gradient")
diff --git a/src/operator/tensor/elemwise_unary_op_basic.cu
b/src/operator/tensor/elemwise_unary_op_basic.cu
index c28934e..14f2be0 100644
--- a/src/operator/tensor/elemwise_unary_op_basic.cu
+++ b/src/operator/tensor/elemwise_unary_op_basic.cu
@@ -68,6 +68,10 @@ NNVM_REGISTER_OP(_copy)
.set_attr<FComputeEx>("FComputeEx<gpu>", UnaryOp::IdentityComputeEx<gpu>);
NNVM_REGISTER_OP(_backward_copy)
+.set_attr<FCompute>("FCompute<gpu>", UnaryOp::IdentityCompute<gpu>)
+.set_attr<FComputeEx>("FComputeEx<gpu>", UnaryOp::IdentityComputeEx<gpu>);
+
+NNVM_REGISTER_OP(_backward_reshape)
.set_attr<FCompute>("FCompute<gpu>", UnaryOp::IdentityCompute<gpu>);
NNVM_REGISTER_OP(BlockGrad)
diff --git a/src/operator/tensor/matrix_op.cc b/src/operator/tensor/matrix_op.cc
index 2ffeabc..db8efa4 100644
--- a/src/operator/tensor/matrix_op.cc
+++ b/src/operator/tensor/matrix_op.cc
@@ -223,7 +223,7 @@ If the argument `reverse` is set to 1, then the special
values are inferred from
.set_attr_parser(ParamParser<ReshapeParam>)
.set_attr<nnvm::FInferShape>("FInferShape", ReshapeShape)
.set_attr<nnvm::FInferType>("FInferType", ElemwiseType<1, 1>)
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_copy"})
+.set_attr<nnvm::FGradient>("FGradient",
ElemwiseGradUseNone{"_backward_reshape"})
.set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
#if MXNET_USE_MKLDNN == 1
.set_attr<bool>("TIsMKLDNN", true)
@@ -415,7 +415,7 @@ will return a new array with shape ``(2,1,3,4)``.
[](const NodeAttrs& attrs){
return std::vector<bool>{true};
})
-.set_attr<nnvm::FGradient>("FGradient", ElemwiseGradUseNone{"_backward_copy"})
+.set_attr<nnvm::FGradient>("FGradient",
ElemwiseGradUseNone{"_backward_reshape"})
.set_attr<FCompute>("FCompute<cpu>", UnaryOp::IdentityCompute<cpu>)
.add_argument("data", "NDArray-or-Symbol", "Source input")
.add_arguments(ExpandDimParam::__FIELDS__());
diff --git a/tests/python/unittest/test_sparse_operator.py
b/tests/python/unittest/test_sparse_operator.py
index 5780824..05175bb 100644
--- a/tests/python/unittest/test_sparse_operator.py
+++ b/tests/python/unittest/test_sparse_operator.py
@@ -2306,6 +2306,48 @@ def test_sparse_quadratic_function():
check_sparse_quadratic_function(a, b, 0.0, 'csr')
check_sparse_quadratic_function(a, b, 1.0, 'default')
+def test_reshape_backward_fallback():
+ """
+ out
+ | \
+ w_x x
+ /
+ w
+ in which x is a sparse tensor.
+ Due to sparse gradient optimization in sym.dot, grad(w_x) is sparse.
+ Though sym.reshape itself does not have sparse version,
+ if we somehow make grad(w) sparse as well, e.g.,
+ - by setting args_grad in symbol.bind
+ - or, we can have out_y = sym.dot(sparse_y, w), then grad(w) will be
inferred as sparse
+ reshape backward (from w_x to w) needs to understand how to handle sparse
inputs.
+ """
+ ctx = default_context()
+ w_shape = (12, 4)
+ w_x_shape = (1, 48)
+ x_nd = rand_ndarray((4, 1), 'csr')
+
+ w_nd = rand_ndarray(w_shape)
+
+ w_x_nd = w_nd.reshape(w_x_shape)
+ out_x_nd = mx.nd.dot(x_nd, w_x_nd)
+
+ w_x_backward_grad = mx.nd.dot(x_nd, out_x_nd, transpose_a=True).asnumpy()
+ expected_grad_nd = w_x_backward_grad.reshape(w_shape)
+
+ x = mx.sym.Variable('x', stype='csr')
+ w = mx.sym.Variable('w')
+
+ w_x = mx.sym.reshape(w, w_x_shape, name="w_x")
+ out = mx.sym.sparse.dot(x, w_x, name='out_x')
+
+ grad_w_nd = rand_ndarray(w_shape, 'row_sparse')
+ executor = out.bind(ctx=ctx, args={"x": x_nd, "w": w_nd},
+ args_grad={"w": grad_w_nd})
+ executor.forward(is_train=True)
+ executor.backward(out_x_nd)
+
+ assert_almost_equal(grad_w_nd.asnumpy(), expected_grad_nd)
+
if __name__ == '__main__':
import nose
nose.runmodule()