This is an automated email from the ASF dual-hosted git repository. anirudh2290 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new 95de767 fix for bug #10868: _backward_softsign activation is incorrect (#11827) 95de767 is described below commit 95de76758a18fc070fac11bcd1d2d24a5263a1df Author: samskalicky <samskali...@gmail.com> AuthorDate: Fri Jul 20 14:16:23 2018 -0700 fix for bug #10868: _backward_softsign activation is incorrect (#11827) * fix for bug #10868: _backward_softsign activation is incorrect problem was that softsign was computed using outputs instead of inputs added inputs to list, and changed what gets passed into softsign calculation added softsign test to test_activation function code reviewed by anirudh * fix for bug #10868: _backward_softsign activation is incorrect problem was that softsign was computed using outputs instead of inputs added inputs to list, and changed what gets passed into softsign calculation added softsign test to test_activation function code reviewed by anirudh **amended to change tab to spaces * rerunning the CI build * fixed size checks for when USE_MKLDNN=ON --- src/operator/nn/activation-inl.h | 7 ++++--- src/operator/nn/activation.cc | 29 ++++++++++++++++++++++------- src/operator/nn/activation.cu | 2 +- tests/python/unittest/test_operator.py | 4 ++++ 4 files changed, 31 insertions(+), 11 deletions(-) diff --git a/src/operator/nn/activation-inl.h b/src/operator/nn/activation-inl.h index e6f8915..2705177 100644 --- a/src/operator/nn/activation-inl.h +++ b/src/operator/nn/activation-inl.h @@ -174,7 +174,7 @@ void ActivationGradComputeImpl(const nnvm::NodeAttrs& attrs, const OpContext &ct break; case activation::kSoftSign: ActivationBackward<xpu, mshadow_op::softsign, mshadow_op::softsign_grad>( - ctx, inputs[0], inputs[1], req[0], outputs[0]); + ctx, inputs[0], inputs[2], req[0], outputs[0]); break; default: LOG(FATAL) << "unknown activation type"; @@ -198,12 +198,13 @@ void ActivationGradCompute(const nnvm::NodeAttrs& attrs, const std::vector<TBlob>& inputs, const std::vector<OpReqType>& req, const std::vector<TBlob>& outputs) { -#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1) const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed); +#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1) bool relu = param.act_type == activation::kReLU; CHECK_EQ(inputs.size(), relu ? 2U : 3U); #else - CHECK_EQ(inputs.size(), 2U); + bool softsign = param.act_type == activation::kSoftSign; + CHECK_EQ(inputs.size(), softsign ? 3U : 2U); #endif CHECK_EQ(outputs.size(), 1U); CHECK_EQ(req.size(), 1U); diff --git a/src/operator/nn/activation.cc b/src/operator/nn/activation.cc index d723bbe..3404b5b 100644 --- a/src/operator/nn/activation.cc +++ b/src/operator/nn/activation.cc @@ -44,11 +44,19 @@ struct ActivationGrad { const std::vector<nnvm::NodeEntry>& ograds) const { std::vector<nnvm::NodeEntry> heads(ograds.begin(), ograds.end()); heads.emplace_back(nnvm::NodeEntry{n, activation::kOut, 0}); -#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1) + const NodeAttrs& attrs = n->attrs; + int act_type = dmlc::get<ActivationParam>(attrs.parsed).act_type; + if (act_type == activation::kSoftSign) { + // for softsign need the inputs to compute the activation. + heads.push_back(n->inputs[activation::kData]); + } + +#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1) // for ReLU, no need to pass input data. This enables inplace optimization during the // forward pass. - if (dmlc::get<ActivationParam>(attrs.parsed).act_type != activation::kReLU) { + if (act_type != activation::kReLU && + act_type != activation::kSoftSign) { heads.push_back(n->inputs[activation::kData]); } #endif @@ -118,8 +126,8 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs, std::vector<int> *in_attrs, std::vector<int> *out_attrs) { bool ret = false; -#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1) const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed); +#if (MXNET_USE_CUDNN == 1 || MXNET_USE_MKLDNN == 1) if (param.act_type != activation::kReLU) { CHECK_EQ(in_attrs->size(), 3U); ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask, @@ -133,10 +141,17 @@ inline static bool BackwardActStorageType(const nnvm::NodeAttrs& attrs, in_attrs, out_attrs); } #else - CHECK_EQ(in_attrs->size(), 2U); - ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask, - dispatch_mode, - in_attrs, out_attrs); + if (param.act_type == activation::kSoftSign) { + CHECK_EQ(in_attrs->size(), 3U); + ret = ElemwiseStorageType<3, 1, false, false, false>(attrs, dev_mask, + dispatch_mode, + in_attrs, out_attrs); + } else { + CHECK_EQ(in_attrs->size(), 2U); + ret = ElemwiseStorageType<2, 1, false, false, false>(attrs, dev_mask, + dispatch_mode, + in_attrs, out_attrs); + } #endif CHECK_EQ(out_attrs->size(), 1U); #if MXNET_USE_MKLDNN == 1 diff --git a/src/operator/nn/activation.cu b/src/operator/nn/activation.cu index 68b4053..8892cc3 100644 --- a/src/operator/nn/activation.cu +++ b/src/operator/nn/activation.cu @@ -87,7 +87,7 @@ void ActivationGradCompute<gpu>(const nnvm::NodeAttrs& attrs, ctx, inputs[0], inputs[1], req[0], outputs[0]); } else if (param.act_type == activation::kSoftSign) { ActivationBackward<gpu, mshadow_op::softsign, mshadow_op::softsign_grad>( - ctx, inputs[0], inputs[1], req[0], outputs[0]); + ctx, inputs[0], inputs[2], req[0], outputs[0]); } else { MSHADOW_REAL_TYPE_SWITCH(inputs[0].type_flag_, DType, { // XXX: for y = relu(x), y is passed as "in_data" to Backward() diff --git a/tests/python/unittest/test_operator.py b/tests/python/unittest/test_operator.py index b663ca2..96696d4 100644 --- a/tests/python/unittest/test_operator.py +++ b/tests/python/unittest/test_operator.py @@ -6852,6 +6852,10 @@ def test_activation(): lambda x: np.log(1. + np.exp(x)), lambda x: 1. - 1 / (1 + np.exp(x)), -3.0, 3.0], + 'softsign': [lambda x: mx.sym.Activation(x, act_type='softsign'), + lambda x: x / (1. + np.abs(x)), + lambda x: 1. / np.square(1. + np.abs(x)), + -3.0, 3.0], } # Loop over operators for name, op in unary_ops.items():