This is an automated email from the ASF dual-hosted git repository. taolv pushed a commit to branch v1.5.x in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/v1.5.x by this push: new fb0374d fix dropout mask output (#15697) (#15804) fb0374d is described below commit fb0374dd702f0a9b66a207e8764bb8d18f595bf7 Author: Shufan <33112206+juliusshu...@users.noreply.github.com> AuthorDate: Tue Aug 13 13:46:01 2019 +0800 fix dropout mask output (#15697) (#15804) --- src/operator/nn/dropout-inl.h | 17 +++++++++++------ src/operator/nn/dropout.cc | 5 +++-- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/src/operator/nn/dropout-inl.h b/src/operator/nn/dropout-inl.h index 2a643a2..29f4b3c 100644 --- a/src/operator/nn/dropout-inl.h +++ b/src/operator/nn/dropout-inl.h @@ -130,11 +130,18 @@ class DropoutOp { DType *dataptr = data.dptr_; auto maskptr = reinterpret_cast<int *>(mask.dptr_); int count = mask.shape_[0] * mask.shape_[1]; + if (sizeof(DType) > sizeof(int)) { + // allocating new buffer to avoiding memory overlapping between `mask.dptr_` and `maskptr` + Tensor<xpu, 1, int> temp = ctx.requested[1].get_space_typed<xpu, 1, int>(Shape1(count), s); + maskptr = temp.dptr_; + } BernoulliGenerate(*pgen, count, this->pkeep_, maskptr); const float pk_1 = 1.0f / this->pkeep_; #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) for (int i = 0; i < count; ++i) { - outptr[i] = dataptr[i] * maskptr[i] * pk_1; + const DType maskVal = static_cast<DType>(maskptr[i]) * pk_1; + outptr[i] = dataptr[i] * maskVal; + mask.dptr_[i] = maskVal; } } @@ -149,12 +156,11 @@ class DropoutOp { Tensor<xpu, 2, DType> gdata = in_grad[dropout::kData].FlatTo2D<xpu, DType>(s); DType *ingradptr = gdata.dptr_; const DType *outgradptr = grad.dptr_; - auto maskptr = reinterpret_cast<int *>(mask.dptr_); - int count = mask.shape_[0] * mask.shape_[1]; - const float pk_1 = 1.0f / this->pkeep_; + const DType *maskptr = mask.dptr_; + const int count = mask.shape_[0] * mask.shape_[1]; #pragma omp parallel for num_threads(engine::OpenMP::Get()->GetRecommendedOMPThreadCount()) for (int i = 0; i < count; ++i) { - ingradptr[i] = outgradptr[i] * maskptr[i] * pk_1; + ingradptr[i] = outgradptr[i] * maskptr[i]; } } @@ -527,5 +533,4 @@ void DropoutGradCompute(const OpStatePtr& state, } // namespace op } // namespace mxnet -#undef MXNET_USE_MKL_DROPOUT #endif // MXNET_OPERATOR_NN_DROPOUT_INL_H_ diff --git a/src/operator/nn/dropout.cc b/src/operator/nn/dropout.cc index 63da561..d2fe3a5 100644 --- a/src/operator/nn/dropout.cc +++ b/src/operator/nn/dropout.cc @@ -28,8 +28,6 @@ #include "../operator_common.h" #include "mxnet/op_attr_types.h" - - namespace mxnet { namespace op { @@ -163,6 +161,9 @@ Example:: #endif } request.emplace_back(ResourceRequest::kParallelRandom); +#if MXNET_USE_MKL_DROPOUT + request.emplace_back(ResourceRequest::kTempSpace); +#endif return request; }) .add_argument("data", "NDArray-or-Symbol", "Input array to which dropout will be applied.")