This is an automated email from the ASF dual-hosted git repository. jxie pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push: new 92fde19 [MXNET-542] Fix mkldnn performance regression + improve test logging (#11262) 92fde19 is described below commit 92fde19924fcbb18582f7662d9ae1e8c2b0bcf24 Author: Alexander Zai <aza...@gmail.com> AuthorDate: Mon Jun 18 10:18:42 2018 -0700 [MXNET-542] Fix mkldnn performance regression + improve test logging (#11262) * do not create tmp memory during act * fix order of alloc memory * fix conditional * fix order * do not pass nullptr to commit * fix comment * do not create tmp mem unless shapes diff * fix params * always return in CreateMKLDNNMem * add boilerplate for CreateMKLDNNMem test * refactor copyfrom * use copyfrom helper in tests * add logs * missing semi * improve print msg * target out_mem * test copy from * reuse verify copy * add inplace test / use sum for test * use assert in sum verify * lint * remove unused var * fix test messsage * out_mem can be null * Revert "refactor copyfrom" This reverts commit 4ab131ee41832eefb32971f82a58440736ca3417. * add back missing var * writeinplace explicitly returns same memory * refactor * only writeinplace if add and pdesc are eq * fix comparison * add second CreateMKLDNNMemory * CreateMKLDNNMem accepts input * refactor WriteTo criteria into separate method * fix lint * copyfrom test back * update mldnnsum test to have diff inputs for write in place * test in place sum with diff arrs * revert CreateMKLDNNMem extra param change * pass input arr param for act_forward * remove extra header * fix indent * add check for writeto * canwriteto uses ref instead of ptr * update comments for CreateMKLDNNData * compare input and output desc with op pdesc * check CreateMKLDNNData does not return null --- src/operator/nn/mkldnn/mkldnn_act.cc | 6 ++-- src/operator/nn/mkldnn/mkldnn_base-inl.h | 9 +++-- src/operator/nn/mkldnn/mkldnn_base.cc | 43 +++++++++++++++--------- src/operator/nn/mkldnn/mkldnn_sum.cc | 31 +++--------------- tests/cpp/operator/mkldnn.cc | 56 ++++++++++++++++++++++++++------ 5 files changed, 88 insertions(+), 57 deletions(-) diff --git a/src/operator/nn/mkldnn/mkldnn_act.cc b/src/operator/nn/mkldnn/mkldnn_act.cc index a278456..fae72bd 100644 --- a/src/operator/nn/mkldnn/mkldnn_act.cc +++ b/src/operator/nn/mkldnn/mkldnn_act.cc @@ -161,15 +161,15 @@ void MKLDNNActivationForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed); NDArray in_buffer = in_data; + MKLDNNStream *stream = MKLDNNStream::Get(); + if (in_data.IsView() && in_data.IsMKLDNNData()) in_buffer = in_data.Reorder2Default(); auto input_mem = in_buffer.GetMKLDNNData(); MKLDNNActForward &fwd = GetActForward(param, ctx, in_buffer, *input_mem); - auto out_mem = CreateMKLDNNMem(out_data, fwd.fwd_pd.dst_primitive_desc(), - req); + auto out_mem = CreateMKLDNNMem(out_data, fwd.fwd_pd.dst_primitive_desc(), req, &in_buffer); fwd.SetNewMem(*input_mem, *out_mem.second); - MKLDNNStream *stream = MKLDNNStream::Get(); stream->RegisterPrim(fwd.GetFwd()); CommitOutput(out_data, out_mem); stream->Submit(); diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h b/src/operator/nn/mkldnn/mkldnn_base-inl.h index bd2faf5..6a7c58f 100644 --- a/src/operator/nn/mkldnn/mkldnn_base-inl.h +++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h @@ -324,13 +324,16 @@ typedef std::pair<OutDataOp, mkldnn::memory *> mkldnn_output_t; * The difference is that the first function can create MKLDNN memory with * special layouts in an NDArray, while the second one can only create MKLDNN * memory with default layouts. + * Also an optional in_arr parameter can be passed in the first function with + * the kWriteInPlace req to validate if mkldnn can support write in place; + * otherwise new memory will be written to an copied back onto out_arr. * If these two functions are used, we have to call CommitOutput to write * the output back to the output NDArray. */ -mkldnn_output_t CreateMKLDNNMem(const NDArray &arr, +mkldnn_output_t CreateMKLDNNMem(const NDArray &out_arr, const mkldnn::memory::primitive_desc &desc, - OpReqType req); -mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &arr, + OpReqType req, const NDArray* in_arr = nullptr); +mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &out_arr, const mkldnn::memory::primitive_desc &desc, OpReqType req); /* This function has to be used with one of the functions above. */ diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc b/src/operator/nn/mkldnn/mkldnn_base.cc index 1bd1581..b182aa0 100644 --- a/src/operator/nn/mkldnn/mkldnn_base.cc +++ b/src/operator/nn/mkldnn/mkldnn_base.cc @@ -77,29 +77,42 @@ mkldnn::memory *TmpMemMgr::Alloc(const mkldnn::memory::primitive_desc &pd) { } } -mkldnn_output_t CreateMKLDNNMem(const NDArray &arr, +bool CanWriteTo(const NDArray &out_arr, + const NDArray &in_arr, + const mkldnn::memory::primitive_desc &desc) { + auto in_mem = in_arr.GetMKLDNNData(); + bool add_same = in_mem->get_data_handle() == out_arr.GetMKLDNNData()->get_data_handle(); + bool pdesc_same = out_arr.GetMKLDNNData()->get_primitive_desc() == desc && + in_mem->get_primitive_desc() == desc; + return add_same && pdesc_same; +} + +mkldnn_output_t CreateMKLDNNMem(const NDArray &out_arr, const mkldnn::memory::primitive_desc &desc, - OpReqType req) { + OpReqType req, + const NDArray* in_arr) { if (kAddTo == req) { auto tmp = TmpMemMgr::Get()->Alloc(desc); return mkldnn_output_t(OutDataOp::AddBack, tmp); - } else if (kWriteInplace == req) { - // MKLDNN ops may not support the case that the input and the output uses - // the same memory. Let's use an extra copy to make sure it always works. + } else if (req == kWriteInplace && in_arr != nullptr && CanWriteTo(out_arr, *in_arr, desc)) { + mkldnn::memory *mem = const_cast<NDArray &>(out_arr).CreateMKLDNNData(desc); + // mem is nullptr if out_arr is view and desc is MKLDNN format. + // need to Reorder2Default before calling CreateMKLDNNMem + CHECK(mem != nullptr); + return mkldnn_output_t(OutDataOp::Noop, mem); + } else if (req == kWriteInplace) { + auto tmp = TmpMemMgr::Get()->Alloc(desc); + return mkldnn_output_t(OutDataOp::CopyBack, tmp); + } + mkldnn::memory *mem = const_cast<NDArray &>(out_arr).CreateMKLDNNData(desc); + if (nullptr == mem) { auto tmp = TmpMemMgr::Get()->Alloc(desc); return mkldnn_output_t(OutDataOp::CopyBack, tmp); - } else { - mkldnn::memory *mem = const_cast<NDArray &>(arr).CreateMKLDNNData(desc); - if (mem == nullptr) { - auto tmp = TmpMemMgr::Get()->Alloc(desc); - return mkldnn_output_t(OutDataOp::CopyBack, tmp); - } else { - return mkldnn_output_t(OutDataOp::Noop, mem); - } } + return mkldnn_output_t(OutDataOp::Noop, mem); } -mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &arr, +mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &out_arr, const mkldnn::memory::primitive_desc &desc, OpReqType req) { if (kAddTo == req) { @@ -113,7 +126,7 @@ mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &arr, auto def_format = GetDefaultFormat(_desc.desc()); mkldnn::memory *mem = nullptr; if (def_format == _desc.desc().data.format) { - mem = const_cast<NDArray &>(arr).CreateMKLDNNData(desc); + mem = const_cast<NDArray &>(out_arr).CreateMKLDNNData(desc); } if (mem == nullptr) { auto tmp = TmpMemMgr::Get()->Alloc(desc); diff --git a/src/operator/nn/mkldnn/mkldnn_sum.cc b/src/operator/nn/mkldnn/mkldnn_sum.cc index fdbfb15..c51e108 100644 --- a/src/operator/nn/mkldnn/mkldnn_sum.cc +++ b/src/operator/nn/mkldnn/mkldnn_sum.cc @@ -58,7 +58,6 @@ void MKLDNNSumForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, std::vector<mkldnn::memory::primitive_desc> in_pds(inputs.size()); std::vector<float> scales(inputs.size(), 1); in_prims.reserve(inputs.size()); - bool pd_same = true; std::vector<NDArray> in_bufs(inputs.size()); for (size_t i = 0; i < inputs.size(); i++) { const mkldnn::memory *in_mem; @@ -73,31 +72,11 @@ void MKLDNNSumForward(const nnvm::NodeAttrs& attrs, const OpContext &ctx, } mkldnn::sum::primitive_desc pdesc(scales, in_pds); - pd_same = pd_same && (pdesc.dst_primitive_desc() == in_pds[0]); - auto out_mem = const_cast<NDArray&>(out_data).CreateMKLDNNData(pdesc.dst_primitive_desc()); - bool addr_same = false; - const void *first_data_handle; - if (in_bufs[0].is_none()) - first_data_handle = inputs[0].GetMKLDNNData()->get_data_handle(); - else - first_data_handle = in_bufs[0].GetMKLDNNData()->get_data_handle(); - if (out_mem) - addr_same = out_mem->get_data_handle() == first_data_handle; - if (((req == kWriteTo) || (req == kWriteInplace && pd_same && addr_same)) - && out_mem) { - // do sum computation directly on output NDArray - MKLDNNStream *stream = MKLDNNStream::Get(); - stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *out_mem)); - stream->Submit(); - } else { - // req == kWriteInplace but cannot be handled by mkldnn and - // req == kAddTo will run into this branch - auto mem = CreateMKLDNNMem(out_data, pdesc.dst_primitive_desc(), req); - MKLDNNStream *stream = MKLDNNStream::Get(); - stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *mem.second)); - CommitOutput(out_data, mem); - stream->Submit(); - } + auto mem = CreateMKLDNNMem(out_data, pdesc.dst_primitive_desc(), req, &inputs[0]); + MKLDNNStream *stream = MKLDNNStream::Get(); + stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *mem.second)); + CommitOutput(out_data, mem); + stream->Submit(); } } // namespace op diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc index a7a1187..82fee67 100644 --- a/tests/cpp/operator/mkldnn.cc +++ b/tests/cpp/operator/mkldnn.cc @@ -426,30 +426,45 @@ OpAttrs GetSumOp() { * reordered to 5 dimensions. * */ -std::vector<NDArrayAttrs> GetTestInputArrays(InitFunc init_fn) { +std::vector<NDArrayAttrs> GetTestInputArrays(InitFunc init_fn, bool rand = false) { TestArrayShapes tas = GetTestArrayShapes(); std::vector<nnvm::TShape> shapes = tas.shapes; std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; std::vector<NDArrayAttrs> in_arrs; + std::string desc; for (auto shape : shapes) { // Type 1. NDArray arr(shape, Context()); in_arrs.emplace_back(arr, "Normal NDArray"); - init_fn(&in_arrs.back().arr, false); + init_fn(&in_arrs.back().arr, rand); for (auto pd : pds) { if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t)) continue; // Type 2, 3. arr = NDArray(shape, Context()); - in_arrs.emplace_back(arr, "MKLDNN NDArray"); + desc = "MKLDNN NDArray"; + if (shape.ndim() != pd.desc().data.ndims) { + std::stringstream ss; + ss << "MKLDNN NDArray with different memory layout " << + shape.ndim() << "/" << pd.desc().data.ndims; + desc = ss.str(); + } + in_arrs.emplace_back(arr, desc); InitMKLDNNArray(&in_arrs.back().arr, pd, init_fn); // Type 4, 5, 6. arr = NDArray(shape, Context()); + desc = "Reshaped MKLDNN NDArray"; + if (shape.ndim() != pd.desc().data.ndims) { + std::stringstream ss; + ss << "Reshaped MKLDNN NDArray with different memory layout " + << shape.ndim() << "/" << pd.desc().data.ndims; + desc = ss.str(); + } InitMKLDNNArray(&arr, pd, init_fn); - in_arrs.emplace_back(arr.Slice(1, arr.shape()[0] - 1), "Reshaped MKLDNN NDArray"); + in_arrs.emplace_back(arr.Slice(1, arr.shape()[0] - 1), desc); } } return in_arrs; @@ -496,6 +511,7 @@ std::vector<NDArrayAttrs> GetTestOutputArrays(const TShape &shape, const std::vector<mkldnn::memory::primitive_desc> &pds, const InitFunc init_fn) { std::vector<NDArrayAttrs> in_arrs; + std::string desc; // Type 1. NDArray arr(shape, Context()); in_arrs.emplace_back(arr, "Normal NDArray"); @@ -539,7 +555,14 @@ std::vector<NDArrayAttrs> GetTestOutputArrays(const TShape &shape, // Type 2, 3. arr = NDArray(shape, Context()); - in_arrs.emplace_back(arr, "MKLDNN NDArray"); + desc = "MKLDNN NDArray"; + if (shape.ndim() != pd.desc().data.ndims) { + std::stringstream ss; + ss << "MKLDNN NDArray with different memory layout " + << shape.ndim() << "/" << pd.desc().data.ndims; + desc = ss.str(); + } + in_arrs.emplace_back(arr, desc); InitMKLDNNArray(&in_arrs.back().arr, pd, init_fn, true); // Type 8, 9. @@ -549,7 +572,14 @@ std::vector<NDArrayAttrs> GetTestOutputArrays(const TShape &shape, NDArray arr = NDArray(s, Context()); arr = arr.AsArray(shape, arr.dtype()); InitMKLDNNArray(&arr, pd, init_fn, true); - in_arrs.emplace_back(arr, "Reused MKLDNN NDArray"); + desc = "Reused MKLDNN NDArray"; + if (shape.ndim() != pd.desc().data.ndims) { + std::stringstream ss; + ss << "Reused MKLDNN NDArray with different memory layout " + << shape.ndim() << "/" << pd.desc().data.ndims; + desc = ss.str(); + } + in_arrs.emplace_back(arr, desc); } return in_arrs; } @@ -588,7 +618,7 @@ void VerifySumResult(const std::vector<NDArray *> &in_arrs, const NDArray &arr) mshadow::default_real_t *d2 = in2.data().dptr<mshadow::default_real_t>(); mshadow::default_real_t *o = out.data().dptr<mshadow::default_real_t>(); for (size_t i = 0; i < in1.shape().Size(); i++) - EXPECT_EQ(d1[i] + d2[i], o[i]); + ASSERT_EQ(d1[i] + d2[i], o[i]); } void PrintVerifyMsg(const NDArrayAttrs &arr1, const NDArrayAttrs &arr2) { @@ -748,10 +778,13 @@ void VerifySumMemory(mkldnn::memory in_mem1, mkldnn::memory in_mem2, mkldnn::mem TEST(MKLDNN_BASE, MKLDNNSum) { std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(InitDefaultArray); + std::vector<NDArrayAttrs> in_arrs2 = GetTestInputArrays(InitDefaultArray, true); TestArrayShapes tas = GetTestArrayShapes(); std::vector<mkldnn::memory::primitive_desc> pds = tas.pds; - for (auto in_arr : in_arrs) { + for (int i = 0; i < in_arrs.size(); i++) { + auto in_arr = in_arrs[i]; + auto in_arr2 = in_arrs2[i]; std::vector<NDArrayAttrs> out_arrs = GetTestOutputArrays(in_arr.arr.shape(), pds, InitDefaultArray); if (!SupportMKLDNN(in_arr.arr) || !in_arr.arr.IsMKLDNNData() || in_arr.arr.IsView()) @@ -761,6 +794,8 @@ TEST(MKLDNN_BASE, MKLDNNSum) { auto in_mem1 = in_arr.arr.GetMKLDNNData(); auto in_mem2 = in_arr.arr.GetMKLDNNData(); auto out_mem = out_arr.arr.GetMKLDNNData(in_mem1->get_primitive_desc()); + + // TODO(alexzai) : remove this noop when by reordering in MKLDNNSum if (out_mem == nullptr) continue; PrintVerifyMsg(in_arr, in_arr); @@ -771,14 +806,15 @@ TEST(MKLDNN_BASE, MKLDNNSum) { // in place auto input_mem = in_arr.arr.GetMKLDNNData(); + auto input_mem2 = in_arr2.arr.GetMKLDNNData(); NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy"); PrintVerifyMsg(orig_arr, in_arr); InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc(), InitDefaultArray); orig_arr.arr.CopyFrom(*input_mem); auto old_mem = orig_arr.arr.GetMKLDNNData(); - op::MKLDNNSum(*input_mem, *input_mem, *input_mem); + op::MKLDNNSum(*input_mem, *input_mem2, *input_mem); MKLDNNStream::Get()->Submit(); - VerifySumMemory(*old_mem, *old_mem, *input_mem); + VerifySumMemory(*old_mem, *input_mem2, *input_mem); } } -- To stop receiving notification emails like this one, please contact j...@apache.org.