[incubator-mxnet] branch master updated: [MXNET-542] Fix mkldnn performance regression + improve test logging (#11262)

jxie Mon, 18 Jun 2018 10:18:56 -0700

This is an automated email from the ASF dual-hosted git repository.

jxie pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git



The following commit(s) were added to refs/heads/master by this push:
     new 92fde19  [MXNET-542] Fix mkldnn performance regression + improve test 
logging (#11262)
92fde19 is described below

commit 92fde19924fcbb18582f7662d9ae1e8c2b0bcf24
Author: Alexander Zai <aza...@gmail.com>
AuthorDate: Mon Jun 18 10:18:42 2018 -0700

    [MXNET-542] Fix mkldnn performance regression + improve test logging 
(#11262)
    
    * do not create tmp memory during act
    
    * fix order of alloc memory
    
    * fix conditional
    
    * fix order
    
    * do not pass nullptr to commit
    
    * fix comment
    
    * do not create tmp mem unless shapes diff
    
    * fix params
    
    * always return in CreateMKLDNNMem
    
    * add boilerplate for CreateMKLDNNMem test
    
    * refactor copyfrom
    
    * use copyfrom helper in tests
    
    * add logs
    
    * missing semi
    
    * improve print msg
    
    * target out_mem
    
    * test copy from
    
    * reuse verify copy
    
    * add inplace test / use sum for test
    
    * use assert in sum verify
    
    * lint
    
    * remove unused var
    
    * fix test messsage
    
    * out_mem can be null
    
    * Revert "refactor copyfrom"
    
    This reverts commit 4ab131ee41832eefb32971f82a58440736ca3417.
    
    * add back missing var
    
    * writeinplace explicitly returns same memory
    
    * refactor
    
    * only writeinplace if add and pdesc are eq
    
    * fix comparison
    
    * add second CreateMKLDNNMemory
    
    * CreateMKLDNNMem accepts input
    
    * refactor WriteTo criteria into separate method
    
    * fix lint
    
    * copyfrom test back
    
    * update mldnnsum test to have diff inputs for write in place
    
    * test in place sum with diff arrs
    
    * revert CreateMKLDNNMem extra param change
    
    * pass input arr param for act_forward
    
    * remove extra header
    
    * fix indent
    
    * add check for writeto
    
    * canwriteto uses ref instead of ptr
    
    * update comments for CreateMKLDNNData
    
    * compare input and output desc with op pdesc
    
    * check CreateMKLDNNData does not return null
---
 src/operator/nn/mkldnn/mkldnn_act.cc     |  6 ++--
 src/operator/nn/mkldnn/mkldnn_base-inl.h |  9 +++--
 src/operator/nn/mkldnn/mkldnn_base.cc    | 43 +++++++++++++++---------
 src/operator/nn/mkldnn/mkldnn_sum.cc     | 31 +++---------------
 tests/cpp/operator/mkldnn.cc             | 56 ++++++++++++++++++++++++++------
 5 files changed, 88 insertions(+), 57 deletions(-)

diff --git a/src/operator/nn/mkldnn/mkldnn_act.cc 
b/src/operator/nn/mkldnn/mkldnn_act.cc
index a278456..fae72bd 100644
--- a/src/operator/nn/mkldnn/mkldnn_act.cc
+++ b/src/operator/nn/mkldnn/mkldnn_act.cc
@@ -161,15 +161,15 @@ void MKLDNNActivationForward(const nnvm::NodeAttrs& 
attrs, const OpContext &ctx,
   const ActivationParam& param = nnvm::get<ActivationParam>(attrs.parsed);
 
   NDArray in_buffer = in_data;
+  MKLDNNStream *stream = MKLDNNStream::Get();
+
   if (in_data.IsView() && in_data.IsMKLDNNData())
     in_buffer = in_data.Reorder2Default();
 
   auto input_mem = in_buffer.GetMKLDNNData();
   MKLDNNActForward &fwd = GetActForward(param, ctx, in_buffer, *input_mem);
-  auto out_mem = CreateMKLDNNMem(out_data, fwd.fwd_pd.dst_primitive_desc(),
-                                 req);
+  auto out_mem = CreateMKLDNNMem(out_data, fwd.fwd_pd.dst_primitive_desc(), 
req, &in_buffer);
   fwd.SetNewMem(*input_mem, *out_mem.second);
-  MKLDNNStream *stream = MKLDNNStream::Get();
   stream->RegisterPrim(fwd.GetFwd());
   CommitOutput(out_data, out_mem);
   stream->Submit();
diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h 
b/src/operator/nn/mkldnn/mkldnn_base-inl.h
index bd2faf5..6a7c58f 100644
--- a/src/operator/nn/mkldnn/mkldnn_base-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h
@@ -324,13 +324,16 @@ typedef std::pair<OutDataOp, mkldnn::memory *> 
mkldnn_output_t;
  * The difference is that the first function can create MKLDNN memory with
  * special layouts in an NDArray, while the second one can only create MKLDNN
  * memory with default layouts.
+ * Also an optional in_arr parameter can be passed in the first function with
+ * the kWriteInPlace req to validate if mkldnn can support write in place;
+ * otherwise new memory will be written to an copied back onto out_arr.
  * If these two functions are used, we have to call CommitOutput to write
  * the output back to the output NDArray.
  */
-mkldnn_output_t CreateMKLDNNMem(const NDArray &arr,
+mkldnn_output_t CreateMKLDNNMem(const NDArray &out_arr,
                                 const mkldnn::memory::primitive_desc &desc,
-                                OpReqType req);
-mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &arr,
+                                OpReqType req, const NDArray* in_arr = 
nullptr);
+mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &out_arr,
                                        const mkldnn::memory::primitive_desc 
&desc,
                                        OpReqType req);
 /* This function has to be used with one of the functions above. */
diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc 
b/src/operator/nn/mkldnn/mkldnn_base.cc
index 1bd1581..b182aa0 100644
--- a/src/operator/nn/mkldnn/mkldnn_base.cc
+++ b/src/operator/nn/mkldnn/mkldnn_base.cc
@@ -77,29 +77,42 @@ mkldnn::memory *TmpMemMgr::Alloc(const 
mkldnn::memory::primitive_desc &pd) {
   }
 }
 
-mkldnn_output_t CreateMKLDNNMem(const NDArray &arr,
+bool CanWriteTo(const NDArray &out_arr,
+                const NDArray &in_arr,
+                const mkldnn::memory::primitive_desc &desc) {
+  auto in_mem = in_arr.GetMKLDNNData();
+  bool add_same = in_mem->get_data_handle() == 
out_arr.GetMKLDNNData()->get_data_handle();
+  bool pdesc_same = out_arr.GetMKLDNNData()->get_primitive_desc() == desc &&
+      in_mem->get_primitive_desc() == desc;
+  return add_same && pdesc_same;
+}
+
+mkldnn_output_t CreateMKLDNNMem(const NDArray &out_arr,
                                 const mkldnn::memory::primitive_desc &desc,
-                                OpReqType req) {
+                                OpReqType req,
+                                const NDArray* in_arr) {
   if (kAddTo == req) {
     auto tmp = TmpMemMgr::Get()->Alloc(desc);
     return mkldnn_output_t(OutDataOp::AddBack, tmp);
-  } else if (kWriteInplace == req) {
-    // MKLDNN ops may not support the case that the input and the output uses
-    // the same memory. Let's use an extra copy to make sure it always works.
+  } else if (req == kWriteInplace && in_arr != nullptr && CanWriteTo(out_arr, 
*in_arr, desc)) {
+    mkldnn::memory *mem = const_cast<NDArray 
&>(out_arr).CreateMKLDNNData(desc);
+    // mem is nullptr if out_arr is view and desc is MKLDNN format.
+    // need to Reorder2Default before calling CreateMKLDNNMem
+    CHECK(mem != nullptr);
+    return mkldnn_output_t(OutDataOp::Noop, mem);
+  } else if (req == kWriteInplace) {
+    auto tmp = TmpMemMgr::Get()->Alloc(desc);
+    return mkldnn_output_t(OutDataOp::CopyBack, tmp);
+  }
+  mkldnn::memory *mem = const_cast<NDArray &>(out_arr).CreateMKLDNNData(desc);
+  if (nullptr == mem) {
     auto tmp = TmpMemMgr::Get()->Alloc(desc);
     return mkldnn_output_t(OutDataOp::CopyBack, tmp);
-  } else {
-    mkldnn::memory *mem = const_cast<NDArray &>(arr).CreateMKLDNNData(desc);
-    if (mem == nullptr) {
-      auto tmp = TmpMemMgr::Get()->Alloc(desc);
-      return mkldnn_output_t(OutDataOp::CopyBack, tmp);
-    } else {
-      return mkldnn_output_t(OutDataOp::Noop, mem);
-    }
   }
+  return mkldnn_output_t(OutDataOp::Noop, mem);
 }
 
-mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &arr,
+mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &out_arr,
                                        const mkldnn::memory::primitive_desc 
&desc,
                                        OpReqType req) {
   if (kAddTo == req) {
@@ -113,7 +126,7 @@ mkldnn_output_t CreateMKLDNNWeightGrad(const NDArray &arr,
     auto def_format = GetDefaultFormat(_desc.desc());
     mkldnn::memory *mem = nullptr;
     if (def_format == _desc.desc().data.format) {
-      mem = const_cast<NDArray &>(arr).CreateMKLDNNData(desc);
+      mem = const_cast<NDArray &>(out_arr).CreateMKLDNNData(desc);
     }
     if (mem == nullptr) {
       auto tmp = TmpMemMgr::Get()->Alloc(desc);
diff --git a/src/operator/nn/mkldnn/mkldnn_sum.cc 
b/src/operator/nn/mkldnn/mkldnn_sum.cc
index fdbfb15..c51e108 100644
--- a/src/operator/nn/mkldnn/mkldnn_sum.cc
+++ b/src/operator/nn/mkldnn/mkldnn_sum.cc
@@ -58,7 +58,6 @@ void MKLDNNSumForward(const nnvm::NodeAttrs& attrs, const 
OpContext &ctx,
   std::vector<mkldnn::memory::primitive_desc> in_pds(inputs.size());
   std::vector<float> scales(inputs.size(), 1);
   in_prims.reserve(inputs.size());
-  bool pd_same = true;
   std::vector<NDArray> in_bufs(inputs.size());
   for (size_t i = 0; i < inputs.size(); i++) {
     const mkldnn::memory *in_mem;
@@ -73,31 +72,11 @@ void MKLDNNSumForward(const nnvm::NodeAttrs& attrs, const 
OpContext &ctx,
   }
 
   mkldnn::sum::primitive_desc pdesc(scales, in_pds);
-  pd_same = pd_same && (pdesc.dst_primitive_desc() == in_pds[0]);
-  auto out_mem = 
const_cast<NDArray&>(out_data).CreateMKLDNNData(pdesc.dst_primitive_desc());
-  bool addr_same = false;
-  const void *first_data_handle;
-  if (in_bufs[0].is_none())
-    first_data_handle = inputs[0].GetMKLDNNData()->get_data_handle();
-  else
-    first_data_handle = in_bufs[0].GetMKLDNNData()->get_data_handle();
-  if (out_mem)
-    addr_same = out_mem->get_data_handle() == first_data_handle;
-  if (((req == kWriteTo) || (req == kWriteInplace && pd_same && addr_same))
-      && out_mem) {
-    // do sum computation directly on output NDArray
-    MKLDNNStream *stream = MKLDNNStream::Get();
-    stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *out_mem));
-    stream->Submit();
-  } else {
-    // req == kWriteInplace but cannot be handled by mkldnn and
-    // req == kAddTo will run into this branch
-    auto mem = CreateMKLDNNMem(out_data, pdesc.dst_primitive_desc(), req);
-    MKLDNNStream *stream = MKLDNNStream::Get();
-    stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *mem.second));
-    CommitOutput(out_data, mem);
-    stream->Submit();
-  }
+  auto mem = CreateMKLDNNMem(out_data, pdesc.dst_primitive_desc(), req, 
&inputs[0]);
+  MKLDNNStream *stream = MKLDNNStream::Get();
+  stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *mem.second));
+  CommitOutput(out_data, mem);
+  stream->Submit();
 }
 
 }  // namespace op
diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc
index a7a1187..82fee67 100644
--- a/tests/cpp/operator/mkldnn.cc
+++ b/tests/cpp/operator/mkldnn.cc
@@ -426,30 +426,45 @@ OpAttrs GetSumOp() {
  *    reordered to 5 dimensions.
  *
  */
-std::vector<NDArrayAttrs> GetTestInputArrays(InitFunc init_fn) {
+std::vector<NDArrayAttrs> GetTestInputArrays(InitFunc init_fn, bool rand = 
false) {
   TestArrayShapes tas = GetTestArrayShapes();
   std::vector<nnvm::TShape> shapes = tas.shapes;
   std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
 
   std::vector<NDArrayAttrs> in_arrs;
+  std::string desc;
   for (auto shape : shapes) {
     // Type 1.
     NDArray arr(shape, Context());
     in_arrs.emplace_back(arr, "Normal NDArray");
-    init_fn(&in_arrs.back().arr, false);
+    init_fn(&in_arrs.back().arr, rand);
     for (auto pd : pds) {
       if (shape.Size() != pd.get_size() / sizeof(mshadow::default_real_t))
         continue;
 
       // Type 2, 3.
       arr = NDArray(shape, Context());
-      in_arrs.emplace_back(arr, "MKLDNN NDArray");
+      desc = "MKLDNN NDArray";
+      if (shape.ndim() != pd.desc().data.ndims) {
+        std::stringstream ss;
+        ss << "MKLDNN NDArray with different memory layout " <<
+           shape.ndim() << "/" << pd.desc().data.ndims;
+        desc = ss.str();
+      }
+      in_arrs.emplace_back(arr, desc);
       InitMKLDNNArray(&in_arrs.back().arr, pd, init_fn);
 
       // Type 4, 5, 6.
       arr = NDArray(shape, Context());
+      desc = "Reshaped MKLDNN NDArray";
+      if (shape.ndim() != pd.desc().data.ndims) {
+        std::stringstream ss;
+        ss << "Reshaped MKLDNN NDArray with different memory layout "
+           << shape.ndim() << "/" << pd.desc().data.ndims;
+        desc = ss.str();
+      }
       InitMKLDNNArray(&arr, pd, init_fn);
-      in_arrs.emplace_back(arr.Slice(1, arr.shape()[0] - 1), "Reshaped MKLDNN 
NDArray");
+      in_arrs.emplace_back(arr.Slice(1, arr.shape()[0] - 1), desc);
     }
   }
   return in_arrs;
@@ -496,6 +511,7 @@ std::vector<NDArrayAttrs> GetTestOutputArrays(const TShape 
&shape,
                                          const 
std::vector<mkldnn::memory::primitive_desc> &pds,
                                          const InitFunc init_fn) {
   std::vector<NDArrayAttrs> in_arrs;
+  std::string desc;
   // Type 1.
   NDArray arr(shape, Context());
   in_arrs.emplace_back(arr, "Normal NDArray");
@@ -539,7 +555,14 @@ std::vector<NDArrayAttrs> GetTestOutputArrays(const TShape 
&shape,
 
     // Type 2, 3.
     arr = NDArray(shape, Context());
-    in_arrs.emplace_back(arr, "MKLDNN NDArray");
+    desc = "MKLDNN NDArray";
+    if (shape.ndim() != pd.desc().data.ndims) {
+      std::stringstream ss;
+      ss << "MKLDNN NDArray with different memory layout "
+         << shape.ndim() << "/" << pd.desc().data.ndims;
+      desc = ss.str();
+    }
+    in_arrs.emplace_back(arr, desc);
     InitMKLDNNArray(&in_arrs.back().arr, pd, init_fn, true);
 
     // Type 8, 9.
@@ -549,7 +572,14 @@ std::vector<NDArrayAttrs> GetTestOutputArrays(const TShape 
&shape,
     NDArray arr = NDArray(s, Context());
     arr = arr.AsArray(shape, arr.dtype());
     InitMKLDNNArray(&arr, pd, init_fn, true);
-    in_arrs.emplace_back(arr, "Reused MKLDNN NDArray");
+    desc = "Reused MKLDNN NDArray";
+    if (shape.ndim() != pd.desc().data.ndims) {
+      std::stringstream ss;
+      ss << "Reused MKLDNN NDArray with different memory layout "
+         << shape.ndim() << "/" << pd.desc().data.ndims;
+      desc = ss.str();
+    }
+    in_arrs.emplace_back(arr, desc);
   }
   return in_arrs;
 }
@@ -588,7 +618,7 @@ void VerifySumResult(const std::vector<NDArray *> &in_arrs, 
const NDArray &arr)
   mshadow::default_real_t *d2 = in2.data().dptr<mshadow::default_real_t>();
   mshadow::default_real_t *o = out.data().dptr<mshadow::default_real_t>();
   for (size_t i = 0; i < in1.shape().Size(); i++)
-    EXPECT_EQ(d1[i] + d2[i], o[i]);
+    ASSERT_EQ(d1[i] + d2[i], o[i]);
 }
 
 void PrintVerifyMsg(const NDArrayAttrs &arr1, const NDArrayAttrs &arr2) {
@@ -748,10 +778,13 @@ void VerifySumMemory(mkldnn::memory in_mem1, 
mkldnn::memory in_mem2, mkldnn::mem
 
 TEST(MKLDNN_BASE, MKLDNNSum) {
   std::vector<NDArrayAttrs> in_arrs = GetTestInputArrays(InitDefaultArray);
+  std::vector<NDArrayAttrs> in_arrs2 = GetTestInputArrays(InitDefaultArray, 
true);
   TestArrayShapes tas = GetTestArrayShapes();
   std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
 
-  for (auto in_arr : in_arrs) {
+  for (int i = 0; i < in_arrs.size(); i++) {
+    auto in_arr = in_arrs[i];
+    auto in_arr2 = in_arrs2[i];
     std::vector<NDArrayAttrs> out_arrs = 
GetTestOutputArrays(in_arr.arr.shape(), pds,
                                                              InitDefaultArray);
     if (!SupportMKLDNN(in_arr.arr) || !in_arr.arr.IsMKLDNNData() || 
in_arr.arr.IsView())
@@ -761,6 +794,8 @@ TEST(MKLDNN_BASE, MKLDNNSum) {
       auto in_mem1 = in_arr.arr.GetMKLDNNData();
       auto in_mem2 = in_arr.arr.GetMKLDNNData();
       auto out_mem = out_arr.arr.GetMKLDNNData(in_mem1->get_primitive_desc());
+
+      // TODO(alexzai) : remove this noop when by reordering in MKLDNNSum
       if (out_mem == nullptr)
         continue;
       PrintVerifyMsg(in_arr, in_arr);
@@ -771,14 +806,15 @@ TEST(MKLDNN_BASE, MKLDNNSum) {
 
     // in place
     auto input_mem = in_arr.arr.GetMKLDNNData();
+    auto input_mem2 = in_arr2.arr.GetMKLDNNData();
     NDArrayAttrs orig_arr(in_arr.arr.Copy(in_arr.arr.ctx()), "In Place Copy");
     PrintVerifyMsg(orig_arr, in_arr);
     InitMKLDNNArray(&orig_arr.arr, input_mem->get_primitive_desc(), 
InitDefaultArray);
     orig_arr.arr.CopyFrom(*input_mem);
     auto old_mem = orig_arr.arr.GetMKLDNNData();
-    op::MKLDNNSum(*input_mem, *input_mem, *input_mem);
+    op::MKLDNNSum(*input_mem, *input_mem2, *input_mem);
     MKLDNNStream::Get()->Submit();
-    VerifySumMemory(*old_mem, *old_mem, *input_mem);
+    VerifySumMemory(*old_mem, *input_mem2, *input_mem);
   }
 }
 

-- 
To stop receiving notification emails like this one, please contact
j...@apache.org.

[incubator-mxnet] branch master updated: [MXNET-542] Fix mkldnn performance regression + improve test logging (#11262)

Reply via email to