[GitHub] piiswrong closed pull request #10731: Fix a bug in getting MKLDNN memory

GitBox Thu, 03 May 2018 10:27:12 -0700

piiswrong closed pull request #10731: Fix a bug in getting MKLDNN memory
URL: https://github.com/apache/incubator-mxnet/pull/10731


This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 363237f909e..cf154484d57 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -159,8 +159,12 @@ endif()
 
 if(USE_MKL_IF_AVAILABLE)
   if(USE_MKLDNN)
+    # We need to use generic archtecture. Otherwise, MKLDNN compiled in one
+    # CPU architecture (e.g., C5) can't run on another architecture (e.g., g3).
+    set(ARCH_OPT_FLAGS "-mtune=generic")
     add_subdirectory(3rdparty/mkldnn)
     include_directories(3rdparty/mkldnn/include)
+    add_definitions(-DMXNET_USE_MKLDNN=1)
     list(APPEND mxnet_LINKER_LIBS mkldnn)
   endif()
   find_package(MKL)
@@ -169,10 +173,6 @@ if(USE_MKL_IF_AVAILABLE)
     include_directories(${MKL_INCLUDE_DIR})
     include_directories(${CMAKE_CURRENT_SOURCE_DIR}/src/operator/mkl)
 
-    if(USE_MKLDNN)
-      add_definitions(-DMXNET_USE_MKLDNN=1)
-    endif()
-
     add_definitions(-DUSE_MKL=1)
     add_definitions(-DCUB_MKL=1)
     list(APPEND mxnet_LINKER_LIBS ${MKL_LIBRARIES})
diff --git a/Jenkinsfile b/Jenkinsfile
index 5601c52df1c..7a08acc38a5 100644
--- a/Jenkinsfile
+++ b/Jenkinsfile
@@ -26,7 +26,7 @@ mx_lib = 'lib/libmxnet.so, lib/libmxnet.a, 
3rdparty/dmlc-core/libdmlc.a, 3rdpart
 mx_dist_lib = 'lib/libmxnet.so, lib/libmxnet.a, 3rdparty/dmlc-core/libdmlc.a, 
3rdparty/nnvm/lib/libnnvm.a, 3rdparty/ps-lite/build/libps.a, 
deps/lib/libprotobuf-lite.a, deps/lib/libzmq.a'
 // mxnet cmake libraries, in cmake builds we do not produce a libnvvm static 
library by default.
 mx_cmake_lib = 'build/libmxnet.so, build/libmxnet.a, 
build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, 
build/3rdparty/openmp/runtime/src/libomp.so'
-mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, 
build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, 
build/3rdparty/openmp/runtime/src/libomp.so, 
build/3rdparty/mkldnn/src/libmkldnn.so, 
build/3rdparty/mkldnn/src/libmkldnn.so.0'
+mx_cmake_mkldnn_lib = 'build/libmxnet.so, build/libmxnet.a, 
build/3rdparty/dmlc-core/libdmlc.a, build/tests/mxnet_unit_tests, 
build/3rdparty/openmp/runtime/src/libomp.so, 
build/3rdparty/mkldnn/src/libmkldnn.so.0'
 mx_mkldnn_lib = 'lib/libmxnet.so, lib/libmxnet.a, lib/libiomp5.so, 
lib/libmkldnn.so.0, lib/libmklml_intel.so, 3rdparty/dmlc-core/libdmlc.a, 
3rdparty/nnvm/lib/libnnvm.a'
 // command to start a docker container
 docker_run = 'tests/ci_build/ci_build.sh'
@@ -574,6 +574,17 @@ try {
         }
       }
     },
+    'Cpp: MKLDNN+GPU': {
+      node('mxnetlinux-gpu') {
+        ws('workspace/ut-cpp-mkldnn-gpu') {
+          timeout(time: max_time, unit: 'MINUTES') {
+            init_git()
+            unpack_lib('cmake_mkldnn_gpu', mx_cmake_mkldnn_lib)
+            sh "ci/build.py --nvidiadocker --platform ubuntu_gpu 
/work/runtime_functions.sh unittest_ubuntu_gpu_cpp"
+          }
+        }
+      }
+    },
     'R: CPU': {
       node('mxnetlinux-cpu') {
         ws('workspace/ut-r-cpu') {
diff --git a/ci/docker/runtime_functions.sh b/ci/docker/runtime_functions.sh
index 027e287f751..7abe767c869 100755
--- a/ci/docker/runtime_functions.sh
+++ b/ci/docker/runtime_functions.sh
@@ -323,6 +323,9 @@ build_ubuntu_gpu_cmake_mkldnn() {
         /work/mxnet
 
     ninja -v
+    # libmkldnn.so.0 is a link file. We need an actual binary file named 
libmkldnn.so.0.
+    cp 3rdparty/mkldnn/src/libmkldnn.so.0 
3rdparty/mkldnn/src/libmkldnn.so.0.tmp
+    mv 3rdparty/mkldnn/src/libmkldnn.so.0.tmp 
3rdparty/mkldnn/src/libmkldnn.so.0
 }
 
 build_ubuntu_gpu_cmake() {
diff --git a/src/ndarray/ndarray.cc b/src/ndarray/ndarray.cc
index b428c2cbefb..82de0949ccc 100644
--- a/src/ndarray/ndarray.cc
+++ b/src/ndarray/ndarray.cc
@@ -485,8 +485,8 @@ const mkldnn::memory *NDArray::GetMKLDNNData(
 }
 
 const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
-    const mkldnn::memory::primitive_desc &desc) const {
-  if (desc.get_size() != shape().Size() * GetTypeSize(dtype_)) {
+    const mkldnn::memory::primitive_desc &new_pd) const {
+  if (new_pd.get_size() != shape().Size() * GetTypeSize(dtype_)) {
     LOG(FATAL) << "The size of NDArray doesn't match the requested MKLDNN 
memory desc";
     return nullptr;
   }
@@ -495,24 +495,41 @@ const mkldnn::memory *NDArray::GetMKLDNNDataReorder(
   const mkldnn::memory *mem = GetMKLDNNData();
   // If the memory descriptor matches, it's easy.
   MKLDNNStream *stream = MKLDNNStream::Get();
-  if (mem->get_primitive_desc() == desc) {
-    return GetMKLDNNExact(mem, desc);
+  if (mem->get_primitive_desc() == new_pd) {
+    return GetMKLDNNExact(mem, new_pd);
   }
 
-  mkldnn::memory::primitive_desc _desc = desc;
+  mkldnn::memory::primitive_desc _pd = new_pd;
+  mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc();
+  mkldnn::memory::desc desc2 = _pd.desc();
   // Now we need to determine if we should reorder the memory.
   // If both use the default formats, we think we don't need to reorder.
-  mkldnn::memory::desc desc1 = mem->get_primitive_desc().desc();
-  mkldnn::memory::desc desc2 = _desc.desc();
   if (desc1.data.format == GetDefaultFormat(desc1) &&
       desc2.data.format == GetDefaultFormat(desc2)) {
-    mkldnn_mem_ptr ret(new mkldnn::memory(desc, mem->get_data_handle()));
+    mkldnn_mem_ptr ret(new mkldnn::memory(new_pd, mem->get_data_handle()));
     stream->RegisterMem(ret);
     return ret.get();
-  } else {
-    mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(desc);
+  } else if (same_shape(desc1, desc2)) {
+    // If they have the same shape, we can reorder data directly.
+    mkldnn::memory *ret = TmpMemMgr::Get()->Alloc(new_pd);
     stream->RegisterPrim(mkldnn::reorder(*mem, *ret));
     return ret;
+  } else {
+    // If they have different shapes, we need to reshape the array first.
+    // Since this method will only be used inside an operator, we can call
+    // MKLDNNDataReshape to reshape an array.
+    TShape required_shape(desc2.data.ndims);
+    for (int i = 0; i < desc2.data.ndims; i++)
+      required_shape[i] = desc2.data.dims[i];
+    NDArray reshaped = MKLDNNDataReshape(required_shape);
+    const mkldnn::memory *ret = reshaped.GetMKLDNNData();
+    if (ret->get_primitive_desc() == new_pd) {
+      return GetMKLDNNExact(ret, new_pd);
+    } else {
+      mkldnn::memory *ret2 = TmpMemMgr::Get()->Alloc(new_pd);
+      stream->RegisterPrim(mkldnn::reorder(*ret, *ret2));
+      return ret2;
+    }
   }
 }
 
@@ -566,10 +583,15 @@ void NDArray::MKLDNNDataReorderAsync(const 
mkldnn::memory::primitive_desc &desc)
 
 const mkldnn::memory *NDArray::GetMKLDNNData() const {
   CHECK(storage_type() == kDefaultStorage);
-  // If this array uses MKLDNN layout, we have to make sure it's not a view.
-  // Otherwise, we'll have to change the layout inside the array.
-  if (IsMKLDNNData())
+  if (IsMKLDNNData()) {
+    // If this array uses MKLDNN layout, we have to make sure it's not a view.
+    // Otherwise, we'll have to change the layout inside the array.
     CHECK(!IsView());
+    MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
+    // If this array uses MKLDNN format, we should return now. Otherwise,
+    // SetMKLMem may mess up mkl_mem_.
+    return ptr_->mkl_mem_->GetRaw();
+  }
   ptr_->SetMKLMem(IsView() ? ptr_->storage_shape : shape_, dtype_);
   MKLDNNStream::Get()->RegisterMem(ptr_->mkl_mem_->GetMem());
   if (IsView()) {
diff --git a/src/operator/nn/mkldnn/mkldnn_base-inl.h 
b/src/operator/nn/mkldnn/mkldnn_base-inl.h
index 489351ebe2c..52f1c0b9772 100644
--- a/src/operator/nn/mkldnn/mkldnn_base-inl.h
+++ b/src/operator/nn/mkldnn/mkldnn_base-inl.h
@@ -272,12 +272,11 @@ class MKLDNNStream {
   std::vector<std::shared_ptr<const mkldnn::memory> > mem_holder;
 
  public:
-  static MKLDNNStream *Get() {
-    static thread_local MKLDNNStream stream;
-    return &stream;
-  }
+  static MKLDNNStream *Get();
 
-  void RegisterPrim(const mkldnn::primitive &prim) { net.push_back(prim); }
+  void RegisterPrim(const mkldnn::primitive &prim) {
+    net.push_back(prim);
+  }
 
   void RegisterMem(std::shared_ptr<const mkldnn::memory> mem) {
     mem_holder.push_back(mem);
@@ -287,10 +286,21 @@ class MKLDNNStream {
     return !net.empty();
   }
 
-  void Submit() {
-    if (!net.empty())
+  /*
+   * After submitting mkldnn operations for execution, we need to
+   * clean up memory held by the stream. However, sometimes users
+   * might want to separate mkldnn execution and memory cleanup.
+   */
+  void Submit(bool cleanup = true) {
+    if (!net.empty()) {
       mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
-    net.clear();
+      net.clear();
+    }
+    if (cleanup)
+      Cleanup();
+  }
+
+  void Cleanup() {
     mem_holder.clear();
     TmpMemMgr::Get()->Reset();
   }
@@ -348,6 +358,16 @@ inline bool same_shape(const TShape &shape, const 
mkldnn_dims_t dims, int ndims)
   return true;
 }
 
+inline bool same_shape(const mkldnn::memory::desc &desc1,
+                       const mkldnn::memory::desc &desc2) {
+  if (desc1.data.ndims != desc2.data.ndims)
+    return false;
+  for (int i = 0; i < desc1.data.ndims; i++)
+    if (desc1.data.dims[i] != desc2.data.dims[i])
+      return false;
+  return true;
+}
+
 inline bool same_shape(const TShape &shape, int dtype,
                        const mkldnn::memory::desc &desc) {
   return same_shape(shape, desc.data.dims, desc.data.ndims)
diff --git a/src/operator/nn/mkldnn/mkldnn_base.cc 
b/src/operator/nn/mkldnn/mkldnn_base.cc
index df37db5e780..c0e1ee6aaa6 100644
--- a/src/operator/nn/mkldnn/mkldnn_base.cc
+++ b/src/operator/nn/mkldnn/mkldnn_base.cc
@@ -25,6 +25,11 @@
 
 namespace mxnet {
 
+MKLDNNStream *MKLDNNStream::Get() {
+  static thread_local MKLDNNStream stream;
+  return &stream;
+}
+
 void *AlignMem(void *mem, size_t size, size_t alignment, size_t *space) {
   if (size > *space)
     return nullptr;
@@ -57,8 +62,11 @@ mkldnn::memory *TmpMemMgr::Alloc(const 
mkldnn::memory::primitive_desc &pd) {
     this->curr_mem = static_cast<char *>(mem) + pd.get_size();
     return ret.get();
   } else {
-    LOG(WARNING) << "Allocate " << pd.get_size()
-        << " bytes with malloc directly";
+    // If curr_mem has been initialized and we still reach here. It means
+    // the current allocated memory isn't enough.
+    if (this->curr_mem)
+      LOG(WARNING) << "Allocate " << pd.get_size()
+          << " bytes with malloc directly";
     mkldnn_mem_ptr ret(new mkldnn::memory(pd));
     MKLDNNStream::Get()->RegisterMem(ret);
     return ret.get();
diff --git a/tests/cpp/include/test_core_op.h b/tests/cpp/include/test_core_op.h
index 7dc05fda2cc..c39373b1b79 100644
--- a/tests/cpp/include/test_core_op.h
+++ b/tests/cpp/include/test_core_op.h
@@ -410,7 +410,7 @@ class CoreOpExecutor : public 
test::op::OperatorDataInitializer<DType>
           if (bwd_node_ptr) {
             CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs);
             input_types.resize(bwd_node_ptr->inputs.size(), -1);
-            for (size_t i = 0; i < num_inputs; ++i) {
+            for (int i = 0; i < num_inputs; ++i) {
               const int map_key = bwd_node_ptr->inputs[i].index;
               CHECK(index2array.find(map_key) != index2array.end());
               const int dtype = index2array[map_key]->dtype();
@@ -421,7 +421,7 @@ class CoreOpExecutor : public 
test::op::OperatorDataInitializer<DType>
               output_types.emplace_back(dtype);
             }
           } else {
-            for (size_t x = 0; x < num_inputs; ++x) {
+            for (int x = 0; x < num_inputs; ++x) {
               input_types.emplace_back(default_dtype());
             }
             for (const auto &fwd_inp : backward_for_op->inputs()) {
@@ -431,10 +431,10 @@ class CoreOpExecutor : public 
test::op::OperatorDataInitializer<DType>
           }
         } else {
           CHECK(false);  // above always true?
-          for (size_t x = 0; x < num_inputs; ++x) {
+          for (int x = 0; x < num_inputs; ++x) {
             input_types.emplace_back(default_dtype());
           }
-          for (size_t x = 0; x < inferred_num_outputs; ++x) {
+          for (int x = 0; x < inferred_num_outputs; ++x) {
             output_types.emplace_back(default_dtype());
           }
         }
@@ -455,7 +455,7 @@ class CoreOpExecutor : public 
test::op::OperatorDataInitializer<DType>
             if (bwd_node_ptr) {
               input_shapes.clear();
               CHECK_EQ(bwd_node_ptr->inputs.size(), num_inputs);
-              for (size_t i = 0; i < num_inputs; ++i) {
+              for (int i = 0; i < num_inputs; ++i) {
                 const int map_key = bwd_node_ptr->inputs[i].index;
                 CHECK(index2array.find(map_key) != index2array.end());
                 const nnvm::TShape &shp = index2array[map_key]->shape();
diff --git a/tests/cpp/operator/mkldnn.cc b/tests/cpp/operator/mkldnn.cc
index 9d4b9823037..58ad894e36b 100644
--- a/tests/cpp/operator/mkldnn.cc
+++ b/tests/cpp/operator/mkldnn.cc
@@ -28,6 +28,8 @@
 #include "gtest/gtest.h"
 #include "../../src/operator/nn/mkldnn/mkldnn_base-inl.h"
 
+using namespace mxnet;
+
 #if __GNUC__ >= 5
 bool test_mem_align(void *mem, size_t size, size_t alignment, size_t space) {
   void *ret1, *ret2;
@@ -84,4 +86,250 @@ TEST(MKLDNN_UTIL_FUNC, MemFormat) {
   CHECK_EQ(mkldnn_nchw, 5);
   CHECK_EQ(mkldnn_oihw, 12);
 }
+
+// Init arrays with the default layout.
+static void InitArray(NDArray *arr) {
+  const TBlob &blob = arr->data();
+  mshadow::default_real_t *data = blob.dptr<mshadow::default_real_t>();
+  size_t size = blob.Size();
+  for (size_t i = 0; i < size; i++)
+    data[i] = i;
+}
+
+// Init arrays with the specified layout.
+static void InitMKLDNNArray(NDArray *arr, const mkldnn::memory::primitive_desc 
&pd) {
+  const TBlob &blob = arr->data();
+  mshadow::default_real_t *data = blob.dptr<mshadow::default_real_t>();
+  size_t size = blob.Size();
+  for (size_t i = 0; i < size; i++)
+    data[i] = i;
+  arr->MKLDNNDataReorderAsync(pd);
+  arr->WaitToRead();
+}
+
+static void VerifyDefMem(const mkldnn::memory &mem) {
+  mkldnn::memory::primitive_desc pd = mem.get_primitive_desc();
+  mshadow::default_real_t *data
+      = static_cast<mshadow::default_real_t *>(mem.get_data_handle());
+  size_t size = pd.get_size() / sizeof(mshadow::default_real_t);
+  size_t num_same = 0;
+  for (size_t i = 0; i < size; i++)
+    num_same += data[i] == static_cast<mshadow::default_real_t>(i);
+  EXPECT_EQ(num_same, size);
+}
+
+static void VerifyMem(const mkldnn::memory &mem) {
+  mkldnn::memory::primitive_desc pd = mem.get_primitive_desc();
+
+  if (pd.desc().data.format == GetDefaultFormat(pd.desc())) {
+    VerifyDefMem(mem);
+  } else {
+    mkldnn::memory::dims dims(pd.desc().data.ndims);
+    for (size_t i = 0; i < dims.size(); i++)
+      dims[i] = pd.desc().data.dims[i];
+    mkldnn::memory::desc desc{dims,
+                              
static_cast<mkldnn::memory::data_type>(pd.desc().data.data_type),
+                              
static_cast<mkldnn::memory::format>(GetDefaultFormat(pd.desc()))};
+    mkldnn::memory::primitive_desc new_pd(desc, 
CpuEngine::Get()->get_engine());
+    mkldnn::memory new_mem(new_pd);
+
+    std::vector<mkldnn::primitive> net;
+    net.push_back(mkldnn::reorder(mem, new_mem));
+    mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
+    VerifyDefMem(new_mem);
+  }
+}
+
+static mkldnn::memory::primitive_desc GetMemPD(const TShape s, int dtype,
+                                               mkldnn::memory::format format) {
+  mkldnn::memory::dims dims(s.ndim());
+  for (size_t i = 0; i < dims.size(); i++)
+    dims[i] = s[i];
+  mkldnn::memory::desc desc{dims, get_mkldnn_type(dtype), format};
+  return mkldnn::memory::primitive_desc(desc, CpuEngine::Get()->get_engine());
+}
+
+// This function gets special MKLDNN formats without knowing the specific
+// hardware configuration. Certainly, it potentially misses some format if
+// it's specific for certain array shapes. It covers at least one special 
format
+// for each of the formats: nchw, oihw, goihw.
+// To test the logic of the code in NDArray, these formats should be enough.
+static std::vector<mkldnn::memory::format> GetMKLDNNFormat(size_t num_dims, 
int dtype) {
+  if (num_dims == 4) {
+    mkldnn::memory::dims data_dims{1, 3, 224, 224};
+    mkldnn::memory::desc data_md{data_dims, get_mkldnn_type(dtype),
+                                 mkldnn::memory::format::any};
+    mkldnn::memory::dims weight_dims{96, 3, 11, 11};
+    mkldnn::memory::desc weight_md{weight_dims, get_mkldnn_type(dtype),
+                                   mkldnn::memory::format::any};
+    mkldnn::memory::dims output_dims{1, 96, 54, 54};
+    mkldnn::memory::desc out_md{output_dims, get_mkldnn_type(dtype),
+                                mkldnn::memory::format::any};
+    mkldnn::memory::dims strides{4, 4};
+    mkldnn::memory::dims padding{0, 0};
+
+    mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training,
+                                           
mkldnn::algorithm::convolution_direct,
+                                           data_md, weight_md, out_md, strides,
+                                           padding, padding, 
mkldnn::padding_kind::zero);
+    mkldnn::convolution_forward::primitive_desc pd(desc, 
CpuEngine::Get()->get_engine());
+    std::vector<mkldnn::memory::format> ret(2);
+    ret[0] = 
static_cast<mkldnn::memory::format>(pd.dst_primitive_desc().desc().data.format);
+    ret[1] = 
static_cast<mkldnn::memory::format>(pd.weights_primitive_desc().desc().data.format);
+    printf("format: %d, %d\n", ret[0], ret[1]);
+    return ret;
+  } else if (num_dims == 5) {
+    mkldnn::memory::dims data_dims{1, 32, 112, 112};
+    mkldnn::memory::desc data_md{data_dims, get_mkldnn_type(dtype),
+                                 mkldnn::memory::format::any};
+    mkldnn::memory::dims weight_dims{32, 1, 1, 3, 3};
+    mkldnn::memory::desc weight_md{weight_dims, get_mkldnn_type(dtype),
+                                   mkldnn::memory::format::any};
+    mkldnn::memory::dims output_dims{1, 32, 112, 112};
+    mkldnn::memory::desc out_md{output_dims, get_mkldnn_type(dtype),
+                                mkldnn::memory::format::any};
+    mkldnn::memory::dims strides{1, 1};
+    mkldnn::memory::dims padding{1, 1};
+
+    mkldnn::convolution_forward::desc desc(mkldnn::prop_kind::forward_training,
+                                           
mkldnn::algorithm::convolution_direct,
+                                           data_md, weight_md, out_md, strides,
+                                           padding, padding, 
mkldnn::padding_kind::zero);
+    mkldnn::convolution_forward::primitive_desc pd(desc, 
CpuEngine::Get()->get_engine());
+    std::vector<mkldnn::memory::format> ret(1);
+    ret[0] = 
static_cast<mkldnn::memory::format>(pd.weights_primitive_desc().desc().data.format);
+    printf("format: %d\n", ret[0]);
+    return ret;
+  } else {
+    return std::vector<mkldnn::memory::format>();
+  }
+}
+
+struct TestArrayShapes {
+  std::vector<TShape> shapes;
+  std::vector<mkldnn::memory::primitive_desc> pds;
+};
+
+static TestArrayShapes GetTestArrayShapes() {
+  int dtype = mshadow::DataType<mshadow::default_real_t>::kFlag;
+  std::vector<TShape> shapes;
+  std::vector<mkldnn::memory::primitive_desc> pds;
+  {
+    // 1D
+    TShape s(1);
+    s[0] = 279936;
+    shapes.push_back(s);
+    pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::x));
+    s[0] = 34848;
+    shapes.push_back(s);
+    pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::x));
+  }
+  {
+    // 2D
+    TShape s(2);
+    s[0] = 96;
+    s[1] = 2916;
+    shapes.push_back(s);
+    pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::nc));
+    s[0] = 96;
+    s[1] = 363;
+    shapes.push_back(s);
+    pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::nc));
+  }
+  {
+    // 4D
+    TShape s1(4);
+    s1[0] = 1; s1[1] = 96; s1[2] = 54; s1[3] = 54;
+    shapes.push_back(s1);
+    pds.push_back(GetMemPD(s1, dtype, mkldnn::memory::format::nchw));
+
+    TShape s2(4);
+    s2[0] = 96; s2[1] = 3; s2[2] = 11; s2[3] = 11;
+    shapes.push_back(s2);
+    pds.push_back(GetMemPD(s2, dtype, mkldnn::memory::format::oihw));
+
+    std::vector<mkldnn::memory::format> formats = GetMKLDNNFormat(4, dtype);
+    pds.push_back(GetMemPD(s1, dtype, formats[0]));
+    pds.push_back(GetMemPD(s2, dtype, formats[1]));
+  }
+  {
+    // 5D
+    TShape s(5);
+    s[0] = 96; s[1] = 1; s[2] = 3; s[3] = 11; s[4] = 11;
+    shapes.push_back(s);
+    pds.push_back(GetMemPD(s, dtype, mkldnn::memory::format::goihw));
+
+    std::vector<mkldnn::memory::format> formats = GetMKLDNNFormat(5, dtype);
+    pds.push_back(GetMemPD(s, dtype, formats[0]));
+  }
+
+  TestArrayShapes ret;
+  ret.shapes = shapes;
+  ret.pds = pds;
+  return ret;
+}
+
+TEST(MKLDNN_NDArray, GetDataReorder) {
+  TestArrayShapes tas = GetTestArrayShapes();
+  std::vector<TShape> shapes = tas.shapes;
+  std::vector<mkldnn::memory::primitive_desc> pds = tas.pds;
+
+
+  // Reorder from the default to any other layout.
+  for (auto s : shapes) {
+    NDArray arr(s, Context());
+    InitArray(&arr);
+    for (auto pd : pds) {
+      if (s.Size() == pd.get_size() / sizeof(mshadow::default_real_t)) {
+        const mkldnn::memory *mem = arr.GetMKLDNNDataReorder(pd);
+        printf("reorder from (");
+        for (size_t i = 0; i < s.ndim(); i++)
+          printf("%ld, ", s[i]);
+        printf(") to (");
+        for (int i = 0; i < pd.desc().data.ndims; i++)
+          printf("%d, ", pd.desc().data.dims[i]);
+        printf("), format: %d\n", pd.desc().data.format);
+        MKLDNNStream::Get()->Submit(false);
+        VerifyMem(*mem);
+        MKLDNNStream::Get()->Cleanup();
+      }
+    }
+  }
+
+  // Reorder from a special layout to another layout.
+  for (auto s : shapes) {
+    for (auto from_pd : pds) {
+      if (from_pd.get_size() / sizeof(mshadow::default_real_t) == s.Size()) {
+        NDArray arr(s, Context());
+        // There is possibility that the dimensions of an NDArray doesn't match
+        // with the MKLDNN memory inside.
+        printf("Init array (");
+        for (size_t i = 0; i < s.ndim(); i++)
+          printf("%ld, ", s[i]);
+        printf(") with MKLDNN memory (");
+        for (int i = 0; i < from_pd.desc().data.ndims; i++)
+          printf("%d, ", from_pd.desc().data.dims[i]);
+        printf("), format: %d\n", from_pd.desc().data.format);
+        InitMKLDNNArray(&arr, from_pd);
+        for (auto to_pd : pds) {
+          if (to_pd.get_size() / sizeof(mshadow::default_real_t) == s.Size()) {
+            const mkldnn::memory *mem = arr.GetMKLDNNDataReorder(to_pd);
+            printf("reorder from (");
+            for (size_t i = 0; i < s.ndim(); i++)
+              printf("%ld, ", s[i]);
+            printf("), format: %d to (",
+                   
arr.GetMKLDNNData()->get_primitive_desc().desc().data.format);
+            for (int i = 0; i < to_pd.desc().data.ndims; i++)
+              printf("%d, ", to_pd.desc().data.dims[i]);
+            printf("), format: %d\n", to_pd.desc().data.format);
+            MKLDNNStream::Get()->Submit(false);
+            VerifyMem(*mem);
+            MKLDNNStream::Get()->Cleanup();
+          }
+        }
+      }
+    }
+  }
+}
+
 #endif
diff --git a/tests/python/gpu/test_gluon_model_zoo_gpu.py 
b/tests/python/gpu/test_gluon_model_zoo_gpu.py
index 378a822d193..273ad3d69ca 100644
--- a/tests/python/gpu/test_gluon_model_zoo_gpu.py
+++ b/tests/python/gpu/test_gluon_model_zoo_gpu.py
@@ -81,15 +81,16 @@ def test_inference():
             gpu_param = gpu_params.get(k)
             gpu_param.set_data(cpu_param.data().as_in_context(mx.gpu()))
 
-        # Run inference.
-        with autograd.record(train_mode=False):
-            cpu_out = cpu_model(mx.nd.array(data, ctx=mx.cpu()))
-            gpu_out = gpu_model(gpu_data)
-        out = cpu_out.asnumpy()
-        max_val = np.max(np.abs(out))
-        gpu_max_val = np.max(np.abs(gpu_out.asnumpy()))
-        eprint(model_name + ": CPU " + str(max_val) + ", GPU " + 
str(gpu_max_val))
-        assert_almost_equal(out / max_val, gpu_out.asnumpy() / max_val, 
rtol=1e-3, atol=1e-3)
+        for i in range(5):
+            # Run inference.
+            with autograd.record(train_mode=False):
+                cpu_out = cpu_model(mx.nd.array(data, ctx=mx.cpu()))
+                gpu_out = gpu_model(gpu_data)
+            out = cpu_out.asnumpy()
+            max_val = np.max(np.abs(out))
+            gpu_max_val = np.max(np.abs(gpu_out.asnumpy()))
+            eprint(model_name + ": CPU " + str(max_val) + ", GPU " + 
str(gpu_max_val))
+            assert_almost_equal(out / max_val, gpu_out.asnumpy() / max_val, 
rtol=1e-3, atol=1e-3)
 
 def get_nn_model(name):
     if "densenet" in name:


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] piiswrong closed pull request #10731: Fix a bug in getting MKLDNN memory

Reply via email to