[GitHub] zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes.

2018-04-06 Thread GitBox
zheng-da commented on a change in pull request #10317: [MXNET-264] Improve 
performance of MKLDNN in small batch sizes.
URL: https://github.com/apache/incubator-mxnet/pull/10317#discussion_r179801695
 
 

 ##
 File path: include/mxnet/ndarray.h
 ##
 @@ -74,6 +74,7 @@ enum NDArrayFormatErr {
   kRSPIdxErr, // indices error for row sparse
 };
 
+class MKLDNNMemory;
 
 Review comment:
   between including mkldnn_base-inl.h and forward declaration, i'll choose the 
latter. NDArray is such a basic class, its header file is included in almost 
every .cc files and many .h files, including mkldnn_base-inl.h. The other 
option is to define MKLDNNMemory in NDArray, if it's a preferred way. it's a 
little weird to me.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes.

2018-04-05 Thread GitBox
zheng-da commented on a change in pull request #10317: [MXNET-264] Improve 
performance of MKLDNN in small batch sizes.
URL: https://github.com/apache/incubator-mxnet/pull/10317#discussion_r179557776
 
 

 ##
 File path: src/operator/nn/mkldnn/mkldnn_base-inl.h
 ##
 @@ -334,11 +334,104 @@ const mkldnn::memory *GetWeights(const NDArray ,
  const mkldnn::memory::primitive_desc 
_pd,
  int num_groups);
 
-mkldnn_memory_format_t GetDefaultFormat(mkldnn::memory::desc desc);
+mkldnn_memory_format_t GetDefaultFormat(const mkldnn::memory::desc );
 mkldnn_memory_format_t GetDefaultFormat(int num_dims);
 mkldnn::memory::primitive_desc GetPrimitiveDesc(mkldnn::memory::primitive_desc 
pd,
 mkldnn_memory_format_t format);
 
+static inline bool same_shape(const TShape , const mkldnn_dims_t dims, 
int ndims) {
+  if (shape.ndim() != (size_t)ndims)
+return false;
+  for (int i = 0; i < ndims; i++)
+if (shape[i] != dims[i])
+  return false;
+  return true;
+}
+
+static inline bool same_shape(const TShape , int dtype,
+  const mkldnn::memory::desc ) {
+  return same_shape(shape, desc.data.dims, desc.data.ndims)
+  && get_mkldnn_type(dtype) == desc.data.data_type;
+}
+
+/*
+ * There is a large overhead of getting mkldnn::memory::primitive_desc from
+ * mkldnn::memory. This class is created to cache the metadata of mkldnn memory
+ * to provide a much more lightweight method to access them.
+ */
+class MKLDNNMemory {
+  std::shared_ptr mem;
+  mkldnn::memory::desc desc;
+  size_t size;  // The number of bytes.
+
+ public:
+  MKLDNNMemory(mkldnn::memory::primitive_desc pd, void *addr): desc(pd.desc()) 
{
+mem.reset(new mkldnn::memory(pd, addr));
+size = pd.get_size();
+  }
+
+  explicit MKLDNNMemory(std::shared_ptr mem): desc(
+  mem->get_primitive_desc().desc()) {
+this->mem = mem;
+auto pd = mem->get_primitive_desc();
+size = pd.get_size();
+  }
+
+  void SetDataHandle(void *handle) {
+mem->set_data_handle(handle);
+  }
+
+  void *GetDataHandle() const {
+return mem->get_data_handle();
+  }
+
+  std::shared_ptr GetMem() const {
+return mem;
+  }
+
+  mkldnn::memory *GetRaw() const {
+return mem.get();
+  }
+
+  size_t GetSize() const {
+return size;
+  }
+
+  mkldnn::memory::primitive_desc GetPrimitiveDesc() const {
+return mem->get_primitive_desc();
+  }
+
+  mkldnn::memory::primitive_desc GetPrimitiveDesc(mkldnn_memory_format_t 
format) const {
+return mxnet::GetPrimitiveDesc(mem->get_primitive_desc(), format);
+  }
+
+  mkldnn_memory_format_t GetDefaultFormat() const {
+return mxnet::GetDefaultFormat(desc);
+  }
+
+  mkldnn_memory_format_t GetFormat() const {
+return desc.data.format;
+  }
+
+  bool IsMKLDNN() const {
+return GetFormat() != GetDefaultFormat();
+  }
+
+  bool SameFormat(mkldnn::memory::primitive_desc pd) const {
+return mem->get_primitive_desc() == pd;
+  }
+
+  bool SameFormat(const TShape , int dtype) const {
+return same_shape(shape, dtype, desc);
+  }
+
+  void ReorderTo(mkldnn::memory *other) const {
+std::vector net;
+net.push_back(mkldnn::reorder(*mem, *other));
+mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait();
 
 Review comment:
   We want to immediate action here. MKLDNNStream is designed to collect all 
MKLDNN operators and submit them in one call. 


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes.

2018-04-05 Thread GitBox
zheng-da commented on a change in pull request #10317: [MXNET-264] Improve 
performance of MKLDNN in small batch sizes.
URL: https://github.com/apache/incubator-mxnet/pull/10317#discussion_r179556501
 
 

 ##
 File path: src/operator/nn/mkldnn/mkldnn_base-inl.h
 ##
 @@ -334,11 +334,104 @@ const mkldnn::memory *GetWeights(const NDArray ,
  const mkldnn::memory::primitive_desc 
_pd,
  int num_groups);
 
-mkldnn_memory_format_t GetDefaultFormat(mkldnn::memory::desc desc);
+mkldnn_memory_format_t GetDefaultFormat(const mkldnn::memory::desc );
 mkldnn_memory_format_t GetDefaultFormat(int num_dims);
 mkldnn::memory::primitive_desc GetPrimitiveDesc(mkldnn::memory::primitive_desc 
pd,
 mkldnn_memory_format_t format);
 
+static inline bool same_shape(const TShape , const mkldnn_dims_t dims, 
int ndims) {
+  if (shape.ndim() != (size_t)ndims)
+return false;
+  for (int i = 0; i < ndims; i++)
+if (shape[i] != dims[i])
+  return false;
+  return true;
+}
+
+static inline bool same_shape(const TShape , int dtype,
+  const mkldnn::memory::desc ) {
+  return same_shape(shape, desc.data.dims, desc.data.ndims)
+  && get_mkldnn_type(dtype) == desc.data.data_type;
+}
+
+/*
+ * There is a large overhead of getting mkldnn::memory::primitive_desc from
+ * mkldnn::memory. This class is created to cache the metadata of mkldnn memory
+ * to provide a much more lightweight method to access them.
+ */
+class MKLDNNMemory {
+  std::shared_ptr mem;
+  mkldnn::memory::desc desc;
+  size_t size;  // The number of bytes.
+
+ public:
+  MKLDNNMemory(mkldnn::memory::primitive_desc pd, void *addr): desc(pd.desc()) 
{
+mem.reset(new mkldnn::memory(pd, addr));
+size = pd.get_size();
+  }
+
+  explicit MKLDNNMemory(std::shared_ptr mem): desc(
 
 Review comment:
   we need to use shared_ptr here. MKLDNNMemory needs to own the memory.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services


[GitHub] zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes.

2018-04-04 Thread GitBox
zheng-da commented on a change in pull request #10317: [MXNET-264] Improve 
performance of MKLDNN in small batch sizes.
URL: https://github.com/apache/incubator-mxnet/pull/10317#discussion_r179305690
 
 

 ##
 File path: src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h
 ##
 @@ -234,20 +234,15 @@ void MKLDNNBatchNormForward(const OpContext , const 
BatchNormParam ,
 DType* weight_ptr = gamma.data().dptr();
 DType* bias_ptr = beta.data().dptr();
 if (!param.fix_gamma) {
-#pragma omp parallel for
-  for (int i = 0; i < channels_; i++) {
-weight_buf[i] = weight_ptr[i];
-weight_buf[channels_ + i] = bias_ptr[i];  // bias
-  }
+  memcpy(weight_buf, weight_ptr, sizeof(weight_buf[0]) * channels_);
+  memcpy(_buf[channels_], bias_ptr, sizeof(weight_buf[0]) * 
channels_);
 
 Review comment:
   are you referring to all of the OMP directives? The number of channels is in 
the order of 100. Parallelization overhead is usually larger than the actual 
computation.


This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services