[GitHub] zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes.
zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes. URL: https://github.com/apache/incubator-mxnet/pull/10317#discussion_r179801695 ## File path: include/mxnet/ndarray.h ## @@ -74,6 +74,7 @@ enum NDArrayFormatErr { kRSPIdxErr, // indices error for row sparse }; +class MKLDNNMemory; Review comment: between including mkldnn_base-inl.h and forward declaration, i'll choose the latter. NDArray is such a basic class, its header file is included in almost every .cc files and many .h files, including mkldnn_base-inl.h. The other option is to define MKLDNNMemory in NDArray, if it's a preferred way. it's a little weird to me. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes.
zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes. URL: https://github.com/apache/incubator-mxnet/pull/10317#discussion_r179557776 ## File path: src/operator/nn/mkldnn/mkldnn_base-inl.h ## @@ -334,11 +334,104 @@ const mkldnn::memory *GetWeights(const NDArray , const mkldnn::memory::primitive_desc _pd, int num_groups); -mkldnn_memory_format_t GetDefaultFormat(mkldnn::memory::desc desc); +mkldnn_memory_format_t GetDefaultFormat(const mkldnn::memory::desc ); mkldnn_memory_format_t GetDefaultFormat(int num_dims); mkldnn::memory::primitive_desc GetPrimitiveDesc(mkldnn::memory::primitive_desc pd, mkldnn_memory_format_t format); +static inline bool same_shape(const TShape , const mkldnn_dims_t dims, int ndims) { + if (shape.ndim() != (size_t)ndims) +return false; + for (int i = 0; i < ndims; i++) +if (shape[i] != dims[i]) + return false; + return true; +} + +static inline bool same_shape(const TShape , int dtype, + const mkldnn::memory::desc ) { + return same_shape(shape, desc.data.dims, desc.data.ndims) + && get_mkldnn_type(dtype) == desc.data.data_type; +} + +/* + * There is a large overhead of getting mkldnn::memory::primitive_desc from + * mkldnn::memory. This class is created to cache the metadata of mkldnn memory + * to provide a much more lightweight method to access them. + */ +class MKLDNNMemory { + std::shared_ptr mem; + mkldnn::memory::desc desc; + size_t size; // The number of bytes. + + public: + MKLDNNMemory(mkldnn::memory::primitive_desc pd, void *addr): desc(pd.desc()) { +mem.reset(new mkldnn::memory(pd, addr)); +size = pd.get_size(); + } + + explicit MKLDNNMemory(std::shared_ptr mem): desc( + mem->get_primitive_desc().desc()) { +this->mem = mem; +auto pd = mem->get_primitive_desc(); +size = pd.get_size(); + } + + void SetDataHandle(void *handle) { +mem->set_data_handle(handle); + } + + void *GetDataHandle() const { +return mem->get_data_handle(); + } + + std::shared_ptr GetMem() const { +return mem; + } + + mkldnn::memory *GetRaw() const { +return mem.get(); + } + + size_t GetSize() const { +return size; + } + + mkldnn::memory::primitive_desc GetPrimitiveDesc() const { +return mem->get_primitive_desc(); + } + + mkldnn::memory::primitive_desc GetPrimitiveDesc(mkldnn_memory_format_t format) const { +return mxnet::GetPrimitiveDesc(mem->get_primitive_desc(), format); + } + + mkldnn_memory_format_t GetDefaultFormat() const { +return mxnet::GetDefaultFormat(desc); + } + + mkldnn_memory_format_t GetFormat() const { +return desc.data.format; + } + + bool IsMKLDNN() const { +return GetFormat() != GetDefaultFormat(); + } + + bool SameFormat(mkldnn::memory::primitive_desc pd) const { +return mem->get_primitive_desc() == pd; + } + + bool SameFormat(const TShape , int dtype) const { +return same_shape(shape, dtype, desc); + } + + void ReorderTo(mkldnn::memory *other) const { +std::vector net; +net.push_back(mkldnn::reorder(*mem, *other)); +mkldnn::stream(mkldnn::stream::kind::eager).submit(net).wait(); Review comment: We want to immediate action here. MKLDNNStream is designed to collect all MKLDNN operators and submit them in one call. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes.
zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes. URL: https://github.com/apache/incubator-mxnet/pull/10317#discussion_r179556501 ## File path: src/operator/nn/mkldnn/mkldnn_base-inl.h ## @@ -334,11 +334,104 @@ const mkldnn::memory *GetWeights(const NDArray , const mkldnn::memory::primitive_desc _pd, int num_groups); -mkldnn_memory_format_t GetDefaultFormat(mkldnn::memory::desc desc); +mkldnn_memory_format_t GetDefaultFormat(const mkldnn::memory::desc ); mkldnn_memory_format_t GetDefaultFormat(int num_dims); mkldnn::memory::primitive_desc GetPrimitiveDesc(mkldnn::memory::primitive_desc pd, mkldnn_memory_format_t format); +static inline bool same_shape(const TShape , const mkldnn_dims_t dims, int ndims) { + if (shape.ndim() != (size_t)ndims) +return false; + for (int i = 0; i < ndims; i++) +if (shape[i] != dims[i]) + return false; + return true; +} + +static inline bool same_shape(const TShape , int dtype, + const mkldnn::memory::desc ) { + return same_shape(shape, desc.data.dims, desc.data.ndims) + && get_mkldnn_type(dtype) == desc.data.data_type; +} + +/* + * There is a large overhead of getting mkldnn::memory::primitive_desc from + * mkldnn::memory. This class is created to cache the metadata of mkldnn memory + * to provide a much more lightweight method to access them. + */ +class MKLDNNMemory { + std::shared_ptr mem; + mkldnn::memory::desc desc; + size_t size; // The number of bytes. + + public: + MKLDNNMemory(mkldnn::memory::primitive_desc pd, void *addr): desc(pd.desc()) { +mem.reset(new mkldnn::memory(pd, addr)); +size = pd.get_size(); + } + + explicit MKLDNNMemory(std::shared_ptr mem): desc( Review comment: we need to use shared_ptr here. MKLDNNMemory needs to own the memory. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes.
zheng-da commented on a change in pull request #10317: [MXNET-264] Improve performance of MKLDNN in small batch sizes. URL: https://github.com/apache/incubator-mxnet/pull/10317#discussion_r179305690 ## File path: src/operator/nn/mkldnn/mkldnn_batch_norm-inl.h ## @@ -234,20 +234,15 @@ void MKLDNNBatchNormForward(const OpContext , const BatchNormParam , DType* weight_ptr = gamma.data().dptr(); DType* bias_ptr = beta.data().dptr(); if (!param.fix_gamma) { -#pragma omp parallel for - for (int i = 0; i < channels_; i++) { -weight_buf[i] = weight_ptr[i]; -weight_buf[channels_ + i] = bias_ptr[i]; // bias - } + memcpy(weight_buf, weight_ptr, sizeof(weight_buf[0]) * channels_); + memcpy(_buf[channels_], bias_ptr, sizeof(weight_buf[0]) * channels_); Review comment: are you referring to all of the OMP directives? The number of channels is in the order of 100. Parallelization overhead is usually larger than the actual computation. This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services