ZhennanQin commented on a change in pull request #14641: [MKLDNN]Improve
quantizeV2 and dequantize latency
URL: https://github.com/apache/incubator-mxnet/pull/14641#discussion_r276085462
##########
File path: src/operator/quantization/mkldnn/mkldnn_dequantize-inl.h
##########
@@ -26,82 +26,104 @@
#ifndef MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
#define MXNET_OPERATOR_QUANTIZATION_MKLDNN_MKLDNN_DEQUANTIZE_INL_H_
#if MXNET_USE_MKLDNN == 1
-#include <string>
#include <algorithm>
+#include <string>
#include <vector>
#include "../../nn/mkldnn/mkldnn_base-inl.h"
namespace mxnet {
namespace op {
-template<typename SrcType, typename DstType>
-static void MKLDNNDequantizeComputeKer(const std::vector<NDArray> &inputs,
- const std::vector<NDArray> &outputs,
- const std::vector<OpReqType> &req) {
- using namespace mshadow;
- using namespace mxnet_op;
- using red::limits::MaxValue;
- using red::limits::MinValue;
- float real_range = 0.0;
- float quantized_range = 0.0;
- if (inputs[0].dtype() == mshadow::kUint8) {
- quantized_range = MaxAbs(MaxValue<SrcType>(), MinValue<SrcType>());
- real_range = MaxAbs(*inputs[1].data().dptr<DstType>(),
*inputs[2].data().dptr<DstType>());
- } else if (inputs[0].dtype() == mshadow::kInt8) {
- quantized_range = MinAbs(MaxValue<SrcType>(), MinValue<SrcType>());
- real_range = MaxAbs(*inputs[1].data().dptr<DstType>(),
*inputs[2].data().dptr<DstType>());
- } else {
- LOG(FATAL) << "mkldnn dequantize op only supports int8 and uint8 as output
type";
- }
- float scale = real_range / quantized_range;
- primitive_attr attr;
- const int mask = 0;
- std::vector<float> scales = {scale};
- attr.set_output_scales(mask, scales);
- attr.set_int_output_round_mode(round_nearest);
- mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
- NDArray in_buffer = inputs[0];
- if (inputs[0].IsView() && inputs[0].IsMKLDNNData())
- in_buffer = inputs[0].Reorder2Default();
+class SgMKLDNNDequantizeOperator {
+ public:
+ explicit SgMKLDNNDequantizeOperator(const nnvm::NodeAttrs &attrs)
+ : param_(nnvm::get<DequantizeParam>(attrs.parsed)) {}
+ void Forward(const OpContext &ctx, const std::vector<NDArray> &inputs,
+ const std::vector<OpReqType> &req, const std::vector<NDArray>
&outputs);
+
+ private:
+ bool initalized_{false};
+ DequantizeParam param_;
+ float cached_data_min_{0.f};
+ float cached_data_max_{0.f};
+ std::shared_ptr<mkldnn::memory> i_mem_;
+ std::shared_ptr<mkldnn::memory> o_mem_;
+ std::shared_ptr<mkldnn::reorder> fwd_pd_;
+};
+
+void SgMKLDNNDequantizeOperator::Forward(const OpContext &ctx, const
std::vector<NDArray> &inputs,
+ const std::vector<OpReqType> &req,
+ const std::vector<NDArray> &outputs) {
+ NDArray in_buffer = inputs[0];
+ if (inputs[0].IsView() && inputs[0].IsMKLDNNData()) in_buffer =
inputs[0].Reorder2Default();
auto i_mem = in_buffer.GetMKLDNNData();
- auto i_mpd = i_mem->get_primitive_desc();
- auto i_desc = i_mpd.desc();
- size_t i_ndim = in_buffer.shape().ndim();
- mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
- for (size_t i = 0; i < i_ndim; i++) {
- i_dims[i] = static_cast<int>(in_buffer.shape()[i]);
- }
- mkldnn::memory::format i_fmt =
static_cast<mkldnn::memory::format>(i_desc.data.format);
- if (i_fmt == mkldnn::memory::format::nhwc) {
- // For 4d tensor, nchw is the default format
- i_fmt = mkldnn::memory::format::nchw;
+ float data_min = *inputs[1].data().dptr<float>();
+ float data_max = *inputs[2].data().dptr<float>();
+
+ if (initalized_ && (cached_data_min_ != data_min || cached_data_max_ !=
data_max))
Review comment:
As above.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services