anirudh2290 commented on a change in pull request #14641: [MKLDNN]Improve
quantizeV2 and dequantize latency
URL: https://github.com/apache/incubator-mxnet/pull/14641#discussion_r276027744
##########
File path: src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h
##########
@@ -34,99 +34,37 @@
namespace mxnet {
namespace op {
-template <typename SrcType, typename DstType>
-static void MKLDNNQuantizeComputeKer(const std::vector<NDArray>& inputs,
- const std::vector<NDArray>& outputs,
- const QuantizeV2Param& param,
- const std::vector<OpReqType>& req) {
- using namespace mshadow;
- using namespace mxnet_op;
- using red::limits::MaxValue;
- using red::limits::MinValue;
- SrcType real_range = 0.f;
- DstType quantized_range = 0;
- NDArray in_buffer = inputs[0];
- SrcType data_min = red::limits::MaxValue<SrcType>();
- SrcType data_max = red::limits::MinValue<SrcType>();
- if (param.min_calib_range.has_value() && param.max_calib_range.has_value()) {
- data_min = param.min_calib_range.value();
- data_max = param.max_calib_range.value();
- } else {
- // no calib info
- in_buffer = inputs[0].Reorder2Default();
- auto in_ptr = in_buffer.data().dptr<SrcType>();
- auto nthreads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
- std::vector<SrcType> data_maxs(nthreads, data_max);
- std::vector<SrcType> data_mins(nthreads, data_min);
-#pragma omp parallel for num_threads(nthreads)
- for (index_t i = 0; i < static_cast<index_t>(in_buffer.shape().Size());
i++) {
- int tid = omp_get_thread_num();
- if (in_ptr[i] > data_maxs[tid]) data_maxs[tid] = in_ptr[i];
- if (in_ptr[i] < data_mins[tid]) data_mins[tid] = in_ptr[i];
- }
- for (index_t i = 0; i < nthreads; i++) {
- if (data_maxs[i] > data_max) data_max = data_maxs[i];
- if (data_mins[i] < data_min) data_min = data_mins[i];
- }
- }
+class SgMKLDNNQuantizeOperator {
+ public:
+ explicit SgMKLDNNQuantizeOperator(const nnvm::NodeAttrs &attrs)
+ : param_(nnvm::get<QuantizeV2Param>(attrs.parsed)) {}
- auto out_type = GetOutputType(param);
- if (out_type == mshadow::kUint8) {
- real_range = std::max<SrcType>(0.f, data_max);
- quantized_range = MaxValue<DstType>();
- *outputs[1].data().dptr<float>() = 0.f;
- *outputs[2].data().dptr<float>() = real_range;
- } else if (out_type == mshadow::kInt8) {
- real_range = MaxAbs(data_min, data_max);
- quantized_range = MinAbs(MaxValue<DstType>(), MinValue<DstType>());
- *outputs[1].data().dptr<float>() = -real_range;
- *outputs[2].data().dptr<float>() = real_range;
- } else {
- LOG(FATAL) << "mkldnn quantize op only supports int8 and uint8 as output
type";
- }
- float scale = static_cast<float>(quantized_range) / real_range;
+ void Forward(const OpContext &ctx, const std::vector<NDArray> &inputs,
+ const std::vector<OpReqType> &req, const std::vector<NDArray>
&outputs);
- primitive_attr attr;
- const int mask = 0;
- std::vector<float> scales = {scale};
- attr.set_output_scales(mask, scales);
- attr.set_int_output_round_mode(round_nearest);
- mkldnn::engine cpu_engine = mxnet::CpuEngine::Get()->get_engine();
+ private:
+ bool initalized_{false};
Review comment:
nit: initialized
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services