[GitHub] [incubator-mxnet] anirudh2290 commented on a change in pull request #14641: [MKLDNN]Improve quantizeV2 and dequantize latency

GitBox Tue, 16 Apr 2019 19:38:07 -0700

anirudh2290 commented on a change in pull request #14641: [MKLDNN]Improve 
quantizeV2 and dequantize latency
URL: https://github.com/apache/incubator-mxnet/pull/14641#discussion_r276060426


 ##########
 File path: src/operator/quantization/mkldnn/mkldnn_quantize_v2-inl.h
 ##########
 @@ -137,21 +75,101 @@ static void MKLDNNQuantizeV2Compute(const 
nnvm::NodeAttrs& attrs, const OpContex
       }
     }
     if (req[0] != kWriteInplace) {
-      const_cast<NDArray&>(outputs[0]).CopyFrom(*inputs[0].GetMKLDNNData());
+      const_cast<NDArray &>(outputs[0]).CopyFrom(*inputs[0].GetMKLDNNData());
       MKLDNNStream::Get()->Submit();
     }
   } else {
-    auto out_type = GetOutputType(param);
+    if (in_buffer.IsView() && in_buffer.IsMKLDNNData()) in_buffer = 
inputs[0].Reorder2Default();
+    auto i_mem = in_buffer.GetMKLDNNData();
+
+    if (param_.min_calib_range.has_value() && 
param_.max_calib_range.has_value()) {
+      data_min = param_.min_calib_range.value();
+      data_max = param_.max_calib_range.value();
+    } else {
+      // no calib info
+      in_buffer = inputs[0].Reorder2Default();
+      auto in_ptr = in_buffer.data().dptr<float>();
+      auto nthreads = engine::OpenMP::Get()->GetRecommendedOMPThreadCount();
+      std::vector<float> data_maxs(nthreads, data_max);
+      std::vector<float> data_mins(nthreads, data_min);
+#pragma omp parallel for num_threads(nthreads)
+      for (index_t i = 0; i < static_cast<index_t>(in_buffer.shape().Size()); 
i++) {
+        int tid = omp_get_thread_num();
+        if (in_ptr[i] > data_maxs[tid]) data_maxs[tid] = in_ptr[i];
+        if (in_ptr[i] < data_mins[tid]) data_mins[tid] = in_ptr[i];
+      }
+      for (index_t i = 0; i < nthreads; i++) {
+        if (data_maxs[i] > data_max) data_max = data_maxs[i];
+        if (data_mins[i] < data_min) data_min = data_mins[i];
+      }
+    }
+
+    // Write output min/max
+    auto out_type = GetOutputType(param_);
     if (out_type == mshadow::kUint8) {
-      MKLDNNQuantizeComputeKer<float, uint8_t>(inputs, outputs, param, req);
+      quantized_range = kUint8Range;
+      *outputs[1].data().dptr<float>() = data_min;
+      *outputs[2].data().dptr<float>() = data_max;
     } else if (out_type == mshadow::kInt8) {
-      MKLDNNQuantizeComputeKer<float, int8_t>(inputs, outputs, param, req);
+      float real_range = MaxAbs(data_min, data_max);
+      quantized_range = kInt8Range;
+      *outputs[1].data().dptr<float>() = -real_range;
+      *outputs[2].data().dptr<float>() = real_range;
     } else {
       LOG(FATAL) << "mkldnn quantize op only supports int8 and uint8 as output 
type";
     }
+
+    if (initalized_ && (cached_data_min_ != data_min || cached_data_max_ != 
data_max))
+      initalized_ = false;
 
 Review comment:
   by static graph with calibration do you mean the case where 
param.calib.min_range and param.max_range has been set ? I think we already 
have the information, so we can decide to check epsilon only if its not set. I 
agree though that it may not be easy to set the epsilon value correctly. makes 
sense to revisit this later if needed.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


With regards,
Apache Git Services

[GitHub] [incubator-mxnet] anirudh2290 commented on a change in pull request #14641: [MKLDNN]Improve quantizeV2 and dequantize latency

Reply via email to