This is an automated email from the ASF dual-hosted git repository.
bgawrych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 7b1daf9bc3 Requantize scale fix (#21100)
7b1daf9bc3 is described below
commit 7b1daf9bc30ae92c0581b26449e00d2ae5882230
Author: DominikaJedynak <[email protected]>
AuthorDate: Tue Jul 19 16:44:52 2022 +0200
Requantize scale fix (#21100)
* Requantize scale fix and refactor
* Comment fix
* Review suggestions
* Formatting fix
---
.../quantization/dnnl/dnnl_quantized_elemwise_add.cc | 14 +++++++-------
src/operator/quantization/dnnl/dnnl_requantize-inl.h | 12 +++++++-----
src/operator/subgraph/dnnl/dnnl_fc.cc | 6 +++---
3 files changed, 17 insertions(+), 15 deletions(-)
diff --git a/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc
b/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc
index 979d2cbd53..ba2f1c5869 100644
--- a/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc
+++ b/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc
@@ -167,9 +167,9 @@ static void DNNLQuantizedElemwiseAddForward(const
nnvm::NodeAttrs& attrs,
float output_max = 0;
float output_scale = 0;
if (params.max_calib_range.has_value() &&
params.min_calib_range.has_value()) {
- output_min = params.min_calib_range.value();
- output_max = params.max_calib_range.value();
- output_scale = output_data_range / MaxAbs(output_min, output_max);
+ output_min = params.min_calib_range.value();
+ output_max = params.max_calib_range.value();
+ output_scale = output_data_range / MaxAbs(output_min, output_max);
} else {
output_max = A_absmax + B_absmax;
output_min = -output_max;
@@ -189,8 +189,8 @@ static void DNNLQuantizedElemwiseAddForward(const
nnvm::NodeAttrs& attrs,
// rescale uint8 to int8 by reorder to temporary memory
auto s8_desc = is_A_int8 ? A_mem->get_desc() :
B_mem->get_desc();
rescaled_mem = TmpMemMgr::Get()->Alloc(s8_desc);
- const float u8_reorder_scale = 0.5;
- std::vector<float> reorder_scale = {u8_reorder_scale};
+ const float u8_to_s8_scale = 0.5;
+ std::vector<float> reorder_scale = {u8_to_s8_scale};
auto engine = CpuEngine::Get()->get_engine();
dnnl::primitive_attr reorder_attr;
reorder_attr.set_output_scales(0, reorder_scale);
@@ -202,10 +202,10 @@ static void DNNLQuantizedElemwiseAddForward(const
nnvm::NodeAttrs& attrs,
// Modify scale to restore original uint8 values:
if (is_A_int8) {
B_mem = rescaled_mem;
- scales[1] *= 1.0 / u8_reorder_scale;
+ scales[1] *= 1.0 / u8_to_s8_scale;
} else {
A_mem = rescaled_mem;
- scales[0] *= 1.0 / u8_reorder_scale;
+ scales[0] *= 1.0 / u8_to_s8_scale;
}
}
}
diff --git a/src/operator/quantization/dnnl/dnnl_requantize-inl.h
b/src/operator/quantization/dnnl/dnnl_requantize-inl.h
index e2009d6508..d15c0fc385 100644
--- a/src/operator/quantization/dnnl/dnnl_requantize-inl.h
+++ b/src/operator/quantization/dnnl/dnnl_requantize-inl.h
@@ -131,11 +131,13 @@ static void DNNLRequantizeForward(const nnvm::NodeAttrs&
attrs,
if (data_mins[i] < data_min)
data_min = data_mins[i];
}
- float src_range = MinAbs(MinValue<SrcDType>(), MaxValue<SrcDType>());
- // MaxAbs is not used here as it converts data to float what could cause
overflow errors.
- SrcDType data_range = std::max(std::abs(data_min), std::abs(data_max));
- float data_scale = MaxAbs(*inputs[1].data().dptr<float>(),
*inputs[2].data().dptr<float>());
- real_range = data_range * data_scale / src_range;
+ float src_range = MinAbs(MinValue<SrcDType>(), MaxValue<SrcDType>());
+ // MaxAbs is not used here as it converts data to float what could cause
errors.
+ // int64 is used because in case of std::abs(int32_MIN), overflow was
occurring.
+ int64_t data_range = std::max(std::abs(static_cast<int64_t>(data_min)),
+ std::abs(static_cast<int64_t>(data_max)));
+ float data_scale = MaxAbs(*inputs[1].data().dptr<float>(),
*inputs[2].data().dptr<float>());
+ real_range = data_range * data_scale / src_range;
}
auto out_type = GetQuantizeOutputType(param);
if (out_type == mshadow::kUint8) {
diff --git a/src/operator/subgraph/dnnl/dnnl_fc.cc
b/src/operator/subgraph/dnnl/dnnl_fc.cc
index ee16d3ec34..54da273712 100644
--- a/src/operator/subgraph/dnnl/dnnl_fc.cc
+++ b/src/operator/subgraph/dnnl/dnnl_fc.cc
@@ -67,6 +67,7 @@ class SgDNNLFCOp {
private:
enum { kDataMin = 0, kDataMax, kWeightMin, kWeightMax, kBiasMin, kBiasMax,
kSumMin, kSumMax };
const size_t MIN_MAX_COUNT = 8;
+ const float u8_to_s8_scale = 0.5;
NDArray PrepareOutputWithSum(const NDArray& sum_input, const NDArray&
output);
bool CheckInitializationConditions(const std::vector<NDArray>& inputs,
@@ -275,8 +276,7 @@ NDArray SgDNNLFCOp::PrepareOutputWithSum(const NDArray&
sum_input, const NDArray
dnnl_mem_ptr tmp_mem(new dnnl::memory(
sum_mem_desc, CpuEngine::Get()->get_engine(),
out_dnnl_mem->get_data_handle()));
DNNLStream::Get()->RegisterMem(tmp_mem);
- const float u8_reorder_scale = 0.5;
- std::vector<float> reorder_scale = {u8_reorder_scale};
+ std::vector<float> reorder_scale = {u8_to_s8_scale};
dnnl::primitive_attr reorder_attr;
reorder_attr.set_output_scales(0, reorder_scale);
const auto reorder_pd =
dnnl::reorder::primitive_desc(CpuEngine::Get()->get_engine(),
@@ -498,7 +498,7 @@ bool SgDNNLFCOp::PrepareQuantization(const OpContext& ctx,
if (in_data[idx.sum].dtype() == mshadow::kUint8 && output.dtype() ==
mshadow::kInt8) {
// In this case, reorder with scale 0.5 is used on in_data[idx.sum] to
// scale it to s8 range, so sum_scale has to be rescaled as well
- full_param_.sum_scale *= 2.0;
+ full_param_.sum_scale *= 1.0 / u8_to_s8_scale;
}
}
return support_channelwise_scale;