This is an automated email from the ASF dual-hosted git repository.
bgawrych pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new e43c1e065e [BUGFIX] Fix and cleanup _contrib_quantized_elemwise_add
(#20999)
e43c1e065e is described below
commit e43c1e065e950d53093ea84ae43897f7c2129c8a
Author: Andrzej Kotłowski <[email protected]>
AuthorDate: Tue Apr 26 11:09:41 2022 +0200
[BUGFIX] Fix and cleanup _contrib_quantized_elemwise_add (#20999)
* Fix and cleanup _contrib_quantized_elemwise_add
The key for caching oneDNN kernel was made from different elements than
used for building the kernel.
+ some additional cleanup
* Align names according to convention
---
src/operator/operator_common.h | 21 ++-
.../dnnl/dnnl_quantized_elemwise_add.cc | 177 +++++++++------------
.../quantization/quantized_elemwise_add-inl.h | 4 +-
tests/python/quantization/test_quantization.py | 2 +-
4 files changed, 96 insertions(+), 108 deletions(-)
diff --git a/src/operator/operator_common.h b/src/operator/operator_common.h
index 9a219aa78b..a0f158f2f9 100644
--- a/src/operator/operator_common.h
+++ b/src/operator/operator_common.h
@@ -546,8 +546,7 @@ class OpSignature {
*/
#if MXNET_USE_ONEDNN == 1
- void AddSign(const dnnl::memory& mem) {
- auto desc = mem.get_desc();
+ void AddSign(const dnnl::memory::desc& desc) {
hash = hash * 2 + desc.data.format_kind;
eles.push_back(desc.data.format_kind);
hash = hash * 2 + desc.data.data_type;
@@ -604,6 +603,18 @@ class OpSignature {
break;
}
}
+
+ void AddSign(const dnnl::memory& mem) {
+ auto desc = mem.get_desc();
+ AddSign(desc);
+ }
+
+ void AddSign(const std::vector<dnnl::memory::desc>& arrs) {
+ for (auto& arr : arrs) {
+ AddSign(arr);
+ }
+ }
+
#endif
void AddSign(const std::vector<NDArray>& arrs) {
@@ -644,6 +655,12 @@ class OpSignature {
eles.push_back(val);
}
+ void AddSign(const std::vector<float>& vec) {
+ for (auto& val : vec) {
+ AddSign(val);
+ }
+ }
+
void AddSign(float val) {
hash = dmlc::HashCombine(hash, val);
eles.push_back(val);
diff --git a/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc
b/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc
index c9d1157eb6..f7674f6dd6 100644
--- a/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc
+++ b/src/operator/quantization/dnnl/dnnl_quantized_elemwise_add.cc
@@ -38,16 +38,15 @@ static inline float GetScale(const NDArray& data, float
min, float max) {
return data_range / MaxAbs(min, max);
}
-class DNNLQuantizedElemwiseAddFwd {
+class DNNLQuantizedElemwiseSumFwd {
public:
dnnl::sum::primitive_desc fwd_pd;
- DNNLQuantizedElemwiseAddFwd(const dnnl::memory::desc& output_desc,
+ DNNLQuantizedElemwiseSumFwd(const dnnl::memory::desc& output_md,
const std::vector<float>& scales,
- const std::vector<dnnl::memory::desc>& data_md)
- : fwd_pd(output_desc, scales, data_md, CpuEngine::Get()->get_engine()) {
+ const std::vector<dnnl::memory::desc>& inputs_md)
+ : fwd_pd(output_md, scales, inputs_md, CpuEngine::Get()->get_engine()) {
fwd_ = std::make_shared<dnnl::sum>(fwd_pd);
- data_.resize(data_md.size());
}
const dnnl::sum& GetFwd() const {
@@ -56,34 +55,25 @@ class DNNLQuantizedElemwiseAddFwd {
private:
std::shared_ptr<dnnl::sum> fwd_;
- std::vector<std::shared_ptr<dnnl::memory>> data_;
std::shared_ptr<dnnl::memory> out_;
};
-static DNNLQuantizedElemwiseAddFwd& GetQuantizedElemwiseAddForward(
- const dnnl::memory::desc& output_desc,
+static DNNLQuantizedElemwiseSumFwd& GetQuantizedElemwiseSumForward(
+ const dnnl::memory::desc& output_md,
const std::vector<float>& scales,
- const std::vector<NDArray>& in_data,
- const std::vector<NDArray>& out_data,
- const std::vector<dnnl::memory::desc>& data_md) {
+ const std::vector<dnnl::memory::desc>& inputs_md) {
#if DMLC_CXX11_THREAD_LOCAL
- static thread_local std::unordered_map<OpSignature,
DNNLQuantizedElemwiseAddFwd, OpHash> fwds;
+ static thread_local std::unordered_map<OpSignature,
DNNLQuantizedElemwiseSumFwd, OpHash> fwds;
#else
- static MX_THREAD_LOCAL std::unordered_map<OpSignature,
DNNLQuantizedElemwiseAddFwd, OpHash> fwds;
+ static MX_THREAD_LOCAL std::unordered_map<OpSignature,
DNNLQuantizedElemwiseSumFwd, OpHash> fwds;
#endif
OpSignature key;
- key.AddSign(in_data);
-
key.AddSign(in_data[quantized_elemwise_add_enum::kAMin].data().dptr<float>()[0]);
-
key.AddSign(in_data[quantized_elemwise_add_enum::kAMax].data().dptr<float>()[0]);
-
key.AddSign(in_data[quantized_elemwise_add_enum::kBMin].data().dptr<float>()[0]);
-
key.AddSign(in_data[quantized_elemwise_add_enum::kBMax].data().dptr<float>()[0]);
- key.AddSign(out_data);
-
key.AddSign(out_data[quantized_elemwise_add_enum::kMin].data().dptr<float>()[0]);
-
key.AddSign(out_data[quantized_elemwise_add_enum::kMax].data().dptr<float>()[0]);
-
+ key.AddSign(output_md);
+ key.AddSign(scales);
+ key.AddSign(inputs_md);
auto it = fwds.find(key);
if (it == fwds.end()) {
- DNNLQuantizedElemwiseAddFwd fwd(output_desc, scales, data_md);
+ DNNLQuantizedElemwiseSumFwd fwd(output_md, scales, inputs_md);
it = AddToCache(&fwds, key, fwd);
}
return it->second;
@@ -91,136 +81,117 @@ static DNNLQuantizedElemwiseAddFwd&
GetQuantizedElemwiseAddForward(
static void DNNLQuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
- const std::vector<NDArray>&
in_data,
+ const std::vector<NDArray>& inputs,
const std::vector<OpReqType>& req,
- const std::vector<NDArray>&
out_data) {
+ const std::vector<NDArray>&
outputs) {
const QuantizeElemwiseAddParam& params =
nnvm::get<QuantizeElemwiseAddParam>(attrs.parsed);
// A, B, A_min, A_max, B_min, B_max
- CHECK_EQ(in_data.size(), 6U) << "should be A, B, A_min, A_max, B_min, B_max";
+ CHECK_EQ(inputs.size(), 6U) << "should be A, B, A_min, A_max, B_min, B_max";
// C, C_min, C_max
- CHECK_EQ(out_data.size(), 3U) << "should be C, C_min, C_max";
+ CHECK_EQ(outputs.size(), 3U) << "should be C, C_min, C_max";
// Collect data min,max,absmax
- const float dataA_min =
in_data[quantized_elemwise_add_enum::kAMin].data().dptr<float>()[0];
- const float dataB_min =
in_data[quantized_elemwise_add_enum::kBMin].data().dptr<float>()[0];
- const float dataA_max =
in_data[quantized_elemwise_add_enum::kAMax].data().dptr<float>()[0];
- const float dataB_max =
in_data[quantized_elemwise_add_enum::kBMax].data().dptr<float>()[0];
- const float dataA_absmax = MaxAbs(dataA_min, dataA_max);
- const float dataB_absmax = MaxAbs(dataB_min, dataB_max);
-
- auto dataA_mem = in_data[quantized_elemwise_add_enum::kDataA].GetDNNLData();
- auto dataB_mem = in_data[quantized_elemwise_add_enum::kDataB].GetDNNLData();
- const bool is_dataA_int8 =
- (in_data[quantized_elemwise_add_enum::kDataA].dtype() == mshadow::kInt8);
- const float dataA_range = is_dataA_int8 ? kInt8Range : kUint8Range;
-
- const float A_scale =
- GetScale(in_data[quantized_elemwise_add_enum::kDataA], dataA_min,
dataA_max);
- const float B_scale =
- GetScale(in_data[quantized_elemwise_add_enum::kDataB], dataB_min,
dataB_max);
- // rescaled_mem is for reorder dnnl memory
- dnnl::memory* rescaled_mem;
-
- // output default set as int32
- double output_data_range = kInt32Range;
- auto output_data_type = dnnl::memory::data_type::s32;
- // dataA && dataB are uint8
- if (out_data[quantized_elemwise_add_enum::kOut].dtype() == mshadow::kInt8) {
+ const float A_min = inputs[q_elemwise_add::kAMin].data().dptr<float>()[0];
+ const float B_min = inputs[q_elemwise_add::kBMin].data().dptr<float>()[0];
+ const float A_max = inputs[q_elemwise_add::kAMax].data().dptr<float>()[0];
+ const float B_max = inputs[q_elemwise_add::kBMax].data().dptr<float>()[0];
+ const float A_absmax = MaxAbs(A_min, A_max);
+ const float B_absmax = MaxAbs(B_min, B_max);
+ const bool is_A_int8 = (inputs[q_elemwise_add::kDataA].dtype() ==
mshadow::kInt8);
+ const float A_range = is_A_int8 ? kInt8Range : kUint8Range;
+ const float A_scale = GetScale(inputs[q_elemwise_add::kDataA], A_min,
A_max);
+ const float B_scale = GetScale(inputs[q_elemwise_add::kDataB], B_min,
B_max);
+ auto A_mem = inputs[q_elemwise_add::kDataA].GetDNNLData();
+ auto B_mem = inputs[q_elemwise_add::kDataB].GetDNNLData();
+ dnnl::memory* rescaled_mem; // rescaled_mem is for reorder dnnl
memory
+ double output_data_range = kInt32Range; // output default set as int32
+ if (outputs[q_elemwise_add::kOut].dtype() == mshadow::kInt8) {
output_data_range = kInt8Range;
- output_data_type = dnnl::memory::data_type::s8;
- } else if (out_data[quantized_elemwise_add_enum::kOut].dtype() ==
mshadow::kUint8) {
+ } else if (outputs[q_elemwise_add::kOut].dtype() == mshadow::kUint8) {
output_data_range = kUint8Range;
- output_data_type = dnnl::memory::data_type::u8;
- } else {
- output_data_range = kInt32Range;
- output_data_type = dnnl::memory::data_type::s32;
}
float output_min = 0;
float output_max = 0;
- float out_data_scale = 0;
+ float output_scale = 0;
if (params.max_calib_range.has_value() &&
params.min_calib_range.has_value()) {
output_min = params.min_calib_range.value();
output_max = params.max_calib_range.value();
- out_data_scale = output_data_range / MaxAbs(output_min, output_max);
+ output_scale = output_data_range / MaxAbs(output_min, output_max);
} else {
- output_max = dataA_absmax + dataB_absmax;
+ output_max = A_absmax + B_absmax;
output_min = -output_max;
}
- // 2: scale 0 for dataA, scale 1 for data B
+ // 2: scale 0 for input A, scale 1 for input B
const int scales_num = 2;
std::vector<float> scales(scales_num, 1);
auto engine = CpuEngine::Get()->get_engine();
- if (in_data[quantized_elemwise_add_enum::kDataA].dtype() !=
- in_data[quantized_elemwise_add_enum::kDataB].dtype()) {
- auto s8_desc = (is_dataA_int8 == true) ? dataA_mem->get_desc() :
dataB_mem->get_desc();
+ if (inputs[q_elemwise_add::kDataA].dtype() !=
inputs[q_elemwise_add::kDataB].dtype()) {
+ auto s8_desc = (is_A_int8 == true) ? A_mem->get_desc() :
B_mem->get_desc();
rescaled_mem = TmpMemMgr::Get()->Alloc(s8_desc);
float u8_reorder_scale = 0;
if (params.max_calib_range.has_value() &&
params.min_calib_range.has_value()) {
- if (is_dataA_int8 == true) {
- u8_reorder_scale = out_data_scale / B_scale;
- scales[0] = out_data_scale / A_scale;
+ if (is_A_int8 == true) {
+ u8_reorder_scale = output_scale / B_scale;
+ scales[0] = output_scale / A_scale;
} else {
- u8_reorder_scale = out_data_scale / A_scale;
- scales[1] = out_data_scale / B_scale;
+ u8_reorder_scale = output_scale / A_scale;
+ scales[1] = output_scale / B_scale;
}
} else {
- // x*dataA_absmax/dataA_range =
y*(dataA_absmax+dataB_absmax)/output_range
- if (is_dataA_int8 == true) {
- u8_reorder_scale =
- dataB_absmax * output_data_range / ((dataA_absmax + dataB_absmax)
* kUint8Range);
- scales[0] =
- dataA_absmax * output_data_range / ((dataA_absmax + dataB_absmax)
* dataA_range);
+ // x*A_absmax/A_range = y*(A_absmax+B_absmax)/output_range
+ if (is_A_int8 == true) {
+ u8_reorder_scale = B_absmax * output_data_range / (output_max *
kUint8Range);
+ scales[0] = A_absmax * output_data_range / (output_max *
A_range);
} else {
- u8_reorder_scale =
- dataA_absmax * output_data_range / ((dataA_absmax + dataB_absmax)
* dataA_range);
- scales[1] = dataB_absmax * output_data_range / ((dataA_absmax +
dataB_absmax) * kInt8Range);
+ u8_reorder_scale = A_absmax * output_data_range / (output_max *
A_range);
+ scales[1] = B_absmax * output_data_range / (output_max *
kInt8Range);
}
}
std::vector<float> reorder_scale = {u8_reorder_scale};
dnnl::primitive_attr reorder_attr;
reorder_attr.set_output_scales(0, reorder_scale);
- auto u8_mem = (is_dataA_int8 == true) ? dataB_mem : dataA_mem;
+ auto u8_mem = (is_A_int8 == true) ? B_mem : A_mem;
const auto reorder_pd =
dnnl::reorder::primitive_desc(engine, u8_mem->get_desc(), engine,
s8_desc, reorder_attr);
dnnl_args_map_t args({{DNNL_ARG_FROM, *u8_mem}, {DNNL_ARG_TO,
*rescaled_mem}});
DNNLStream::Get()->RegisterPrimArgs(dnnl::reorder(reorder_pd), args);
- if (is_dataA_int8 == true) {
- dataB_mem = rescaled_mem;
+ if (is_A_int8 == true) {
+ B_mem = rescaled_mem;
} else {
- dataA_mem = rescaled_mem;
+ A_mem = rescaled_mem;
}
} else {
// same data type and has same data range
if (params.max_calib_range.has_value() &&
params.min_calib_range.has_value()) {
- scales[0] = out_data_scale / A_scale;
- scales[1] = out_data_scale / B_scale;
+ scales[0] = output_scale / A_scale;
+ scales[1] = output_scale / B_scale;
} else {
- scales[0] = dataA_absmax * output_data_range / ((dataA_absmax +
dataB_absmax) * dataA_range);
- scales[1] = dataB_absmax * output_data_range / ((dataA_absmax +
dataB_absmax) * dataA_range);
+ scales[0] = A_absmax * output_data_range / (output_max * A_range);
+ scales[1] = B_absmax * output_data_range / (output_max * A_range);
}
}
std::vector<dnnl::memory::desc> in_desc;
- in_desc.push_back(dataA_mem->get_desc());
- in_desc.push_back(dataB_mem->get_desc());
- const auto in_shape = in_data[quantized_elemwise_add_enum::kDataA].shape();
- dnnl::memory::dims i_dims(in_shape.begin(), in_shape.end());
- auto output_desc = dnnl::memory::desc(i_dims, output_data_type,
dnnl::memory::format_tag::any);
- DNNLQuantizedElemwiseAddFwd& fwd =
- GetQuantizedElemwiseAddForward(output_desc, scales, in_data, out_data,
in_desc);
- auto mem = CreateDNNLMem(
- out_data[quantized_elemwise_add_enum::kOut], fwd.fwd_pd.dst_desc(),
req[0], &in_data[0]);
- dnnl_args_map_t args({{DNNL_ARG_MULTIPLE_SRC, *dataA_mem},
- {DNNL_ARG_MULTIPLE_SRC + 1, *dataB_mem},
- {DNNL_ARG_DST, *mem.second}});
- DNNLStream* stream = DNNLStream::Get();
+ in_desc.push_back(A_mem->get_desc());
+ in_desc.push_back(B_mem->get_desc());
+
+ auto output_md =
outputs[q_elemwise_add::kOut].GetDNNLData()->get_desc();
+ DNNLStream* stream = DNNLStream::Get();
+ DNNLQuantizedElemwiseSumFwd& fwd = GetQuantizedElemwiseSumForward(output_md,
scales, in_desc);
+ auto out_mem =
CreateDNNLMem(outputs[q_elemwise_add::kOut],
+ fwd.fwd_pd.dst_desc(),
+ req[q_elemwise_add::kOut],
+ &inputs[q_elemwise_add::kDataA]);
+ dnnl_args_map_t args({{DNNL_ARG_MULTIPLE_SRC, *A_mem},
+ {DNNL_ARG_MULTIPLE_SRC + 1, *B_mem},
+ {DNNL_ARG_DST, *out_mem.second}});
stream->RegisterPrimArgs(fwd.GetFwd(), args);
- CommitOutput(out_data[quantized_elemwise_add_enum::kOut], mem);
+ CommitOutput(outputs[q_elemwise_add::kOut], out_mem);
stream->Submit();
- out_data[quantized_elemwise_add_enum::kMin].data().dptr<float>()[0] =
output_min;
- out_data[quantized_elemwise_add_enum::kMax].data().dptr<float>()[0] =
output_max;
+ outputs[q_elemwise_add::kMin].data().dptr<float>()[0] = output_min;
+ outputs[q_elemwise_add::kMax].data().dptr<float>()[0] = output_max;
}
inline static bool ElemwiseAddStorageType(const nnvm::NodeAttrs& attrs,
diff --git a/src/operator/quantization/quantized_elemwise_add-inl.h
b/src/operator/quantization/quantized_elemwise_add-inl.h
index 3a916546ed..7935a1471e 100644
--- a/src/operator/quantization/quantized_elemwise_add-inl.h
+++ b/src/operator/quantization/quantized_elemwise_add-inl.h
@@ -49,10 +49,10 @@ struct QuantizeElemwiseAddParam : public
dmlc::Parameter<QuantizeElemwiseAddPara
}
};
-namespace quantized_elemwise_add_enum {
+namespace q_elemwise_add {
enum QuantizedElemwiseAddOutputs { kOut, kMin, kMax };
enum QuantizedElemwiseAddInputs { kDataA, kDataB, kAMin, kAMax, kBMin, kBMax };
-} // namespace quantized_elemwise_add_enum
+} // namespace q_elemwise_add
} // namespace op
} // namespace mxnet
diff --git a/tests/python/quantization/test_quantization.py
b/tests/python/quantization/test_quantization.py
index 6b74a49a9d..65d973171f 100644
--- a/tests/python/quantization/test_quantization.py
+++ b/tests/python/quantization/test_quantization.py
@@ -1267,7 +1267,7 @@ def test_quantize_gluon_with_forward():
for mode in ['naive', 'entropy']:
for quantize_granularity in ['tensor-wise', 'channel-wise']:
- qdtype = qdtype if mode is 'naive' else 'auto'
+ qdtype = qdtype if mode == 'naive' else 'auto'
quantized_resnet18_v1 =
mx.contrib.quant.quantize_net(resnet18_v1, quantized_dtype=qdtype,
exclude_layers=None,
exclude_layers_match=excluded_names_match,