This is an automated email from the ASF dual-hosted git repository.
patriczhao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-mxnet.git
The following commit(s) were added to refs/heads/master by this push:
new 84c1635 [MKLDNN] add quantized sum (#14614)
84c1635 is described below
commit 84c1635adc1d885a916b5993eb0aa15baab9bbd9
Author: rongzha1 <[email protected]>
AuthorDate: Wed May 1 05:56:04 2019 +0800
[MKLDNN] add quantized sum (#14614)
* add quantized sum
* fix gpu compiler error and cpu testcase fail
* add default forward function for quantized_sum
* skip quantized_sum for gpu ctx
* fix comments
* fix indetation and comments
* retrigger CI
* alloc memeory through TmpMemMgr
* fix comments Apr.12
* change sum to elemwise_add
* change Sum to ElemwiseAdd
* fix indents
* retrigger CI
* trigger CI
* fix indentation and typo
* trigger CI
* fix typo
* fix typo
* remove USE_MKLDNN macro for requantize params
* rename param same as its op
* trigger CI
* trigger CI
* trigger CI
---
.../mkldnn/mkldnn_quantized_elemwise_add.cc | 206 +++++++++++++++++++++
src/operator/quantization/quantization_utils.h | 1 +
.../quantization/quantized_elemwise_add-inl.h | 58 ++++++
.../quantization/quantized_elemwise_add.cc | 141 ++++++++++++++
..._property.h => mkldnn_post_quantize_property.h} | 63 ++++---
.../subgraph/mkldnn/mkldnn_subgraph_property.cc | 4 +-
src/operator/tensor/elemwise_binary_op_basic.cc | 3 +
tests/python/quantization/test_quantization.py | 70 ++++++-
8 files changed, 518 insertions(+), 28 deletions(-)
diff --git a/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
b/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
new file mode 100644
index 0000000..05da992
--- /dev/null
+++ b/src/operator/quantization/mkldnn/mkldnn_quantized_elemwise_add.cc
@@ -0,0 +1,206 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2019 by Contributors
+ * \file mkldnn_quantized_elemwise_add.cc
+ * \brief
+ */
+
+#if MXNET_USE_MKLDNN == 1
+#include "../quantized_elemwise_add-inl.h"
+#include "../../nn/mkldnn/mkldnn_ops-inl.h"
+#include "../../nn/mkldnn/mkldnn_base-inl.h"
+#include "../quantization_utils.h"
+
+namespace mxnet {
+namespace op {
+
+DMLC_REGISTER_PARAMETER(QuantizeElemwiseAddParam);
+
+static inline float GetScale(const NDArray& data, float min, float max) {
+ auto data_range = (data.dtype() == mshadow::kInt8) ? kInt8Range :
kUint8Range;
+ return data_range / MaxAbs(min, max);
+}
+
+static void MKLDNNQuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs,
const OpContext& ctx,
+ const std::vector<NDArray>&
in_data,
+ const std::vector<OpReqType>&
req,
+ const std::vector<NDArray>&
out_data) {
+ const QuantizeElemwiseAddParam& params =
nnvm::get<QuantizeElemwiseAddParam>(attrs.parsed);
+ // A, B, A_min, A_max, B_min, B_max
+ CHECK_EQ(in_data.size(), 6U) << "should be A, B, A_min, A_max, B_min, B_max";
+ // C, C_min, C_max
+ CHECK_EQ(out_data.size(), 3U) << "should be C, C_min, C_max";
+ // Collect data min,max,absmax
+ const float dataA_min =
in_data[quantized_elemwise_add_enum::kAMin].data().dptr<float>()[0];
+ const float dataB_min =
in_data[quantized_elemwise_add_enum::kBMin].data().dptr<float>()[0];
+ const float dataA_max =
in_data[quantized_elemwise_add_enum::kAMax].data().dptr<float>()[0];
+ const float dataB_max =
in_data[quantized_elemwise_add_enum::kBMax].data().dptr<float>()[0];
+ const float dataA_absmax = MaxAbs(dataA_min, dataA_max);
+ const float dataB_absmax = MaxAbs(dataB_min, dataB_max);
+
+ auto dataA_mem =
in_data[quantized_elemwise_add_enum::kDataA].GetMKLDNNData();
+ auto dataB_mem =
in_data[quantized_elemwise_add_enum::kDataB].GetMKLDNNData();
+ const bool is_dataA_int8 =
(in_data[quantized_elemwise_add_enum::kDataA].dtype()
+ == mshadow::kInt8);
+ const size_t dataA_range = is_dataA_int8 ? kInt8Range : kUint8Range;
+
+ const float A_scale = GetScale(in_data[quantized_elemwise_add_enum::kDataA],
+ dataA_min,
+ dataA_max);
+ const float B_scale = GetScale(in_data[quantized_elemwise_add_enum::kDataB],
+ dataB_min,
+ dataB_max);
+ // rescaled_mem is for reorder mkldnn memory
+ mkldnn::memory *rescaled_mem;
+
+ // output default set as int32
+ size_t output_data_range = kInt32Range;
+ auto output_data_type = mkldnn::memory::s32;
+ // dataA && dataB are uint8
+ if (out_data[quantized_elemwise_add_enum::kOut].dtype() == mshadow::kInt8) {
+ output_data_range = kInt8Range;
+ output_data_type = mkldnn::memory::s8;
+ } else if (out_data[quantized_elemwise_add_enum::kOut].dtype() ==
mshadow::kUint8) {
+ output_data_range = kUint8Range;
+ output_data_type = mkldnn::memory::u8;
+ } else {
+ output_data_range = kInt32Range;
+ output_data_type = mkldnn::memory::s32;
+ }
+
+ float output_min = 0;
+ float output_max = 0;
+ float out_data_scale = 0;
+ if (params.max_calib_range.has_value() &&
params.min_calib_range.has_value()) {
+ output_min = params.min_calib_range.value();
+ output_max = params.max_calib_range.value();
+ out_data_scale = output_data_range / MaxAbs(output_min, output_max);
+ } else {
+ output_max = dataA_absmax + dataB_absmax;
+ output_min = -output_max;
+ }
+ // 2: scale 0 for dataA, scale 1 for data B
+ const int scales_num = 2;
+ std::vector<float> scales(scales_num, 1);
+ if (in_data[quantized_elemwise_add_enum::kDataA].dtype()
+ != in_data[quantized_elemwise_add_enum::kDataB].dtype()) {
+ auto s8_pd = (is_dataA_int8 == true)
+ ? dataA_mem->get_primitive_desc()
+ : dataB_mem->get_primitive_desc();
+ rescaled_mem = TmpMemMgr::Get()->Alloc(s8_pd);
+ float u8_reorder_scale = 0;
+ if (params.max_calib_range.has_value() &&
params.min_calib_range.has_value()) {
+ if (is_dataA_int8 == true) {
+ u8_reorder_scale = out_data_scale / B_scale;
+ scales[0] = out_data_scale / A_scale;
+ } else {
+ u8_reorder_scale = out_data_scale / A_scale;
+ scales[1] = out_data_scale / B_scale;
+ }
+ } else {
+ // x*dataA_absmax/dataA_range =
y*(dataA_absmax+dataB_absmax)/output_range
+ if (is_dataA_int8 == true) {
+ u8_reorder_scale = dataB_absmax * output_data_range
+ / ((dataA_absmax + dataB_absmax) * kUint8Range);
+ scales[0] = dataA_absmax * output_data_range
+ / ((dataA_absmax + dataB_absmax) * dataA_range);
+ } else {
+ u8_reorder_scale = dataA_absmax * output_data_range
+ / ((dataA_absmax + dataB_absmax) * dataA_range);
+ scales[1] = dataB_absmax * output_data_range
+ / ((dataA_absmax + dataB_absmax) * kInt8Range);
+ }
+ }
+ std::vector<float> reorder_scale = {u8_reorder_scale};
+ primitive_attr reorder_attr;
+ reorder_attr.set_int_output_round_mode(round_mode::round_nearest);
+ reorder_attr.set_output_scales(0, reorder_scale);
+ auto u8_mem = (is_dataA_int8 == true) ? dataB_mem : dataA_mem;
+ const auto reorder_pd =
mkldnn::reorder::primitive_desc(u8_mem->get_primitive_desc(),
+ s8_pd,
+ reorder_attr);
+ MKLDNNStream::Get()->RegisterPrim(mkldnn::reorder(reorder_pd, *u8_mem,
*rescaled_mem));
+
+ if (is_dataA_int8 == true) {
+ dataB_mem = rescaled_mem;
+ } else {
+ dataA_mem = rescaled_mem;
+ }
+ } else {
+ // same data type and has same data range
+ if (params.max_calib_range.has_value() &&
params.min_calib_range.has_value()) {
+ scales[0] = out_data_scale / A_scale;
+ scales[1] = out_data_scale / B_scale;
+ } else {
+ scales[0] = dataA_absmax * output_data_range / ((dataA_absmax +
dataB_absmax) * dataA_range);
+ scales[1] = dataB_absmax * output_data_range / ((dataA_absmax +
dataB_absmax) * dataA_range);
+ }
+ }
+
+ std::vector<mkldnn::primitive::at> in_prims;
+ std::vector<mkldnn::memory::primitive_desc> in_pds;
+ in_prims.push_back(*dataA_mem);
+ in_prims.push_back(*dataB_mem);
+ in_pds.push_back(dataA_mem->get_primitive_desc());
+ in_pds.push_back(dataB_mem->get_primitive_desc());
+ size_t i_ndim = in_data[quantized_elemwise_add_enum::kDataA].shape().ndim();
+ mkldnn::memory::dims i_dims = mkldnn::memory::dims(i_ndim);
+ for (size_t i = 0; i < i_ndim; i++) {
+ i_dims[i] =
static_cast<int>(in_data[quantized_elemwise_add_enum::kDataA].shape()[i]);
+ }
+ mkldnn::memory::format i_fmt = static_cast<mkldnn::memory::format>(
+
in_pds[quantized_elemwise_add_enum::kDataA].desc().data.format);
+ auto output_desc = mkldnn::memory::desc(i_dims, output_data_type, i_fmt);
+ mkldnn::sum::primitive_desc pdesc(output_desc, scales, in_pds);
+ auto mem = CreateMKLDNNMem(out_data[quantized_elemwise_add_enum::kOut],
+ pdesc.dst_primitive_desc(),
+ req[0],
+ &in_data[0]);
+ MKLDNNStream *stream = MKLDNNStream::Get();
+ stream->RegisterPrim(mkldnn::sum(pdesc, in_prims, *mem.second));
+ CommitOutput(out_data[quantized_elemwise_add_enum::kOut], mem);
+ stream->Submit();
+
+ out_data[quantized_elemwise_add_enum::kMin].data().dptr<float>()[0] =
output_min;
+ out_data[quantized_elemwise_add_enum::kMax].data().dptr<float>()[0] =
output_max;
+}
+
+inline static bool ElemwiseAddStorageType(const nnvm::NodeAttrs& attrs, const
int dev_mask,
+ DispatchMode* dispatch_mode,
std::vector<int>* in_attrs,
+ std::vector<int>* out_attrs) {
+ // Check num of inputs: A, B, A_min, A_max, B_min, B_max
+ CHECK_EQ(in_attrs->size(), 6U);
+ // Check num of outputs: C, C_min, C_max
+ CHECK_EQ(out_attrs->size(), 3U);
+
+ return MKLDNNStorageType(attrs, dev_mask, true, dispatch_mode, in_attrs,
out_attrs);
+}
+
+NNVM_REGISTER_OP(_contrib_quantized_elemwise_add)
+.set_attr<FInferStorageType>("FInferStorageType", ElemwiseAddStorageType)
+.set_attr<FComputeEx>("FComputeEx<cpu>", MKLDNNQuantizedElemwiseAddForward)
+.set_attr<bool>("TIsMKLDNN", true)
+.set_attr_parser(ParamParser<QuantizeElemwiseAddParam>)
+.add_arguments(QuantizeElemwiseAddParam::__FIELDS__());
+} // namespace op
+} // namespace mxnet
+
+#endif // MXNET_USE_MKLDNN == 1
diff --git a/src/operator/quantization/quantization_utils.h
b/src/operator/quantization/quantization_utils.h
index 86018e6..e7f7ccd 100644
--- a/src/operator/quantization/quantization_utils.h
+++ b/src/operator/quantization/quantization_utils.h
@@ -34,6 +34,7 @@ namespace op {
static const size_t kUint8Range = 255;
static const size_t kInt8Range = 127;
+static const size_t kInt32Range = 0x7fffffff;
template<typename T>
MSHADOW_XINLINE int Sign(T val) {
diff --git a/src/operator/quantization/quantized_elemwise_add-inl.h
b/src/operator/quantization/quantized_elemwise_add-inl.h
new file mode 100644
index 0000000..673b281
--- /dev/null
+++ b/src/operator/quantization/quantized_elemwise_add-inl.h
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+/*!
+ * \file quantized_elemwise_add-inl.h
+ * \brief
+ * \author Rong Zhang
+ */
+
+#ifndef MXNET_OPERATOR_QUANTIZATION_QUANTIZED_ELEMWISE_ADD_INL_H_
+#define MXNET_OPERATOR_QUANTIZATION_QUANTIZED_ELEMWISE_ADD_INL_H_
+
+#include "../tensor/elemwise_unary_op.h"
+
+namespace mxnet {
+namespace op {
+/* These structure is used for requantization only when fusion */
+struct QuantizeElemwiseAddParam : public
dmlc::Parameter<QuantizeElemwiseAddParam> {
+ dmlc::optional<float> min_calib_range;
+ dmlc::optional<float> max_calib_range;
+ DMLC_DECLARE_PARAMETER(QuantizeElemwiseAddParam) {
+ DMLC_DECLARE_FIELD(min_calib_range)
+ .set_default(dmlc::optional<float>())
+ .describe("The minimum scalar value in the form of float32 obtained "
+ "through calibration. If present, it will be used to requantize
the "
+ "int8 output data.");
+ DMLC_DECLARE_FIELD(max_calib_range)
+ .set_default(dmlc::optional<float>())
+ .describe("The maximum scalar value in the form of float32 obtained "
+ "through calibration. If present, it will be used to requantize
the "
+ "int8 output data.");
+ }
+};
+
+namespace quantized_elemwise_add_enum {
+enum QuantizedElemwiseAddOutputs { kOut, kMin, kMax };
+enum QuantizedElemwiseAddInputs { kDataA, kDataB, kAMin, kAMax, kBMin, kBMax};
+}
+
+} // namespace op
+} // namespace mxnet
+
+#endif // MXNET_OPERATOR_QUANTIZATION_QUANTIZED_ELEMWISE_ADD_INL_H_
diff --git a/src/operator/quantization/quantized_elemwise_add.cc
b/src/operator/quantization/quantized_elemwise_add.cc
new file mode 100644
index 0000000..f821e65
--- /dev/null
+++ b/src/operator/quantization/quantized_elemwise_add.cc
@@ -0,0 +1,141 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * Copyright (c) 2019 by Contributors
+ * \file quantized_elemwise_add.cc
+ * \brief
+*/
+#include "../tensor/elemwise_unary_op.h"
+#include "./quantized_elemwise_add-inl.h"
+
+namespace mxnet {
+namespace op {
+
+static bool ElemwiseAddShape(const nnvm::NodeAttrs& attrs,
+ mxnet::ShapeVector* in_shape,
+ mxnet::ShapeVector* out_shape) {
+ // A, B, A_min, A_max, B_min, B_max
+ CHECK_EQ(in_shape->size(), 6U);
+ // C, C_min, C_max
+ CHECK_EQ(out_shape->size(), 3U);
+ CHECK_EQ((*in_shape)[0], (*in_shape)[1]);
+
+
+ SHAPE_ASSIGN_CHECK(*in_shape, 2, TShape{1});
+ SHAPE_ASSIGN_CHECK(*in_shape, 3, TShape{1});
+ SHAPE_ASSIGN_CHECK(*in_shape, 4, TShape{1});
+ SHAPE_ASSIGN_CHECK(*in_shape, 5, TShape{1});
+
+ SHAPE_ASSIGN_CHECK(*out_shape, 0, (*in_shape)[0]);
+ SHAPE_ASSIGN_CHECK(*out_shape, 1, TShape{1});
+ SHAPE_ASSIGN_CHECK(*out_shape, 2, TShape{1});
+ return true;
+}
+
+static bool ElemwiseAddType(const nnvm::NodeAttrs& attrs,
+ std::vector<int>* in_type,
+ std::vector<int>* out_type) {
+ // A, B, A_min, A_max, B_min, B_max
+ CHECK_EQ(in_type->size(), 6U);
+ // C, C_min, C_max
+ CHECK_EQ(out_type->size(), 3U);
+
+ // A, B
+ const int elem_add_num = 2;
+ for (int i = 0; i < elem_add_num; ++i) {
+ if (in_type->at(i) == mshadow::kInt8) {
+ TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kInt8);
+ } else {
+ TYPE_ASSIGN_CHECK(*in_type, i, mshadow::kUint8);
+ }
+ }
+ // C
+ int dtype = mshadow::kInt32;
+ const QuantizeElemwiseAddParam& params =
nnvm::get<QuantizeElemwiseAddParam>(attrs.parsed);
+ if (params.max_calib_range.has_value() &&
params.min_calib_range.has_value()) {
+ dtype = (in_type->at(0) == in_type->at(1)) ? in_type->at(0) :
mshadow::kInt8;
+ }
+ TYPE_ASSIGN_CHECK(*out_type, 0, dtype);
+ // C_min
+ TYPE_ASSIGN_CHECK(*out_type, 1, mshadow::kFloat32);
+ // C_max
+ TYPE_ASSIGN_CHECK(*out_type, 2, mshadow::kFloat32);
+
+ return true;
+}
+
+void QuantizedElemwiseAddForward(const nnvm::NodeAttrs& attrs,
+ const OpContext &ctx,
+ const std::vector<TBlob> &in_data,
+ const std::vector<OpReqType> &req,
+ const std::vector<TBlob> &out_data) {
+ LOG(FATAL) << "Not supported for MXNet built without MKLDNN. "
+ "Please install MKLDNN enabled MXNet.";
+}
+
+NNVM_REGISTER_OP(_contrib_quantized_elemwise_add)
+.describe(R"code(elemwise_add operator for input dataA and input dataB data
type of int8,
+and accumulates in type int32 for the output. For each argument, two more
arguments of type
+float32 must be provided representing the thresholds of quantizing argument
from data
+type float32 to int8. The final outputs contain result in int32, and min
+and max thresholds representing the threholds for quantizing the float32
output into int32.
+
+.. Note::
+ This operator only supports forward propogation. DO NOT use it in training.
+
+)code")
+.set_num_inputs([](const NodeAttrs& attrs) {
+// A, B, A_min, A_max, B_min, B_max
+ return 6;
+})
+// C, C_min, C_max
+.set_num_outputs(3)
+.set_attr<nnvm::FListInputNames>("FListInputNames", [](const NodeAttrs& attrs)
{
+ return std::vector<std::string>{"lhs", "rhs", "lhs_min", "lhs_max",
"rhs_min", "rhs_max"}; \
+})
+.set_attr<nnvm::FListOutputNames>("FListOutputNames", [](const NodeAttrs&
attrs) {
+ return std::vector<std::string>{"output", "min_output", "max_output"};
+})
+.set_attr<nnvm::FInferType>("FInferType", ElemwiseAddType)
+.set_attr<mxnet::FInferShape>("FInferShape", ElemwiseAddShape)
+.set_attr<FCompute>("FCompute<cpu>", QuantizedElemwiseAddForward)
+.set_attr<FNeedRequantize>("FNeedRequantize", [](const NodeAttrs& attrs) {
return true; })
+.add_argument("lhs", "NDArray-or-Symbol", "first input")
+.add_argument("rhs", "NDArray-or-Symbol", "second input")
+.add_argument("lhs_min", "NDArray-or-Symbol", "3rd input")
+.add_argument("lhs_max", "NDArray-or-Symbol", "4th input")
+.add_argument("rhs_min", "NDArray-or-Symbol", "5th input")
+.add_argument("rhs_max", "NDArray-or-Symbol", "6th input");
+
+
+NNVM_REGISTER_OP(elemwise_add)
+.set_attr<FQuantizedOp>("FQuantizedOp", [](const NodeAttrs& attrs) {
+ nnvm::NodePtr node = nnvm::Node::Create();
+ node->attrs.op = Op::Get("_contrib_quantized_elemwise_add");
+ node->attrs.name = "quantized_" + attrs.name;
+ node->attrs.dict = attrs.dict;
+ if (node->op()->attr_parser != nullptr) {
+ node->op()->attr_parser(&(node->attrs));
+ }
+ return node;
+});
+
+} // namespace op
+} // namespace mxnet
diff --git a/src/operator/subgraph/mkldnn/mkldnn_conv_post_quantize_property.h
b/src/operator/subgraph/mkldnn/mkldnn_post_quantize_property.h
similarity index 68%
rename from src/operator/subgraph/mkldnn/mkldnn_conv_post_quantize_property.h
rename to src/operator/subgraph/mkldnn/mkldnn_post_quantize_property.h
index f9033f4..b61a303 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_conv_post_quantize_property.h
+++ b/src/operator/subgraph/mkldnn/mkldnn_post_quantize_property.h
@@ -16,12 +16,13 @@
* specific language governing permissions and limitations
* under the License.
*/
-#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_CONV_POST_QUANTIZE_PROPERTY_H_
-#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_CONV_POST_QUANTIZE_PROPERTY_H_
+#ifndef MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_PROPERTY_H_
+#define MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_PROPERTY_H_
#if MXNET_USE_MKLDNN == 1
#include <string>
#include <vector>
+#include <set>
#include "../common.h"
#include "../subgraph_property.h"
#include "../../nn/mkldnn/mkldnn_convolution-inl.h"
@@ -31,7 +32,7 @@
namespace mxnet {
namespace op {
-class SgMKLDNNConvPostQuantizeSelector : public SubgraphSelector {
+class SgMKLDNNPostQuantizeSelector : public SubgraphSelector {
public:
/*! \brief pattern match status */
enum SelectStatus {
@@ -43,14 +44,25 @@ class SgMKLDNNConvPostQuantizeSelector : public
SubgraphSelector {
private:
SelectStatus status;
std::vector<const nnvm::Node *> matched_list;
+ std::set<std::string> support_requantize_fusion_op_name;
public:
- SgMKLDNNConvPostQuantizeSelector() {}
+ SgMKLDNNPostQuantizeSelector() {
+ support_requantize_fusion_op_name.insert("_sg_mkldnn_conv");
+
support_requantize_fusion_op_name.insert("_contrib_quantized_elemwise_add");
+ }
bool Select(const nnvm::Node &n) override {
- if (n.op() && n.op()->name == "_sg_mkldnn_conv") {
- auto const ¶m = nnvm::get<MKLDNNConvFusionParam>(n.attrs.parsed);
- if (param.full_conv_param.mkldnn_param.quantized) {
+ if (n.op() && support_requantize_fusion_op_name.count(n.op()->name)) {
+ if (n.op()->name == "_sg_mkldnn_conv") {
+ auto const ¶m = nnvm::get<MKLDNNConvFusionParam>(n.attrs.parsed);
+ if (param.full_conv_param.mkldnn_param.quantized) {
+ status = kStart;
+ matched_list.clear();
+ matched_list.push_back(&n);
+ return true;
+ }
+ } else if (n.op()->name == "_contrib_quantized_elemwise_add") {
status = kStart;
matched_list.clear();
matched_list.push_back(&n);
@@ -97,47 +109,48 @@ class SgMKLDNNConvPostQuantizeSelector : public
SubgraphSelector {
}
};
-class SgMKLDNNConvPostQuantizeProperty : public SubgraphProperty {
+class SgMKLDNNPostQuantizeProperty : public SubgraphProperty {
public:
- SgMKLDNNConvPostQuantizeProperty() {}
-
+ SgMKLDNNPostQuantizeProperty() {
+ support_requantize_fusion_op_name.insert("_sg_mkldnn_conv");
+
support_requantize_fusion_op_name.insert("_contrib_quantized_elemwise_add");
+ }
static SubgraphPropertyPtr Create() {
- static const std::string &name = "MKLDNN Convolution post-quantization
optimization pass";
- auto property = std::make_shared<SgMKLDNNConvPostQuantizeProperty>();
+ static const std::string &name = "MKLDNN post-quantization optimization
pass";
+ auto property = std::make_shared<SgMKLDNNPostQuantizeProperty>();
property->SetAttr<std::string>("property_name", name);
property->SetAttr<bool>("inference_only", true);
return property;
}
-
nnvm::NodePtr CreateSubgraphNode(const nnvm::Symbol &sym,
const int subgraph_id = 0) const override {
- nnvm::NodePtr conv_node = nullptr;
+ nnvm::NodePtr fuse_node = nullptr;
nnvm::NodePtr requantize_node = nullptr;
DFSVisit(sym.outputs, [&](const nnvm::NodePtr &node) {
if (node->is_variable()) return;
auto &op_name = node->op()->name;
- if (op_name == "_sg_mkldnn_conv") {
- conv_node = node;
+ if (support_requantize_fusion_op_name.count(op_name)) {
+ fuse_node = node;
} else if (op_name == "_contrib_requantize") {
requantize_node = node;
}
});
- CHECK_NOTNULL(conv_node);
+ CHECK_NOTNULL(fuse_node);
CHECK_NOTNULL(requantize_node);
auto const &requantize_param =
nnvm::get<RequantizeParam>(requantize_node->attrs.parsed);
CHECK(requantize_param.min_calib_range.has_value());
CHECK(requantize_param.max_calib_range.has_value());
- conv_node->attrs.dict["min_calib_range"] =
+ fuse_node->attrs.dict["min_calib_range"] =
std::to_string(requantize_param.min_calib_range.value());
- conv_node->attrs.dict["max_calib_range"] =
+ fuse_node->attrs.dict["max_calib_range"] =
std::to_string(requantize_param.max_calib_range.value());
- conv_node->op()->attr_parser(&(conv_node->attrs));
- return conv_node;
+ fuse_node->op()->attr_parser(&(fuse_node->attrs));
+ return fuse_node;
}
SubgraphSelectorPtr CreateSubgraphSelector() const override {
- auto selector = std::make_shared<SgMKLDNNConvPostQuantizeSelector>();
+ auto selector = std::make_shared<SgMKLDNNPostQuantizeSelector>();
return selector;
}
@@ -149,10 +162,12 @@ class SgMKLDNNConvPostQuantizeProperty : public
SubgraphProperty {
*entry_ptr = nnvm::NodeEntry{n, entry_ptr->index, 0};
}
}
-};
+ private:
+ std::set<std::string> support_requantize_fusion_op_name;
+};
} // namespace op
} // namespace mxnet
#endif // if MXNET_USE_MKLDNN == 1
-#endif // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_CONV_POST_QUANTIZE_PROPERTY_H_
+#endif // MXNET_OPERATOR_SUBGRAPH_MKLDNN_MKLDNN_POST_QUANTIZE_PROPERTY_H_
diff --git a/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc
b/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc
index 26aa3b5..4fc2d2c 100644
--- a/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc
+++ b/src/operator/subgraph/mkldnn/mkldnn_subgraph_property.cc
@@ -21,7 +21,7 @@
#include "mkldnn_conv_property.h"
#include "mkldnn_fc_property.h"
-#include "mkldnn_conv_post_quantize_property.h"
+#include "mkldnn_post_quantize_property.h"
#include "mkldnn_fc_post_quantize_property.h"
#include "mkldnn_post_quantize_align_scale_property.h"
@@ -30,7 +30,7 @@ namespace op {
MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN, SgMKLDNNConvProperty);
MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN, SgMKLDNNFCProperty);
-MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_POST_QUANTIZE,
SgMKLDNNConvPostQuantizeProperty);
+MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_POST_QUANTIZE,
SgMKLDNNPostQuantizeProperty);
MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_POST_QUANTIZE,
SgMKLDNNFCPostQuantizeProperty);
MXNET_REGISTER_SUBGRAPH_PROPERTY(MKLDNN_POST_QUANTIZE,
SgMKLDNNPostQuantizeAlignScaleProperty);
diff --git a/src/operator/tensor/elemwise_binary_op_basic.cc
b/src/operator/tensor/elemwise_binary_op_basic.cc
index f7599ad..0ff73f4 100644
--- a/src/operator/tensor/elemwise_binary_op_basic.cc
+++ b/src/operator/tensor/elemwise_binary_op_basic.cc
@@ -85,6 +85,9 @@ MXNET_OPERATOR_REGISTER_BINARY(elemwise_add)
return
std::vector<ResourceRequest>{ResourceRequest::kTempSpace};})
MXNET_ADD_SPARSE_OP_ALIAS(elemwise_add)
.add_alias("_add").add_alias("_plus").add_alias("_Plus")
+.set_attr<nnvm::FListOutputNames>("FListOutputNames", [](const NodeAttrs&
attrs) {
+ return std::vector<std::string>{"output"};
+})
.describe(R"code(Adds arguments element-wise.
The storage type of ``elemwise_add`` output depends on storage types of inputs
diff --git a/tests/python/quantization/test_quantization.py
b/tests/python/quantization/test_quantization.py
index a65a9e7..ce93f98 100644
--- a/tests/python/quantization/test_quantization.py
+++ b/tests/python/quantization/test_quantization.py
@@ -275,6 +275,71 @@ def test_quantized_conv():
check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1),
True, qdtype)
check_quantized_conv((3, 4, 28, 28), (3, 3), 128, (1, 1), (1, 1),
False, qdtype)
+
+@with_seed()
+def test_quantized_elemwise_add():
+ def check_quantized_elemwise_add(data_shape, qtype):
+ if is_test_for_native_cpu():
+ print('skipped testing quantized_elemwise_add for native cpu since
it is not supported yet')
+ return
+ elif qtype != 'uint8' and qtype != 'int8':
+ print('skipped testing quantized_elemwise_add for not supported
data type')
+ return
+ elif is_test_for_gpu():
+ print('skipped testing quantized_elemwise_add for gpu since it is
not supported yet')
+ return
+
+ dataA = mx.sym.Variable(name='dataA', shape=data_shape,
dtype='float32')
+ dataB = mx.sym.Variable(name='dataB', shape=data_shape,
dtype='float32')
+ elemwise_add_fp32 = mx.sym.elemwise_add(dataA, dataB)
+ arg_names = elemwise_add_fp32.list_arguments()
+ elemwise_add_fp32_exe =
elemwise_add_fp32.simple_bind(ctx=mx.current_context(), grad_req='null')
+ if qtype == 'uint8':
+ data_low = 0.0
+ data_high = 255.0
+ else:
+ data_low = -127.0
+ data_high = 127.0
+
+ dataA_val = mx.nd.random.uniform(low=data_low, high=data_high,
shape=data_shape).astype('int32')
+ dataB_val = mx.nd.random.uniform(low=data_low, high=data_high,
shape=data_shape).astype('int32')
+ elemwise_add_fp32_exe.arg_dict[arg_names[0]][:] = dataA_val
+
+ elemwise_add_fp32_exe.arg_dict[arg_names[1]][:] = dataB_val
+
+ output = elemwise_add_fp32_exe.forward()[0]
+
+ qdataA = mx.sym.Variable(name='qdataA', shape=data_shape, dtype=qtype)
+ qdataB = mx.sym.Variable(name='qdataB', shape=data_shape, dtype=qtype)
+ min_dataA = mx.sym.Variable(name='min_dataA')
+ max_dataA = mx.sym.Variable(name='max_dataA')
+ min_dataB = mx.sym.Variable(name='min_dataB')
+ max_dataB = mx.sym.Variable(name='max_dataB')
+ quantized_elemwise_add = mx.sym.contrib.quantized_elemwise_add(qdataA,
qdataB, min_dataA, max_dataA, min_dataB, max_dataB)
+ elemwise_add_int8_exe =
quantized_elemwise_add.simple_bind(ctx=mx.current_context(), grad_req='null')
+ qarg_names = quantized_elemwise_add.list_arguments()
+ elemwise_add_int8_exe.arg_dict[qarg_names[0]][:] =
elemwise_add_fp32_exe.arg_dict[arg_names[0]].astype(qtype)
+ elemwise_add_int8_exe.arg_dict[qarg_names[1]][:] =
elemwise_add_fp32_exe.arg_dict[arg_names[1]].astype(qtype)
+ quantized_range = 127.0
+ elemwise_add_int8_exe.arg_dict[qarg_names[2]][:] = data_low
+ elemwise_add_int8_exe.arg_dict[qarg_names[3]][:] = data_high
+ elemwise_add_int8_exe.arg_dict[qarg_names[4]][:] = data_low
+ elemwise_add_int8_exe.arg_dict[qarg_names[5]][:] = data_high
+ qoutput, min_range, max_range = elemwise_add_int8_exe.forward()
+ min_val = min_range.asnumpy().tolist()[0]
+ max_val = max_range.asnumpy().tolist()[0]
+
+ fp32_rslt = output.asnumpy()
+ int8_rslt = qoutput.asnumpy()*max_val/0x7fffffff
+ assert_almost_equal(int8_rslt, int8_rslt, atol = 1e-4)
+
+ for qtype in ['int8', 'uint8']:
+ check_quantized_elemwise_add((4, 6), qtype)
+ check_quantized_elemwise_add((13, 74, 52), qtype)
+ check_quantized_elemwise_add((3, 4, 56, 56), qtype)
+ check_quantized_elemwise_add((32, 56, 64, 11), qtype)
+
+
@with_seed()
def test_quantized_pooling():
def check_quantized_pooling(data_shape, kernel, pool_type, pad, stride,
global_pool, qdtype, convention='valid'):
@@ -566,7 +631,8 @@ def get_fp32_residual():
conv0 = mx.sym.Convolution(data=data, num_filter=4, kernel=(1,1),
pad=(0,0),
no_bias=True, name='conv0')
bn = mx.sym.BatchNorm(data=conv0, fix_gamma=False, eps=2e-5, momentum=0.9,
name='bn')
- act0 = mx.sym.Activation(data=bn + data, act_type='relu', name='relu0')
+ sum0 = mx.sym.elemwise_add(bn, data, name='sum0')
+ act0 = mx.sym.Activation(data=sum0, act_type='relu', name='relu0')
pool0 = mx.sym.Pooling(act0, kernel=(4, 4), pool_type='avg', name='pool0')
conv1 = mx.sym.Convolution(data=pool0, num_filter=4, kernel=(1,1),
pad=(0,0),
no_bias=False, name='conv1')
@@ -749,7 +815,7 @@ def test_quantize_model_with_forward():
if mx.current_context() == mx.cpu():
excluded_names += ['fc', 'conv1']
if mx.current_context() == mx.gpu():
- excluded_names += ['relu0', 'relu1']
+ excluded_names += ['sum0', 'relu0', 'relu1']
excluded_names += ['concat']
optional_names = ['pool0']