This is an automated email from the ASF dual-hosted git repository.
anijain2305 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git
The following commit(s) were added to refs/heads/master by this push:
new 38118be [ConvertLayout] Support QNN ops. (#5066)
38118be is described below
commit 38118befc0a7e8a3db87d652b30a9369abb60363
Author: Animesh Jain <[email protected]>
AuthorDate: Wed Mar 18 20:03:56 2020 -0700
[ConvertLayout] Support QNN ops. (#5066)
* [ConvertLayout] Support QNN ops.
* Changing layouts to C.
* Fixing dilation.
* Empty commit.
Co-authored-by: Ubuntu <[email protected]>
---
python/tvm/relay/op/nn/_nn.py | 10 +-
python/tvm/relay/qnn/op/__init__.py | 2 +-
python/tvm/relay/qnn/op/layout_conversions.py | 53 ++++++
src/relay/op/nn/bitserial.cc | 2 +-
src/relay/op/nn/convolution.cc | 19 +-
src/relay/op/nn/convolution.h | 15 ++
src/relay/op/nn/nn.cc | 12 +-
src/relay/op/nn/pad.cc | 2 +-
src/relay/op/nn/pooling.cc | 2 +-
src/relay/op/nn/upsampling.cc | 2 +-
src/relay/op/tensor/reduce.cc | 7 +-
src/relay/op/tensor/transform.cc | 57 +-----
src/relay/op/tensor/transform.h | 58 ++++++
src/relay/qnn/op/add.cc | 21 ++-
src/relay/qnn/op/concatenate.cc | 41 +++-
src/relay/qnn/op/convolution.cc | 20 +-
src/relay/qnn/op/requantize.cc | 77 +++++++-
src/relay/transforms/infer_layout_util.h | 17 +-
src/relay/transforms/transform_layout.h | 16 +-
tests/python/relay/test_pass_convert_op_layout.py | 217 ++++++++++++++++++++++
20 files changed, 544 insertions(+), 106 deletions(-)
diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py
index c2fe6d0..a9bd900 100644
--- a/python/tvm/relay/op/nn/_nn.py
+++ b/python/tvm/relay/op/nn/_nn.py
@@ -138,8 +138,6 @@ def convert_conv2d(attrs, inputs, tinfos, desired_layout):
"""
# pylint: disable=import-outside-toplevel
from tvm import relay
- data_layout = attrs['data_layout']
- kernel_layout = attrs['kernel_layout']
data, weight = inputs
assert desired_layout == 'NCHW', \
"Currently only transformation to NCHW layout is supported."
@@ -147,13 +145,7 @@ def convert_conv2d(attrs, inputs, tinfos, desired_layout):
new_attrs = dict(attrs)
new_attrs['data_layout'] = desired_layout
new_attrs['kernel_layout'] = 'OIHW'
-
- if data_layout == 'NHWC' and kernel_layout == 'HWIO':
- # Convert (NHWC, HWIO) to (NCHW, OIHW)
- return relay.nn.conv2d(data, weight, **new_attrs)
- if data_layout == 'NHWC' and kernel_layout == 'HWOI':
- # Convert (NHWC, HWOI) to (NCHW, OIHW). Depthwise conv2d.
- return relay.nn.conv2d(data, weight, **new_attrs)
+ return relay.nn.conv2d(data, weight, **new_attrs)
return None
diff --git a/python/tvm/relay/qnn/op/__init__.py
b/python/tvm/relay/qnn/op/__init__.py
index 042dcb9..6d66e12 100644
--- a/python/tvm/relay/qnn/op/__init__.py
+++ b/python/tvm/relay/qnn/op/__init__.py
@@ -19,4 +19,4 @@
from __future__ import absolute_import as _abs
from .qnn import *
from .op import register_qnn_legalize
-from . import legalizations
+from . import legalizations, layout_conversions
diff --git a/python/tvm/relay/qnn/op/layout_conversions.py
b/python/tvm/relay/qnn/op/layout_conversions.py
new file mode 100644
index 0000000..f5850b8
--- /dev/null
+++ b/python/tvm/relay/qnn/op/layout_conversions.py
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""Convert layout related registration"""
+from __future__ import absolute_import
+
+from tvm.relay.op import op as reg
+
+
[email protected]_convert_op_layout("qnn.conv2d")
+def convert_qnn_conv2d(attrs, inputs, tinfos, desired_layout):
+ """Convert Layout pass registration for QNN conv2d op.
+
+ Parameters
+ ----------
+ attrs : tvm.ir.Attrs
+ Attributes of current convolution
+ inputs : list of tvm.relay.Expr
+ The args of the Relay expr to be legalized
+ tinfos : list of types
+ List of input and output types
+ desired_layout : str
+ The desired layout
+
+ Returns
+ -------
+ result : tvm.relay.Expr
+ The transformed expr
+ """
+ # pylint: disable=import-outside-toplevel
+ from tvm import relay
+ assert desired_layout == 'NCHW', \
+ "Currently only transformation to NCHW layout is supported."
+ if desired_layout == 'NCHW':
+ new_attrs = dict(attrs)
+ new_attrs['data_layout'] = desired_layout
+ new_attrs['kernel_layout'] = 'OIHW'
+ return relay.qnn.op.conv2d(*inputs, **new_attrs)
+ return None
diff --git a/src/relay/op/nn/bitserial.cc b/src/relay/op/nn/bitserial.cc
index 621fe77..9457b4b 100644
--- a/src/relay/op/nn/bitserial.cc
+++ b/src/relay/op/nn/bitserial.cc
@@ -39,7 +39,7 @@ template <typename T>
Array<Array<Layout>> BinaryConv2DInferCorrectLayout(const Attrs& attrs,
const Array<Layout>&
new_in_layouts,
const Array<Layout>&
old_in_layouts,
- const
Array<Array<IndexExpr>>& old_in_shapes) {
+ const
Array<tvm::relay::Type>& old_in_types) {
const T* params = attrs.as<T>();
// We always make other operators to fit the layouts of convolution layers
diff --git a/src/relay/op/nn/convolution.cc b/src/relay/op/nn/convolution.cc
index d642e2f..25dca08 100644
--- a/src/relay/op/nn/convolution.cc
+++ b/src/relay/op/nn/convolution.cc
@@ -34,23 +34,6 @@
namespace tvm {
namespace relay {
-
-template<typename T>
-Array<Array<Layout> > ConvInferCorrectLayout(
- const Attrs& attrs,
- const Array<Layout>& new_in_layouts,
- const Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>> &old_in_shapes) {
- const T* params = attrs.as<T>();
-
- // We always make other operators to fit the layouts of convolution layers
- // So this inference ignores all inputs
- return Array<Array<Layout> >{{params->data_layout, params->kernel_layout},
- {params->out_layout == "" ?
- params->data_layout : params->out_layout}};
-}
-
-
template <typename T>
Expr MakeConv(Expr data,
Expr weight,
@@ -1048,7 +1031,7 @@ Array<Array<Layout> > Dilation2DInferCorrectLayout(
const Attrs& attrs,
const Array<Layout>& new_in_layouts,
const Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>> &old_in_shapes) {
+ const Array<tvm::relay::Type> &old_in_types) {
const T* params = attrs.as<T>();
// We always make other operators to fit the layouts of convolution layers
diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h
index d451215..e3fa02d 100644
--- a/src/relay/op/nn/convolution.h
+++ b/src/relay/op/nn/convolution.h
@@ -431,6 +431,21 @@ bool Dilation2DRel(const Array<Type>& types, int
num_inputs, const Attrs& attrs,
return true;
}
+template<typename T>
+Array<Array<Layout> > ConvInferCorrectLayout(
+ const Attrs& attrs,
+ const Array<Layout>& new_in_layouts,
+ const Array<Layout>& old_in_layouts,
+ const Array<tvm::relay::Type> &old_in_types) {
+ const T* params = attrs.as<T>();
+
+ // We always make other operators to fit the layouts of convolution layers
+ // So this inference ignores all inputs
+ return Array<Array<Layout> >{{params->data_layout, params->kernel_layout},
+ {params->out_layout == "" ?
+ params->data_layout : params->out_layout}};
+}
+
} // namespace relay
} // namespace tvm
#endif // TVM_RELAY_OP_NN_CONVOLUTION_H_
diff --git a/src/relay/op/nn/nn.cc b/src/relay/op/nn/nn.cc
index 560d06f..5203ffc 100644
--- a/src/relay/op/nn/nn.cc
+++ b/src/relay/op/nn/nn.cc
@@ -272,10 +272,10 @@ Array<Array<Layout> > PReluInferCorrectLayout(
const Attrs& attrs,
const Array<Layout>& new_in_layouts,
const Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>> &old_in_shapes) {
+ const Array<tvm::relay::Type> &old_in_types) {
CHECK_EQ(old_in_layouts.size(), 2U);
- CHECK_EQ(old_in_shapes.size(), 2U);
+ CHECK_EQ(old_in_types.size(), 2U);
Layout data_layout = old_in_layouts[0];
if (new_in_layouts.defined()) {
CHECK_EQ(new_in_layouts.size(), 2U);
@@ -615,9 +615,15 @@ TVM_REGISTER_NODE_TYPE(BatchNormAttrs);
Array<Array<Layout>> BatchNormInferCorrectLayout(const Attrs& attrs,
const Array<Layout>&
new_in_layouts,
const Array<Layout>&
old_in_layouts,
- const
Array<Array<IndexExpr>>& old_in_shapes) {
+ const
Array<tvm::relay::Type>& old_in_types) {
BatchNormAttrs* param =
const_cast<BatchNormAttrs*>(attrs.as<BatchNormAttrs>());
+ Array<Array<IndexExpr>> old_in_shapes;
+ for (auto old_in_t : old_in_types) {
+ CHECK(old_in_t.as<TensorTypeNode>());
+ old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+ }
+
size_t axis =
param->axis < 0 ? param->axis + old_in_shapes[0].size() :
static_cast<size_t>(param->axis);
diff --git a/src/relay/op/nn/pad.cc b/src/relay/op/nn/pad.cc
index 84a4940..3506f42 100644
--- a/src/relay/op/nn/pad.cc
+++ b/src/relay/op/nn/pad.cc
@@ -39,7 +39,7 @@ Array<Array<Layout> > PadInferCorrectLayout(
const Attrs& attrs,
const Array<Layout>& new_in_layouts,
const Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>> &old_in_shapes) {
+ const Array<tvm::relay::Type> &old_in_types) {
// NOTE: Discard "const" qualifier here.
PadAttrs *params = const_cast<PadAttrs*>(attrs.as<PadAttrs>());
diff --git a/src/relay/op/nn/pooling.cc b/src/relay/op/nn/pooling.cc
index b18ed90..00ec55a 100644
--- a/src/relay/op/nn/pooling.cc
+++ b/src/relay/op/nn/pooling.cc
@@ -41,7 +41,7 @@ Array<Array<Layout> > PoolInferCorrectLayout(
const Attrs& attrs,
const Array<Layout>& new_in_layouts,
const Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>> &old_in_shapes) {
+ const Array<tvm::relay::Type> &old_in_types) {
// NOTE: Discard "const" qualifier here.
T *params = const_cast<T*>(attrs.as<T>());
diff --git a/src/relay/op/nn/upsampling.cc b/src/relay/op/nn/upsampling.cc
index 477cec7..2c1f45d 100644
--- a/src/relay/op/nn/upsampling.cc
+++ b/src/relay/op/nn/upsampling.cc
@@ -39,7 +39,7 @@ Array<Array<Layout> > UpsamplingInferCorrectLayout(
const Attrs& attrs,
const Array<Layout>& new_in_layouts,
const Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>> &old_in_shapes) {
+ const Array<tvm::relay::Type> &old_in_types) {
// NOTE: Discard "const" qualifier here.
T *params = const_cast<T*>(attrs.as<T>());
diff --git a/src/relay/op/tensor/reduce.cc b/src/relay/op/tensor/reduce.cc
index 5e0795e..74bc84e 100644
--- a/src/relay/op/tensor/reduce.cc
+++ b/src/relay/op/tensor/reduce.cc
@@ -122,11 +122,16 @@ Array<Integer> GetExcludeAxes(size_t indim,
Array<Array<Layout>> ReduceInferCorrectLayout(const Attrs& attrs,
const Array<Layout>&
new_in_layouts,
const Array<Layout>&
old_in_layouts,
- const Array<Array<IndexExpr>>&
old_in_shapes) {
+ const Array<tvm::relay::Type>&
old_in_types) {
// NOTE: Discard "const" qualifier here.
ReduceAttrs* params = const_cast<ReduceAttrs*>(attrs.as<ReduceAttrs>());
// Get the reduce axes.
+ Array<Array<IndexExpr>> old_in_shapes;
+ for (auto old_in_t : old_in_types) {
+ CHECK(old_in_t.as<TensorTypeNode>());
+ old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+ }
uint32_t indim = old_in_shapes[0].size();
auto r_axes = GetReduceAxes(indim, params->axis, params->exclude);
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index 17d9788..32df221 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -275,54 +275,6 @@ Array<te::Tensor> ConcatenateCompute(const Attrs& attrs,
return { topi::concatenate(inputs, param->axis) };
}
-Array<Array<Layout>> ConcatenateLayout(
- const Attrs& attrs,
- const Array<Layout>& new_in_layouts,
- const Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>> &old_in_shapes) {
- ConcatenateAttrs* param =
const_cast<ConcatenateAttrs*>(attrs.as<ConcatenateAttrs>());
-
- size_t axis = param->axis < 0 ? param->axis + old_in_shapes[0].size() :
- static_cast<size_t>(param->axis);
-
- Layout ret;
- bool is_new_layout_selected = false;
- if (new_in_layouts.defined()) { // this function is called after some
operators are alternated.
- // If all the new input layouts are same, the new in layout gets selected.
For axis, the new
- // axis in the new layout is identified. The param->axis is then modified
on the fly to conform
- // to the new input layout.
- const auto& concate_dim = old_in_layouts[0][axis];
- bool all_input_layouts_same = true;
- for (auto new_layout : new_in_layouts) {
- if (!new_layout.Equals(new_in_layouts[0])) {
- all_input_layouts_same = false;
- }
- }
- if (all_input_layouts_same) {
- auto new_index = new_in_layouts[0].IndexOf(concate_dim);
- ret = new_in_layouts[0];
- param->axis = new_index;
- is_new_layout_selected = true;
- }
- }
-
- if (!is_new_layout_selected) {
- // this function is called on the original correct relay ir
- for (size_t i = 0; i < old_in_layouts.size(); ++i) {
- if (old_in_layouts[i].defined()) {
- ret = old_in_layouts[i];
- break;
- }
- }
-
- if (ret.ndim() <= axis || !ret[axis].IsPrimal()) {
- return Array<Array<Layout> > {{Layout::Undef()}, {Layout::Undef()}};
- }
- }
-
- return Array<Array<Layout> > {Array<Layout>(old_in_layouts.size(), ret),
{ret}};
-}
-
Expr MakeConcatenate(Expr data,
int axis) {
auto attrs = make_object<ConcatenateAttrs>();
@@ -1933,7 +1885,14 @@ Array<Array<Layout> > StridedSliceInferCorrectLayout(
const Attrs& attrs,
const Array<Layout>& new_in_layouts,
const Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>>& old_in_shapes) {
+ const Array<tvm::relay::Type>& old_in_types) {
+
+ Array<Array<IndexExpr>> old_in_shapes;
+ for (auto old_in_t : old_in_types) {
+ CHECK(old_in_t.as<TensorTypeNode>());
+ old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+ }
+
CHECK(old_in_layouts.defined());
CHECK_EQ(old_in_layouts.size(), 1);
CHECK(old_in_shapes.defined());
diff --git a/src/relay/op/tensor/transform.h b/src/relay/op/tensor/transform.h
index b69f6e7..a64dcd5 100644
--- a/src/relay/op/tensor/transform.h
+++ b/src/relay/op/tensor/transform.h
@@ -25,6 +25,7 @@
#define TVM_RELAY_OP_TENSOR_TRANSFORM_H_
#include <tvm/ir/error.h>
+#include <tvm/relay/attrs/transform.h>
#include <vector>
#include <algorithm>
#include <limits>
@@ -124,6 +125,63 @@ bool ConcatenateRel(const Array<Type>& types,
return true;
}
+static inline Array<Array<Layout>> ConcatenateLayout(
+ const Attrs& attrs,
+ const Array<Layout>& new_in_layouts,
+ const Array<Layout>& old_in_layouts,
+ const Array<tvm::relay::Type> &old_in_types) {
+ ConcatenateAttrs* param =
const_cast<ConcatenateAttrs*>(attrs.as<ConcatenateAttrs>());
+
+ Array<Array<IndexExpr>> old_in_shapes;
+ CHECK_EQ(old_in_types.size(), 1);
+ for (auto old_in_tuple_t : old_in_types) {
+ CHECK(old_in_tuple_t.as<TupleTypeNode>());
+ for (auto old_in_t : old_in_tuple_t.as<TupleTypeNode>()->fields) {
+ old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+ }
+ }
+
+ size_t axis = param->axis < 0 ? param->axis + old_in_shapes[0].size() :
+ static_cast<size_t>(param->axis);
+
+ Layout ret;
+ bool is_new_layout_selected = false;
+ if (new_in_layouts.defined()) { // this function is called after some
operators are alternated.
+ // If all the new input layouts are same, the new in layout gets selected.
For axis, the new
+ // axis in the new layout is identified. The param->axis is then modified
on the fly to conform
+ // to the new input layout.
+ const auto& concate_dim = old_in_layouts[0][axis];
+ bool all_input_layouts_same = true;
+ for (auto new_layout : new_in_layouts) {
+ if (!new_layout.Equals(new_in_layouts[0])) {
+ all_input_layouts_same = false;
+ }
+ }
+ if (all_input_layouts_same) {
+ auto new_index = new_in_layouts[0].IndexOf(concate_dim);
+ ret = new_in_layouts[0];
+ param->axis = new_index;
+ is_new_layout_selected = true;
+ }
+ }
+
+ if (!is_new_layout_selected) {
+ // this function is called on the original correct relay ir
+ for (size_t i = 0; i < old_in_layouts.size(); ++i) {
+ if (old_in_layouts[i].defined()) {
+ ret = old_in_layouts[i];
+ break;
+ }
+ }
+
+ if (ret.ndim() <= axis || !ret[axis].IsPrimal()) {
+ return Array<Array<Layout> > {{Layout::Undef()}, {Layout::Undef()}};
+ }
+ }
+
+ return Array<Array<Layout> > {Array<Layout>(old_in_layouts.size(), ret),
{ret}};
+}
+
} // namespace relay
} // namespace tvm
#endif // TVM_RELAY_OP_TENSOR_TRANSFORM_H_
diff --git a/src/relay/qnn/op/add.cc b/src/relay/qnn/op/add.cc
index b70954b..479138f 100644
--- a/src/relay/qnn/op/add.cc
+++ b/src/relay/qnn/op/add.cc
@@ -25,6 +25,7 @@
#include <tvm/relay/op_attr_types.h>
#include <tvm/relay/qnn/attrs.h>
#include "../../transforms/pattern_util.h"
+#include "../../transforms/infer_layout_util.h"
#include "../util.h"
#include "op_common.h"
@@ -32,6 +33,23 @@ namespace tvm {
namespace relay {
namespace qnn {
+/*! \brief Infer layout for QNN binary broadcast operators */
+Array<Array<Layout> > QnnBinaryBroadcastLayout(const Attrs& attrs,
+ const Array<Layout>&
new_in_layouts,
+ const Array<Layout>&
old_in_layouts,
+ const Array<tvm::relay::Type>&
old_in_types) {
+ // Use Relay Binary Broadcast Infer correct layout.
+ auto layouts = BinaryBroadcastLayout(attrs, new_in_layouts, old_in_layouts,
old_in_types);
+
+ // Fill the layouts of remaining input tensors - scales and zero points. The
layouts of these
+ // tensors can be treated as C.
+ Layout channel_layout = Layout("C");
+ Array<Layout> input_layouts = {layouts[0][0], layouts[0][1],
channel_layout, channel_layout,
+ channel_layout, channel_layout,
channel_layout, channel_layout};
+ Array<Layout> output_layouts = layouts[1];
+ return {input_layouts, output_layouts};
+}
+
/*
* \brief Canonicalizes the QNN add op.
* \param attrs The QNN concatenate attrs.
@@ -118,7 +136,8 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const
Array<Expr>& new_args,
QNN_REGISTER_BINARY_OP("add")
.describe("Elementwise add with with broadcasting for quantized tensors.")
.set_support_level(11)
-.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnAddCanonicalize);
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnAddCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout",
QnnBinaryBroadcastLayout);
} // namespace qnn
} // namespace relay
diff --git a/src/relay/qnn/op/concatenate.cc b/src/relay/qnn/op/concatenate.cc
index 3f4e315..110ee6f 100644
--- a/src/relay/qnn/op/concatenate.cc
+++ b/src/relay/qnn/op/concatenate.cc
@@ -28,6 +28,7 @@
#include <tvm/relay/qnn/attrs.h>
#include "../../op/tensor/transform.h"
#include "../../transforms/pattern_util.h"
+#include "../../transforms/infer_layout_util.h"
#include "../util.h"
namespace tvm {
@@ -70,6 +71,43 @@ bool QnnConcatenateRel(const Array<Type>& types, int
num_inputs, const Attrs& at
return ConcatenateRel<ConcatenateAttrs>(tensor_types, 2, attrs, reporter);
}
+Array<Array<Layout>> QnnConcatenateLayout(const Attrs& attrs, const
Array<Layout>& new_in_layouts,
+ const Array<Layout>& old_in_layouts,
+ const Array<tvm::relay::Type>&
old_in_types) {
+ // Collect the layouts and types to reuse Relay Concatenate Infer Correct
Layout.
+ CHECK_EQ(old_in_types.size(), 5);
+ auto input_tuple_type = old_in_types[0].as<TupleTypeNode>();
+ CHECK(input_tuple_type);
+ auto num_input_tensors = input_tuple_type->fields.size();
+
+ Array<Layout> relay_new_in_layouts(nullptr);
+ if (new_in_layouts.defined()) {
+ relay_new_in_layouts =
+ Array<Layout>(new_in_layouts.begin(), new_in_layouts.begin() +
num_input_tensors);
+ }
+ Array<Layout> relay_old_in_layouts(nullptr);
+ if (old_in_layouts.defined()) {
+ relay_old_in_layouts =
+ Array<Layout>(old_in_layouts.begin(), old_in_layouts.begin() +
num_input_tensors);
+ }
+
+ // Use Relay Concatenate Infer Correct layout to infer the layouts for data
tensors.
+ auto layouts =
+ ConcatenateLayout(attrs, relay_new_in_layouts, relay_old_in_layouts,
{old_in_types[0]});
+
+ // Fill the layouts of remaining input tensors - scales and zero points. The
layouts of these
+ // tensors can be treated as channel layout. Total number of these tensors
are 2 * num of data
+ // tensors (scale and zero point for each input data tensor) + 2 for the
output data tensor.
+ Layout channel_layout = Layout("C");
+ Array<Layout> input_layouts = layouts[0];
+
+ for (size_t i = 0; i < 2 * num_input_tensors + 2; i++) {
+ input_layouts.push_back(channel_layout);
+ }
+ Array<Layout> output_layouts = layouts[1];
+ return {input_layouts, output_layouts};
+}
+
Expr MakeQnnConcatenate(Expr data, Expr input_scales, Expr input_zero_points,
Expr output_scale,
Expr output_zero_point, int axis) {
auto attrs = make_object<ConcatenateAttrs>();
@@ -161,7 +199,8 @@ RELAY_REGISTER_OP("qnn.concatenate")
.add_argument("output_zero_point", "Tensor", "The quantization zero_point of
the output tensor.")
.set_support_level(11)
.add_type_rel("QnnConcatenate", QnnConcatenateRel)
-.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", ConcatenateQnnCanonicalize);
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", ConcatenateQnnCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnConcatenateLayout);
TVM_REGISTER_GLOBAL("relay.qnn.op._make.concatenate")
.set_body_typed(MakeQnnConcatenate);
diff --git a/src/relay/qnn/op/convolution.cc b/src/relay/qnn/op/convolution.cc
index 30308fd..791b035 100644
--- a/src/relay/qnn/op/convolution.cc
+++ b/src/relay/qnn/op/convolution.cc
@@ -68,6 +68,23 @@ bool QnnConv2DRel(const Array<Type>& types, int num_inputs,
const Attrs& attrs,
return Conv2DRel<Conv2DAttrs>(tensor_types, 3, attrs, reporter);
}
+Array<Array<Layout>> QnnConvInferCorrectLayout(const Attrs& attrs,
+ const Array<Layout>&
new_in_layouts,
+ const Array<Layout>&
old_in_layouts,
+ const Array<tvm::relay::Type>&
old_in_types) {
+ // Use Relay Conv2D Infer correct layout.
+ auto layouts =
+ ConvInferCorrectLayout<Conv2DAttrs>(attrs, new_in_layouts,
old_in_layouts, old_in_types);
+
+ // Fill the layouts of remaining input tensors - scales and zero points. The
layouts of these
+ // tensors can be treated as channel layout.
+ Layout channel_layout = Layout("C");
+ Array<Layout> input_layouts = {layouts[0][0], layouts[0][1],
channel_layout,
+ channel_layout, channel_layout,
channel_layout};
+ Array<Layout> output_layouts = layouts[1];
+ return {input_layouts, output_layouts};
+}
+
bool is_depthwise(const Conv2DAttrs* param) {
return param->channels.defined() && tvm::tir::Equal(param->channels,
param->groups) &&
param->groups != 1;
@@ -684,7 +701,8 @@ operator to understand how to scale back the int32 output
to (u)int8.
.add_argument("weight_zero_point", "Tensor", "The quantization zero_point of
the weight tensor.")
.set_support_level(11)
.add_type_rel("QnnConv2D", QnnConv2DRel)
-.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnConv2DCanonicalize);
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnConv2DCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout",
QnnConvInferCorrectLayout);
TVM_REGISTER_GLOBAL("relay.qnn.op._make.conv2d").set_body_typed(MakeQnnConv2D);
diff --git a/src/relay/qnn/op/requantize.cc b/src/relay/qnn/op/requantize.cc
index 9b2ed91..f3351a2 100644
--- a/src/relay/qnn/op/requantize.cc
+++ b/src/relay/qnn/op/requantize.cc
@@ -26,6 +26,7 @@
#include <tvm/relay/op_attr_types.h>
#include <tvm/relay/qnn/attrs.h>
#include "../../transforms/pattern_util.h"
+#include "../../transforms/infer_layout_util.h"
#include "../util.h"
namespace tvm {
@@ -34,6 +35,79 @@ namespace qnn {
TVM_REGISTER_NODE_TYPE(RequantizeAttrs);
+Array<Array<Layout>> RequantizeInferCorrectLayout(const Attrs& attrs,
+ const Array<Layout>&
new_in_layouts,
+ const Array<Layout>&
old_in_layouts,
+ const
Array<tvm::relay::Type>& old_in_types) {
+ RequantizeAttrs* param =
const_cast<RequantizeAttrs*>(attrs.as<RequantizeAttrs>());
+
+ Array<Array<IndexExpr>> old_in_shapes;
+ for (auto old_in_t : old_in_types) {
+ CHECK(old_in_t.as<TensorTypeNode>());
+ old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+ }
+
+ Array<Layout> input_layouts, output_layouts;
+ if (new_in_layouts.defined()) {
+ // Adapt to new layout. The axis has to change.
+ // Record original reduce axis. Convert to the modified layout axis.
+ CHECK_EQ(new_in_layouts.size(), 5);
+ CHECK_EQ(old_in_layouts.size(), 5);
+
+ // 1) Get the axis.
+ int axis = param->axis;
+ axis = (axis == -1) ? old_in_shapes[0].size() - 1 : axis;
+
+ // 2) Collect the original axis
+ std::string old_dim = old_in_layouts[0][axis].name();
+
+ // 3) Collect the new axes by walking new_layout.
+ tvm::Integer new_axis;
+ std::string new_layout_string = "";
+ int axis_index = 0;
+ for (auto iter_var : new_in_layouts[0]->axes) {
+ const auto& layout_axis = LayoutAxis::Get(iter_var);
+ const std::string& layout_dim = layout_axis.name();
+ if (old_dim == layout_dim) {
+ new_axis = tvm::Integer(axis_index);
+ }
+ // Collect only the primal axis.
+ if (layout_axis.IsPrimal()) {
+ new_layout_string += layout_dim;
+ axis_index++;
+ }
+ }
+
+ // 4) Set the new axis and layout.
+ Layout new_layout = Layout(new_layout_string);
+
+ // Fill the layouts of remaining input tensors - scales and zero points.
The layouts of these
+ // tensors can be treated as channel layout.
+ Layout channel_layout = Layout("C");
+ input_layouts = {new_layout, channel_layout, channel_layout,
channel_layout, channel_layout};
+ output_layouts = {new_layout};
+ param->axis = new_axis;
+ } else if (old_in_layouts.defined()) {
+ // If the new layout is undefined, set the old layout as the inferred
layout.
+ CHECK_EQ(old_in_layouts.size(), 5);
+
+ Layout old_layout = old_in_layouts[0];
+
+ // Fill the layouts of remaining input tensors - scales and zero points.
The layouts of these
+ // tensors can be treated as channel layout.
+ Layout channel_layout = Layout("C");
+ input_layouts = {old_layout, channel_layout, channel_layout,
channel_layout, channel_layout};
+ output_layouts = {old_layout};
+ } else {
+ // Set the layouts to undef.
+ Layout undef = Layout::Undef();
+ input_layouts = Array<Layout>(5, undef);
+ output_layouts = {undef};
+ }
+
+ return Array<Array<Layout>>{input_layouts, output_layouts};
+}
+
// Lowering of qnn.requantize op
/*
@@ -247,7 +321,8 @@ Q_output = zp_output + (scale_input)/(scale_output) *
(Q_input - zp_input)
.add_argument("output_zero_point", "Tensor", "The quantization zero_point of
the output tensor.")
.set_support_level(11)
.add_type_rel("Requantize", RequantizeRel)
-.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", RequantizeQnnCanonicalize);
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", RequantizeQnnCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout",
RequantizeInferCorrectLayout);
TVM_REGISTER_GLOBAL("relay.qnn.op._make.requantize")
.set_body_typed(MakeRequantize);
diff --git a/src/relay/transforms/infer_layout_util.h
b/src/relay/transforms/infer_layout_util.h
index eeb61e3..ca73003 100644
--- a/src/relay/transforms/infer_layout_util.h
+++ b/src/relay/transforms/infer_layout_util.h
@@ -90,7 +90,7 @@ inline Layout AdjustSubordinateFactors(const Layout&
src_layout, const Layout& o
* This can be undefined, which means we call this
function before alternating
* any operators.
* \param old_in_layouts The layouts of input arguments before alter_op_layout.
- * \param old_in_shapes The shapes of old input arguments.
+ * \param old_in_types The types of old input arguments.
* \return infered_layout An array of two elements that are inferred input
layouts and
* inferred output layouts.
*/
@@ -98,13 +98,13 @@ using FInferCorrectLayout = runtime::TypedPackedFunc<
Array<Array<Layout>>(const Attrs& attrs,
const Array<Layout>& new_in_layouts,
const Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>> &old_in_shapes)>;
+ const Array<tvm::relay::Type> &old_in_types)>;
/*! \brief take arbitrary input layout and copy to output */
inline Array<Array<Layout> > ElemwiseArbitraryLayout(const Attrs& attrs,
const Array<Layout>&
new_in_layouts,
const Array<Layout>&
old_in_layouts,
- const
Array<Array<IndexExpr>> &old_in_shapes) {
+ const
Array<tvm::relay::Type> &old_in_types) {
Layout ret;
if (new_in_layouts.defined()) {
@@ -126,8 +126,13 @@ inline Array<Array<Layout> > ElemwiseArbitraryLayout(const
Attrs& attrs,
inline Array<Array<Layout> > BinaryBroadcastLayout(const Attrs& attrs,
const Array<Layout>&
new_in_layouts,
const Array<Layout>&
old_in_layouts,
- const
Array<Array<IndexExpr>> &old_in_shapes) {
+ const
Array<tvm::relay::Type> &old_in_types) {
Array<Layout> layouts;
+ Array<Array<IndexExpr>> old_in_shapes;
+ for (auto old_in_t : old_in_types) {
+ CHECK(old_in_t.as<TensorTypeNode>());
+ old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+ }
if (new_in_layouts.defined()) {
layouts.assign(new_in_layouts.begin(), new_in_layouts.end());
@@ -203,7 +208,7 @@ inline Array<Array<Layout> > BinaryBroadcastLayout(const
Attrs& attrs,
*/
static inline std::tuple<Array<Layout>, Array<Layout>, bool>
InferCorrectLayouts(
const Call& call, const Array<Layout>& new_in_layouts, const
Array<Layout>& old_in_layouts,
- const Array<Array<IndexExpr>>& old_in_shapes) {
+ const Array<tvm::relay::Type>& old_in_types) {
static auto finfer_layout =
Op::GetAttr<FInferCorrectLayout>("FInferCorrectLayout");
if (!call->op.as<OpNode>()) {
return std::make_tuple<>(Array<Layout>(nullptr), Array<Layout>(nullptr),
false);
@@ -213,7 +218,7 @@ static inline std::tuple<Array<Layout>, Array<Layout>,
bool> InferCorrectLayouts
if (finfer_layout.count(op)) {
Array<Array<Layout>> inferred_layouts;
inferred_layouts =
- finfer_layout[op](call->attrs, new_in_layouts, old_in_layouts,
old_in_shapes);
+ finfer_layout[op](call->attrs, new_in_layouts, old_in_layouts,
old_in_types);
CHECK_EQ(inferred_layouts.size(), 2)
<< "FInferCorrectLayout should return an array with size of 2";
for (auto x : inferred_layouts) {
diff --git a/src/relay/transforms/transform_layout.h
b/src/relay/transforms/transform_layout.h
index 7204a7e..cee3aaf 100644
--- a/src/relay/transforms/transform_layout.h
+++ b/src/relay/transforms/transform_layout.h
@@ -225,7 +225,6 @@ template <class TransformMemorizerT>
Expr LayoutRewriter(const Call& ref_call, const Array<Expr>& new_args, const
ObjectRef& ctx) {
std::vector<LayoutAlternatedExpr<TransformMemorizerT>> inputs;
std::vector<Expr> normal_new_args;
- Array<Array<IndexExpr>> input_shapes;
// NOTE: discard the "const" qualifier
// TransformMemorizer memorizer = Downcast<TransformMemorizer>(ctx);
@@ -273,21 +272,16 @@ Expr LayoutRewriter(const Call& ref_call, const
Array<Expr>& new_args, const Obj
new_in.push_back(inp->new_layout);
}
+ // Collect input types to pass on to Infer Correct Layout.
+ tvm::Array<tvm::relay::Type> types;
for (auto arg : ref_call->args) {
- if (arg->IsInstance<TupleNode>()) { // flatten tuple
- Tuple tuple_arg = Downcast<Tuple>(arg);
- for (auto x : tuple_arg->fields) {
- input_shapes.push_back(x->type_as<TensorTypeNode>()->shape);
- }
- } else {
- input_shapes.push_back(arg->type_as<TensorTypeNode>()->shape);
- }
+ types.push_back(arg->checked_type());
}
// old_in, old_out = op.infer(old_in)
bool success = false;
std::tie(old_in, old_out, success) =
- InferCorrectLayouts(ref_call, Array<Layout>(nullptr), old_in,
input_shapes);
+ InferCorrectLayouts(ref_call, Array<Layout>(nullptr), old_in, types);
if (!success) {
return Expr(nullptr);
}
@@ -307,7 +301,7 @@ Expr LayoutRewriter(const Call& ref_call, const
Array<Expr>& new_args, const Obj
if (new_call->op->IsInstance<OpNode>()) {
success = false;
std::tie(new_in2, new_out, success) =
- InferCorrectLayouts(new_call, new_in, old_in, input_shapes);
+ InferCorrectLayouts(new_call, new_in, old_in, types);
if (!success) {
return Expr(nullptr);
}
diff --git a/tests/python/relay/test_pass_convert_op_layout.py
b/tests/python/relay/test_pass_convert_op_layout.py
index f9e7ca9..9e8f662 100644
--- a/tests/python/relay/test_pass_convert_op_layout.py
+++ b/tests/python/relay/test_pass_convert_op_layout.py
@@ -398,6 +398,220 @@ def test_conv_bn_convert_layout():
assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
+def test_qnn_conv_requantize_convert_layout():
+ def before():
+ x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+ weight = relay.var('weight', shape=(3, 3, 64, 64), dtype='int8')
+ y = relay.qnn.op.conv2d(x, weight,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1),
+ data_layout='NHWC',
+ kernel_layout='HWIO')
+ y = relay.qnn.op.requantize(y,
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ out_dtype='int32')
+ y = relay.nn.relu(y)
+ y = relay.Function([x, weight], y)
+ return y
+
+ def expected():
+ x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+ weight = relay.var('weight', shape=(3, 3, 64, 64), dtype='int8')
+ x = relay.layout_transform(x, 'NHWC', 'NCHW')
+ weight = relay.layout_transform(weight, 'HWIO', 'OIHW')
+ y = relay.qnn.op.conv2d(x, weight,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1))
+ y = relay.qnn.op.requantize(y,
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ axis=1,
+ out_dtype='int32')
+ y = relay.nn.relu(y)
+ y = relay.layout_transform(y, 'NCHW', 'NHWC')
+ y = relay.Function(relay.analysis.free_vars(y), y)
+ return y
+
+ a = before()
+ a = run_opt_pass(a, transform.ConvertLayout('NCHW'))
+ b = run_opt_pass(expected(), transform.InferType())
+
+ assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
+
+
+def test_qnn_conv_concat_convert_layout():
+ def before():
+ x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+ weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8')
+ weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8')
+ y = relay.qnn.op.conv2d(x, weight1,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1),
+ data_layout='NHWC',
+ kernel_layout='HWIO')
+ y1 = relay.qnn.op.conv2d(y, weight2,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1),
+ data_layout='NHWC',
+ kernel_layout='HWIO')
+ y = relay.cast(y, 'int8')
+ y1 = relay.cast(y, 'int8')
+ ret = relay.qnn.op.concatenate([y, y1],
+ [relay.const(1, 'float32'),
relay.const(1, 'float32')],
+ [relay.const(1, 'int32'),
relay.const(1, 'int32')],
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ axis=3)
+ y = relay.Function(analysis.free_vars(ret), ret)
+ return y
+
+ def expected():
+ x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+ weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8')
+ weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8')
+ weight1 = relay.layout_transform(weight1, 'HWIO', 'OIHW')
+ weight2 = relay.layout_transform(weight2, 'HWIO', 'OIHW')
+ y = relay.layout_transform(x, "NHWC", "NCHW")
+ y = relay.qnn.op.conv2d(y, weight1,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1))
+ y1 = relay.qnn.op.conv2d(y, weight2,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1))
+ y = relay.cast(y, 'int8')
+ y1 = relay.cast(y, 'int8')
+ ret = relay.qnn.op.concatenate([y, y1],
+ [relay.const(1, 'float32'),
relay.const(1, 'float32')],
+ [relay.const(1, 'int32'), relay.const(1,
'int32')],
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ axis=1)
+ ret = relay.layout_transform(ret, "NCHW", "NHWC")
+ y = relay.Function(analysis.free_vars(ret), ret)
+ return y
+
+ a = before()
+ a = run_opt_pass(a, transform.ConvertLayout('NCHW'))
+ b = run_opt_pass(expected(), transform.InferType())
+
+ assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
+
+
+def test_qnn_conv_add_convert_layout():
+ def before():
+ x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+ weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8')
+ weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8')
+ y = relay.qnn.op.conv2d(x, weight1,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1),
+ data_layout='NHWC',
+ kernel_layout='HWIO')
+ y1 = relay.qnn.op.conv2d(y, weight2,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1),
+ data_layout='NHWC',
+ kernel_layout='HWIO')
+ y = relay.cast(y, 'int8')
+ y1 = relay.cast(y, 'int8')
+ ret = relay.qnn.op.add(y, y1,
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'))
+ y = relay.Function(analysis.free_vars(ret), ret)
+ return y
+
+ def expected():
+ x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+ weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8')
+ weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8')
+ weight1 = relay.layout_transform(weight1, 'HWIO', 'OIHW')
+ weight2 = relay.layout_transform(weight2, 'HWIO', 'OIHW')
+ y = relay.layout_transform(x, "NHWC", "NCHW")
+ y = relay.qnn.op.conv2d(y, weight1,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1))
+ y1 = relay.qnn.op.conv2d(y, weight2,
+ relay.const(1, 'int32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'float32'),
+ channels=64,
+ kernel_size=(3, 3),
+ padding=(1, 1))
+ y = relay.cast(y, 'int8')
+ y1 = relay.cast(y, 'int8')
+ ret = relay.qnn.op.add(y, y1,
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'),
+ relay.const(1, 'float32'),
+ relay.const(1, 'int32'))
+ ret = relay.layout_transform(ret, "NCHW", "NHWC")
+ y = relay.Function(analysis.free_vars(ret), ret)
+ return y
+
+ a = before()
+ a = run_opt_pass(a, transform.ConvertLayout('NCHW'))
+ b = run_opt_pass(expected(), transform.InferType())
+
+ assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
+
+
if __name__ == "__main__":
test_no_convert_layout()
test_conv_convert_layout()
@@ -408,3 +622,6 @@ if __name__ == "__main__":
test_resnet_convert_layout()
test_scalar_convert_layout()
test_conv_bn_convert_layout()
+ test_qnn_conv_requantize_convert_layout()
+ test_qnn_conv_concat_convert_layout()
+ test_qnn_conv_add_convert_layout()