[incubator-tvm] branch master updated: [ConvertLayout] Support QNN ops. (#5066)

anijain2305 Wed, 18 Mar 2020 20:04:26 -0700

This is an automated email from the ASF dual-hosted git repository.

anijain2305 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-tvm.git



The following commit(s) were added to refs/heads/master by this push:
     new 38118be  [ConvertLayout] Support QNN ops. (#5066)
38118be is described below

commit 38118befc0a7e8a3db87d652b30a9369abb60363
Author: Animesh Jain <[email protected]>
AuthorDate: Wed Mar 18 20:03:56 2020 -0700

    [ConvertLayout] Support QNN ops. (#5066)
    
    * [ConvertLayout] Support QNN ops.
    
    * Changing layouts to C.
    
    * Fixing dilation.
    
    * Empty commit.
    
    Co-authored-by: Ubuntu <[email protected]>
---
 python/tvm/relay/op/nn/_nn.py                     |  10 +-
 python/tvm/relay/qnn/op/__init__.py               |   2 +-
 python/tvm/relay/qnn/op/layout_conversions.py     |  53 ++++++
 src/relay/op/nn/bitserial.cc                      |   2 +-
 src/relay/op/nn/convolution.cc                    |  19 +-
 src/relay/op/nn/convolution.h                     |  15 ++
 src/relay/op/nn/nn.cc                             |  12 +-
 src/relay/op/nn/pad.cc                            |   2 +-
 src/relay/op/nn/pooling.cc                        |   2 +-
 src/relay/op/nn/upsampling.cc                     |   2 +-
 src/relay/op/tensor/reduce.cc                     |   7 +-
 src/relay/op/tensor/transform.cc                  |  57 +-----
 src/relay/op/tensor/transform.h                   |  58 ++++++
 src/relay/qnn/op/add.cc                           |  21 ++-
 src/relay/qnn/op/concatenate.cc                   |  41 +++-
 src/relay/qnn/op/convolution.cc                   |  20 +-
 src/relay/qnn/op/requantize.cc                    |  77 +++++++-
 src/relay/transforms/infer_layout_util.h          |  17 +-
 src/relay/transforms/transform_layout.h           |  16 +-
 tests/python/relay/test_pass_convert_op_layout.py | 217 ++++++++++++++++++++++
 20 files changed, 544 insertions(+), 106 deletions(-)

diff --git a/python/tvm/relay/op/nn/_nn.py b/python/tvm/relay/op/nn/_nn.py
index c2fe6d0..a9bd900 100644
--- a/python/tvm/relay/op/nn/_nn.py
+++ b/python/tvm/relay/op/nn/_nn.py
@@ -138,8 +138,6 @@ def convert_conv2d(attrs, inputs, tinfos, desired_layout):
     """
     # pylint: disable=import-outside-toplevel
     from tvm import relay
-    data_layout = attrs['data_layout']
-    kernel_layout = attrs['kernel_layout']
     data, weight = inputs
     assert desired_layout == 'NCHW', \
             "Currently only transformation to NCHW layout is supported."
@@ -147,13 +145,7 @@ def convert_conv2d(attrs, inputs, tinfos, desired_layout):
         new_attrs = dict(attrs)
         new_attrs['data_layout'] = desired_layout
         new_attrs['kernel_layout'] = 'OIHW'
-
-        if data_layout == 'NHWC' and kernel_layout == 'HWIO':
-            # Convert (NHWC, HWIO) to (NCHW, OIHW)
-            return relay.nn.conv2d(data, weight, **new_attrs)
-        if data_layout == 'NHWC' and kernel_layout == 'HWOI':
-            # Convert (NHWC, HWOI) to (NCHW, OIHW). Depthwise conv2d.
-            return relay.nn.conv2d(data, weight, **new_attrs)
+        return relay.nn.conv2d(data, weight, **new_attrs)
     return None
 
 
diff --git a/python/tvm/relay/qnn/op/__init__.py 
b/python/tvm/relay/qnn/op/__init__.py
index 042dcb9..6d66e12 100644
--- a/python/tvm/relay/qnn/op/__init__.py
+++ b/python/tvm/relay/qnn/op/__init__.py
@@ -19,4 +19,4 @@
 from __future__ import absolute_import as _abs
 from .qnn import *
 from .op import register_qnn_legalize
-from . import legalizations
+from . import legalizations, layout_conversions
diff --git a/python/tvm/relay/qnn/op/layout_conversions.py 
b/python/tvm/relay/qnn/op/layout_conversions.py
new file mode 100644
index 0000000..f5850b8
--- /dev/null
+++ b/python/tvm/relay/qnn/op/layout_conversions.py
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+# pylint: disable=invalid-name, unused-argument
+"""Convert layout related registration"""
+from __future__ import absolute_import
+
+from tvm.relay.op import op as reg
+
+
[email protected]_convert_op_layout("qnn.conv2d")
+def convert_qnn_conv2d(attrs, inputs, tinfos, desired_layout):
+    """Convert Layout pass registration for QNN conv2d op.
+
+    Parameters
+    ----------
+    attrs : tvm.ir.Attrs
+        Attributes of current convolution
+    inputs : list of tvm.relay.Expr
+        The args of the Relay expr to be legalized
+    tinfos : list of types
+        List of input and output types
+    desired_layout : str
+        The desired layout
+
+    Returns
+    -------
+    result : tvm.relay.Expr
+        The transformed expr
+    """
+    # pylint: disable=import-outside-toplevel
+    from tvm import relay
+    assert desired_layout == 'NCHW', \
+            "Currently only transformation to NCHW layout is supported."
+    if desired_layout == 'NCHW':
+        new_attrs = dict(attrs)
+        new_attrs['data_layout'] = desired_layout
+        new_attrs['kernel_layout'] = 'OIHW'
+        return relay.qnn.op.conv2d(*inputs, **new_attrs)
+    return None
diff --git a/src/relay/op/nn/bitserial.cc b/src/relay/op/nn/bitserial.cc
index 621fe77..9457b4b 100644
--- a/src/relay/op/nn/bitserial.cc
+++ b/src/relay/op/nn/bitserial.cc
@@ -39,7 +39,7 @@ template <typename T>
 Array<Array<Layout>> BinaryConv2DInferCorrectLayout(const Attrs& attrs,
                                                     const Array<Layout>& 
new_in_layouts,
                                                     const Array<Layout>& 
old_in_layouts,
-                                                    const 
Array<Array<IndexExpr>>& old_in_shapes) {
+                                                    const 
Array<tvm::relay::Type>& old_in_types) {
   const T* params = attrs.as<T>();
 
   // We always make other operators to fit the layouts of convolution layers
diff --git a/src/relay/op/nn/convolution.cc b/src/relay/op/nn/convolution.cc
index d642e2f..25dca08 100644
--- a/src/relay/op/nn/convolution.cc
+++ b/src/relay/op/nn/convolution.cc
@@ -34,23 +34,6 @@
 namespace tvm {
 namespace relay {
 
-
-template<typename T>
-Array<Array<Layout> > ConvInferCorrectLayout(
-    const Attrs& attrs,
-    const Array<Layout>& new_in_layouts,
-    const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
-  const T* params = attrs.as<T>();
-
-  // We always make other operators to fit the layouts of convolution layers
-  // So this inference ignores all inputs
-  return Array<Array<Layout> >{{params->data_layout, params->kernel_layout},
-                               {params->out_layout == "" ?
-                                   params->data_layout : params->out_layout}};
-}
-
-
 template <typename T>
 Expr MakeConv(Expr data,
               Expr weight,
@@ -1048,7 +1031,7 @@ Array<Array<Layout> > Dilation2DInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
+    const Array<tvm::relay::Type> &old_in_types) {
   const T* params = attrs.as<T>();
 
   // We always make other operators to fit the layouts of convolution layers
diff --git a/src/relay/op/nn/convolution.h b/src/relay/op/nn/convolution.h
index d451215..e3fa02d 100644
--- a/src/relay/op/nn/convolution.h
+++ b/src/relay/op/nn/convolution.h
@@ -431,6 +431,21 @@ bool Dilation2DRel(const Array<Type>& types, int 
num_inputs, const Attrs& attrs,
   return true;
 }
 
+template<typename T>
+Array<Array<Layout> > ConvInferCorrectLayout(
+    const Attrs& attrs,
+    const Array<Layout>& new_in_layouts,
+    const Array<Layout>& old_in_layouts,
+    const Array<tvm::relay::Type> &old_in_types) {
+  const T* params = attrs.as<T>();
+
+  // We always make other operators to fit the layouts of convolution layers
+  // So this inference ignores all inputs
+  return Array<Array<Layout> >{{params->data_layout, params->kernel_layout},
+                               {params->out_layout == "" ?
+                                   params->data_layout : params->out_layout}};
+}
+
 }  // namespace relay
 }  // namespace tvm
 #endif  // TVM_RELAY_OP_NN_CONVOLUTION_H_
diff --git a/src/relay/op/nn/nn.cc b/src/relay/op/nn/nn.cc
index 560d06f..5203ffc 100644
--- a/src/relay/op/nn/nn.cc
+++ b/src/relay/op/nn/nn.cc
@@ -272,10 +272,10 @@ Array<Array<Layout> > PReluInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
+    const Array<tvm::relay::Type> &old_in_types) {
 
   CHECK_EQ(old_in_layouts.size(), 2U);
-  CHECK_EQ(old_in_shapes.size(), 2U);
+  CHECK_EQ(old_in_types.size(), 2U);
   Layout data_layout = old_in_layouts[0];
   if (new_in_layouts.defined()) {
     CHECK_EQ(new_in_layouts.size(), 2U);
@@ -615,9 +615,15 @@ TVM_REGISTER_NODE_TYPE(BatchNormAttrs);
 Array<Array<Layout>> BatchNormInferCorrectLayout(const Attrs& attrs,
                                                  const Array<Layout>& 
new_in_layouts,
                                                  const Array<Layout>& 
old_in_layouts,
-                                                 const 
Array<Array<IndexExpr>>& old_in_shapes) {
+                                                 const 
Array<tvm::relay::Type>& old_in_types) {
   BatchNormAttrs* param = 
const_cast<BatchNormAttrs*>(attrs.as<BatchNormAttrs>());
 
+  Array<Array<IndexExpr>> old_in_shapes;
+  for (auto old_in_t : old_in_types) {
+    CHECK(old_in_t.as<TensorTypeNode>());
+    old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+  }
+
   size_t axis =
       param->axis < 0 ? param->axis + old_in_shapes[0].size() : 
static_cast<size_t>(param->axis);
 
diff --git a/src/relay/op/nn/pad.cc b/src/relay/op/nn/pad.cc
index 84a4940..3506f42 100644
--- a/src/relay/op/nn/pad.cc
+++ b/src/relay/op/nn/pad.cc
@@ -39,7 +39,7 @@ Array<Array<Layout> > PadInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
+    const Array<tvm::relay::Type> &old_in_types) {
   // NOTE: Discard "const" qualifier here.
   PadAttrs *params = const_cast<PadAttrs*>(attrs.as<PadAttrs>());
 
diff --git a/src/relay/op/nn/pooling.cc b/src/relay/op/nn/pooling.cc
index b18ed90..00ec55a 100644
--- a/src/relay/op/nn/pooling.cc
+++ b/src/relay/op/nn/pooling.cc
@@ -41,7 +41,7 @@ Array<Array<Layout> > PoolInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
+    const Array<tvm::relay::Type> &old_in_types) {
   // NOTE: Discard "const" qualifier here.
   T *params = const_cast<T*>(attrs.as<T>());
 
diff --git a/src/relay/op/nn/upsampling.cc b/src/relay/op/nn/upsampling.cc
index 477cec7..2c1f45d 100644
--- a/src/relay/op/nn/upsampling.cc
+++ b/src/relay/op/nn/upsampling.cc
@@ -39,7 +39,7 @@ Array<Array<Layout> > UpsamplingInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
+    const Array<tvm::relay::Type> &old_in_types) {
   // NOTE: Discard "const" qualifier here.
   T *params = const_cast<T*>(attrs.as<T>());
 
diff --git a/src/relay/op/tensor/reduce.cc b/src/relay/op/tensor/reduce.cc
index 5e0795e..74bc84e 100644
--- a/src/relay/op/tensor/reduce.cc
+++ b/src/relay/op/tensor/reduce.cc
@@ -122,11 +122,16 @@ Array<Integer> GetExcludeAxes(size_t indim,
 Array<Array<Layout>> ReduceInferCorrectLayout(const Attrs& attrs,
                                               const Array<Layout>& 
new_in_layouts,
                                               const Array<Layout>& 
old_in_layouts,
-                                              const Array<Array<IndexExpr>>& 
old_in_shapes) {
+                                              const Array<tvm::relay::Type>& 
old_in_types) {
   // NOTE: Discard "const" qualifier here.
   ReduceAttrs* params = const_cast<ReduceAttrs*>(attrs.as<ReduceAttrs>());
 
   // Get the reduce axes.
+  Array<Array<IndexExpr>> old_in_shapes;
+  for (auto old_in_t : old_in_types) {
+    CHECK(old_in_t.as<TensorTypeNode>());
+    old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+  }
   uint32_t indim = old_in_shapes[0].size();
   auto r_axes = GetReduceAxes(indim, params->axis, params->exclude);
 
diff --git a/src/relay/op/tensor/transform.cc b/src/relay/op/tensor/transform.cc
index 17d9788..32df221 100644
--- a/src/relay/op/tensor/transform.cc
+++ b/src/relay/op/tensor/transform.cc
@@ -275,54 +275,6 @@ Array<te::Tensor> ConcatenateCompute(const Attrs& attrs,
   return { topi::concatenate(inputs, param->axis) };
 }
 
-Array<Array<Layout>> ConcatenateLayout(
-    const Attrs& attrs,
-    const Array<Layout>& new_in_layouts,
-    const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>> &old_in_shapes) {
-  ConcatenateAttrs* param = 
const_cast<ConcatenateAttrs*>(attrs.as<ConcatenateAttrs>());
-
-  size_t axis = param->axis < 0 ? param->axis + old_in_shapes[0].size() :
-                static_cast<size_t>(param->axis);
-
-  Layout ret;
-  bool is_new_layout_selected = false;
-  if (new_in_layouts.defined()) {  // this function is called after some 
operators are alternated.
-    // If all the new input layouts are same, the new in layout gets selected. 
 For axis, the new
-    // axis in the new layout is identified. The param->axis is then modified 
on the fly to conform
-    // to the new input layout.
-    const auto& concate_dim = old_in_layouts[0][axis];
-    bool all_input_layouts_same = true;
-    for (auto new_layout : new_in_layouts) {
-      if (!new_layout.Equals(new_in_layouts[0])) {
-        all_input_layouts_same = false;
-      }
-    }
-    if (all_input_layouts_same) {
-      auto new_index = new_in_layouts[0].IndexOf(concate_dim);
-      ret = new_in_layouts[0];
-      param->axis = new_index;
-      is_new_layout_selected = true;
-    }
-  }
-
-  if (!is_new_layout_selected) {
-    // this function is called on the original correct relay ir
-    for (size_t i = 0; i < old_in_layouts.size(); ++i) {
-      if (old_in_layouts[i].defined()) {
-        ret = old_in_layouts[i];
-        break;
-      }
-    }
-
-    if (ret.ndim() <= axis || !ret[axis].IsPrimal()) {
-      return Array<Array<Layout> > {{Layout::Undef()}, {Layout::Undef()}};
-    }
-  }
-
-  return Array<Array<Layout> > {Array<Layout>(old_in_layouts.size(), ret), 
{ret}};
-}
-
 Expr MakeConcatenate(Expr data,
                      int axis) {
   auto attrs = make_object<ConcatenateAttrs>();
@@ -1933,7 +1885,14 @@ Array<Array<Layout> > StridedSliceInferCorrectLayout(
     const Attrs& attrs,
     const Array<Layout>& new_in_layouts,
     const Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>>& old_in_shapes) {
+    const Array<tvm::relay::Type>& old_in_types) {
+
+  Array<Array<IndexExpr>> old_in_shapes;
+  for (auto old_in_t : old_in_types) {
+    CHECK(old_in_t.as<TensorTypeNode>());
+    old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+  }
+
   CHECK(old_in_layouts.defined());
   CHECK_EQ(old_in_layouts.size(), 1);
   CHECK(old_in_shapes.defined());
diff --git a/src/relay/op/tensor/transform.h b/src/relay/op/tensor/transform.h
index b69f6e7..a64dcd5 100644
--- a/src/relay/op/tensor/transform.h
+++ b/src/relay/op/tensor/transform.h
@@ -25,6 +25,7 @@
 #define TVM_RELAY_OP_TENSOR_TRANSFORM_H_
 
 #include <tvm/ir/error.h>
+#include <tvm/relay/attrs/transform.h>
 #include <vector>
 #include <algorithm>
 #include <limits>
@@ -124,6 +125,63 @@ bool ConcatenateRel(const Array<Type>& types,
   return true;
 }
 
+static inline Array<Array<Layout>> ConcatenateLayout(
+    const Attrs& attrs,
+    const Array<Layout>& new_in_layouts,
+    const Array<Layout>& old_in_layouts,
+    const Array<tvm::relay::Type> &old_in_types) {
+  ConcatenateAttrs* param = 
const_cast<ConcatenateAttrs*>(attrs.as<ConcatenateAttrs>());
+
+  Array<Array<IndexExpr>> old_in_shapes;
+  CHECK_EQ(old_in_types.size(), 1);
+  for (auto old_in_tuple_t : old_in_types) {
+    CHECK(old_in_tuple_t.as<TupleTypeNode>());
+    for (auto old_in_t : old_in_tuple_t.as<TupleTypeNode>()->fields) {
+      old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+    }
+  }
+
+  size_t axis = param->axis < 0 ? param->axis + old_in_shapes[0].size() :
+                static_cast<size_t>(param->axis);
+
+  Layout ret;
+  bool is_new_layout_selected = false;
+  if (new_in_layouts.defined()) {  // this function is called after some 
operators are alternated.
+    // If all the new input layouts are same, the new in layout gets selected. 
 For axis, the new
+    // axis in the new layout is identified. The param->axis is then modified 
on the fly to conform
+    // to the new input layout.
+    const auto& concate_dim = old_in_layouts[0][axis];
+    bool all_input_layouts_same = true;
+    for (auto new_layout : new_in_layouts) {
+      if (!new_layout.Equals(new_in_layouts[0])) {
+        all_input_layouts_same = false;
+      }
+    }
+    if (all_input_layouts_same) {
+      auto new_index = new_in_layouts[0].IndexOf(concate_dim);
+      ret = new_in_layouts[0];
+      param->axis = new_index;
+      is_new_layout_selected = true;
+    }
+  }
+
+  if (!is_new_layout_selected) {
+    // this function is called on the original correct relay ir
+    for (size_t i = 0; i < old_in_layouts.size(); ++i) {
+      if (old_in_layouts[i].defined()) {
+        ret = old_in_layouts[i];
+        break;
+      }
+    }
+
+    if (ret.ndim() <= axis || !ret[axis].IsPrimal()) {
+      return Array<Array<Layout> > {{Layout::Undef()}, {Layout::Undef()}};
+    }
+  }
+
+  return Array<Array<Layout> > {Array<Layout>(old_in_layouts.size(), ret), 
{ret}};
+}
+
 }  // namespace relay
 }  // namespace tvm
 #endif  // TVM_RELAY_OP_TENSOR_TRANSFORM_H_
diff --git a/src/relay/qnn/op/add.cc b/src/relay/qnn/op/add.cc
index b70954b..479138f 100644
--- a/src/relay/qnn/op/add.cc
+++ b/src/relay/qnn/op/add.cc
@@ -25,6 +25,7 @@
 #include <tvm/relay/op_attr_types.h>
 #include <tvm/relay/qnn/attrs.h>
 #include "../../transforms/pattern_util.h"
+#include "../../transforms/infer_layout_util.h"
 #include "../util.h"
 #include "op_common.h"
 
@@ -32,6 +33,23 @@ namespace tvm {
 namespace relay {
 namespace qnn {
 
+/*! \brief Infer layout for QNN binary broadcast operators */
+Array<Array<Layout> > QnnBinaryBroadcastLayout(const Attrs& attrs,
+                                               const Array<Layout>& 
new_in_layouts,
+                                               const Array<Layout>& 
old_in_layouts,
+                                               const Array<tvm::relay::Type>& 
old_in_types) {
+  // Use Relay Binary Broadcast Infer correct layout.
+  auto layouts = BinaryBroadcastLayout(attrs, new_in_layouts, old_in_layouts, 
old_in_types);
+
+  // Fill the layouts of remaining input tensors - scales and zero points. The 
layouts of these
+  // tensors can be treated as C.
+  Layout channel_layout = Layout("C");
+  Array<Layout> input_layouts = {layouts[0][0],  layouts[0][1],  
channel_layout, channel_layout,
+                                 channel_layout, channel_layout, 
channel_layout, channel_layout};
+  Array<Layout> output_layouts = layouts[1];
+  return {input_layouts, output_layouts};
+}
+
 /*
  * \brief Canonicalizes the QNN add op.
  * \param attrs The QNN concatenate attrs.
@@ -118,7 +136,8 @@ Expr QnnAddCanonicalize(const Attrs& attrs, const 
Array<Expr>& new_args,
 QNN_REGISTER_BINARY_OP("add")
 .describe("Elementwise add with with broadcasting for quantized tensors.")
 .set_support_level(11)
-.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnAddCanonicalize);
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnAddCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", 
QnnBinaryBroadcastLayout);
 
 }  // namespace qnn
 }  // namespace relay
diff --git a/src/relay/qnn/op/concatenate.cc b/src/relay/qnn/op/concatenate.cc
index 3f4e315..110ee6f 100644
--- a/src/relay/qnn/op/concatenate.cc
+++ b/src/relay/qnn/op/concatenate.cc
@@ -28,6 +28,7 @@
 #include <tvm/relay/qnn/attrs.h>
 #include "../../op/tensor/transform.h"
 #include "../../transforms/pattern_util.h"
+#include "../../transforms/infer_layout_util.h"
 #include "../util.h"
 
 namespace tvm {
@@ -70,6 +71,43 @@ bool QnnConcatenateRel(const Array<Type>& types, int 
num_inputs, const Attrs& at
   return ConcatenateRel<ConcatenateAttrs>(tensor_types, 2, attrs, reporter);
 }
 
+Array<Array<Layout>> QnnConcatenateLayout(const Attrs& attrs, const 
Array<Layout>& new_in_layouts,
+                                          const Array<Layout>& old_in_layouts,
+                                          const Array<tvm::relay::Type>& 
old_in_types) {
+  // Collect the layouts and types to reuse Relay Concatenate Infer Correct 
Layout.
+  CHECK_EQ(old_in_types.size(), 5);
+  auto input_tuple_type = old_in_types[0].as<TupleTypeNode>();
+  CHECK(input_tuple_type);
+  auto num_input_tensors = input_tuple_type->fields.size();
+
+  Array<Layout> relay_new_in_layouts(nullptr);
+  if (new_in_layouts.defined()) {
+    relay_new_in_layouts =
+        Array<Layout>(new_in_layouts.begin(), new_in_layouts.begin() + 
num_input_tensors);
+  }
+  Array<Layout> relay_old_in_layouts(nullptr);
+  if (old_in_layouts.defined()) {
+    relay_old_in_layouts =
+        Array<Layout>(old_in_layouts.begin(), old_in_layouts.begin() + 
num_input_tensors);
+  }
+
+  // Use Relay Concatenate Infer Correct layout to infer the layouts for data 
tensors.
+  auto layouts =
+      ConcatenateLayout(attrs, relay_new_in_layouts, relay_old_in_layouts, 
{old_in_types[0]});
+
+  // Fill the layouts of remaining input tensors - scales and zero points. The 
layouts of these
+  // tensors can be treated as channel layout. Total number of these tensors 
are 2 * num of data
+  // tensors (scale and zero point for each input data tensor) + 2 for the 
output data tensor.
+  Layout channel_layout = Layout("C");
+  Array<Layout> input_layouts = layouts[0];
+
+  for (size_t i = 0; i < 2 * num_input_tensors + 2; i++) {
+    input_layouts.push_back(channel_layout);
+  }
+  Array<Layout> output_layouts = layouts[1];
+  return {input_layouts, output_layouts};
+}
+
 Expr MakeQnnConcatenate(Expr data, Expr input_scales, Expr input_zero_points, 
Expr output_scale,
                         Expr output_zero_point, int axis) {
   auto attrs = make_object<ConcatenateAttrs>();
@@ -161,7 +199,8 @@ RELAY_REGISTER_OP("qnn.concatenate")
 .add_argument("output_zero_point", "Tensor", "The quantization zero_point of 
the output tensor.")
 .set_support_level(11)
 .add_type_rel("QnnConcatenate", QnnConcatenateRel)
-.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", ConcatenateQnnCanonicalize);
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", ConcatenateQnnCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", QnnConcatenateLayout);
 
 TVM_REGISTER_GLOBAL("relay.qnn.op._make.concatenate")
 .set_body_typed(MakeQnnConcatenate);
diff --git a/src/relay/qnn/op/convolution.cc b/src/relay/qnn/op/convolution.cc
index 30308fd..791b035 100644
--- a/src/relay/qnn/op/convolution.cc
+++ b/src/relay/qnn/op/convolution.cc
@@ -68,6 +68,23 @@ bool QnnConv2DRel(const Array<Type>& types, int num_inputs, 
const Attrs& attrs,
   return Conv2DRel<Conv2DAttrs>(tensor_types, 3, attrs, reporter);
 }
 
+Array<Array<Layout>> QnnConvInferCorrectLayout(const Attrs& attrs,
+                                               const Array<Layout>& 
new_in_layouts,
+                                               const Array<Layout>& 
old_in_layouts,
+                                               const Array<tvm::relay::Type>& 
old_in_types) {
+  // Use Relay Conv2D Infer correct layout.
+  auto layouts =
+      ConvInferCorrectLayout<Conv2DAttrs>(attrs, new_in_layouts, 
old_in_layouts, old_in_types);
+
+  // Fill the layouts of remaining input tensors - scales and zero points. The 
layouts of these
+  // tensors can be treated as channel layout.
+  Layout channel_layout = Layout("C");
+  Array<Layout> input_layouts = {layouts[0][0],  layouts[0][1],  
channel_layout,
+                                 channel_layout, channel_layout, 
channel_layout};
+  Array<Layout> output_layouts = layouts[1];
+  return {input_layouts, output_layouts};
+}
+
 bool is_depthwise(const Conv2DAttrs* param) {
   return param->channels.defined() && tvm::tir::Equal(param->channels, 
param->groups) &&
          param->groups != 1;
@@ -684,7 +701,8 @@ operator to understand how to scale back the int32 output 
to (u)int8.
 .add_argument("weight_zero_point", "Tensor", "The quantization zero_point of 
the weight tensor.")
 .set_support_level(11)
 .add_type_rel("QnnConv2D", QnnConv2DRel)
-.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnConv2DCanonicalize);
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", QnnConv2DCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", 
QnnConvInferCorrectLayout);
 
 TVM_REGISTER_GLOBAL("relay.qnn.op._make.conv2d").set_body_typed(MakeQnnConv2D);
 
diff --git a/src/relay/qnn/op/requantize.cc b/src/relay/qnn/op/requantize.cc
index 9b2ed91..f3351a2 100644
--- a/src/relay/qnn/op/requantize.cc
+++ b/src/relay/qnn/op/requantize.cc
@@ -26,6 +26,7 @@
 #include <tvm/relay/op_attr_types.h>
 #include <tvm/relay/qnn/attrs.h>
 #include "../../transforms/pattern_util.h"
+#include "../../transforms/infer_layout_util.h"
 #include "../util.h"
 
 namespace tvm {
@@ -34,6 +35,79 @@ namespace qnn {
 
 TVM_REGISTER_NODE_TYPE(RequantizeAttrs);
 
+Array<Array<Layout>> RequantizeInferCorrectLayout(const Attrs& attrs,
+                                                  const Array<Layout>& 
new_in_layouts,
+                                                  const Array<Layout>& 
old_in_layouts,
+                                                  const 
Array<tvm::relay::Type>& old_in_types) {
+  RequantizeAttrs* param = 
const_cast<RequantizeAttrs*>(attrs.as<RequantizeAttrs>());
+
+  Array<Array<IndexExpr>> old_in_shapes;
+  for (auto old_in_t : old_in_types) {
+    CHECK(old_in_t.as<TensorTypeNode>());
+    old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+  }
+
+  Array<Layout> input_layouts, output_layouts;
+  if (new_in_layouts.defined()) {
+    // Adapt to new layout. The axis has to change.
+    // Record original reduce axis. Convert to the modified layout axis.
+    CHECK_EQ(new_in_layouts.size(), 5);
+    CHECK_EQ(old_in_layouts.size(), 5);
+
+    // 1) Get the axis.
+    int axis = param->axis;
+    axis = (axis == -1) ? old_in_shapes[0].size() - 1 : axis;
+
+    // 2) Collect the original axis
+    std::string old_dim = old_in_layouts[0][axis].name();
+
+    // 3) Collect the new axes by walking new_layout.
+    tvm::Integer new_axis;
+    std::string new_layout_string = "";
+    int axis_index = 0;
+    for (auto iter_var : new_in_layouts[0]->axes) {
+      const auto& layout_axis = LayoutAxis::Get(iter_var);
+      const std::string& layout_dim = layout_axis.name();
+      if (old_dim  == layout_dim) {
+        new_axis = tvm::Integer(axis_index);
+      }
+      // Collect only the primal axis.
+      if (layout_axis.IsPrimal()) {
+        new_layout_string += layout_dim;
+        axis_index++;
+      }
+    }
+
+    // 4) Set the new axis and layout.
+    Layout new_layout = Layout(new_layout_string);
+
+    // Fill the layouts of remaining input tensors - scales and zero points. 
The layouts of these
+    // tensors can be treated as channel layout.
+    Layout channel_layout = Layout("C");
+    input_layouts = {new_layout, channel_layout, channel_layout, 
channel_layout, channel_layout};
+    output_layouts = {new_layout};
+    param->axis = new_axis;
+  } else if (old_in_layouts.defined()) {
+    // If the new layout is undefined, set the old layout as the inferred 
layout.
+    CHECK_EQ(old_in_layouts.size(), 5);
+
+    Layout old_layout = old_in_layouts[0];
+
+    // Fill the layouts of remaining input tensors - scales and zero points. 
The layouts of these
+    // tensors can be treated as channel layout.
+    Layout channel_layout = Layout("C");
+    input_layouts = {old_layout, channel_layout, channel_layout, 
channel_layout, channel_layout};
+    output_layouts = {old_layout};
+  } else {
+    // Set the layouts to undef.
+    Layout undef = Layout::Undef();
+    input_layouts = Array<Layout>(5, undef);
+    output_layouts = {undef};
+  }
+
+  return Array<Array<Layout>>{input_layouts, output_layouts};
+}
+
 // Lowering of qnn.requantize op
 
 /*
@@ -247,7 +321,8 @@ Q_output = zp_output +  (scale_input)/(scale_output) * 
(Q_input - zp_input)
 .add_argument("output_zero_point", "Tensor", "The quantization zero_point of 
the output tensor.")
 .set_support_level(11)
 .add_type_rel("Requantize", RequantizeRel)
-.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", RequantizeQnnCanonicalize);
+.set_attr<FTVMLegalize>("FTVMQnnCanonicalize", RequantizeQnnCanonicalize)
+.set_attr<FInferCorrectLayout>("FInferCorrectLayout", 
RequantizeInferCorrectLayout);
 
 TVM_REGISTER_GLOBAL("relay.qnn.op._make.requantize")
 .set_body_typed(MakeRequantize);
diff --git a/src/relay/transforms/infer_layout_util.h 
b/src/relay/transforms/infer_layout_util.h
index eeb61e3..ca73003 100644
--- a/src/relay/transforms/infer_layout_util.h
+++ b/src/relay/transforms/infer_layout_util.h
@@ -90,7 +90,7 @@ inline Layout AdjustSubordinateFactors(const Layout& 
src_layout, const Layout& o
  *                       This can be undefined, which means we call this 
function before alternating
  *                       any operators.
  * \param old_in_layouts The layouts of input arguments before alter_op_layout.
- * \param old_in_shapes The shapes of old input arguments.
+ * \param old_in_types The types of old input arguments.
  * \return infered_layout An array of two elements that are inferred input 
layouts and
  *                        inferred output layouts.
  */
@@ -98,13 +98,13 @@ using FInferCorrectLayout = runtime::TypedPackedFunc<
     Array<Array<Layout>>(const Attrs& attrs,
                          const Array<Layout>& new_in_layouts,
                          const Array<Layout>& old_in_layouts,
-                         const Array<Array<IndexExpr>> &old_in_shapes)>;
+                         const Array<tvm::relay::Type> &old_in_types)>;
 
 /*! \brief take arbitrary input layout and copy to output */
 inline Array<Array<Layout> > ElemwiseArbitraryLayout(const Attrs& attrs,
                                                      const Array<Layout>& 
new_in_layouts,
                                                      const Array<Layout>& 
old_in_layouts,
-                                                     const 
Array<Array<IndexExpr>> &old_in_shapes) {
+                                                     const 
Array<tvm::relay::Type> &old_in_types) {
   Layout ret;
 
   if (new_in_layouts.defined()) {
@@ -126,8 +126,13 @@ inline Array<Array<Layout> > ElemwiseArbitraryLayout(const 
Attrs& attrs,
 inline Array<Array<Layout> > BinaryBroadcastLayout(const Attrs& attrs,
                                                    const Array<Layout>& 
new_in_layouts,
                                                    const Array<Layout>& 
old_in_layouts,
-                                                   const 
Array<Array<IndexExpr>> &old_in_shapes) {
+                                                   const 
Array<tvm::relay::Type> &old_in_types) {
   Array<Layout> layouts;
+  Array<Array<IndexExpr>> old_in_shapes;
+  for (auto old_in_t : old_in_types) {
+    CHECK(old_in_t.as<TensorTypeNode>());
+    old_in_shapes.push_back(old_in_t.as<TensorTypeNode>()->shape);
+  }
 
   if (new_in_layouts.defined()) {
     layouts.assign(new_in_layouts.begin(), new_in_layouts.end());
@@ -203,7 +208,7 @@ inline Array<Array<Layout> > BinaryBroadcastLayout(const 
Attrs& attrs,
  */
 static inline std::tuple<Array<Layout>, Array<Layout>, bool> 
InferCorrectLayouts(
     const Call& call, const Array<Layout>& new_in_layouts, const 
Array<Layout>& old_in_layouts,
-    const Array<Array<IndexExpr>>& old_in_shapes) {
+    const Array<tvm::relay::Type>& old_in_types) {
   static auto finfer_layout = 
Op::GetAttr<FInferCorrectLayout>("FInferCorrectLayout");
   if (!call->op.as<OpNode>()) {
     return std::make_tuple<>(Array<Layout>(nullptr), Array<Layout>(nullptr), 
false);
@@ -213,7 +218,7 @@ static inline std::tuple<Array<Layout>, Array<Layout>, 
bool> InferCorrectLayouts
   if (finfer_layout.count(op)) {
     Array<Array<Layout>> inferred_layouts;
     inferred_layouts =
-        finfer_layout[op](call->attrs, new_in_layouts, old_in_layouts, 
old_in_shapes);
+        finfer_layout[op](call->attrs, new_in_layouts, old_in_layouts, 
old_in_types);
     CHECK_EQ(inferred_layouts.size(), 2)
         << "FInferCorrectLayout should return an array with size of 2";
     for (auto x : inferred_layouts) {
diff --git a/src/relay/transforms/transform_layout.h 
b/src/relay/transforms/transform_layout.h
index 7204a7e..cee3aaf 100644
--- a/src/relay/transforms/transform_layout.h
+++ b/src/relay/transforms/transform_layout.h
@@ -225,7 +225,6 @@ template <class TransformMemorizerT>
 Expr LayoutRewriter(const Call& ref_call, const Array<Expr>& new_args, const 
ObjectRef& ctx) {
   std::vector<LayoutAlternatedExpr<TransformMemorizerT>> inputs;
   std::vector<Expr> normal_new_args;
-  Array<Array<IndexExpr>> input_shapes;
 
   // NOTE: discard the "const" qualifier
   // TransformMemorizer memorizer = Downcast<TransformMemorizer>(ctx);
@@ -273,21 +272,16 @@ Expr LayoutRewriter(const Call& ref_call, const 
Array<Expr>& new_args, const Obj
     new_in.push_back(inp->new_layout);
   }
 
+  // Collect input types to pass on to Infer Correct Layout.
+  tvm::Array<tvm::relay::Type> types;
   for (auto arg : ref_call->args) {
-    if (arg->IsInstance<TupleNode>()) {  // flatten tuple
-      Tuple tuple_arg = Downcast<Tuple>(arg);
-      for (auto x : tuple_arg->fields) {
-        input_shapes.push_back(x->type_as<TensorTypeNode>()->shape);
-      }
-    } else {
-      input_shapes.push_back(arg->type_as<TensorTypeNode>()->shape);
-    }
+    types.push_back(arg->checked_type());
   }
 
   // old_in, old_out = op.infer(old_in)
   bool success = false;
   std::tie(old_in, old_out, success) =
-      InferCorrectLayouts(ref_call, Array<Layout>(nullptr), old_in, 
input_shapes);
+      InferCorrectLayouts(ref_call, Array<Layout>(nullptr), old_in, types);
   if (!success) {
     return Expr(nullptr);
   }
@@ -307,7 +301,7 @@ Expr LayoutRewriter(const Call& ref_call, const 
Array<Expr>& new_args, const Obj
   if (new_call->op->IsInstance<OpNode>()) {
     success = false;
     std::tie(new_in2, new_out, success) =
-        InferCorrectLayouts(new_call, new_in, old_in, input_shapes);
+        InferCorrectLayouts(new_call, new_in, old_in, types);
     if (!success) {
       return Expr(nullptr);
     }
diff --git a/tests/python/relay/test_pass_convert_op_layout.py 
b/tests/python/relay/test_pass_convert_op_layout.py
index f9e7ca9..9e8f662 100644
--- a/tests/python/relay/test_pass_convert_op_layout.py
+++ b/tests/python/relay/test_pass_convert_op_layout.py
@@ -398,6 +398,220 @@ def test_conv_bn_convert_layout():
     assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
 
 
+def test_qnn_conv_requantize_convert_layout():
+    def before():
+        x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+        weight = relay.var('weight', shape=(3, 3, 64, 64), dtype='int8')
+        y = relay.qnn.op.conv2d(x, weight,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1),
+                                data_layout='NHWC',
+                                kernel_layout='HWIO')
+        y = relay.qnn.op.requantize(y,
+                                    relay.const(1, 'float32'),
+                                    relay.const(1, 'int32'),
+                                    relay.const(1, 'float32'),
+                                    relay.const(1, 'int32'),
+                                    out_dtype='int32')
+        y = relay.nn.relu(y)
+        y = relay.Function([x, weight], y)
+        return y
+
+    def expected():
+        x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+        weight = relay.var('weight', shape=(3, 3, 64, 64), dtype='int8')
+        x = relay.layout_transform(x, 'NHWC', 'NCHW')
+        weight = relay.layout_transform(weight, 'HWIO', 'OIHW')
+        y = relay.qnn.op.conv2d(x, weight,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1))
+        y = relay.qnn.op.requantize(y,
+                                    relay.const(1, 'float32'),
+                                    relay.const(1, 'int32'),
+                                    relay.const(1, 'float32'),
+                                    relay.const(1, 'int32'),
+                                    axis=1,
+                                    out_dtype='int32')
+        y = relay.nn.relu(y)
+        y = relay.layout_transform(y, 'NCHW', 'NHWC')
+        y = relay.Function(relay.analysis.free_vars(y), y)
+        return y
+
+    a = before()
+    a = run_opt_pass(a, transform.ConvertLayout('NCHW'))
+    b = run_opt_pass(expected(), transform.InferType())
+
+    assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
+
+
+def test_qnn_conv_concat_convert_layout():
+    def before():
+        x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+        weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8')
+        weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8')
+        y = relay.qnn.op.conv2d(x, weight1,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1),
+                                data_layout='NHWC',
+                                kernel_layout='HWIO')
+        y1 = relay.qnn.op.conv2d(y, weight2,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1),
+                                data_layout='NHWC',
+                                kernel_layout='HWIO')
+        y = relay.cast(y, 'int8')
+        y1 = relay.cast(y, 'int8')
+        ret = relay.qnn.op.concatenate([y, y1],
+                                       [relay.const(1, 'float32'), 
relay.const(1, 'float32')],
+                                       [relay.const(1, 'int32'), 
relay.const(1, 'int32')],
+                                       relay.const(1, 'float32'),
+                                       relay.const(1, 'int32'),
+                                       axis=3)
+        y = relay.Function(analysis.free_vars(ret), ret)
+        return y
+
+    def expected():
+        x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+        weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8')
+        weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8')
+        weight1 = relay.layout_transform(weight1, 'HWIO', 'OIHW')
+        weight2 = relay.layout_transform(weight2, 'HWIO', 'OIHW')
+        y = relay.layout_transform(x, "NHWC", "NCHW")
+        y = relay.qnn.op.conv2d(y, weight1,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1))
+        y1 = relay.qnn.op.conv2d(y, weight2,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1))
+        y = relay.cast(y, 'int8')
+        y1 = relay.cast(y, 'int8')
+        ret = relay.qnn.op.concatenate([y, y1],
+                                      [relay.const(1, 'float32'), 
relay.const(1, 'float32')],
+                                      [relay.const(1, 'int32'), relay.const(1, 
'int32')],
+                                      relay.const(1, 'float32'),
+                                      relay.const(1, 'int32'),
+                                      axis=1)
+        ret = relay.layout_transform(ret, "NCHW", "NHWC")
+        y = relay.Function(analysis.free_vars(ret), ret)
+        return y
+
+    a = before()
+    a = run_opt_pass(a, transform.ConvertLayout('NCHW'))
+    b = run_opt_pass(expected(), transform.InferType())
+
+    assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
+
+
+def test_qnn_conv_add_convert_layout():
+    def before():
+        x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+        weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8')
+        weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8')
+        y = relay.qnn.op.conv2d(x, weight1,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1),
+                                data_layout='NHWC',
+                                kernel_layout='HWIO')
+        y1 = relay.qnn.op.conv2d(y, weight2,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1),
+                                data_layout='NHWC',
+                                kernel_layout='HWIO')
+        y = relay.cast(y, 'int8')
+        y1 = relay.cast(y, 'int8')
+        ret = relay.qnn.op.add(y, y1,
+                               relay.const(1, 'float32'),
+                               relay.const(1, 'int32'),
+                               relay.const(1, 'float32'),
+                               relay.const(1, 'int32'),
+                               relay.const(1, 'float32'),
+                               relay.const(1, 'int32'))
+        y = relay.Function(analysis.free_vars(ret), ret)
+        return y
+
+    def expected():
+        x = relay.var("x", shape=(1, 56, 56, 64), dtype='int8')
+        weight1 = relay.var('weight1', shape=(3, 3, 64, 64), dtype='int8')
+        weight2 = relay.var('weight2', shape=(3, 3, 64, 64), dtype='int8')
+        weight1 = relay.layout_transform(weight1, 'HWIO', 'OIHW')
+        weight2 = relay.layout_transform(weight2, 'HWIO', 'OIHW')
+        y = relay.layout_transform(x, "NHWC", "NCHW")
+        y = relay.qnn.op.conv2d(y, weight1,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1))
+        y1 = relay.qnn.op.conv2d(y, weight2,
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'int32'),
+                                relay.const(1, 'float32'),
+                                relay.const(1, 'float32'),
+                                channels=64,
+                                kernel_size=(3, 3),
+                                padding=(1, 1))
+        y = relay.cast(y, 'int8')
+        y1 = relay.cast(y, 'int8')
+        ret = relay.qnn.op.add(y, y1,
+                               relay.const(1, 'float32'),
+                               relay.const(1, 'int32'),
+                               relay.const(1, 'float32'),
+                               relay.const(1, 'int32'),
+                               relay.const(1, 'float32'),
+                               relay.const(1, 'int32'))
+        ret = relay.layout_transform(ret, "NCHW", "NHWC")
+        y = relay.Function(analysis.free_vars(ret), ret)
+        return y
+
+    a = before()
+    a = run_opt_pass(a, transform.ConvertLayout('NCHW'))
+    b = run_opt_pass(expected(), transform.InferType())
+
+    assert analysis.alpha_equal(a, b), "Actual = \n" + str(a)
+
+
 if __name__ == "__main__":
     test_no_convert_layout()
     test_conv_convert_layout()
@@ -408,3 +622,6 @@ if __name__ == "__main__":
     test_resnet_convert_layout()
     test_scalar_convert_layout()
     test_conv_bn_convert_layout()
+    test_qnn_conv_requantize_convert_layout()
+    test_qnn_conv_concat_convert_layout()
+    test_qnn_conv_add_convert_layout()

[incubator-tvm] branch master updated: [ConvertLayout] Support QNN ops. (#5066)

Reply via email to