This is an automated email from the ASF dual-hosted git repository.

manupa pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git


The following commit(s) were added to refs/heads/main by this push:
     new 4fc79b5594 [ETHOSN] Supply output tensor to issupported checks (#11944)
4fc79b5594 is described below

commit 4fc79b5594b8b50514c6b4be5fec3041a6dd09b2
Author: Luke Hutton <[email protected]>
AuthorDate: Thu Jul 21 16:38:40 2022 +0100

    [ETHOSN] Supply output tensor to issupported checks (#11944)
    
    Some operations were being offloaded when they are not supported
    by the NPU, for example mean could get offloaded with different
    quantization parameters for the input and output which is not
    supported. Consequently, this meant that there would be a failure
    during compilation or an output mismatch at runtime. Fixing this by
    supplying the output information to the issupported checks that
    determine whether an operation should be offloaded.
    
    Change-Id: I8896f83dad3d1c837fbb85bf2836fc9325f9dec9
---
 python/tvm/relay/op/contrib/ethosn.py              |  44 ++----
 src/relay/backend/contrib/ethosn/codegen.cc        |  55 ++++---
 src/relay/backend/contrib/ethosn/ethosn_api.cc     | 165 +++++++++++++++++----
 src/relay/backend/contrib/ethosn/ethosn_api.h      |  26 +++-
 .../python/contrib/test_ethosn/test_concatenate.py |   2 +-
 tests/python/contrib/test_ethosn/test_mean.py      |  18 ++-
 tests/python/contrib/test_ethosn/test_networks.py  |   4 +-
 7 files changed, 217 insertions(+), 97 deletions(-)

diff --git a/python/tvm/relay/op/contrib/ethosn.py 
b/python/tvm/relay/op/contrib/ethosn.py
index 17038e749f..b3540a9433 100644
--- a/python/tvm/relay/op/contrib/ethosn.py
+++ b/python/tvm/relay/op/contrib/ethosn.py
@@ -23,7 +23,6 @@ import tvm.ir
 from tvm.relay import transform
 from tvm.relay.build_module import bind_params_by_name
 
-from ... import qnn as _qnn
 from ...dataflow_pattern import is_constant, is_op, wildcard
 from . import _ethosn as support
 from .register import register_pattern_table
@@ -224,9 +223,7 @@ def max_pool2d(expr):
     if not ethosn_available():
         return False
 
-    attrs, args = expr.attrs, expr.args
-    pool = tvm.relay.nn.max_pool2d(*args, **attrs)
-    return support.max_pool2d(pool)
+    return support.max_pool2d(expr)
 
 
 @tvm.ir.register_op_attr("reshape", "target.ethos-n")
@@ -234,13 +231,10 @@ def reshape(expr):
     """Check if a reshape is supported by Ethos-N."""
     if not ethosn_available():
         return False
-
-    attrs, args = expr.attrs, expr.args
-    if not _is_ethosn_composite(args[0]):
+    if not _is_ethosn_composite(expr.args[0]):
         return False
 
-    rs = tvm.relay.op.reshape(*args, attrs["newshape"])
-    return support.reshape(rs)
+    return support.reshape(expr)
 
 
 @tvm.ir.register_op_attr("qnn.add", "target.ethos-n")
@@ -249,9 +243,7 @@ def qnn_add(expr):
     if not ethosn_available():
         return False
 
-    args = expr.args
-    add = _qnn.op.add(*args)
-    return support.addition(add)
+    return support.addition(expr)
 
 
 @tvm.ir.register_op_attr("qnn.concatenate", "target.ethos-n")
@@ -259,13 +251,11 @@ def qnn_concatenate(expr):
     """Check if a concatenate is supported by Ethos-N."""
     if not ethosn_available():
         return False
-
-    attrs, args = expr.attrs, expr.args
-    conc = _qnn.op.concatenate(*args, **attrs)
-    if not support.concatenate(conc):
+    if not support.concatenate(expr):
         return False
 
     # Support library has some unenforced restrictions on qnn params
+    args = expr.args
     min_range = 1e9
     max_range = -1e9
     qnn_params = []
@@ -289,17 +279,7 @@ def split(expr):
     """Check if a split is supported by Ethos-N."""
     if not ethosn_available():
         return False
-
-    attrs, args = expr.attrs, expr.args
-    if isinstance(attrs["indices_or_sections"], tvm.tir.IntImm):
-        sp = tvm.relay.split(
-            *args, indices_or_sections=attrs["indices_or_sections"].value, 
axis=attrs["axis"]
-        )
-    else:
-        sp = tvm.relay.split(
-            *args, indices_or_sections=attrs["indices_or_sections"], 
axis=attrs["axis"]
-        )
-    if not support.split(sp.astuple()):
+    if not support.split(expr):
         return False
 
     return True
@@ -310,10 +290,7 @@ def depth_to_space(expr):
     """Check if a depth_to_space is supported by Ethos-N."""
     if not ethosn_available():
         return False
-
-    attrs, args = expr.attrs, expr.args
-    depth = tvm.relay.nn.depth_to_space(*args, **attrs)
-    if not support.depth_to_space(depth):
+    if not support.depth_to_space(expr):
         return False
 
     return True
@@ -324,10 +301,7 @@ def clip(expr):
     """Check if a clip is supported by Ethos-N."""
     if not ethosn_available():
         return False
-
-    attrs, args = expr.attrs, expr.args
-    c = tvm.relay.clip(*args, **attrs)
-    if not support.relu(c):
+    if not support.relu(expr):
         return False
 
     return True
diff --git a/src/relay/backend/contrib/ethosn/codegen.cc 
b/src/relay/backend/contrib/ethosn/codegen.cc
index fc8a4c48df..67ae1d20e3 100644
--- a/src/relay/backend/contrib/ethosn/codegen.cc
+++ b/src/relay/backend/contrib/ethosn/codegen.cc
@@ -83,7 +83,7 @@ void InferTensorsVisitor::InferCall(const CallNode* cn) {
   if (IsEthosnFunc(call, "ethos-n.qnn_conv2d")) {
     ConvolutionParams params;
     err += EthosnAPI::QnnConv2d(cn->op.as<FunctionNode>()->body, &params);
-    tensor_table_[cn->args[0]] = {params.activation_info};
+    tensor_table_[cn->args[0]] = {params.input_info};
   } else if (IsEthosnFunc(call, "ethos-n.qnn_fc")) {
     FullyConnectedParams params;
     err += EthosnAPI::QnnFullyConnected(cn->op.as<FunctionNode>()->body, 
&params);
@@ -714,11 +714,11 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.conv2d")
       if (params.is_depthwise) {
         *rv = !err && 
EthosnCompiler::GetSupported()->IsDepthwiseConvolutionSupported(
                           params.bias_info, params.weights_info, 
params.conv_info,
-                          params.activation_info, nullptr, reason, 
sizeof(reason));
+                          params.input_info, &params.output_info, reason, 
sizeof(reason));
       } else {
         *rv = !err && EthosnCompiler::GetSupported()->IsConvolutionSupported(
                           params.bias_info, params.weights_info, 
params.conv_info,
-                          params.activation_info, nullptr, reason, 
sizeof(reason));
+                          params.input_info, &params.output_info, reason, 
sizeof(reason));
       }
       err += EthosnError(reason);
     });
@@ -733,7 +733,7 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.fc")
       reason[0] = '\0';
       *rv = !err && EthosnCompiler::GetSupported()->IsFullyConnectedSupported(
                         params.bias_info, params.weights_info, params.fc_info, 
params.input_info,
-                        nullptr, reason, sizeof(reason));
+                        &params.output_info, reason, sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -745,8 +745,9 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.max_pool2d")
       err += EthosnCompiler::SupportedSetup();
       char reason[kReasonMaxLength];
       reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsPoolingSupported(
-                        params.pool_info, params.input_info, nullptr, reason, 
sizeof(reason));
+      *rv = !err &&
+            EthosnCompiler::GetSupported()->IsPoolingSupported(
+                params.pool_info, params.input_info, &params.output_info, 
reason, sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -758,8 +759,9 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.avg_pool2d")
       err += EthosnCompiler::SupportedSetup();
       char reason[kReasonMaxLength];
       reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsPoolingSupported(
-                        params.pool_info, params.input_info, nullptr, reason, 
sizeof(reason));
+      *rv = !err &&
+            EthosnCompiler::GetSupported()->IsPoolingSupported(
+                params.pool_info, params.input_info, &params.output_info, 
reason, sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -772,8 +774,9 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.reshape")
       err += EthosnCompiler::SupportedSetup();
       char reason[kReasonMaxLength];
       reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsReshapeSupported(
-                        params.new_shape, params.input_info, nullptr, reason, 
sizeof(reason));
+      *rv = !err &&
+            EthosnCompiler::GetSupported()->IsReshapeSupported(
+                params.new_shape, params.input_info, &params.output_info, 
reason, sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -786,8 +789,8 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.addition")
       char reason[kReasonMaxLength];
       reason[0] = '\0';
       *rv = !err && EthosnCompiler::GetSupported()->IsAdditionSupported(
-                        params.lhs_info, params.rhs_info, 
params.output_quantization_info, nullptr,
-                        reason, sizeof(reason));
+                        params.lhs_info, params.rhs_info, 
params.output_quantization_info,
+                        &params.output_info, reason, sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -799,8 +802,8 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.sigmoid")
       err += EthosnCompiler::SupportedSetup();
       char reason[kReasonMaxLength];
       reason[0] = '\0';
-      *rv = !err && 
EthosnCompiler::GetSupported()->IsSigmoidSupported(params.input_info, nullptr,
-                                                                       reason, 
sizeof(reason));
+      *rv = !err && EthosnCompiler::GetSupported()->IsSigmoidSupported(
+                        params.input_info, &params.output_info, reason, 
sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -812,8 +815,8 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.mean")
       err += EthosnCompiler::SupportedSetup();
       char reason[kReasonMaxLength];
       reason[0] = '\0';
-      *rv = !err && 
EthosnCompiler::GetSupported()->IsMeanXySupported(params.input_info, nullptr,
-                                                                      reason, 
sizeof(reason));
+      *rv = !err && EthosnCompiler::GetSupported()->IsMeanXySupported(
+                        params.input_info, &params.output_info, reason, 
sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -825,8 +828,8 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.tanh")
       err += EthosnCompiler::SupportedSetup();
       char reason[kReasonMaxLength];
       reason[0] = '\0';
-      *rv = !err && 
EthosnCompiler::GetSupported()->IsTanhSupported(params.input_info, nullptr,
-                                                                    reason, 
sizeof(reason));
+      *rv = !err && EthosnCompiler::GetSupported()->IsTanhSupported(
+                        params.input_info, &params.output_info, reason, 
sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -839,7 +842,8 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.leaky_relu")
       char reason[kReasonMaxLength];
       reason[0] = '\0';
       *rv = !err && EthosnCompiler::GetSupported()->IsLeakyReluSupported(
-                        params.leaky_relu_info, params.input_info, nullptr, 
reason, sizeof(reason));
+                        params.leaky_relu_info, params.input_info, 
&params.output_info, reason,
+                        sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -852,7 +856,8 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.concatenate")
       char reason[kReasonMaxLength];
       reason[0] = '\0';
       *rv = !err && EthosnCompiler::GetSupported()->IsConcatenationSupported(
-                        params.input_infos, params.concat_info, nullptr, 
reason, sizeof(reason));
+                        params.input_infos, params.concat_info, 
&params.output_info, reason,
+                        sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -878,8 +883,9 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.depth_to_space")
       err += EthosnCompiler::SupportedSetup();
       char reason[kReasonMaxLength];
       reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsDepthToSpaceSupported(
-                        params.input_info, params.depth_info, nullptr, reason, 
sizeof(reason));
+      *rv = !err &&
+            EthosnCompiler::GetSupported()->IsDepthToSpaceSupported(
+                params.input_info, params.depth_info, &params.output_info, 
reason, sizeof(reason));
       err += EthosnError(reason);
     });
 
@@ -891,8 +897,9 @@ TVM_REGISTER_GLOBAL("relay.ethos-n.support.relu")
       err += EthosnCompiler::SupportedSetup();
       char reason[kReasonMaxLength];
       reason[0] = '\0';
-      *rv = !err && EthosnCompiler::GetSupported()->IsReluSupported(
-                        params.relu_info, params.input_info, nullptr, reason, 
sizeof(reason));
+      *rv = !err &&
+            EthosnCompiler::GetSupported()->IsReluSupported(
+                params.relu_info, params.input_info, &params.output_info, 
reason, sizeof(reason));
       err += EthosnError(reason);
     });
 
diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.cc 
b/src/relay/backend/contrib/ethosn/ethosn_api.cc
index bf2f248b3f..493b827c28 100644
--- a/src/relay/backend/contrib/ethosn/ethosn_api.cc
+++ b/src/relay/backend/contrib/ethosn/ethosn_api.cc
@@ -90,13 +90,13 @@ EthosnError EthosnAPI::QnnConv2d(const Expr& expr, 
ConvolutionParams* params) {
   err += AsConstant(requantize->args[3], &output_scale);
 
   // Convert quantization params
-  sl::QuantizationInfo data_q_info;
+  sl::QuantizationInfo input_q_info;
   sl::QuantizationInfo weights_q_info;
   sl::QuantizationInfo bias_q_info;
   sl::QuantizationInfo output_q_info;
-  err += Tvm2Npu(input_zero_point, input_scale, qaxis, &data_q_info);
+  err += Tvm2Npu(input_zero_point, input_scale, qaxis, &input_q_info);
   err += Tvm2Npu(kernel_zero_point, kernel_scale, qaxis, &weights_q_info);
-  std::valarray<float> bias = data_q_info.GetScales() * 
weights_q_info.GetScales();
+  std::valarray<float> bias = input_q_info.GetScales() * 
weights_q_info.GetScales();
   err += Tvm2Npu(0, bias, 3, &bias_q_info);
   err += Tvm2Npu(output_zero_point, output_scale, &output_q_info);
 
@@ -125,19 +125,19 @@ EthosnError EthosnAPI::QnnConv2d(const Expr& expr, 
ConvolutionParams* params) {
   // Create convolution info
   params->conv_info = sl::ConvolutionInfo(padding, stride, output_q_info);
 
-  // Create data info
-  const TensorTypeNode* data_dtype;
+  // Create input info
+  const TensorTypeNode* input_ttype;
   if (pad.defined()) {
-    data_dtype = pad->args[0]->checked_type().as<TensorTypeNode>();
+    input_ttype = pad->args[0]->checked_type().as<TensorTypeNode>();
   } else {
-    data_dtype = conv->args[0]->checked_type().as<TensorTypeNode>();
+    input_ttype = conv->args[0]->checked_type().as<TensorTypeNode>();
   }
-  sl::TensorShape activation_tensor_shape;
-  sl::DataType activation_data_type;
-  err += Tvm2Npu(data_dtype->shape, &activation_tensor_shape);
-  err += Tvm2Npu(data_dtype->dtype, &activation_data_type);
-  params->activation_info = sl::TensorInfo(activation_tensor_shape, 
activation_data_type,
-                                           sl::DataFormat::NHWC, data_q_info);
+  sl::TensorShape input_tensor_shape;
+  sl::DataType input_data_type;
+  err += Tvm2Npu(input_ttype->shape, &input_tensor_shape);
+  err += Tvm2Npu(input_ttype->dtype, &input_data_type);
+  params->input_info =
+      sl::TensorInfo(input_tensor_shape, input_data_type, 
sl::DataFormat::NHWC, input_q_info);
 
   // Create weights info
   const auto* weights_dtype = 
conv->args[1]->checked_type().as<TensorTypeNode>();
@@ -158,6 +158,11 @@ EthosnError EthosnAPI::QnnConv2d(const Expr& expr, 
ConvolutionParams* params) {
       sl::DataType::INT32_QUANTIZED, sl::DataFormat::NHWC, bias_q_info);
   params->raw_bias = bias_add->args[1].as<ConstantNode>()->data->data;
 
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(requantize->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = output_q_info;
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -221,12 +226,19 @@ EthosnError EthosnAPI::QnnFullyConnected(const Expr& 
expr, FullyConnectedParams*
                      sl::DataFormat::NHWC, bias_q_info);
   params->raw_bias = bias_add->args[1].as<ConstantNode>()->data->data;
 
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(requantize->checked_type(), &output_tensor_info);
+  output_tensor_info.m_Dimensions = {data_tensor_shape[0], 1, 1, 
weights_tensor_shape[0]};
+  output_tensor_info.m_QuantizationInfo = output_q_info;
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
-EthosnError EthosnAPI::Pool2d(const Call& pool, Array<IndexExpr> size, 
Array<IndexExpr> strides,
-                              Array<IndexExpr> padding, sl::PoolingType 
pooling_type,
-                              sl::PoolingInfo* pool_info, sl::TensorInfo* 
input_info,
+EthosnError EthosnAPI::Pool2d(const Call& input, const Call& output, 
Array<IndexExpr> size,
+                              Array<IndexExpr> strides, Array<IndexExpr> 
padding,
+                              sl::PoolingType pooling_type, sl::PoolingInfo* 
pool_info,
+                              sl::TensorInfo* input_info, sl::TensorInfo* 
output_info,
                               std::string layout) {
   uint32_t npu_sizex, npu_sizey;
   sl::Padding npu_padding;
@@ -238,7 +250,7 @@ EthosnError EthosnAPI::Pool2d(const Call& pool, 
Array<IndexExpr> size, Array<Ind
                                pooling_type);
 
   // Create input info
-  const auto* input_dtype = pool->args[0]->checked_type().as<TensorTypeNode>();
+  const auto* input_dtype = 
input->args[0]->checked_type().as<TensorTypeNode>();
   sl::TensorShape input_tensor_shape;
   sl::DataType input_data_type;
   sl::DataFormat input_data_format;
@@ -250,14 +262,21 @@ EthosnError EthosnAPI::Pool2d(const Call& pool, 
Array<IndexExpr> size, Array<Ind
   }
   *input_info = sl::TensorInfo(input_tensor_shape, input_data_type, 
input_data_format,
                                input_info->m_QuantizationInfo);
+
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(output->checked_type(), &output_tensor_info);
+  // output quantization is the same as the input
+  output_tensor_info.m_QuantizationInfo = input_info->m_QuantizationInfo;
+  *output_info = output_tensor_info;
   return err;
 }
 
 EthosnError EthosnAPI::MaxPool2D(const Expr& expr, MaxPool2DParams* params) {
   Call pool = Downcast<Call>(expr);
   const auto pool_attrs = pool->attrs.as<MaxPool2DAttrs>();
-  return Pool2d(pool, pool_attrs->pool_size, pool_attrs->strides, 
pool_attrs->padding,
-                sl::PoolingType::MAX, &params->pool_info, &params->input_info, 
pool_attrs->layout);
+  return Pool2d(pool, pool, pool_attrs->pool_size, pool_attrs->strides, 
pool_attrs->padding,
+                sl::PoolingType::MAX, &params->pool_info, &params->input_info, 
&params->output_info,
+                pool_attrs->layout);
 }
 
 EthosnError EthosnAPI::AvgPool2D(const Expr& expr, AvgPool2DParams* params) {
@@ -265,8 +284,9 @@ EthosnError EthosnAPI::AvgPool2D(const Expr& expr, 
AvgPool2DParams* params) {
   Call pool = Downcast<Call>(cast_0->args[0]);
   Call cast_1 = Downcast<Call>(pool->args[0]);
   const auto pool_attrs = pool->attrs.as<AvgPool2DAttrs>();
-  return Pool2d(cast_1, pool_attrs->pool_size, pool_attrs->strides, 
pool_attrs->padding,
-                sl::PoolingType::AVG, &params->pool_info, &params->input_info, 
pool_attrs->layout);
+  return Pool2d(cast_1, cast_0, pool_attrs->pool_size, pool_attrs->strides, 
pool_attrs->padding,
+                sl::PoolingType::AVG, &params->pool_info, &params->input_info, 
&params->output_info,
+                pool_attrs->layout);
 }
 
 EthosnError EthosnAPI::Reshape(const Expr& expr, ReshapeParams* params) {
@@ -323,6 +343,11 @@ EthosnError EthosnAPI::Reshape(const Expr& expr, 
ReshapeParams* params) {
       sl::TensorInfo(input_tensor_shape, input_data_type, 
params->input_info.m_DataFormat,
                      params->input_info.m_QuantizationInfo);
 
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(reshape->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = 
params->input_info.m_QuantizationInfo;
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -344,9 +369,11 @@ EthosnError EthosnAPI::Addition(const Expr& expr, 
AdditionParams* params) {
 
   sl::QuantizationInfo lhs_q_info;
   sl::QuantizationInfo rhs_q_info;
+  sl::QuantizationInfo output_q_info;
   err += Tvm2Npu(lhs_zero_point, lhs_scale, &lhs_q_info);
   err += Tvm2Npu(rhs_zero_point, rhs_scale, &rhs_q_info);
-  err += Tvm2Npu(output_zero_point, output_scale, 
&params->output_quantization_info);
+  err += Tvm2Npu(output_zero_point, output_scale, &output_q_info);
+  params->output_quantization_info = output_q_info;
 
   // Create input info
   const auto* lhs_dtype = call->args[0]->checked_type().as<TensorTypeNode>();
@@ -364,6 +391,12 @@ EthosnError EthosnAPI::Addition(const Expr& expr, 
AdditionParams* params) {
   err += Tvm2Npu(rhs_dtype->dtype, &rhs_data_type);
   params->rhs_info =
       sl::TensorInfo(rhs_tensor_shape, rhs_data_type, sl::DataFormat::NHWC, 
rhs_q_info);
+
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(call->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = output_q_info;
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -373,7 +406,7 @@ EthosnError EthosnAPI::Sigmoid(const Expr& expr, 
SigmoidParams* params) {
   Call dequantize = Downcast<Call>(sigmoid->args[0]);
 
   // Create input info
-  const auto* input_dtype = quantize->checked_type().as<TensorTypeNode>();
+  const auto* input_dtype = 
dequantize->args[0]->checked_type().as<TensorTypeNode>();
   sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
   sl::DataType input_tensor_dtype;
   EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape);
@@ -386,13 +419,21 @@ EthosnError EthosnAPI::Sigmoid(const Expr& expr, 
SigmoidParams* params) {
   int output_zp;
   err += AsConstant(quantize->args[2], &output_zp);
   err += AsConstant(quantize->args[1], &output_sc);
+
   auto test_zp = input_dtype->dtype.is_int() ? -128 : 0;
   if (output_zp != test_zp || output_sc != 1.0f / 256.0f) {
     err += EthosnError(ErrStrm() << "output quantization params=(" << 
output_zp << ", " << output_sc
                                  << "), must = (" << test_zp << ", 1/256)");
   }
+
   params->input_info = sl::TensorInfo(input_tensor_shape, input_tensor_dtype, 
sl::DataFormat::NHWC,
                                       sl::QuantizationInfo(input_zp, 
input_sc));
+
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(quantize->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, 
output_sc);
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -402,11 +443,16 @@ EthosnError EthosnAPI::Mean(const Expr& expr, MeanParams* 
params) {
   Call cast_0 = Downcast<Call>(mean->args[0]);
 
   // Create input info
-  const auto* input_dtype = 
cast_0->args[0]->checked_type().as<TensorTypeNode>();
+  const auto* input_ttype = 
cast_0->args[0]->checked_type().as<TensorTypeNode>();
+  const auto* output_ttype = requantize->checked_type().as<TensorTypeNode>();
   sl::TensorShape input_tensor_shape = {1, 1, 1, 1};
   sl::DataType input_tensor_dtype;
-  EthosnError err = Tvm2Npu(input_dtype->shape, &input_tensor_shape);
-  err += Tvm2Npu(input_dtype->dtype, &input_tensor_dtype);
+  EthosnError err = Tvm2Npu(input_ttype->shape, &input_tensor_shape);
+  err += Tvm2Npu(input_ttype->dtype, &input_tensor_dtype);
+  sl::TensorShape output_tensor_shape = {1, 1, 1, 1};
+  sl::DataType output_tensor_dtype;
+  err += Tvm2Npu(output_ttype->shape, &output_tensor_shape);
+  err += Tvm2Npu(output_ttype->dtype, &output_tensor_dtype);
   float input_sc;
   int input_zp;
   err += AsConstant(requantize->args[2], &input_zp);
@@ -414,6 +460,15 @@ EthosnError EthosnAPI::Mean(const Expr& expr, MeanParams* 
params) {
   params->input_info = sl::TensorInfo(input_tensor_shape, input_tensor_dtype, 
sl::DataFormat::NHWC,
                                       sl::QuantizationInfo(input_zp, 
input_sc));
 
+  float output_sc;
+  int output_zp;
+  err += AsConstant(requantize->args[3], &output_sc);
+  err += AsConstant(requantize->args[4], &output_zp);
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(requantize->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, 
output_sc);
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -442,6 +497,12 @@ EthosnError EthosnAPI::Tanh(const Expr& expr, TanhParams* 
params) {
   }
   params->input_info = sl::TensorInfo(input_tensor_shape, input_tensor_dtype, 
sl::DataFormat::NHWC,
                                       sl::QuantizationInfo(input_zp, 
input_sc));
+
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(quantize->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, 
output_sc);
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -474,6 +535,12 @@ EthosnError EthosnAPI::LeakyReLU(const Expr& expr, 
LeakyReLUParams* params) {
   params->leaky_relu_info = sl::LeakyReluInfo(alpha, 
sl::QuantizationInfo(output_zp, output_sc));
   params->input_info = sl::TensorInfo(input_tensor_shape, input_tensor_dtype, 
sl::DataFormat::NHWC,
                                       sl::QuantizationInfo(input_zp, 
input_sc));
+
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(quantize->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, 
output_sc);
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -482,11 +549,11 @@ EthosnError EthosnAPI::Concatenate(const Expr& expr, 
ConcatenateParams* params)
   const auto& attrs = call->attrs.as<ConcatenateAttrs>();
   params->concat_info.m_Axis = attrs->axis;
 
-  float output_s;
+  float output_sc;
   int output_zp;
-  EthosnError err = AsConstant(call->args[3], &output_s);
+  EthosnError err = AsConstant(call->args[3], &output_sc);
   err += AsConstant(call->args[4], &output_zp);
-  params->concat_info.m_OutputQuantizationInfo = 
sl::QuantizationInfo(output_zp, output_s);
+  params->concat_info.m_OutputQuantizationInfo = 
sl::QuantizationInfo(output_zp, output_sc);
 
   auto input_scales = call->args[1].as<TupleNode>()->fields;
   auto input_zero_points = call->args[2].as<TupleNode>()->fields;
@@ -509,6 +576,12 @@ EthosnError EthosnAPI::Concatenate(const Expr& expr, 
ConcatenateParams* params)
                                                     sl::QuantizationInfo(zp, 
scale)));
     index++;
   }
+
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(call->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = sl::QuantizationInfo(output_zp, 
output_sc);
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -541,6 +614,16 @@ EthosnError EthosnAPI::Split(const Expr& expr, 
SplitParams* params) {
     int axis_size = input_tensor_shape[attrs->axis];
     params->split_info.m_Sizes.push_back(axis_size - last_index);
   }
+
+  Array<Type> output_tensors = 
call->checked_type().as<TupleTypeNode>()->fields;
+  std::vector<sl::TensorInfo> output_infos = {};
+  for (auto output_ttype : output_tensors) {
+    sl::TensorInfo output_tensor_info;
+    err += Tvm2Npu(output_ttype, &output_tensor_info);
+    output_tensor_info.m_QuantizationInfo = 
params->input_info.m_QuantizationInfo;
+    output_infos.push_back(output_tensor_info);
+  }
+  params->output_infos = output_infos;
   return err;
 }
 
@@ -561,6 +644,12 @@ EthosnError EthosnAPI::DepthToSpace(const Expr& expr, 
DepthToSpaceParams* params
   err += Tvm2Npu(attrs->layout, &input_data_format);
   params->input_info = sl::TensorInfo(input_tensor_shape, input_data_type, 
input_data_format,
                                       params->input_info.m_QuantizationInfo);
+
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(call->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = 
params->input_info.m_QuantizationInfo;
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -578,6 +667,12 @@ EthosnError EthosnAPI::Relu(const Expr& expr, ReluParams* 
params) {
   params->input_info =
       sl::TensorInfo(input_tensor_shape, input_data_type, 
params->input_info.m_DataFormat,
                      params->input_info.m_QuantizationInfo);
+
+  sl::TensorInfo output_tensor_info;
+  err += Tvm2Npu(call->checked_type(), &output_tensor_info);
+  output_tensor_info.m_QuantizationInfo = 
params->input_info.m_QuantizationInfo;
+  params->output_info = output_tensor_info;
+
   return err;
 }
 
@@ -715,6 +810,18 @@ EthosnError EthosnAPI::Tvm2Npu(const 
Array<Array<Integer>>& padding, sl::Padding
   return EthosnError();
 }
 
+EthosnError EthosnAPI::Tvm2Npu(const tvm::Type& type, sl::TensorInfo* 
npu_tinfo) {
+  const TensorTypeNode* ttype = type.as<TensorTypeNode>();
+  ICHECK(ttype) << "Expected TensorTypeNode but was " << ttype->GetTypeKey();
+
+  sl::TensorShape shape = {1, 1, 1, 1};
+  sl::DataType data_type;
+  EthosnError err = Tvm2Npu(ttype->shape, &shape);
+  err += Tvm2Npu(ttype->dtype, &data_type);
+  *npu_tinfo = sl::TensorInfo(shape, data_type, sl::DataFormat::NHWC, {});
+  return err;
+}
+
 // Convert an array of IntImmNodes into ValueT
 // IndexT type of Array indexing variable
 // ValueT type of resulting value
diff --git a/src/relay/backend/contrib/ethosn/ethosn_api.h 
b/src/relay/backend/contrib/ethosn/ethosn_api.h
index 6ab256231f..3adb2981cc 100644
--- a/src/relay/backend/contrib/ethosn/ethosn_api.h
+++ b/src/relay/backend/contrib/ethosn/ethosn_api.h
@@ -50,9 +50,10 @@ namespace sl = ::ethosn::support_library;
 
 struct ConvolutionParams {
   sl::ConvolutionInfo conv_info;
-  sl::TensorInfo activation_info;
+  sl::TensorInfo input_info;
   sl::TensorInfo weights_info;
   sl::TensorInfo bias_info;
+  sl::TensorInfo output_info;
   void* raw_weights = nullptr;
   void* raw_bias = nullptr;
   bool is_depthwise = false;
@@ -63,6 +64,7 @@ struct FullyConnectedParams {
   sl::TensorInfo input_info;
   sl::TensorInfo weights_info;
   sl::TensorInfo bias_info;
+  sl::TensorInfo output_info;
   void* raw_weights = nullptr;
   void* raw_bias = nullptr;
 };
@@ -70,60 +72,72 @@ struct FullyConnectedParams {
 struct MaxPool2DParams {
   sl::PoolingInfo pool_info = sl::PoolingInfo(0, 0, 0, 0, sl::Padding(), 
sl::PoolingType::MAX);
   sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
 };
 
 struct AvgPool2DParams {
   sl::PoolingInfo pool_info = sl::PoolingInfo(0, 0, 0, 0, sl::Padding(), 
sl::PoolingType::AVG);
   sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
 };
 
 struct ReshapeParams {
   sl::TensorShape new_shape{};
   sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
 };
 
 struct AdditionParams {
   sl::QuantizationInfo output_quantization_info;
   sl::TensorInfo lhs_info;
   sl::TensorInfo rhs_info;
+  sl::TensorInfo output_info;
 };
 
 struct SigmoidParams {
   sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
 };
 
 struct MeanParams {
   sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
 };
 
 struct TanhParams {
   sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
 };
 
 struct LeakyReLUParams {
   sl::LeakyReluInfo leaky_relu_info;
   sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
 };
 
 struct ConcatenateParams {
   sl::QuantizationInfo qInfo;
   sl::ConcatenationInfo concat_info = sl::ConcatenationInfo(1, qInfo);
   std::vector<sl::TensorInfo> input_infos;
+  sl::TensorInfo output_info;
 };
 
 struct SplitParams {
   sl::SplitInfo split_info = sl::SplitInfo(0, {});
   sl::TensorInfo input_info;
+  std::vector<sl::TensorInfo> output_infos;
 };
 
 struct DepthToSpaceParams {
   sl::DepthToSpaceInfo depth_info = sl::DepthToSpaceInfo(0);
   sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
 };
 
 struct ReluParams {
   sl::ReluInfo relu_info;
   sl::TensorInfo input_info;
+  sl::TensorInfo output_info;
 };
 
 /*!
@@ -242,10 +256,14 @@ class EthosnAPI {
   static EthosnError Tvm2Npu(const Array<Array<Integer>>& padding, 
sl::Padding* npu_padding);
   /*! \brief Convert a TVM Integer array to a SL tensor shape */
   static EthosnError Tvm2Npu(const Array<Integer>& shape, sl::TensorShape* 
npu_shape);
+  /*! \brief Convert a TVM Type to SL tensor info. */
+  static EthosnError Tvm2Npu(const tvm::Type& type, sl::TensorInfo* npu_tinfo);
+
   /*! \brief Convert a TVM pooling call to SL pooling information */
-  static EthosnError Pool2d(const Call& pool, Array<IndexExpr> size, 
Array<IndexExpr> strides,
-                            Array<IndexExpr> padding, sl::PoolingType 
pooling_type,
-                            sl::PoolingInfo* pool_info, sl::TensorInfo* 
input_info,
+  static EthosnError Pool2d(const Call& input, const Call& output, 
Array<IndexExpr> size,
+                            Array<IndexExpr> strides, Array<IndexExpr> padding,
+                            sl::PoolingType pooling_type, sl::PoolingInfo* 
pool_info,
+                            sl::TensorInfo* input_info, sl::TensorInfo* 
output_info,
                             std::string layout);
 
   // Convert an array of IntImmNodes into ValueT
diff --git a/tests/python/contrib/test_ethosn/test_concatenate.py 
b/tests/python/contrib/test_ethosn/test_concatenate.py
index 8f5585f338..b2eba6d650 100644
--- a/tests/python/contrib/test_ethosn/test_concatenate.py
+++ b/tests/python/contrib/test_ethosn/test_concatenate.py
@@ -99,7 +99,7 @@ def test_concatenate_failure():
             "batch size=2, batch size must = 1; batch size=2, batch size must 
= 1;",
         ),
         (
-            [(1, 4, 4, 4), (1, 4, 4, 4)],
+            [(1, 4, 4, 4)],
             "uint8",
             0,
             "Concatenation cannot be performed along batch axis (axis 0);",
diff --git a/tests/python/contrib/test_ethosn/test_mean.py 
b/tests/python/contrib/test_ethosn/test_mean.py
index a93ec384b2..548743fe95 100644
--- a/tests/python/contrib/test_ethosn/test_mean.py
+++ b/tests/python/contrib/test_ethosn/test_mean.py
@@ -44,10 +44,12 @@ def _get_model(shape, axis, keepdims, input_zp, input_sc, 
output_zp, output_sc,
 @pytest.mark.parametrize("dtype", ["uint8", "int8"])
 @pytest.mark.parametrize("shape", [(1, 7, 7, 2048), (1, 8, 8)])
 def test_mean(dtype, shape):
+    """Compare Mean output with TVM."""
+    np.random.seed(0)
+
     zp_min = np.iinfo(dtype).min
     zp_max = np.iinfo(dtype).max
 
-    np.random.seed(0)
     inputs = {
         "a": tvm.nd.array(np.random.randint(zp_min, high=zp_max + 1, 
size=shape, dtype=dtype)),
     }
@@ -60,3 +62,17 @@ def test_mean(dtype, shape):
         outputs.append(tei.build_and_run(mod, inputs, 1, {}, npu=npu))
 
     tei.verify(outputs, dtype, 1)
+
+
+@requires_ethosn
[email protected]("dtype", ["int8", "uint8"])
+def test_mean_non_equal_quantization(dtype):
+    """Test mean is not offloaded when quantization is not equal."""
+    np.random.seed(0)
+
+    shape = (1, 7, 7, 2048)
+    zp_min = np.iinfo(dtype).min
+
+    model = _get_model(shape, [1, 2], True, zp_min + 120, 0.0068132, zp_min + 
128, 0.0078125, dtype)
+    mod = tei.make_module(model, [])
+    tei.build(mod, {}, npu=True, expected_host_ops=3, npu_partitions=0)
diff --git a/tests/python/contrib/test_ethosn/test_networks.py 
b/tests/python/contrib/test_ethosn/test_networks.py
index 33f4dfd28e..143ec0b88d 100644
--- a/tests/python/contrib/test_ethosn/test_networks.py
+++ b/tests/python/contrib/test_ethosn/test_networks.py
@@ -143,7 +143,6 @@ def test_mobilenet_v1():
     )
 
 
[email protected](reason="very slow test")
 @requires_ethosn
 def test_resnet_50_int8():
     # If this test is failing due to a hash mismatch, please notify @mbaret and
@@ -153,7 +152,7 @@ def test_resnet_50_int8():
     # on hardware that isn't available in CI.
     if tei.get_ethosn_api_version() > 2011:
         if tei.get_ethosn_variant() == "Ethos-N78_1TOPS_2PLE_RATIO":
-            _compile_hash = {"de6723dc69f5f3015c4ab5cb8f288221", 
"dc2ed339583a59f0c3d38dc5ff069ec9"}
+            _compile_hash = {"c0a01c547ed1b2e3308094508fa1bfea", 
"434f0c65c41e24d5482142c88b3438fe"}
             _test_image_network(
                 
model_url="https://raw.githubusercontent.com/dmlc/web-data/main/tensorflow/";
                 "models/Quantized/resnet_50_quantized.tflite",
@@ -163,7 +162,6 @@ def test_resnet_50_int8():
                 output_count=1,
                 host_ops=11,
                 npu_partitions=2,
-                run=True,
             )
 
 

Reply via email to