[GitHub] [incubator-tvm] comaniac commented on a change in pull request #6222: [BYOC][ETHOSN] Introduce the Ethos-N BYOC integration

GitBox Fri, 07 Aug 2020 14:47:52 -0700


comaniac commented on a change in pull request #6222:
URL: https://github.com/apache/incubator-tvm/pull/6222#discussion_r467257422




##########
File path: src/relay/backend/contrib/ethosn/codegen.cc
##########
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/ethosn/codegen.cc
+ * \brief The Relay -> Ethos-N command stream compiler.
+ */
+#include <tvm/relay/expr_functor.h>
+#include <tvm/runtime/module.h>
+
+#include "codegen_ethosn.h"
+#include "ethosn_api.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace ethosn {
+
+sl::TensorInfo GetTensorInfo(std::map<Expr, std::vector<sl::TensorInfo>> 
tensor_table,
+                             const Call& call) {
+  if (tensor_table.find(call) != tensor_table.end()) return 
tensor_table[call][0];
+
+  return sl::TensorInfo();
+}
+
+void InferTensorsVisitor::InferCall(const CallNode* cn) {

Review comment:
       Can we inline this function to `InferTensorsVisitor::VisitExpr_(const 
CallNode* cn)`? I didn't see any other reference to this function.

##########
File path: src/relay/backend/contrib/ethosn/codegen_ethosn.h
##########
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/ethosn/codegen_ethosn.h
+ * \brief The Relay -> Ethos-N command stream compiler.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_
+
+#include <dmlc/memory_io.h>
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/relay/type.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/registry.h>
+
+#include <algorithm>
+#include <fstream>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "../../../../runtime/contrib/ethosn/ethosn_runtime.h"
+#include "../codegen_c/codegen_c.h"
+#include "ethosn_api.h"
+#include "ethosn_support_library/Support.hpp"
+#include "ethosn_support_library/SupportQueries.hpp"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace ethosn {
+
+namespace sl = ::ethosn::support_library;
+
+/*!
+ * \brief A struct to hold an uncompiled support library network alongside
+ * the desired order of input and output operation ids.
+ */
+struct NetworkWithIDs {
+  struct hash_pair {
+    template <class T_0, class T_1>
+    size_t operator()(const std::pair<T_0, T_1>& p) const {
+      return std::hash<T_0>{}(p.first) ^ std::hash<T_1>{}(p.second);
+    }
+  };
+  std::shared_ptr<sl::Network> network;
+  std::unordered_map<uint32_t, unsigned int> input_ids;
+  std::unordered_map<std::pair<uint32_t, uint32_t>, unsigned int, hash_pair> 
output_ids;
+};
+
+/*!
+ * \brief A base class for error handling using ErrorReporter.
+ */
+class ErrorReportingPass {
+ public:
+  ErrorReportingPass(const IRModule& mod, const GlobalVar& var) : mod_(mod), 
var_(var) {}
+
+  /*!
+   * \brief Report fatal errors for an expression.
+   * \param expr The expression to report errors at.
+   * \param err The errors to report.
+   */
+  void ReportFatalError(const ObjectRef& expr, const EthosnError& err) {
+    for (const auto& msg : err.msgs) {
+      error_reporter_.ReportAt(this->var_, expr, ErrorBuilder() << msg);
+    }
+    error_reporter_.RenderErrors(this->mod_);
+  }
+
+ protected:
+  /*! \brief An ErrorReporter object to render the errors.*/
+  ErrorReporter error_reporter_;
+  /*! \brief The module to report errors for. */
+  IRModule mod_;
+  /*! \brief The GlobalVar to report errors for. */
+  GlobalVar var_;
+};
+
+/*!
+ * \brief A custom pass to infer the support library tensor information
+ * for a Relay expression.
+ *
+ * Support Library requires that tensors are explicitly declared with
+ * information on their size, data type, format (eg. NHWC) and quantisation
+ * parameters. In Relay, size and data type are already determined when the
+ * type_infer pass is run. However, format and quantisation parameters are
+ * properties of the operators that consume the tensors.
+ *
+ * This pass works by having each node initialise the information of its
+ * parents, essentially propagating the inferred information all the way up
+ * to the inputs of the expression.
+ *
+ * Because the children initialise the information of the parents, it is
+ * necessary to traverse the graph in such a way so as to ensure all the
+ * children of a node are visited before the parent is. As Relay does not
+ * keep a reference to child nodes, this pass goes in preorder but will
+ * skip visiting a parent if all the children haven't yet been visited (see
+ * VisitInferred for the logic that implements this).
+ *
+ * Inference only works for supported callnodes, for tuplenodes, tuplegetitem
+ * nodes and free var nodes. Other nodes should not be off-loaded to Ethos-N.
+ */
+class InferTensorsVisitor : private ErrorReportingPass, private ExprVisitor {
+ public:
+  InferTensorsVisitor(const IRModule& mod, const GlobalVar& var) : 
ErrorReportingPass(mod, var) {}
+
+  /*!
+   * \brief Infer the support library tensor information for all the nodes
+   * in an expression.
+   * \param expr The expression for which to infer tensor information.
+   * \return A map of expressions to tensor information.
+   * \note This algorithm does not traverse into functions, so call it on
+   * the body of the function you're interested in.
+   */
+  std::map<Expr, std::vector<sl::TensorInfo>> Infer(const Expr& expr) {
+    tensor_table_.clear();
+    CHECK(expr->checked_type().defined());
+    size_t output_size = 1;
+    if (expr->checked_type()->IsInstance<TupleTypeNode>()) {
+      auto type = expr->checked_type().as<TupleTypeNode>();
+      output_size = type->fields.size();
+    }
+    for (size_t i = 0; i < output_size; i++) {
+      tensor_table_[expr].push_back(sl::TensorInfo({1, 1, 1, 1}, 
sl::DataType::UINT8_QUANTIZED,
+                                                   sl::DataFormat::NHWC, 
sl::QuantizationInfo()));
+    }
+    VisitInferred(expr);
+    return tensor_table_;
+  }
+
+ private:
+  // Infer a callnode if it's a supported operator/composite function
+  void InferCall(const CallNode* cn);
+  void VisitInferred(const Expr& expr);
+
+  void VisitExpr_(const CallNode* cn) final;
+  void VisitExpr_(const TupleNode* tn) final;
+  void VisitExpr_(const TupleGetItemNode* tg) final;
+  // Don't traverse into functions, the Ethos-N codegen isn't meant to support 
them.
+  void VisitExpr_(const FunctionNode* fn) final {}
+
+  /*! \brief A look-up table from Expr to tensor infos. */
+  std::map<Expr, std::vector<sl::TensorInfo>> tensor_table_;
+};
+
+std::map<Expr, std::vector<sl::TensorInfo>> InferTensors(const IRModule& mod, 
const GlobalVar& var,
+                                                         const Expr& expr) {
+  return InferTensorsVisitor(mod, var).Infer(expr);
+}
+
+/*!
+ * \brief A pass to generate a support library network from a Relay function.
+ *
+ * This pass constructs an equivalent support library network from a Relay
+ * function in two visits. One to infer the tensor information of all the nodes
+ * and another in postorder to add the nodes as support library operands.
+ * (Supported) Callnodes, tuplenodes, tuplegetitemnodes and (free)
+ * varnodes are handled by this pass.
+ *
+ * As part of the pass, nodes in the function body are associated with both
+ * type information in the 'tensor_table', and support library operands in the
+ * 'operand_table'. Both of these are maps of vectors as a Relay node can have
+ * tuple type and accordingly be associated with multiple tensors. For nodes
+ * which are not tuple type, vectors of size 1 are used.
+ */
+class ConstructNetworkVisitor : public MixedModeVisitor, private 
ErrorReportingPass {
+ public:
+  explicit ConstructNetworkVisitor(const IRModule& mod, const GlobalVar& var)
+      : ErrorReportingPass(mod, var) {}
+
+  /*!
+   * \brief Construct a support library network from a given Relay function. 
The
+   * function should contain only nodes supported by Ethos-N.
+   * \param func The Relay function for which to construct a support library 
network.
+   * \return A support library network that performs the same operation as the 
Relay
+   * function.
+   */
+  NetworkWithIDs Construct(const Function& func) {
+    // Initialise everything
+    NetworkWithIDs network_with_ids;
+    network_ = sl::CreateNetwork();
+    network_with_ids.network = network_;
+    operand_table_.clear();
+
+    // Infer tensor information
+    tensor_table_ = InferTensors(this->mod_, this->var_, func->body);
+    // Add the inputs in the order they appear in the parameters
+    unsigned int idx = 0;
+    for (const auto& param : func->params) {
+      for (const auto& tensor_info : tensor_table_[param]) {
+        auto tensor_and_id = AddInput(network_, tensor_info);
+        operand_table_[param].push_back(tensor_and_id.tensor);
+        id_table_[param].push_back(std::make_pair(tensor_and_id.operationId, 
0));
+        network_with_ids.input_ids[tensor_and_id.operationId] = idx++;
+      }
+    }
+    // Add the function body
+    VisitExpr(func->body);
+    // Add the outputs
+    idx = 0;
+    for (const auto& layer : operand_table_[func->body]) {
+      AddOutput(network_, *layer);
+      network_with_ids.output_ids[id_table_[func->body][idx]] = idx;
+      idx++;
+    }
+    return network_with_ids;
+  }
+
+ private:
+  // Translate from a callnode to the appropriate 'Make' method
+  sl::TensorsAndId HandleCall(const CallNode*);
+
+  void VisitExpr_(const CallNode* cn) final;
+  void VisitExpr_(const TupleNode* op) final;
+  void VisitExpr_(const TupleGetItemNode* tg) final;
+  void VisitLeaf(const Expr& expr) final;
+
+  // Make a support library operand from a Call
+  EthosnError MakeConcatenateLayer(const Call& call, 
sl::TensorAndId<sl::Operand>* out);
+  EthosnError MakeSplitLayer(const Call& call, sl::TensorsAndId* outs);
+
+  /*! \brief A look-up table from Expr to layers. */
+  std::map<Expr, std::vector<std::shared_ptr<sl::Operand>>> operand_table_;
+  /*! \brief A look-up table from Expr to SL operation IDs. */
+  std::map<Expr, std::vector<std::pair<uint32_t, uint32_t>>> id_table_;
+  /*! \brief A look-up table from Expr to tensor infos. */
+  std::map<Expr, std::vector<sl::TensorInfo>> tensor_table_;
+  /*! \brief The support library network to compile. */
+  std::shared_ptr<sl::Network> network_;
+};
+
+NetworkWithIDs ConstructNetwork(const IRModule& mod, const GlobalVar& var, 
const Function& func) {
+  return ConstructNetworkVisitor(mod, var).Construct(func);
+}
+
+class EthosnCompiler {
+ public:
+  static runtime::ethosn::OrderedCompiledNetwork CompileEthosnFunc(const 
IRModule& mod,
+                                                                   std::string 
name,
+                                                                   const 
Function& func) {
+    // Construct the network
+    GlobalVar var = mod->GetGlobalVar(name);
+    auto network_with_ids = ConstructNetwork(mod, var, func);
+    // Now set the required build flags
+    sl::CompilationOptions options = EthosnAPI::CreateOptions();
+    // Finally compile the network
+    auto compiled_network = EthosnAPI::Compile(network_with_ids.network, 
options);
+    auto input_output_order = GetInputOutputOrder(network_with_ids, 
compiled_network);
+    runtime::ethosn::OrderedCompiledNetwork ordered_network;
+    ordered_network.name = name;
+    ordered_network.cmm = std::move(compiled_network);
+    ordered_network.inputs = input_output_order.first;
+    ordered_network.outputs = input_output_order.second;
+    return ordered_network;
+  }
+
+  static runtime::Module CreateRuntimeModule(const ObjectRef& ref) {
+    std::vector<runtime::ethosn::OrderedCompiledNetwork> cmms;
+    if (ref->IsInstance<FunctionNode>()) {
+      IRModule mod;
+      Function bfunc = Downcast<Function>(ref);
+      auto name_node = bfunc->GetAttr<String>(tvm::attr::kGlobalSymbol);
+      CHECK(name_node.defined()) << "Failed to retrieved external symbol.";
+      mod->Add(GlobalVar(name_node.value()), bfunc);
+      for (const auto& it : mod->functions) {
+        Function func = Downcast<Function>(it.second);
+        name_node = func->GetAttr<String>(tvm::attr::kGlobalSymbol);
+        cmms.emplace_back(CompileEthosnFunc(mod, name_node.value(), func));
+      }
+    } else {
+      LOG(FATAL) << "The input ref is expected to be a Relay function or 
module"
+                 << "\n";

Review comment:
       * Merge to one line.
   * From this logic of this function, the input ref cannot be a Relay module.

##########
File path: src/runtime/contrib/ethosn/ethosn_runtime.h
##########
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file ethosn_runtime.h
+ * \brief Execution handling of Ethos-N command streams.
+ */
+#ifndef TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_
+#define TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_
+
+#include <tvm/runtime/packed_func.h>
+
+#include <ethosn_support_library/Support.hpp>
+#include <map>            // NOLINT

Review comment:
       Just carious, why you need NOLINT for those?

##########
File path: src/relay/backend/contrib/ethosn/ethosn_api.h
##########
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_
+
+#include <tvm/relay/expr.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/tir/analysis.h>
+#include <tvm/tir/op.h>
+
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "ethosn_support_library/Support.hpp"
+#include "ethosn_support_library/SupportQueries.hpp"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace ethosn {
+
+namespace sl = ::ethosn::support_library;
+
+struct ConcatenateParams {
+  sl::QuantizationInfo qInfo;
+  sl::ConcatenationInfo concat_info = sl::ConcatenationInfo(1, qInfo);
+  std::vector<sl::TensorInfo> input_infos;
+};
+
+struct SplitParams {
+  sl::SplitInfo split_info = sl::SplitInfo(0, {});
+  sl::TensorInfo input_info;
+};
+
+class ErrStrm {
+ public:
+  template <typename T>
+  ErrStrm& operator<<(const T& val) {  // NOLINT(*)
+    stream_ << val;
+    return *this;
+  }
+
+ private:
+  std::stringstream stream_;
+  friend class EthosnError;
+};
+
+class EthosnError {
+ public:
+  EthosnError() {}
+  explicit EthosnError(const Array<String>& msgs) : msgs(msgs) {}
+  explicit EthosnError(const String& msg) { msgs.push_back(msg); }
+  explicit EthosnError(const ErrStrm& err) : EthosnError(err.stream_.str()) {}
+
+  explicit operator bool() const { return !msgs.empty(); }
+
+  EthosnError& operator+=(const EthosnError& other) {
+    msgs.insert(msgs.end(), other.msgs.begin(), other.msgs.end());
+    return *this;
+  }
+
+  Array<String> msgs;
+};
+
+class EthosnAPI {
+ public:
+  static std::unique_ptr<sl::CompiledNetwork> 
Compile(std::shared_ptr<sl::Network> network,
+                                                      const 
sl::CompilationOptions& options);
+
+  static sl::CompilationOptions CreateOptions();
+
+  static bool IsEthosFunc(const Call& call, const std::string& op_name);
+  static bool IsEthosOp(const Call& call, const std::string& op_name);
+
+  static EthosnError Concatenate(const Expr& expr, ConcatenateParams* params);
+  static EthosnError Split(const Expr& expr, SplitParams* params);
+
+ private:
+  static EthosnError Tvm2Npu(const Array<IndexExpr>& shape, sl::TensorShape* 
npu_shape);
+  static EthosnError Tvm2Npu(const tvm::DataType& dtype, sl::DataType* 
data_type);

Review comment:
       The naming is confused. Is this like "CreateNPUTensor"?

##########
File path: src/relay/backend/contrib/ethosn/capabilities.h
##########
@@ -0,0 +1,67 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CAPABILITIES_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CAPABILITIES_H_
+
+#include <vector>
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace ethosn {
+
+static std::vector<char> variants[3] = {

Review comment:
       Can we have some comments on this? Something like the one you put in the 
description "0 for Ethos-N77, 1 for Ethos-N57, 2 for Ethos-N37. See Ethos-N 
documentation."

##########
File path: src/relay/backend/contrib/ethosn/ethosn_api.h
##########
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_

Review comment:
       * The class and functions defined in this file need docstrings.
   * Could you clarify the reason of having `ethosn_api` and `ethosn_codegen` 
instead of putting them together?

##########
File path: src/relay/backend/contrib/ethosn/ethosn_api.cc
##########
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "ethosn_api.h"
+
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/tir/analysis.h>
+
+#include <fstream>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "capabilities.h"
+#include "ethosn_support_library/Support.hpp"
+#include "ethosn_support_library/SupportQueries.hpp"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace ethosn {
+
+std::unique_ptr<sl::CompiledNetwork> 
EthosnAPI::Compile(std::shared_ptr<sl::Network> network,
+                                                        const 
sl::CompilationOptions& options) {
+  std::vector<std::unique_ptr<sl::CompiledNetwork>> compiled_network =
+      sl::Compile(*network, options);
+  CHECK_GE(compiled_network.size(), 1) << "Ethos-N compiler failed to compile 
network";
+
+  return std::move(compiled_network[0]);
+}
+
+struct EthosnCompilerConfigNode : public 
tvm::AttrsNode<EthosnCompilerConfigNode> {
+  int variant;
+  bool strategy0;
+  bool strategy1;
+  bool strategy3;
+  bool strategy4;
+  bool strategy6;
+  bool strategy7;
+  bool dump_ram;
+  bool initial_sram_dump;
+  bool block_config_16x16;
+  bool block_config_32x8;
+  bool block_config_8x32;
+  bool block_config_8x8;
+  bool enable_intermediate_compression;
+  bool disable_winograd;
+  bool dump_debug_files;
+  String debug_dir;
+  bool enable_cascading;
+
+  TVM_DECLARE_ATTRS(EthosnCompilerConfigNode, 
"ext.attrs.EthosnCompilerConfigNode") {
+    TVM_ATTR_FIELD(variant)
+        .describe("0 for Ethos-N77, 1 for Ethos-N57, 2 for Ethos-N37. See 
Ethos-N documentation.")
+        .set_default(0);
+    TVM_ATTR_FIELD(strategy0).set_default(true);
+    TVM_ATTR_FIELD(strategy1).set_default(true);
+    TVM_ATTR_FIELD(strategy3).set_default(true);
+    TVM_ATTR_FIELD(strategy4).set_default(true);
+    TVM_ATTR_FIELD(strategy6).set_default(true);
+    TVM_ATTR_FIELD(strategy7).set_default(true);
+    TVM_ATTR_FIELD(dump_ram).set_default(false);
+    TVM_ATTR_FIELD(initial_sram_dump).set_default(false);
+    TVM_ATTR_FIELD(block_config_16x16).set_default(true);
+    TVM_ATTR_FIELD(block_config_32x8).set_default(true);
+    TVM_ATTR_FIELD(block_config_8x32).set_default(true);
+    TVM_ATTR_FIELD(block_config_8x8).set_default(true);
+    TVM_ATTR_FIELD(enable_intermediate_compression).set_default(true);
+    TVM_ATTR_FIELD(disable_winograd).set_default(false);
+    TVM_ATTR_FIELD(dump_debug_files).set_default(false);
+    TVM_ATTR_FIELD(debug_dir).set_default(".");
+    TVM_ATTR_FIELD(enable_cascading).set_default(false);
+  }
+};
+
+class EthosnCompilerConfig : public Attrs {
+ public:
+  TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(EthosnCompilerConfig, Attrs, 
EthosnCompilerConfigNode);
+};
+
+TVM_REGISTER_NODE_TYPE(EthosnCompilerConfigNode);
+TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.ethos-n.options", 
EthosnCompilerConfig);
+
+sl::CompilationOptions EthosnAPI::CreateOptions() {
+  auto ctx = transform::PassContext::Current();
+  auto cfg = ctx->GetConfig<EthosnCompilerConfig>("relay.ext.ethos-n.options");
+  if (!cfg.defined()) {
+    cfg = AttrsWithDefaultValues<EthosnCompilerConfig>();
+  }
+
+  sl::CompilationOptions options(variants[cfg.value()->variant]);
+  options.m_Strategy0 = cfg.value()->strategy0;
+  options.m_Strategy1 = cfg.value()->strategy1;
+  options.m_Strategy3 = cfg.value()->strategy3;
+  options.m_Strategy4 = cfg.value()->strategy4;
+  options.m_Strategy6 = cfg.value()->strategy6;
+  options.m_Strategy7 = cfg.value()->strategy7;
+  options.m_DebugInfo.m_DumpRam = cfg.value()->dump_ram;
+  options.m_DebugInfo.m_InitialSramDump = cfg.value()->initial_sram_dump;
+  options.m_BlockConfig16x16 = cfg.value()->block_config_16x16;
+  options.m_BlockConfig32x8 = cfg.value()->block_config_32x8;
+  options.m_BlockConfig8x32 = cfg.value()->block_config_8x32;
+  options.m_BlockConfig8x8 = cfg.value()->block_config_8x8;
+  options.m_EnableIntermediateCompression = 
cfg.value()->enable_intermediate_compression;
+  options.m_DisableWinograd = cfg.value()->disable_winograd;
+  options.m_DebugInfo.m_DumpDebugFiles = cfg.value()->dump_debug_files;
+  options.m_DebugInfo.m_DebugDir = cfg.value()->debug_dir;
+  options.m_EnableCascading = cfg.value()->enable_cascading;
+  return options;
+}
+
+bool EthosnAPI::IsEthosFunc(const Call& call, const std::string& op_name) {

Review comment:
       * Should these functions be like IsEthos"N"Func?
   * I didn't see any reference to this function. I guess composite function is 
not handled in this PR? If so, would you provide an upstream plan in the PR 
description to let everyone have a whole picture?

##########
File path: src/runtime/contrib/ethosn/ethosn_device.cc
##########
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file ethosn_device.cc
+ * \brief Ethos-N NPU device integration.
+ */
+
+#include <dlpack/dlpack.h>
+#include <poll.h>
+#include <tvm/tir/expr.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "ethosn_driver_library/Buffer.hpp"
+#include "ethosn_support_library/Support.hpp"
+
+#if defined ETHOSN_HW
+
+#include "ethosn_driver_library/Inference.hpp"
+#include "ethosn_driver_library/Network.hpp"
+
+namespace tvm {
+namespace runtime {
+namespace ethosn {
+
+namespace sl = ::ethosn::support_library;
+namespace dl = ::ethosn::driver_library;
+
+int64_t GetTensorSize(const DLTensor& tensor) {
+  int64_t size = 1;
+  for (int i = 0; i < tensor.ndim; i++) {
+    size *= tensor.shape[i];
+  }
+  return size;
+}
+
+bool WaitForInference(dl::Inference* inference, int timeout) {
+  // Wait for inference to complete
+  int fd = inference->GetFileDescriptor();
+  struct pollfd fds;
+  memset(&fds, 0, sizeof(fds));
+  fds.fd = fd;
+  fds.events = POLLIN;  // Wait for any available input.
+
+  const int ms_per_seconds = 1000;
+  int poll_result = poll(&fds, 1, timeout * ms_per_seconds);
+  if (poll_result > 0) {
+    dl::InferenceResult result;
+    if (read(fd, &result, sizeof(result)) != sizeof(result)) {
+      return false;
+    }
+    if (result != dl::InferenceResult::Completed) {
+      return false;
+    }
+  } else if (poll_result == 0) {
+    return false;
+  } else {
+    return false;
+  }
+  return true;
+}
+
+template <typename T>
+void CopyOutput(dl::Buffer* source_buffers[], std::vector<DLTensor*>* outputs) 
{
+  for (DLTensor* tensor : *outputs) {
+    dl::Buffer* source_buffer = source_buffers[0];
+    uint8_t* source_buffer_data = source_buffer->GetMappedBuffer();
+    size_t size = source_buffer->GetSize();
+    T* dest_pointer = static_cast<T*>(tensor->data);
+    std::copy_backward(source_buffer_data, source_buffer_data + size, 
dest_pointer + size);
+    source_buffers++;
+  }
+}
+
+void CreateBuffers(std::vector<std::shared_ptr<dl::Buffer> >* fm,
+                   const std::vector<DLTensor*>& tensors) {
+  int index = 0;
+  for (auto buffer : tensors) {
+    auto* data = static_cast<uint8_t*>(buffer->data);
+    // The NPU only needs the size of the tensor * uint8_t.
+    auto data_size = static_cast<uint32_t>(GetTensorSize(*buffer));
+    (*fm)[index++] = std::make_shared<dl::Buffer>(data, data_size, 
dl::DataFormat::NHWC);
+  }
+}
+
+bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network,
+               std::vector<uint32_t> input_order, std::vector<uint32_t> 
output_order) {
+  // Unpack parameters
+  uint8_t argc = 0;
+  std::vector<DLTensor*> inputs(input_order.size());
+  for (uint8_t i = 0; i < network->GetInputBufferInfos().size(); i++) {
+    inputs[input_order[i]] = args[argc++];
+  }
+  auto out_infos = network->GetOutputBufferInfos();
+  std::vector<DLTensor*> outputs(output_order.size());
+  for (uint8_t i = 0; i < network->GetOutputBufferInfos().size(); i++) {
+    outputs[output_order[i]] = args[argc++];
+  }
+
+  // Set up input buffers
+  std::vector<std::shared_ptr<dl::Buffer> > ifm(inputs.size());
+  CreateBuffers(&ifm, inputs);
+
+  // Set up output buffers
+  std::vector<std::shared_ptr<dl::Buffer> > ofm(outputs.size());
+  CreateBuffers(&ofm, outputs);
+
+  // Raw pointers for the inference
+  dl::Buffer* ifm_raw[inputs.size()];
+  for (size_t i = 0; i < inputs.size(); i++) {
+    ifm_raw[i] = ifm[i].get();
+  }
+  dl::Buffer* ofm_raw[outputs.size()];
+  for (size_t i = 0; i < outputs.size(); i++) {
+    ofm_raw[i] = ofm[i].get();
+  }
+
+  auto npu = std::make_unique<dl::Network>(*network);
+
+  // Execute the inference.
+  std::unique_ptr<dl::Inference> result(
+      npu->ScheduleInference(ifm_raw, sizeof(ifm_raw) / sizeof(ifm_raw[0]), 
ofm_raw,
+                             sizeof(ofm_raw) / sizeof(ofm_raw[0])));
+  bool inferenceCompleted = WaitForInference(result.get(), 60);
+  if (inferenceCompleted) {
+    switch ((outputs)[0]->dtype.bits) {
+      case 8: {
+        dl::Buffer** ofms = &ofm_raw[0];
+        for (DLTensor* tensor : outputs) {
+          uint8_t* source_buffer_data = (*ofms++)->GetMappedBuffer();
+          uint8_t* dest_pointer = static_cast<uint8_t*>(tensor->data);
+          if (source_buffer_data != dest_pointer) {
+            CopyOutput<uint8_t>(ofm_raw, &outputs);
+            break;
+          }
+        }
+        break;
+      }
+      case 16:
+        CopyOutput<uint16_t>(ofm_raw, &outputs);
+        break;
+      case 32:
+        CopyOutput<uint32_t>(ofm_raw, &outputs);
+        break;
+      default:
+        break;
+    }
+  }
+
+  return inferenceCompleted;
+}
+
+}  // namespace ethosn
+}  // namespace runtime
+}  // namespace tvm
+
+#else

Review comment:
       Better to have comments here to indicate that those are used for mocking 
the hardware.

##########
File path: tests/python/contrib/test_ethosn/infrastructure.py
##########
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Expose Ethos test functions to the Python front end"""
+
+from __future__ import absolute_import, print_function
+import tvm
+from tvm import relay
+from tvm.contrib import util, graph_runtime, download
+from tvm.relay.testing import run_opt_pass
+from enum import Enum
+from hashlib import md5
+from itertools import zip_longest, combinations
+import numpy as np
+from PIL import Image
+import os
+
+from . import _infrastructure
+from tvm.relay.op.contrib import get_pattern_table
+
+
+class Available(Enum):
+    UNAVAILABLE = 0
+    SW_ONLY = 1
+    SW_AND_HW = 2
+
+
+def ethosn_available():
+    """Return whether Ethos-N software and hardware support is available"""
+    if not tvm.get_global_func("relay.ethos-n.query", True):
+        print("skip because Ethos-N module is not available")
+        return Available.UNAVAILABLE
+    else:
+        hw = tvm.get_global_func("relay.ethos-n.query")()
+        return Available.SW_AND_HW if hw else Available.SW_ONLY
+
+
+def get_real_image(im_height, im_width):

Review comment:
       * Didn't see this function being used.
   * Do we really need a real image for testing?

##########
File path: tests/python/contrib/test_ethosn/infrastructure.py
##########
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Expose Ethos test functions to the Python front end"""
+
+from __future__ import absolute_import, print_function
+import tvm
+from tvm import relay
+from tvm.contrib import util, graph_runtime, download
+from tvm.relay.testing import run_opt_pass
+from enum import Enum
+from hashlib import md5
+from itertools import zip_longest, combinations
+import numpy as np
+from PIL import Image
+import os
+
+from . import _infrastructure
+from tvm.relay.op.contrib import get_pattern_table
+
+
+class Available(Enum):
+    UNAVAILABLE = 0
+    SW_ONLY = 1
+    SW_AND_HW = 2
+
+
+def ethosn_available():
+    """Return whether Ethos-N software and hardware support is available"""
+    if not tvm.get_global_func("relay.ethos-n.query", True):
+        print("skip because Ethos-N module is not available")
+        return Available.UNAVAILABLE
+    else:
+        hw = tvm.get_global_func("relay.ethos-n.query")()
+        return Available.SW_AND_HW if hw else Available.SW_ONLY
+
+
+def get_real_image(im_height, im_width):
+    repo_base = 
'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/'
+    img_name = 'elephant-299.jpg'
+    image_url = os.path.join(repo_base, img_name)
+    img_path = download.download_testdata(image_url, img_name, module='data')
+    image = Image.open(img_path).resize((im_height, im_width))
+    x = np.array(image).astype('uint8')
+    data = np.reshape(x, (1, im_height, im_width, 3))
+    return data
+
+
+def assert_lib_hash(lib, golden):
+    temp = util.tempdir()
+    path = temp.relpath("lib.cmm")
+    lib.imported_modules[1].save(path)
+    lib_hash = md5(open(path, 'rb').read()).hexdigest()
+    assert lib_hash == golden, "Expected hash: {} Got hash: {}".format(golden, 
lib_hash)
+
+
+def make_module(func, params):
+    func = relay.Function(relay.analysis.free_vars(func), func)
+    if len(params):
+        relay.build_module.bind_params_by_name(func, params)
+    return tvm.IRModule.from_expr(func)
+
+
+def make_ethosn_composite(ethosn_expr, name):
+    vars = relay.analysis.free_vars(ethosn_expr)
+    func = relay.Function([relay.Var("a")], ethosn_expr)
+    func = func.with_attr("Composite", name)
+    call = relay.Call(func, vars)
+    return call
+
+
+def make_ethosn_partition(ethosn_expr):
+    # Create an Ethos-N global function
+    mod = tvm.IRModule({})
+    vars = relay.analysis.free_vars(ethosn_expr)
+    func = relay.Function(vars, ethosn_expr)
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", "ethos-n")
+    func = func.with_attr("global_symbol", "ethos-n_0")
+    g1 = relay.GlobalVar("ethos-n_0")
+    mod[g1] = func
+
+    # These are the vars to call the Ethos-N partition with
+    more_vars = relay.analysis.free_vars(ethosn_expr)
+    # Call the Ethos-N partition in main
+    call_fn1 = g1(*more_vars)
+    mod["main"] = relay.Function(more_vars, call_fn1)
+    return mod
+
+
+def get_cpu_op_count(mod):
+    class Counter(tvm.relay.ExprVisitor):
+        def __init__(self):
+            super().__init__()
+            self.count = 0
+
+        def visit_call(self, call):
+            if isinstance(call.op, tvm.ir.Op):
+                self.count += 1
+
+            super().visit_call(call)
+
+    c = Counter()
+    c.visit(mod["main"])
+    return c.count
+
+
+def build(mod, params, npu=True, cpu_ops=0, npu_partitions=1):
+    relay.backend.compile_engine.get().clear()
+    with tvm.transform.PassContext(opt_level=3, config={
+            "relay.ext.ethos-n.options": {"variant": 0}
+    }):
+        with tvm.target.create("llvm -mcpu=core-avx2"):
+            if npu:
+                f = relay.build_module.bind_params_by_name(mod["main"], params)
+                mod = tvm.IRModule()
+                mod["main"] = f
+                mod = relay.transform.AnnotateTarget("ethos-n")(mod)
+                mod = relay.transform.MergeCompilerRegions()(mod)
+                mod = relay.transform.PartitionGraph()(mod)
+                cpu_op_count = get_cpu_op_count(mod)
+                assert cpu_op_count == cpu_ops, \
+                    "Got {} CPU operators, expected {}".format(cpu_op_count, 
cpu_ops)
+                partition_count = 0
+                for global_var in mod.get_global_vars():
+                    if "ethos-n" in global_var.name_hint:
+                        partition_count += 1
+
+                assert npu_partitions == partition_count, \
+                    "Got {} ethos-n partitions, expected 
{}".format(partition_count, npu_partitions)
+
+            return relay.build(mod, params=params)
+
+
+def run(graph, lib, params, inputs, outputs, npu=True):
+    module = graph_runtime.create(graph, lib, tvm.cpu())
+    module.set_input(**inputs)
+    module.set_input(**params)
+    module.run()
+    out = [module.get_output(i) for i in range(outputs)]
+    if not npu:
+        inference_result(0, out)
+    return out
+
+
+def build_and_run(mod, inputs, outputs, params, ctx=tvm.cpu(), npu=True, 
cpu_ops=0, npu_partitions=1):
+    graph, lib, params = build(mod, params, npu, cpu_ops, npu_partitions)
+    return run(graph, lib, params, inputs, outputs, npu)
+
+
+def verify(answers, atol, rtol=1e-07, verify_saturation=True):
+    """Compare the array of answers. Each entry is a list of outputs"""
+    if len(answers) < 2:
+        print("No results to compare: expected at least two, found ",
+              len(answers))
+    for answer in zip_longest(*answers):
+        for outs in combinations(answer, 2):
+            if verify_saturation:
+                assert np.count_nonzero(outs[0].asnumpy() == 255) < 0.25 * 
outs[0].asnumpy().size, \
+                    "Output is saturated: {}".format(outs[0])
+                assert np.count_nonzero(outs[0].asnumpy() == 0) < 0.25 * 
outs[0].asnumpy().size, \
+                    "Output is saturated: {}".format(outs[0])
+            tvm.testing.assert_allclose(
+                outs[0].asnumpy(), outs[1].asnumpy(), rtol=rtol, atol=atol
+            )
+
+
+def inference_result(checksum, outputs):
+    """Set the expected results of an Ethos inference, if the testing
+    infrastructure is available. This assumes that the entire graph
+    was offloaded to the neural processor."""
+    if tvm.get_global_func(
+            "relay.ethos-n.test.infra.inference_result", True):
+        return _infrastructure.inference_result(checksum, *outputs)
+    return False
+
+
+def generate_trials(space, r_factor=3):

Review comment:
       Although this function is not being used, I'd provide the same 
suggestion as I did to the ACL integration. We should avoid using the random 
workloads in the unit test.

##########
File path: tests/python/contrib/test_ethosn/infrastructure.py
##########
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Expose Ethos test functions to the Python front end"""
+
+from __future__ import absolute_import, print_function
+import tvm
+from tvm import relay
+from tvm.contrib import util, graph_runtime, download
+from tvm.relay.testing import run_opt_pass
+from enum import Enum
+from hashlib import md5
+from itertools import zip_longest, combinations
+import numpy as np
+from PIL import Image
+import os
+
+from . import _infrastructure
+from tvm.relay.op.contrib import get_pattern_table
+
+
+class Available(Enum):
+    UNAVAILABLE = 0
+    SW_ONLY = 1
+    SW_AND_HW = 2
+
+
+def ethosn_available():
+    """Return whether Ethos-N software and hardware support is available"""
+    if not tvm.get_global_func("relay.ethos-n.query", True):
+        print("skip because Ethos-N module is not available")
+        return Available.UNAVAILABLE
+    else:
+        hw = tvm.get_global_func("relay.ethos-n.query")()
+        return Available.SW_AND_HW if hw else Available.SW_ONLY
+
+
+def get_real_image(im_height, im_width):
+    repo_base = 
'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/'
+    img_name = 'elephant-299.jpg'
+    image_url = os.path.join(repo_base, img_name)
+    img_path = download.download_testdata(image_url, img_name, module='data')
+    image = Image.open(img_path).resize((im_height, im_width))
+    x = np.array(image).astype('uint8')
+    data = np.reshape(x, (1, im_height, im_width, 3))
+    return data
+
+
+def assert_lib_hash(lib, golden):
+    temp = util.tempdir()
+    path = temp.relpath("lib.cmm")
+    lib.imported_modules[1].save(path)
+    lib_hash = md5(open(path, 'rb').read()).hexdigest()
+    assert lib_hash == golden, "Expected hash: {} Got hash: {}".format(golden, 
lib_hash)
+
+
+def make_module(func, params):
+    func = relay.Function(relay.analysis.free_vars(func), func)
+    if len(params):
+        relay.build_module.bind_params_by_name(func, params)
+    return tvm.IRModule.from_expr(func)
+
+
+def make_ethosn_composite(ethosn_expr, name):
+    vars = relay.analysis.free_vars(ethosn_expr)
+    func = relay.Function([relay.Var("a")], ethosn_expr)
+    func = func.with_attr("Composite", name)
+    call = relay.Call(func, vars)
+    return call
+
+
+def make_ethosn_partition(ethosn_expr):
+    # Create an Ethos-N global function
+    mod = tvm.IRModule({})
+    vars = relay.analysis.free_vars(ethosn_expr)
+    func = relay.Function(vars, ethosn_expr)
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", "ethos-n")
+    func = func.with_attr("global_symbol", "ethos-n_0")
+    g1 = relay.GlobalVar("ethos-n_0")
+    mod[g1] = func
+
+    # These are the vars to call the Ethos-N partition with
+    more_vars = relay.analysis.free_vars(ethosn_expr)
+    # Call the Ethos-N partition in main
+    call_fn1 = g1(*more_vars)
+    mod["main"] = relay.Function(more_vars, call_fn1)
+    return mod
+
+
+def get_cpu_op_count(mod):
+    class Counter(tvm.relay.ExprVisitor):
+        def __init__(self):
+            super().__init__()
+            self.count = 0
+
+        def visit_call(self, call):
+            if isinstance(call.op, tvm.ir.Op):
+                self.count += 1
+
+            super().visit_call(call)
+
+    c = Counter()
+    c.visit(mod["main"])
+    return c.count
+
+
+def build(mod, params, npu=True, cpu_ops=0, npu_partitions=1):
+    relay.backend.compile_engine.get().clear()
+    with tvm.transform.PassContext(opt_level=3, config={
+            "relay.ext.ethos-n.options": {"variant": 0}
+    }):
+        with tvm.target.create("llvm -mcpu=core-avx2"):
+            if npu:
+                f = relay.build_module.bind_params_by_name(mod["main"], params)
+                mod = tvm.IRModule()
+                mod["main"] = f
+                mod = relay.transform.AnnotateTarget("ethos-n")(mod)
+                mod = relay.transform.MergeCompilerRegions()(mod)
+                mod = relay.transform.PartitionGraph()(mod)
+                cpu_op_count = get_cpu_op_count(mod)
+                assert cpu_op_count == cpu_ops, \

Review comment:
       Would `expected_host_ops` be clearer than `cpu_ops`?

##########
File path: tests/scripts/task_config_build_cpu.sh
##########
@@ -43,3 +43,5 @@ echo set\(USE_VTA_FSIM ON\) >> config.cmake
 echo set\(USE_TFLITE ON\) >> config.cmake
 echo set\(USE_TENSORFLOW_PATH \"/tensorflow\"\) >> config.cmake
 echo set\(USE_FLATBUFFERS_PATH \"/flatbuffers\"\) >> config.cmake
+echo set\(USE_ETHOSN /opt/arm/ethosn-driver-dev/ethos-n77\) >> config.cmake
+echo set\(USE_ETHOSN_HW ON\) >> config.cmake

Review comment:
       Will we have Ethos-N on the CI machine?

##########
File path: src/relay/backend/contrib/ethosn/codegen.cc
##########
@@ -0,0 +1,214 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/ethosn/codegen.cc
+ * \brief The Relay -> Ethos-N command stream compiler.
+ */
+#include <tvm/relay/expr_functor.h>
+#include <tvm/runtime/module.h>
+
+#include "codegen_ethosn.h"
+#include "ethosn_api.h"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace ethosn {
+
+sl::TensorInfo GetTensorInfo(std::map<Expr, std::vector<sl::TensorInfo>> 
tensor_table,
+                             const Call& call) {
+  if (tensor_table.find(call) != tensor_table.end()) return 
tensor_table[call][0];
+
+  return sl::TensorInfo();
+}
+
+void InferTensorsVisitor::InferCall(const CallNode* cn) {
+  EthosnError err;
+  Call call = GetRef<Call>(cn);
+  // Determine call -> NPU mapping
+  if (EthosnAPI::IsEthosOp(call, "qnn.concatenate")) {
+    ConcatenateParams params;
+    err = EthosnAPI::Concatenate(call, &params);
+    tensor_table_[cn->args[0]] = params.input_infos;
+  } else if (EthosnAPI::IsEthosOp(call, "split")) {
+    SplitParams params;
+    params.input_info = GetTensorInfo(tensor_table_, call);
+    err = EthosnAPI::Split(call, &params);
+    tensor_table_[cn->args[0]] = {params.input_info};
+  } else {
+    err = EthosnError("unknown operator");
+  }
+  if (err) {
+    ReportFatalError(call, err);
+  }
+}
+
+// This will only visit an expression if the expression's tensor info
+// has already been entirely inferred.
+// An example where this is important is a tuple node where each
+// get item node will only infer one field of the tuple's expression info.
+// We don't want to traverse the tuple until all of its fields have been 
inferred.
+void InferTensorsVisitor::VisitInferred(const Expr& expr) {
+  if (tensor_table_.find(expr) != tensor_table_.end()) {
+    for (const auto& tensor_info : tensor_table_[expr]) {
+      if (tensor_info == sl::TensorInfo()) return;
+    }
+    VisitExpr(expr);
+  }
+}
+
+void InferTensorsVisitor::VisitExpr_(const CallNode* cn) {
+  InferCall(cn);
+  // Pre-order visitor
+  for (const auto& arg : cn->args) {
+    VisitInferred(arg);
+  }
+}
+
+void InferTensorsVisitor::VisitExpr_(const TupleNode* tn) {
+  auto tuple = GetRef<Tuple>(tn);
+  CHECK(tensor_table_.find(tuple) != tensor_table_.end());
+  for (size_t i = 0; i < tn->fields.size(); i++) {
+    tensor_table_[tn->fields[i]] = {tensor_table_[tuple][i]};
+  }
+  // Pre-order visitor
+  for (const auto& field : tn->fields) {
+    VisitExpr(field);
+  }
+}
+
+void InferTensorsVisitor::VisitExpr_(const TupleGetItemNode* tgn) {
+  // Don't assume it must be targeting a TupleNode
+  // Vars and calls can still have TupleType
+  auto tg = GetRef<TupleGetItem>(tgn);
+  CHECK(tensor_table_.find(tg) != tensor_table_.end());
+  auto tuple = tg->tuple;
+  auto type = tuple->checked_type().as<TupleTypeNode>();
+  int index = tg->index;
+  // Resize the tensor infos to the tuple size if not already done
+  if (tensor_table_.find(tuple) == tensor_table_.end()) {
+    tensor_table_[tuple].resize(type->fields.size());
+  }
+  tensor_table_[tuple][index] = tensor_table_[tg][0];
+  // Pre-order visitor
+  VisitInferred(tuple);
+}
+
+sl::TensorsAndId MakeOps(const sl::TensorAndId<sl::Operand>& op) {
+  sl::TensorsAndId ops;
+  ops.tensors = {op.tensor};
+  ops.operationId = op.operationId;
+  return ops;
+}
+
+sl::TensorsAndId ConstructNetworkVisitor::HandleCall(const CallNode* cn) {

Review comment:
       Ditto. Can we inline this function?

##########
File path: src/relay/backend/contrib/ethosn/ethosn_api.h
##########
@@ -0,0 +1,142 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_ETHOSN_API_H_
+
+#include <tvm/relay/expr.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/tir/analysis.h>
+#include <tvm/tir/op.h>
+
+#include <algorithm>
+#include <limits>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "ethosn_support_library/Support.hpp"
+#include "ethosn_support_library/SupportQueries.hpp"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace ethosn {
+
+namespace sl = ::ethosn::support_library;
+
+struct ConcatenateParams {
+  sl::QuantizationInfo qInfo;
+  sl::ConcatenationInfo concat_info = sl::ConcatenationInfo(1, qInfo);
+  std::vector<sl::TensorInfo> input_infos;
+};
+
+struct SplitParams {
+  sl::SplitInfo split_info = sl::SplitInfo(0, {});
+  sl::TensorInfo input_info;
+};
+
+class ErrStrm {
+ public:
+  template <typename T>
+  ErrStrm& operator<<(const T& val) {  // NOLINT(*)
+    stream_ << val;
+    return *this;
+  }
+
+ private:
+  std::stringstream stream_;
+  friend class EthosnError;
+};
+
+class EthosnError {
+ public:
+  EthosnError() {}
+  explicit EthosnError(const Array<String>& msgs) : msgs(msgs) {}
+  explicit EthosnError(const String& msg) { msgs.push_back(msg); }
+  explicit EthosnError(const ErrStrm& err) : EthosnError(err.stream_.str()) {}
+
+  explicit operator bool() const { return !msgs.empty(); }
+
+  EthosnError& operator+=(const EthosnError& other) {
+    msgs.insert(msgs.end(), other.msgs.begin(), other.msgs.end());
+    return *this;
+  }
+
+  Array<String> msgs;
+};
+
+class EthosnAPI {
+ public:
+  static std::unique_ptr<sl::CompiledNetwork> 
Compile(std::shared_ptr<sl::Network> network,
+                                                      const 
sl::CompilationOptions& options);
+
+  static sl::CompilationOptions CreateOptions();
+
+  static bool IsEthosFunc(const Call& call, const std::string& op_name);
+  static bool IsEthosOp(const Call& call, const std::string& op_name);
+
+  static EthosnError Concatenate(const Expr& expr, ConcatenateParams* params);
+  static EthosnError Split(const Expr& expr, SplitParams* params);
+
+ private:
+  static EthosnError Tvm2Npu(const Array<IndexExpr>& shape, sl::TensorShape* 
npu_shape);
+  static EthosnError Tvm2Npu(const tvm::DataType& dtype, sl::DataType* 
data_type);
+
+  // Convert an array of IntImmNodes into ValueT
+  // IndexT type of Array indexing variable
+  // ValueT type of resulting value
+  template <typename IndexT, typename ValueT>
+  static EthosnError AsArray(const Array<IndexT>& arr, std::array<ValueT, 4>* 
v) {
+    if (arr.size() > 4)
+      return EthosnError(ErrStrm() << "dimensions=" << arr.size() << ", 
dimensions must be <= 4");
+    for (size_t i = 0; i < std::min(arr.size(), 4ul); i++) {
+      const PrimExpr& a = arr[i];
+      const auto* intImm = a.as<IntImmNode>();
+      if (intImm->value > std::numeric_limits<ValueT>::max()) {
+        return EthosnError(ErrStrm() << "axis size=" << intImm->value << ", 
axis size must be <= "
+                                     << std::numeric_limits<ValueT>::max());
+      }
+      (*v)[i] = static_cast<ValueT>(intImm->value);
+    }
+    return EthosnError();
+  }
+
+  // Get a T from a constant represented by
+  // a NDArray.

Review comment:
       Merge to 1 line.

##########
File path: src/relay/backend/contrib/ethosn/ethosn_api.cc
##########
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+#include "ethosn_api.h"
+
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/tir/analysis.h>
+
+#include <fstream>
+#include <map>
+#include <memory>
+#include <string>
+#include <utility>
+#include <vector>
+
+#include "capabilities.h"
+#include "ethosn_support_library/Support.hpp"
+#include "ethosn_support_library/SupportQueries.hpp"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace ethosn {
+
+std::unique_ptr<sl::CompiledNetwork> 
EthosnAPI::Compile(std::shared_ptr<sl::Network> network,
+                                                        const 
sl::CompilationOptions& options) {
+  std::vector<std::unique_ptr<sl::CompiledNetwork>> compiled_network =
+      sl::Compile(*network, options);
+  CHECK_GE(compiled_network.size(), 1) << "Ethos-N compiler failed to compile 
network";
+
+  return std::move(compiled_network[0]);
+}
+
+struct EthosnCompilerConfigNode : public 
tvm::AttrsNode<EthosnCompilerConfigNode> {
+  int variant;
+  bool strategy0;
+  bool strategy1;
+  bool strategy3;
+  bool strategy4;
+  bool strategy6;
+  bool strategy7;
+  bool dump_ram;
+  bool initial_sram_dump;
+  bool block_config_16x16;
+  bool block_config_32x8;
+  bool block_config_8x32;
+  bool block_config_8x8;
+  bool enable_intermediate_compression;
+  bool disable_winograd;
+  bool dump_debug_files;
+  String debug_dir;
+  bool enable_cascading;
+
+  TVM_DECLARE_ATTRS(EthosnCompilerConfigNode, 
"ext.attrs.EthosnCompilerConfigNode") {
+    TVM_ATTR_FIELD(variant)
+        .describe("0 for Ethos-N77, 1 for Ethos-N57, 2 for Ethos-N37. See 
Ethos-N documentation.")
+        .set_default(0);
+    TVM_ATTR_FIELD(strategy0).set_default(true);
+    TVM_ATTR_FIELD(strategy1).set_default(true);
+    TVM_ATTR_FIELD(strategy3).set_default(true);
+    TVM_ATTR_FIELD(strategy4).set_default(true);
+    TVM_ATTR_FIELD(strategy6).set_default(true);
+    TVM_ATTR_FIELD(strategy7).set_default(true);
+    TVM_ATTR_FIELD(dump_ram).set_default(false);
+    TVM_ATTR_FIELD(initial_sram_dump).set_default(false);
+    TVM_ATTR_FIELD(block_config_16x16).set_default(true);
+    TVM_ATTR_FIELD(block_config_32x8).set_default(true);
+    TVM_ATTR_FIELD(block_config_8x32).set_default(true);
+    TVM_ATTR_FIELD(block_config_8x8).set_default(true);
+    TVM_ATTR_FIELD(enable_intermediate_compression).set_default(true);
+    TVM_ATTR_FIELD(disable_winograd).set_default(false);
+    TVM_ATTR_FIELD(dump_debug_files).set_default(false);
+    TVM_ATTR_FIELD(debug_dir).set_default(".");
+    TVM_ATTR_FIELD(enable_cascading).set_default(false);
+  }
+};
+
+class EthosnCompilerConfig : public Attrs {
+ public:
+  TVM_DEFINE_NOTNULLABLE_OBJECT_REF_METHODS(EthosnCompilerConfig, Attrs, 
EthosnCompilerConfigNode);
+};
+
+TVM_REGISTER_NODE_TYPE(EthosnCompilerConfigNode);
+TVM_REGISTER_PASS_CONFIG_OPTION("relay.ext.ethos-n.options", 
EthosnCompilerConfig);
+
+sl::CompilationOptions EthosnAPI::CreateOptions() {
+  auto ctx = transform::PassContext::Current();
+  auto cfg = ctx->GetConfig<EthosnCompilerConfig>("relay.ext.ethos-n.options");
+  if (!cfg.defined()) {
+    cfg = AttrsWithDefaultValues<EthosnCompilerConfig>();
+  }
+
+  sl::CompilationOptions options(variants[cfg.value()->variant]);
+  options.m_Strategy0 = cfg.value()->strategy0;
+  options.m_Strategy1 = cfg.value()->strategy1;
+  options.m_Strategy3 = cfg.value()->strategy3;
+  options.m_Strategy4 = cfg.value()->strategy4;
+  options.m_Strategy6 = cfg.value()->strategy6;
+  options.m_Strategy7 = cfg.value()->strategy7;
+  options.m_DebugInfo.m_DumpRam = cfg.value()->dump_ram;
+  options.m_DebugInfo.m_InitialSramDump = cfg.value()->initial_sram_dump;
+  options.m_BlockConfig16x16 = cfg.value()->block_config_16x16;
+  options.m_BlockConfig32x8 = cfg.value()->block_config_32x8;
+  options.m_BlockConfig8x32 = cfg.value()->block_config_8x32;
+  options.m_BlockConfig8x8 = cfg.value()->block_config_8x8;
+  options.m_EnableIntermediateCompression = 
cfg.value()->enable_intermediate_compression;
+  options.m_DisableWinograd = cfg.value()->disable_winograd;
+  options.m_DebugInfo.m_DumpDebugFiles = cfg.value()->dump_debug_files;
+  options.m_DebugInfo.m_DebugDir = cfg.value()->debug_dir;
+  options.m_EnableCascading = cfg.value()->enable_cascading;
+  return options;
+}
+
+bool EthosnAPI::IsEthosFunc(const Call& call, const std::string& op_name) {
+  if (call->op->IsInstance<FunctionNode>()) {
+    Function func = Downcast<Function>(call->op);
+    CHECK(func.defined());
+    auto name_node = func->GetAttr<String>(attr::kComposite);
+    return name_node.value() == op_name;
+  }
+  return false;
+}
+
+bool EthosnAPI::IsEthosOp(const Call& call, const std::string& op_name) {

Review comment:
       It seems to me that you don't need this function if it just checks the 
op name. If there are something missing and will be added in the folow-up PRs, 
please add TODOs.

##########
File path: tests/python/contrib/test_ethosn/infrastructure.py
##########
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Expose Ethos test functions to the Python front end"""
+
+from __future__ import absolute_import, print_function
+import tvm
+from tvm import relay
+from tvm.contrib import util, graph_runtime, download
+from tvm.relay.testing import run_opt_pass
+from enum import Enum
+from hashlib import md5
+from itertools import zip_longest, combinations
+import numpy as np
+from PIL import Image
+import os
+
+from . import _infrastructure
+from tvm.relay.op.contrib import get_pattern_table
+
+
+class Available(Enum):
+    UNAVAILABLE = 0
+    SW_ONLY = 1
+    SW_AND_HW = 2
+
+
+def ethosn_available():
+    """Return whether Ethos-N software and hardware support is available"""
+    if not tvm.get_global_func("relay.ethos-n.query", True):
+        print("skip because Ethos-N module is not available")
+        return Available.UNAVAILABLE
+    else:
+        hw = tvm.get_global_func("relay.ethos-n.query")()
+        return Available.SW_AND_HW if hw else Available.SW_ONLY
+
+
+def get_real_image(im_height, im_width):
+    repo_base = 
'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/'
+    img_name = 'elephant-299.jpg'
+    image_url = os.path.join(repo_base, img_name)
+    img_path = download.download_testdata(image_url, img_name, module='data')
+    image = Image.open(img_path).resize((im_height, im_width))
+    x = np.array(image).astype('uint8')
+    data = np.reshape(x, (1, im_height, im_width, 3))
+    return data
+
+
+def assert_lib_hash(lib, golden):

Review comment:
       * Didn't see this function being used.
   * What's the purpose of this function?

##########
File path: src/relay/backend/contrib/ethosn/codegen_ethosn.h
##########
@@ -0,0 +1,331 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/relay/backend/contrib/ethosn/codegen_ethosn.h
+ * \brief The Relay -> Ethos-N command stream compiler.
+ */
+
+#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_
+#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CODEGEN_ETHOSN_H_
+
+#include <dmlc/memory_io.h>
+#include <tvm/relay/attrs/nn.h>
+#include <tvm/relay/expr_functor.h>
+#include <tvm/relay/transform.h>
+#include <tvm/relay/type.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/registry.h>
+
+#include <algorithm>
+#include <fstream>
+#include <map>
+#include <memory>
+#include <sstream>
+#include <string>
+#include <unordered_map>
+#include <utility>
+#include <vector>
+
+#include "../../../../runtime/contrib/ethosn/ethosn_runtime.h"
+#include "../codegen_c/codegen_c.h"
+#include "ethosn_api.h"
+#include "ethosn_support_library/Support.hpp"
+#include "ethosn_support_library/SupportQueries.hpp"
+
+namespace tvm {
+namespace relay {
+namespace contrib {
+namespace ethosn {
+
+namespace sl = ::ethosn::support_library;
+
+/*!
+ * \brief A struct to hold an uncompiled support library network alongside
+ * the desired order of input and output operation ids.
+ */
+struct NetworkWithIDs {
+  struct hash_pair {
+    template <class T_0, class T_1>
+    size_t operator()(const std::pair<T_0, T_1>& p) const {
+      return std::hash<T_0>{}(p.first) ^ std::hash<T_1>{}(p.second);
+    }
+  };
+  std::shared_ptr<sl::Network> network;
+  std::unordered_map<uint32_t, unsigned int> input_ids;
+  std::unordered_map<std::pair<uint32_t, uint32_t>, unsigned int, hash_pair> 
output_ids;
+};
+
+/*!
+ * \brief A base class for error handling using ErrorReporter.
+ */
+class ErrorReportingPass {
+ public:
+  ErrorReportingPass(const IRModule& mod, const GlobalVar& var) : mod_(mod), 
var_(var) {}
+
+  /*!
+   * \brief Report fatal errors for an expression.
+   * \param expr The expression to report errors at.
+   * \param err The errors to report.
+   */
+  void ReportFatalError(const ObjectRef& expr, const EthosnError& err) {
+    for (const auto& msg : err.msgs) {
+      error_reporter_.ReportAt(this->var_, expr, ErrorBuilder() << msg);
+    }
+    error_reporter_.RenderErrors(this->mod_);
+  }
+
+ protected:
+  /*! \brief An ErrorReporter object to render the errors.*/
+  ErrorReporter error_reporter_;
+  /*! \brief The module to report errors for. */
+  IRModule mod_;
+  /*! \brief The GlobalVar to report errors for. */
+  GlobalVar var_;
+};
+
+/*!
+ * \brief A custom pass to infer the support library tensor information
+ * for a Relay expression.
+ *
+ * Support Library requires that tensors are explicitly declared with
+ * information on their size, data type, format (eg. NHWC) and quantisation
+ * parameters. In Relay, size and data type are already determined when the
+ * type_infer pass is run. However, format and quantisation parameters are
+ * properties of the operators that consume the tensors.
+ *
+ * This pass works by having each node initialise the information of its
+ * parents, essentially propagating the inferred information all the way up
+ * to the inputs of the expression.
+ *
+ * Because the children initialise the information of the parents, it is
+ * necessary to traverse the graph in such a way so as to ensure all the
+ * children of a node are visited before the parent is. As Relay does not
+ * keep a reference to child nodes, this pass goes in preorder but will
+ * skip visiting a parent if all the children haven't yet been visited (see
+ * VisitInferred for the logic that implements this).
+ *
+ * Inference only works for supported callnodes, for tuplenodes, tuplegetitem
+ * nodes and free var nodes. Other nodes should not be off-loaded to Ethos-N.
+ */
+class InferTensorsVisitor : private ErrorReportingPass, private ExprVisitor {
+ public:
+  InferTensorsVisitor(const IRModule& mod, const GlobalVar& var) : 
ErrorReportingPass(mod, var) {}
+
+  /*!
+   * \brief Infer the support library tensor information for all the nodes
+   * in an expression.
+   * \param expr The expression for which to infer tensor information.
+   * \return A map of expressions to tensor information.
+   * \note This algorithm does not traverse into functions, so call it on
+   * the body of the function you're interested in.
+   */
+  std::map<Expr, std::vector<sl::TensorInfo>> Infer(const Expr& expr) {
+    tensor_table_.clear();
+    CHECK(expr->checked_type().defined());
+    size_t output_size = 1;
+    if (expr->checked_type()->IsInstance<TupleTypeNode>()) {
+      auto type = expr->checked_type().as<TupleTypeNode>();
+      output_size = type->fields.size();
+    }

Review comment:
       ```suggestion
       if (auto tuple = expr->checked_type().as<TupleTypeNode>()) {
         output_size = tuple->fields.size();
       }
   ```

##########
File path: tests/python/contrib/test_ethosn/infrastructure.py
##########
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Expose Ethos test functions to the Python front end"""
+
+from __future__ import absolute_import, print_function
+import tvm
+from tvm import relay
+from tvm.contrib import util, graph_runtime, download
+from tvm.relay.testing import run_opt_pass
+from enum import Enum
+from hashlib import md5
+from itertools import zip_longest, combinations
+import numpy as np
+from PIL import Image
+import os
+
+from . import _infrastructure
+from tvm.relay.op.contrib import get_pattern_table
+
+
+class Available(Enum):
+    UNAVAILABLE = 0
+    SW_ONLY = 1
+    SW_AND_HW = 2
+
+
+def ethosn_available():
+    """Return whether Ethos-N software and hardware support is available"""
+    if not tvm.get_global_func("relay.ethos-n.query", True):
+        print("skip because Ethos-N module is not available")
+        return Available.UNAVAILABLE
+    else:
+        hw = tvm.get_global_func("relay.ethos-n.query")()
+        return Available.SW_AND_HW if hw else Available.SW_ONLY
+
+
+def get_real_image(im_height, im_width):
+    repo_base = 
'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/'
+    img_name = 'elephant-299.jpg'
+    image_url = os.path.join(repo_base, img_name)
+    img_path = download.download_testdata(image_url, img_name, module='data')
+    image = Image.open(img_path).resize((im_height, im_width))
+    x = np.array(image).astype('uint8')
+    data = np.reshape(x, (1, im_height, im_width, 3))
+    return data
+
+
+def assert_lib_hash(lib, golden):
+    temp = util.tempdir()
+    path = temp.relpath("lib.cmm")
+    lib.imported_modules[1].save(path)
+    lib_hash = md5(open(path, 'rb').read()).hexdigest()
+    assert lib_hash == golden, "Expected hash: {} Got hash: {}".format(golden, 
lib_hash)
+
+
+def make_module(func, params):
+    func = relay.Function(relay.analysis.free_vars(func), func)
+    if len(params):

Review comment:
       ```suggestion
       if params:
   ```

##########
File path: src/runtime/contrib/ethosn/ethosn_device.cc
##########
@@ -0,0 +1,222 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file ethosn_device.cc
+ * \brief Ethos-N NPU device integration.
+ */
+
+#include <dlpack/dlpack.h>
+#include <poll.h>
+#include <tvm/tir/expr.h>
+#include <unistd.h>
+
+#include <algorithm>
+#include <memory>
+
+#include "ethosn_driver_library/Buffer.hpp"
+#include "ethosn_support_library/Support.hpp"
+
+#if defined ETHOSN_HW
+
+#include "ethosn_driver_library/Inference.hpp"
+#include "ethosn_driver_library/Network.hpp"
+
+namespace tvm {
+namespace runtime {
+namespace ethosn {
+
+namespace sl = ::ethosn::support_library;
+namespace dl = ::ethosn::driver_library;
+
+int64_t GetTensorSize(const DLTensor& tensor) {

Review comment:
       We really need to put this function into DLTensor directly...

##########
File path: tests/python/contrib/test_ethosn/infrastructure.py
##########
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Expose Ethos test functions to the Python front end"""
+
+from __future__ import absolute_import, print_function
+import tvm
+from tvm import relay
+from tvm.contrib import util, graph_runtime, download
+from tvm.relay.testing import run_opt_pass
+from enum import Enum
+from hashlib import md5
+from itertools import zip_longest, combinations
+import numpy as np
+from PIL import Image
+import os
+
+from . import _infrastructure
+from tvm.relay.op.contrib import get_pattern_table
+
+
+class Available(Enum):
+    UNAVAILABLE = 0
+    SW_ONLY = 1
+    SW_AND_HW = 2
+
+
+def ethosn_available():
+    """Return whether Ethos-N software and hardware support is available"""
+    if not tvm.get_global_func("relay.ethos-n.query", True):
+        print("skip because Ethos-N module is not available")
+        return Available.UNAVAILABLE
+    else:
+        hw = tvm.get_global_func("relay.ethos-n.query")()
+        return Available.SW_AND_HW if hw else Available.SW_ONLY
+
+
+def get_real_image(im_height, im_width):
+    repo_base = 
'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/'
+    img_name = 'elephant-299.jpg'
+    image_url = os.path.join(repo_base, img_name)
+    img_path = download.download_testdata(image_url, img_name, module='data')
+    image = Image.open(img_path).resize((im_height, im_width))
+    x = np.array(image).astype('uint8')
+    data = np.reshape(x, (1, im_height, im_width, 3))
+    return data
+
+
+def assert_lib_hash(lib, golden):
+    temp = util.tempdir()
+    path = temp.relpath("lib.cmm")
+    lib.imported_modules[1].save(path)
+    lib_hash = md5(open(path, 'rb').read()).hexdigest()
+    assert lib_hash == golden, "Expected hash: {} Got hash: {}".format(golden, 
lib_hash)
+
+
+def make_module(func, params):
+    func = relay.Function(relay.analysis.free_vars(func), func)
+    if len(params):
+        relay.build_module.bind_params_by_name(func, params)
+    return tvm.IRModule.from_expr(func)
+
+
+def make_ethosn_composite(ethosn_expr, name):
+    vars = relay.analysis.free_vars(ethosn_expr)
+    func = relay.Function([relay.Var("a")], ethosn_expr)
+    func = func.with_attr("Composite", name)
+    call = relay.Call(func, vars)
+    return call
+
+
+def make_ethosn_partition(ethosn_expr):
+    # Create an Ethos-N global function
+    mod = tvm.IRModule({})
+    vars = relay.analysis.free_vars(ethosn_expr)
+    func = relay.Function(vars, ethosn_expr)
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", "ethos-n")
+    func = func.with_attr("global_symbol", "ethos-n_0")
+    g1 = relay.GlobalVar("ethos-n_0")
+    mod[g1] = func
+
+    # These are the vars to call the Ethos-N partition with
+    more_vars = relay.analysis.free_vars(ethosn_expr)
+    # Call the Ethos-N partition in main
+    call_fn1 = g1(*more_vars)
+    mod["main"] = relay.Function(more_vars, call_fn1)
+    return mod
+
+
+def get_cpu_op_count(mod):
+    class Counter(tvm.relay.ExprVisitor):
+        def __init__(self):
+            super().__init__()
+            self.count = 0
+
+        def visit_call(self, call):
+            if isinstance(call.op, tvm.ir.Op):
+                self.count += 1
+
+            super().visit_call(call)
+
+    c = Counter()
+    c.visit(mod["main"])
+    return c.count
+
+
+def build(mod, params, npu=True, cpu_ops=0, npu_partitions=1):
+    relay.backend.compile_engine.get().clear()
+    with tvm.transform.PassContext(opt_level=3, config={
+            "relay.ext.ethos-n.options": {"variant": 0}
+    }):
+        with tvm.target.create("llvm -mcpu=core-avx2"):

Review comment:
       Maybe it's more general to remove `-mcpu=core-avx2` in testing.

##########
File path: tests/python/contrib/test_ethosn/infrastructure.py
##########
@@ -0,0 +1,225 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+"""Expose Ethos test functions to the Python front end"""
+
+from __future__ import absolute_import, print_function
+import tvm
+from tvm import relay
+from tvm.contrib import util, graph_runtime, download
+from tvm.relay.testing import run_opt_pass
+from enum import Enum
+from hashlib import md5
+from itertools import zip_longest, combinations
+import numpy as np
+from PIL import Image
+import os
+
+from . import _infrastructure
+from tvm.relay.op.contrib import get_pattern_table
+
+
+class Available(Enum):
+    UNAVAILABLE = 0
+    SW_ONLY = 1
+    SW_AND_HW = 2
+
+
+def ethosn_available():
+    """Return whether Ethos-N software and hardware support is available"""
+    if not tvm.get_global_func("relay.ethos-n.query", True):
+        print("skip because Ethos-N module is not available")
+        return Available.UNAVAILABLE
+    else:
+        hw = tvm.get_global_func("relay.ethos-n.query")()
+        return Available.SW_AND_HW if hw else Available.SW_ONLY
+
+
+def get_real_image(im_height, im_width):
+    repo_base = 
'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/'
+    img_name = 'elephant-299.jpg'
+    image_url = os.path.join(repo_base, img_name)
+    img_path = download.download_testdata(image_url, img_name, module='data')
+    image = Image.open(img_path).resize((im_height, im_width))
+    x = np.array(image).astype('uint8')
+    data = np.reshape(x, (1, im_height, im_width, 3))
+    return data
+
+
+def assert_lib_hash(lib, golden):
+    temp = util.tempdir()
+    path = temp.relpath("lib.cmm")
+    lib.imported_modules[1].save(path)
+    lib_hash = md5(open(path, 'rb').read()).hexdigest()
+    assert lib_hash == golden, "Expected hash: {} Got hash: {}".format(golden, 
lib_hash)
+
+
+def make_module(func, params):
+    func = relay.Function(relay.analysis.free_vars(func), func)
+    if len(params):
+        relay.build_module.bind_params_by_name(func, params)
+    return tvm.IRModule.from_expr(func)
+
+
+def make_ethosn_composite(ethosn_expr, name):
+    vars = relay.analysis.free_vars(ethosn_expr)
+    func = relay.Function([relay.Var("a")], ethosn_expr)
+    func = func.with_attr("Composite", name)
+    call = relay.Call(func, vars)
+    return call
+
+
+def make_ethosn_partition(ethosn_expr):
+    # Create an Ethos-N global function
+    mod = tvm.IRModule({})
+    vars = relay.analysis.free_vars(ethosn_expr)
+    func = relay.Function(vars, ethosn_expr)
+    func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1))
+    func = func.with_attr("Compiler", "ethos-n")
+    func = func.with_attr("global_symbol", "ethos-n_0")
+    g1 = relay.GlobalVar("ethos-n_0")
+    mod[g1] = func
+
+    # These are the vars to call the Ethos-N partition with
+    more_vars = relay.analysis.free_vars(ethosn_expr)
+    # Call the Ethos-N partition in main
+    call_fn1 = g1(*more_vars)
+    mod["main"] = relay.Function(more_vars, call_fn1)
+    return mod
+
+
+def get_cpu_op_count(mod):
+    class Counter(tvm.relay.ExprVisitor):
+        def __init__(self):
+            super().__init__()
+            self.count = 0
+
+        def visit_call(self, call):
+            if isinstance(call.op, tvm.ir.Op):
+                self.count += 1
+

Review comment:
       remove this line.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [incubator-tvm] comaniac commented on a change in pull request #6222: [BYOC][ETHOSN] Introduce the Ethos-N BYOC integration

Reply via email to