zhiics commented on a change in pull request #6222: URL: https://github.com/apache/incubator-tvm/pull/6222#discussion_r468690336
########## File path: cmake/modules/contrib/EthosN.cmake ########## @@ -0,0 +1,58 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Arm Ethos-N rules + +if(NOT USE_ETHOSN STREQUAL "OFF") + find_ethosn(${USE_ETHOSN}) + + if(NOT ETHOSN_FOUND) + message(FATAL_ERROR "Cannot find Ethos-N, USE_ETHOSN=" ${USE_ETHOSN}) + endif() + + if (ETHOSN_FOUND) Review comment: else() ########## File path: cmake/util/FindEthosN.cmake ########## @@ -0,0 +1,95 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +####################################################### +# Find Arm Ethos-N libraries +# +# Usage: +# find_ethosn(${USE_ETHOSN}) +# +# - When USE_ETHOSN=/path/to/ethos-sdk-path, use the path from USE_ETHOSN +# - Else, when environment variable ETHOSN_STACK is set, use that path +# - When USE_ETHOSN=ON, use auto search +# +# Provide variables: +# +# - ETHOSN_FOUND +# - ETHOSN_PACKAGE_VERSION +# - ETHOSN_DEFINITIONS +# - ETHOSN_INCLUDE_DIRS +# - ETHOSN_COMPILER_LIBRARY +# - ETHOSN_RUNTIME_LIBRARY + +macro(find_ethosn use_ethosn) + set(__use_ethosn ${use_ethosn}) + if(IS_DIRECTORY ${__use_ethosn}) + set(__ethosn_stack ${__use_ethosn}) + message(STATUS "Arm Ethos-N driver stack PATH=" ${__use_ethosn}) + elseif(IS_DIRECTORY $ENV{ETHOSN_STACK}) + set(__ethosn_stack $ENV{ETHOSN_STACK}) + message(STATUS "Arm Ethos-N driver stack from env=" ${__use_ethosn}) + else() + set(__ethosn_stack "") + endif() + + if(__ethosn_stack) + set(ETHOSN_INCLUDE_DIRS "") + # Compile-time support + find_path(_SL_DIR NAMES Support.hpp + PATHS ${__ethosn_stack}/include/ethosn_support_library) + string(REGEX REPLACE "/ethosn_support_library" "" _SL_DIR2 ${_SL_DIR}) + list(APPEND ETHOSN_INCLUDE_DIRS "${_SL_DIR2}") + + find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport + PATHS ${__ethosn_stack}/lib) + find_library(ETHOSN_COMPILER_LIBRARY NAMES EthosNSupport) + + set(ETHOSN_PACKAGE_VERSION "0.1.1") + + if(USE_ETHOSN_HW STREQUAL "ON") + # Runtime hardware support + find_path(_DL_DIR NAMES Network.hpp + PATHS ${__ethosn_stack}/include/ethosn_driver_library) + string(REGEX REPLACE "/ethosn_driver_library" "" _DL_DIR2 ${_DL_DIR}) + list(APPEND ETHOSN_INCLUDE_DIRS "${_DL_DIR2}") + + find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver + PATHS ${__ethosn_stack}/lib) + find_library(ETHOSN_RUNTIME_LIBRARY NAMES EthosNDriver) + set(ETHOSN_DEFINITIONS -DETHOSN_HW) + endif () + + if(ETHOSN_COMPILER_LIBRARY) + set(ETHOSN_FOUND TRUE) + endif() + endif(__ethosn_stack) + + if(NOT ETHOSN_FOUND) + if(__use_ethosn STREQUAL "ON") + message(WARNING "No cmake find_package available for Arm Ethos-N") + endif() + endif() + + # additional libraries + if(ETHOSN_FOUND) Review comment: else() ########## File path: src/runtime/contrib/ethosn/ethosn_device.cc ########## @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_device.cc + * \brief Ethos-N NPU device integration. + */ + +#include <dlpack/dlpack.h> +#include <poll.h> +#include <tvm/tir/expr.h> +#include <unistd.h> + +#include <algorithm> +#include <memory> + +#include "ethosn_driver_library/Buffer.hpp" +#include "ethosn_support_library/Support.hpp" + +#if defined ETHOSN_HW + +#include "ethosn_driver_library/Inference.hpp" +#include "ethosn_driver_library/Network.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; +namespace dl = ::ethosn::driver_library; + +int64_t GetTensorSize(const DLTensor& tensor) { + int64_t size = 1; + for (int i = 0; i < tensor.ndim; i++) { + size *= tensor.shape[i]; + } + return size; +} + +bool WaitForInference(dl::Inference* inference, int timeout) { + // Wait for inference to complete + int fd = inference->GetFileDescriptor(); + struct pollfd fds; + memset(&fds, 0, sizeof(fds)); + fds.fd = fd; + fds.events = POLLIN; // Wait for any available input. + + const int ms_per_seconds = 1000; + int poll_result = poll(&fds, 1, timeout * ms_per_seconds); + if (poll_result > 0) { + dl::InferenceResult result; + if (read(fd, &result, sizeof(result)) != sizeof(result)) { + return false; + } + if (result != dl::InferenceResult::Completed) { + return false; + } + } else if (poll_result == 0) { + return false; + } else { + return false; + } + return true; +} + +template <typename T> +void CopyOutput(dl::Buffer* source_buffers[], std::vector<DLTensor*>* outputs) { + for (DLTensor* tensor : *outputs) { + dl::Buffer* source_buffer = source_buffers[0]; + uint8_t* source_buffer_data = source_buffer->GetMappedBuffer(); + size_t size = source_buffer->GetSize(); + T* dest_pointer = static_cast<T*>(tensor->data); + std::copy_backward(source_buffer_data, source_buffer_data + size, dest_pointer + size); + source_buffers++; + } +} + +void CreateBuffers(std::vector<std::shared_ptr<dl::Buffer> >* fm, + const std::vector<DLTensor*>& tensors) { + int index = 0; + for (auto buffer : tensors) { + auto* data = static_cast<uint8_t*>(buffer->data); + // The NPU only needs the size of the tensor * uint8_t. + auto data_size = static_cast<uint32_t>(GetTensorSize(*buffer)); + (*fm)[index++] = std::make_shared<dl::Buffer>(data, data_size, dl::DataFormat::NHWC); + } +} + +bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network, + std::vector<uint32_t> input_order, std::vector<uint32_t> output_order) { + // Unpack parameters + uint8_t argc = 0; + std::vector<DLTensor*> inputs(input_order.size()); + for (uint8_t i = 0; i < network->GetInputBufferInfos().size(); i++) { + inputs[input_order[i]] = args[argc++]; + } + auto out_infos = network->GetOutputBufferInfos(); + std::vector<DLTensor*> outputs(output_order.size()); + for (uint8_t i = 0; i < network->GetOutputBufferInfos().size(); i++) { + outputs[output_order[i]] = args[argc++]; + } + + // Set up input buffers + std::vector<std::shared_ptr<dl::Buffer> > ifm(inputs.size()); + CreateBuffers(&ifm, inputs); + + // Set up output buffers + std::vector<std::shared_ptr<dl::Buffer> > ofm(outputs.size()); + CreateBuffers(&ofm, outputs); + + // Raw pointers for the inference + dl::Buffer* ifm_raw[inputs.size()]; + for (size_t i = 0; i < inputs.size(); i++) { + ifm_raw[i] = ifm[i].get(); + } + dl::Buffer* ofm_raw[outputs.size()]; + for (size_t i = 0; i < outputs.size(); i++) { + ofm_raw[i] = ofm[i].get(); + } + + auto npu = std::make_unique<dl::Network>(*network); + + // Execute the inference. + std::unique_ptr<dl::Inference> result( + npu->ScheduleInference(ifm_raw, sizeof(ifm_raw) / sizeof(ifm_raw[0]), ofm_raw, + sizeof(ofm_raw) / sizeof(ofm_raw[0]))); + bool inferenceCompleted = WaitForInference(result.get(), 60); + if (inferenceCompleted) { + switch ((outputs)[0]->dtype.bits) { + case 8: { + dl::Buffer** ofms = &ofm_raw[0]; + for (DLTensor* tensor : outputs) { + uint8_t* source_buffer_data = (*ofms++)->GetMappedBuffer(); + uint8_t* dest_pointer = static_cast<uint8_t*>(tensor->data); + if (source_buffer_data != dest_pointer) { + CopyOutput<uint8_t>(ofm_raw, &outputs); + break; + } + } + break; + } + case 16: + CopyOutput<uint16_t>(ofm_raw, &outputs); + break; + case 32: + CopyOutput<uint32_t>(ofm_raw, &outputs); + break; + default: + break; + } + } + + return inferenceCompleted; +} + +} // namespace ethosn +} // namespace runtime +} // namespace tvm + +#else +/* If USE_ETHOSN_HW=OFF, we mock the inference call with a known-good output. + * That output can be set by using relay.ethos-n.test.infra.inference_result + * which will set the values the mocked inference will return the next time + * it's called. + */ + +#include <tvm/runtime/ndarray.h> +#include <tvm/runtime/registry.h> + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; + +std::vector<tvm::runtime::NDArray> test_outputs; + +TVM_REGISTER_GLOBAL("relay.ethos-n.test.infra.inference_result") + .set_body([](tvm::TVMArgs args, tvm::TVMRetValue* rv) { + test_outputs.clear(); + for (int argc = 1; argc < args.size(); argc++) { + const DLTensor* tensor = args[argc]; + auto shape = std::vector<int64_t>(tensor->shape, tensor->shape + tensor->ndim); + test_outputs.emplace_back(tvm::runtime::NDArray::Empty(shape, tensor->dtype, tensor->ctx)); + test_outputs[test_outputs.size() - 1].CopyFrom(tensor); + } + }); + +// Allow the ethos-n support code to be tested without a device +bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network, + std::vector<uint32_t> input_order, std::vector<uint32_t> output_order) { Review comment: const& for bouth input_order and output_order ########## File path: tests/python/contrib/test_ethosn/infrastructure.py ########## @@ -0,0 +1,225 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Expose Ethos test functions to the Python front end""" + +from __future__ import absolute_import, print_function +import tvm +from tvm import relay +from tvm.contrib import util, graph_runtime, download +from tvm.relay.testing import run_opt_pass +from enum import Enum +from hashlib import md5 +from itertools import zip_longest, combinations +import numpy as np +from PIL import Image +import os + +from . import _infrastructure +from tvm.relay.op.contrib import get_pattern_table + + +class Available(Enum): + UNAVAILABLE = 0 + SW_ONLY = 1 + SW_AND_HW = 2 + + +def ethosn_available(): + """Return whether Ethos-N software and hardware support is available""" + if not tvm.get_global_func("relay.ethos-n.query", True): + print("skip because Ethos-N module is not available") + return Available.UNAVAILABLE + else: + hw = tvm.get_global_func("relay.ethos-n.query")() + return Available.SW_AND_HW if hw else Available.SW_ONLY + + +def get_real_image(im_height, im_width): + repo_base = 'https://github.com/dmlc/web-data/raw/master/tensorflow/models/InceptionV1/' + img_name = 'elephant-299.jpg' + image_url = os.path.join(repo_base, img_name) + img_path = download.download_testdata(image_url, img_name, module='data') + image = Image.open(img_path).resize((im_height, im_width)) + x = np.array(image).astype('uint8') + data = np.reshape(x, (1, im_height, im_width, 3)) + return data + + +def assert_lib_hash(lib, golden): + temp = util.tempdir() + path = temp.relpath("lib.cmm") + lib.imported_modules[1].save(path) + lib_hash = md5(open(path, 'rb').read()).hexdigest() + assert lib_hash == golden, "Expected hash: {} Got hash: {}".format(golden, lib_hash) + + +def make_module(func, params): + func = relay.Function(relay.analysis.free_vars(func), func) + if len(params): + relay.build_module.bind_params_by_name(func, params) + return tvm.IRModule.from_expr(func) + + +def make_ethosn_composite(ethosn_expr, name): + vars = relay.analysis.free_vars(ethosn_expr) + func = relay.Function([relay.Var("a")], ethosn_expr) + func = func.with_attr("Composite", name) + call = relay.Call(func, vars) + return call + + +def make_ethosn_partition(ethosn_expr): + # Create an Ethos-N global function + mod = tvm.IRModule({}) + vars = relay.analysis.free_vars(ethosn_expr) + func = relay.Function(vars, ethosn_expr) + func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Compiler", "ethos-n") + func = func.with_attr("global_symbol", "ethos-n_0") + g1 = relay.GlobalVar("ethos-n_0") + mod[g1] = func + + # These are the vars to call the Ethos-N partition with + more_vars = relay.analysis.free_vars(ethosn_expr) + # Call the Ethos-N partition in main + call_fn1 = g1(*more_vars) + mod["main"] = relay.Function(more_vars, call_fn1) + return mod + + +def get_cpu_op_count(mod): + class Counter(tvm.relay.ExprVisitor): + def __init__(self): + super().__init__() + self.count = 0 + + def visit_call(self, call): + if isinstance(call.op, tvm.ir.Op): + self.count += 1 + + super().visit_call(call) + + c = Counter() + c.visit(mod["main"]) + return c.count + + +def build(mod, params, npu=True, cpu_ops=0, npu_partitions=1): + relay.backend.compile_engine.get().clear() + with tvm.transform.PassContext(opt_level=3, config={ + "relay.ext.ethos-n.options": {"variant": 0} + }): + with tvm.target.create("llvm -mcpu=core-avx2"): Review comment: it looks this is still not a good workaround because CI may have some CPUs that are older than Haswell. This would then break. Do you really need -mcpu=core-avx2? ########## File path: src/relay/backend/contrib/ethosn/ethosn_api.cc ########## @@ -0,0 +1,188 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +#include "ethosn_api.h" + +#include <tvm/relay/attrs/nn.h> +#include <tvm/relay/expr.h> +#include <tvm/relay/expr_functor.h> +#include <tvm/relay/transform.h> +#include <tvm/tir/analysis.h> + +#include <fstream> +#include <map> +#include <memory> +#include <string> +#include <utility> +#include <vector> + +#include "ethosn_support_library/Support.hpp" +#include "ethosn_support_library/SupportQueries.hpp" + +namespace tvm { +namespace relay { +namespace contrib { +namespace ethosn { + +EthosnError EthosnAPI::Concatenate(const Expr& expr, ConcatenateParams* params) { + Call call = Downcast<Call>(expr); + const auto& attrs = call->attrs.as<ConcatenateAttrs>(); + params->concat_info.m_Axis = attrs->axis; + + float output_s; + int output_zp; + EthosnError err = AsConstant<float>(call->args[3], &output_s); + err += AsConstant<int>(call->args[4], &output_zp); + params->concat_info.m_OutputQuantizationInfo = sl::QuantizationInfo(output_zp, output_s); + + auto input_scales = call->args[1].as<TupleNode>()->fields; + auto input_zero_points = call->args[2].as<TupleNode>()->fields; + auto input_tensors = call->args[0]->checked_type().as<TupleTypeNode>()->fields; + + int index = 0; + for (auto input_scale : input_scales) { + auto input_dtype = input_tensors[index].as<TensorTypeNode>(); + auto input_zero_point = input_zero_points[index]; + float scale; + int zp; + err += AsConstant<float>(input_scale, &scale); + err += AsConstant<int>(input_zero_point, &zp); + sl::TensorShape input_tensor_shape = {1, 1, 1, 1}; + sl::DataType input_data_type; + err += Tvm2Npu(input_dtype->shape, &input_tensor_shape); + err += Tvm2Npu(input_dtype->dtype, &input_data_type); + params->input_infos.emplace_back(sl::TensorInfo(input_tensor_shape, input_data_type, + sl::DataFormat::NHWC, + sl::QuantizationInfo(zp, scale))); + index++; + } + return err; +} + +EthosnError EthosnAPI::Split(const Expr& expr, SplitParams* params) { + Call call = Downcast<Call>(expr); + const auto* input_tensor_type = call->args[0]->checked_type().as<TensorTypeNode>(); + const auto& attrs = call->attrs.as<SplitAttrs>(); + + sl::TensorShape input_tensor_shape = {1, 1, 1, 1}; + sl::DataType input_data_type; + EthosnError err = Tvm2Npu(input_tensor_type->shape, &input_tensor_shape); + err += Tvm2Npu(input_tensor_type->dtype, &input_data_type); + params->input_info = + sl::TensorInfo(input_tensor_shape, input_data_type, params->input_info.m_DataFormat, + params->input_info.m_QuantizationInfo); + params->split_info.m_Axis = attrs->axis; + if (attrs->indices_or_sections->IsInstance<IntImmNode>()) { + auto sections = Downcast<IntImm>(attrs->indices_or_sections)->value; + int size = input_tensor_shape[attrs->axis] / sections; + for (int i = 0; i < sections; i++) { + params->split_info.m_Sizes.push_back(size); + } + } else { + auto indices = Downcast<tvm::Array<Integer>>(attrs->indices_or_sections); + int last_index = 0; + for (const auto& i : indices) { + params->split_info.m_Sizes.push_back(i->value - last_index); + last_index = i->value; + } + int axis_size = input_tensor_shape[attrs->axis]; + params->split_info.m_Sizes.push_back(axis_size - last_index); + } + return err; +} + +EthosnError EthosnAPI::Tvm2Npu(const Array<IndexExpr>& shape, sl::TensorShape* npu_shape) { + EthosnError err = AsArray<IndexExpr, uint32_t>(shape, npu_shape); + if (npu_shape->front() != 1) { + err += EthosnError(ErrStrm() << "batch size=" << npu_shape->front() << ", batch size must = 1"); + } + return err; +} + +EthosnError EthosnAPI::Tvm2Npu(const tvm::DataType& dtype, sl::DataType* data_type) { + if (dtype.is_scalar() == 1) { + if (dtype.is_uint() && dtype.bits() == 8) { + *data_type = sl::DataType::UINT8_QUANTIZED; + return EthosnError(); + } else if (dtype.is_int() && dtype.bits() == 32) { + *data_type = sl::DataType::INT32_QUANTIZED; + return EthosnError(); + } + } + return EthosnError(ErrStrm() << "dtype=\'" << dtype << "\', dtype must be either uint8 or int32"); +} + +// Convert an array of IntImmNodes into ValueT +// IndexT type of Array indexing variable +// ValueT type of resulting value +template <typename IndexT, typename ValueT> +EthosnError EthosnAPI::AsArray(const Array<IndexT>& arr, std::array<ValueT, 4>* v) { + if (arr.size() > 4) + return EthosnError(ErrStrm() << "dimensions=" << arr.size() << ", dimensions must be <= 4"); + for (size_t i = 0; i < std::min(arr.size(), 4ul); i++) { + const PrimExpr& a = arr[i]; + const auto* intImm = a.as<IntImmNode>(); + if (intImm->value > std::numeric_limits<ValueT>::max()) { + return EthosnError(ErrStrm() << "axis size=" << intImm->value << ", axis size must be <= " + << std::numeric_limits<ValueT>::max()); + } + (*v)[i] = static_cast<ValueT>(intImm->value); + } + return EthosnError(); +} + +// Get a T from a constant represented by a NDArray. +template <typename T> +EthosnError EthosnAPI::AsConstant(const Expr& expr, T* out) { + if (!expr->IsInstance<ConstantNode>()) { + return EthosnError("expected constant data"); + } + runtime::NDArray data = Downcast<Constant>(expr)->data; + *out = *static_cast<T*>(data.operator->()->data); Review comment: data->data should be fine ########## File path: python/tvm/relay/op/contrib/ethosn.py ########## @@ -0,0 +1,90 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# pylint: disable=invalid-name, unused-argument +"""Arm(R) Ethos(TM) -N NPU supported operators.""" +import tvm.ir +from enum import Enum +from ... import qnn as _qnn +from . import _ethosn as support + + +class Available(Enum): + UNAVAILABLE = 0 + SW_ONLY = 1 + SW_AND_HW = 2 + + def __bool__(self): + return self != Available.UNAVAILABLE + + +def ethosn_available(): + """Return whether Ethos-N software and hardware support is available""" + if not tvm.get_global_func("relay.ethos-n.query", True): + print("skip because Ethos-N module is not available") + return Available.UNAVAILABLE + else: + hw = tvm.get_global_func("relay.ethos-n.query")() + return Available.SW_AND_HW if hw else Available.SW_ONLY + + [email protected]_op_attr("qnn.concatenate", "target.ethos-n") +def qnn_concatenate(attrs, args): + """Check if a concatenate is supported by Ethos-N.""" + if not ethosn_available(): + return False + + conc = _qnn.op.concatenate(*args, **attrs) + if not support.concatenate(conc): + return False + + # Support library has some unenforced restrictions on qnn params + min_range = 1e9 + max_range = -1e9 + qnn_params = [] + for i in range(len(args[1].fields)): + scale = args[1].fields[i].data.asnumpy() + zero_point = args[2].fields[i].data.asnumpy() + min_range = min(-1 * zero_point * scale, min_range) + max_range = max((255 - zero_point) * scale, max_range) + qnn_params.append((scale, zero_point)) + + scale = (max_range - min_range) / 255 + zero_point = int(-min_range/scale) + if (scale, zero_point) in qnn_params: + return True + + return False + + [email protected]_op_attr("split", "target.ethos-n") +def split(attrs, args): Review comment: Can you elaborate a bit why we currently chose these two ops other than more common ones like conv2d, etc? ########## File path: src/relay/backend/contrib/ethosn/capabilities.h ########## @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file src/relay/backend/contrib/ethosn/capabilities.h + * \brief The Ethos-N processor series has four variants, the Ethos-N37, Ethos-N57, Ethos-N77 + * and the Ethos-N78. This release of the integration supports the first three variants. + * Configuration information for each variant is stored as a blob in this file. These blobs + * are passed into the Ethos-N support library, which in turn uses them to optimize the + * generated command-stream appropriately for the specified variant. + */ + +#ifndef TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CAPABILITIES_H_ +#define TVM_RELAY_BACKEND_CONTRIB_ETHOSN_CAPABILITIES_H_ + +#include <vector> + +namespace tvm { +namespace relay { +namespace contrib { +namespace ethosn { + +/* Ethos-N variants (N77, N57 and N37) Review comment: just curious, what would need to change later for the support of N78? Is this backward compatible? ########## File path: src/runtime/contrib/ethosn/ethosn_runtime.h ########## @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_runtime.h + * \brief Execution handling of Ethos-N command streams. + */ +#ifndef TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_ +#define TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_ + +#include <tvm/runtime/packed_func.h> + +#include <map> +#include <memory> +#include <string> +#include <unordered_map> +#include <vector> + +#include "ethosn_support_library/Support.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; + +struct OrderedCompiledNetwork { + std::unique_ptr<sl::CompiledNetwork> cmm; + std::string name; + std::vector<uint32_t> inputs; + std::vector<uint32_t> outputs; +}; + +class EthosnModule : public ModuleNode { + public: + /*! + * \brief The Ethos-N runtime module. + * \param cmms A vector of compiled networks with input/output orders. + */ + explicit EthosnModule(std::vector<OrderedCompiledNetwork>* cmms); + + /*! + * \brief Get a PackedFunc from the Ethos-N module. + * \param name The name of the function. + * \param sptr_to_self The ObjectPtr that points to this module node. + * \return The function pointer when it is found, otherwise, PackedFunc(nullptr). + */ + PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) final; + /*! + * \brief Save a compiled network to a binary stream, which can then be + * serialized to disk. + * \param stream The stream to save the binary. + * \note See EthosnModule::LoadFromBinary for the serialization format. + */ + void SaveToBinary(dmlc::Stream* stream) final; + /*! + * \brief Load a compiled network from stream. + * \param strm The binary stream to load. + * \return The created Ethos-N module. + * \note The serialization format is: + * + * size_t : number of functions + * [ + * std::string : name of function (symbol) + * std::string : serialized command stream + * size_t : number of inputs + * std::vector : order of inputs + * size_t : number of outputs + * std::vector : order of outputs + * ] * number of functions + */ + static Module LoadFromBinary(void* strm); + /*! + * \brief Save a module to a specified path. + * \param path Where to save the serialized module. Review comment: \param format ########## File path: tests/python/contrib/test_ethosn/infrastructure.py ########## @@ -0,0 +1,169 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +"""Expose Ethos test functions to the Python front end""" + +from __future__ import absolute_import, print_function +import tvm +from tvm import relay +from tvm.contrib import util, graph_runtime, download +from tvm.relay.testing import run_opt_pass +from enum import Enum +from hashlib import md5 +from itertools import zip_longest, combinations +import numpy as np +from PIL import Image +import os + +from . import _infrastructure +from tvm.relay.op.contrib import get_pattern_table + + +def make_module(func, params): + func = relay.Function(relay.analysis.free_vars(func), func) + if params: + relay.build_module.bind_params_by_name(func, params) + return tvm.IRModule.from_expr(func) + + +def make_ethosn_composite(ethosn_expr, name): + vars = relay.analysis.free_vars(ethosn_expr) + func = relay.Function([relay.Var("a")], ethosn_expr) + func = func.with_attr("Composite", name) + call = relay.Call(func, vars) + return call + + +def make_ethosn_partition(ethosn_expr): + # Create an Ethos-N global function + mod = tvm.IRModule({}) + vars = relay.analysis.free_vars(ethosn_expr) + func = relay.Function(vars, ethosn_expr) + func = func.with_attr("Primitive", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Inline", tvm.tir.IntImm("int32", 1)) + func = func.with_attr("Compiler", "ethos-n") + func = func.with_attr("global_symbol", "ethos-n_0") + g1 = relay.GlobalVar("ethos-n_0") + mod[g1] = func + + # These are the vars to call the Ethos-N partition with + more_vars = relay.analysis.free_vars(ethosn_expr) + # Call the Ethos-N partition in main + call_fn1 = g1(*more_vars) + mod["main"] = relay.Function(more_vars, call_fn1) + return mod + + +def get_host_op_count(mod): + class Counter(tvm.relay.ExprVisitor): + def __init__(self): + super().__init__() + self.count = 0 + + def visit_call(self, call): + if isinstance(call.op, tvm.ir.Op): + self.count += 1 + super().visit_call(call) + + c = Counter() + c.visit(mod["main"]) + return c.count + + +def build(mod, params, npu=True, expected_host_ops=0, npu_partitions=1): + relay.backend.compile_engine.get().clear() + with tvm.transform.PassContext(opt_level=3, config={ + "relay.ext.ethos-n.options": {"variant": 0} + }): + with tvm.target.create("llvm -mcpu=core-avx2"): + if npu: + f = relay.build_module.bind_params_by_name(mod["main"], params) + mod = tvm.IRModule() + mod["main"] = f + mod = relay.transform.AnnotateTarget("ethos-n")(mod) + mod = relay.transform.MergeCompilerRegions()(mod) + mod = relay.transform.PartitionGraph()(mod) + host_op_count = get_host_op_count(mod) + assert host_op_count == expected_host_ops, \ + "Got {} host operators, expected {}".format(host_op_count, expected_host_ops) + partition_count = 0 + for global_var in mod.get_global_vars(): + if "ethos-n" in global_var.name_hint: + partition_count += 1 + + assert npu_partitions == partition_count, \ + "Got {} ethos-n partitions, expected {}".format(partition_count, npu_partitions) + + return relay.build(mod, params=params) Review comment: Some where we need to test export_library and module.load ########## File path: src/runtime/contrib/ethosn/ethosn_runtime.cc ########## @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_runtime.cc + * \brief Execution handling of Ethos-N command streams. + */ + +#include "ethosn_runtime.h" + +#include <dmlc/memory_io.h> +#include <tvm/runtime/c_runtime_api.h> +#include <tvm/runtime/memory.h> +#include <tvm/runtime/module.h> +#include <tvm/runtime/object.h> +#include <tvm/runtime/packed_func.h> +#include <tvm/runtime/registry.h> + +#include <string> +#include <utility> +#include <vector> + +#include "../../file_util.h" +#include "ethosn_device.h" +#include "ethosn_driver_library/Inference.hpp" +#include "ethosn_driver_library/Network.hpp" +#include "ethosn_support_library/Support.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; +namespace dl = ::ethosn::driver_library; + +EthosnModule::EthosnModule(std::vector<OrderedCompiledNetwork>* cmms) { Review comment: why not just `const std::vector<OrderedCompiledNetwork>& cmms`? ########## File path: src/runtime/contrib/ethosn/ethosn_device.cc ########## @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_device.cc + * \brief Ethos-N NPU device integration. + */ + +#include <dlpack/dlpack.h> +#include <poll.h> +#include <tvm/tir/expr.h> +#include <unistd.h> + +#include <algorithm> +#include <memory> + +#include "ethosn_driver_library/Buffer.hpp" +#include "ethosn_support_library/Support.hpp" + +#if defined ETHOSN_HW + +#include "ethosn_driver_library/Inference.hpp" +#include "ethosn_driver_library/Network.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; +namespace dl = ::ethosn::driver_library; + +int64_t GetTensorSize(const DLTensor& tensor) { Review comment: We have `GetDataSize()` in ndarray.h already ########## File path: src/runtime/contrib/ethosn/ethosn_device.cc ########## @@ -0,0 +1,228 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_device.cc + * \brief Ethos-N NPU device integration. + */ + +#include <dlpack/dlpack.h> +#include <poll.h> +#include <tvm/tir/expr.h> +#include <unistd.h> + +#include <algorithm> +#include <memory> + +#include "ethosn_driver_library/Buffer.hpp" +#include "ethosn_support_library/Support.hpp" + +#if defined ETHOSN_HW + +#include "ethosn_driver_library/Inference.hpp" +#include "ethosn_driver_library/Network.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; +namespace dl = ::ethosn::driver_library; + +int64_t GetTensorSize(const DLTensor& tensor) { + int64_t size = 1; + for (int i = 0; i < tensor.ndim; i++) { + size *= tensor.shape[i]; + } + return size; +} + +bool WaitForInference(dl::Inference* inference, int timeout) { + // Wait for inference to complete + int fd = inference->GetFileDescriptor(); + struct pollfd fds; + memset(&fds, 0, sizeof(fds)); + fds.fd = fd; + fds.events = POLLIN; // Wait for any available input. + + const int ms_per_seconds = 1000; + int poll_result = poll(&fds, 1, timeout * ms_per_seconds); + if (poll_result > 0) { + dl::InferenceResult result; + if (read(fd, &result, sizeof(result)) != sizeof(result)) { + return false; + } + if (result != dl::InferenceResult::Completed) { + return false; + } + } else if (poll_result == 0) { + return false; + } else { + return false; + } + return true; +} + +template <typename T> +void CopyOutput(dl::Buffer* source_buffers[], std::vector<DLTensor*>* outputs) { + for (DLTensor* tensor : *outputs) { + dl::Buffer* source_buffer = source_buffers[0]; + uint8_t* source_buffer_data = source_buffer->GetMappedBuffer(); + size_t size = source_buffer->GetSize(); + T* dest_pointer = static_cast<T*>(tensor->data); + std::copy_backward(source_buffer_data, source_buffer_data + size, dest_pointer + size); + source_buffers++; + } +} + +void CreateBuffers(std::vector<std::shared_ptr<dl::Buffer> >* fm, + const std::vector<DLTensor*>& tensors) { + int index = 0; + for (auto buffer : tensors) { + auto* data = static_cast<uint8_t*>(buffer->data); + // The NPU only needs the size of the tensor * uint8_t. + auto data_size = static_cast<uint32_t>(GetTensorSize(*buffer)); + (*fm)[index++] = std::make_shared<dl::Buffer>(data, data_size, dl::DataFormat::NHWC); + } +} + +bool Inference(tvm::runtime::TVMArgs args, sl::CompiledNetwork* network, + std::vector<uint32_t> input_order, std::vector<uint32_t> output_order) { Review comment: const& for bouth input_order and output_order ########## File path: src/runtime/contrib/ethosn/ethosn_runtime.h ########## @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +/*! + * \file ethosn_runtime.h + * \brief Execution handling of Ethos-N command streams. + */ +#ifndef TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_ +#define TVM_RUNTIME_CONTRIB_ETHOSN_ETHOSN_RUNTIME_H_ + +#include <tvm/runtime/packed_func.h> + +#include <map> +#include <memory> +#include <string> +#include <unordered_map> +#include <vector> + +#include "ethosn_support_library/Support.hpp" + +namespace tvm { +namespace runtime { +namespace ethosn { + +namespace sl = ::ethosn::support_library; + +struct OrderedCompiledNetwork { + std::unique_ptr<sl::CompiledNetwork> cmm; + std::string name; + std::vector<uint32_t> inputs; + std::vector<uint32_t> outputs; +}; + +class EthosnModule : public ModuleNode { + public: + /*! + * \brief The Ethos-N runtime module. + * \param cmms A vector of compiled networks with input/output orders. + */ + explicit EthosnModule(std::vector<OrderedCompiledNetwork>* cmms); + + /*! + * \brief Get a PackedFunc from the Ethos-N module. + * \param name The name of the function. + * \param sptr_to_self The ObjectPtr that points to this module node. + * \return The function pointer when it is found, otherwise, PackedFunc(nullptr). + */ + PackedFunc GetFunction(const std::string& name, const ObjectPtr<Object>& sptr_to_self) final; + /*! + * \brief Save a compiled network to a binary stream, which can then be + * serialized to disk. + * \param stream The stream to save the binary. + * \note See EthosnModule::LoadFromBinary for the serialization format. + */ + void SaveToBinary(dmlc::Stream* stream) final; + /*! + * \brief Load a compiled network from stream. + * \param strm The binary stream to load. + * \return The created Ethos-N module. + * \note The serialization format is: + * + * size_t : number of functions + * [ + * std::string : name of function (symbol) + * std::string : serialized command stream + * size_t : number of inputs + * std::vector : order of inputs + * size_t : number of outputs + * std::vector : order of outputs + * ] * number of functions + */ + static Module LoadFromBinary(void* strm); + /*! + * \brief Save a module to a specified path. + * \param path Where to save the serialized module. + */ + void SaveToFile(const std::string& path, const std::string& format) override; + /*! + * \brief Create a module from a file. + * \param path The path of the file containing the serialized module. + * \return The created Ethos-N module. + */ + static Module LoadFromFile(const std::string& path); Review comment: I feel we don't need SaveToFile and LoadFromFile? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
