This is an automated email from the ASF dual-hosted git repository.
syfeng pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 84141c4e7e [mrvl][runtime]: Support Marvell Hardware Runtime (#17498)
84141c4e7e is described below
commit 84141c4e7e19543ec3416a84934f77526409caec
Author: Krishna Bindumadhavan <[email protected]>
AuthorDate: Sat Nov 2 13:46:44 2024 +0530
[mrvl][runtime]: Support Marvell Hardware Runtime (#17498)
Change-Id: Id9052552fbb3f19a53462183967d30679f3aa286
Signed-off-by: Krishna Bindumadhavan <[email protected]>
---
cmake/modules/contrib/Mrvl.cmake | 1 +
docs/how_to/deploy/mrvl.rst | 33 +-
python/tvm/relay/op/contrib/mrvl.py | 17 +
src/relay/backend/contrib/mrvl/codegen.cc | 26 +-
src/runtime/contrib/mrvl/mrvl_hw_runtime.cc | 485 ++++++++++++++++++++++++++++
5 files changed, 555 insertions(+), 7 deletions(-)
diff --git a/cmake/modules/contrib/Mrvl.cmake b/cmake/modules/contrib/Mrvl.cmake
index 8bf48e02ca..52907011e6 100644
--- a/cmake/modules/contrib/Mrvl.cmake
+++ b/cmake/modules/contrib/Mrvl.cmake
@@ -20,6 +20,7 @@ if(USE_MRVL)
message(STATUS "Build with Mrvl support")
file(GLOB RUNTIME_MRVL_SRCS
src/runtime/contrib/mrvl/mrvl_runtime.cc
+ src/runtime/contrib/mrvl/mrvl_hw_runtime.cc
src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.cc
)
list(APPEND RUNTIME_SRCS ${RUNTIME_MRVL_SRCS})
diff --git a/docs/how_to/deploy/mrvl.rst b/docs/how_to/deploy/mrvl.rst
index 7b41e2ee3a..a0876fbe5a 100644
--- a/docs/how_to/deploy/mrvl.rst
+++ b/docs/how_to/deploy/mrvl.rst
@@ -100,11 +100,11 @@ integrated MLIP cn10ka processor, using only 4 tiles in
the block.
python3 -m tvm.driver.tvmc compile --target="mrvl, llvm" \
--target-llvm-mtriple=aarch64-linux-gnu --target-llvm-mcpu=neoverse-n2
\
--target-mrvl-num_tiles=4 \
+ --target-mrvl-mattr="hw -quantize=fp16 -wb_pin_ocm=1" \
--cross-compiler aarch64-linux-gnu-gcc \
--output model.tar \
mnist-12.onnx
-The runtime support for hardware acceleration is a WIP, it will be added in
future PR.
3.3. TVMC Compiler: mrvl specific Command Line Options
------------------------------------------------------
@@ -125,7 +125,7 @@ The runtime support for hardware acceleration is a WIP, it
will be added in futu
Maximum number of tiles that may be used, possible values = {1,2,4,8},
defaults to 8
* mattr:
- Attributes for mrvl; possible values = {quantize, wb_pin_ocm}
+ Attributes for mrvl; possible values = {quantize, wb_pin_ocm, run_mode}
mattr specifies the data type, code generation options and optimizations.
@@ -141,8 +141,13 @@ The runtime support for hardware acceleration is a WIP, it
will be added in futu
Optimize runtime by preloading a model's weights and bias into
the on chip memory. Possible values = {0, 1}. Default is 0 (no preload)
-4. Compile ONNX model for Simulator + LLVM / x86_64 target
-----------------------------------------------------------
+ **3. run_mode**
+
+ Specify whether to compile for the simulator or for the target hardware
(Octeon).
+ Possible values = {sim, hw}. Default is sim (software simulator).
+
+4. Compile ONNX model using the TVMC flow
+-----------------------------------------
In the TVMC mrvl flow, the model is partitioned into Marvell and LLVM regions.
Building each partitioned Marvell subgraph generates serialized nodes.json and
@@ -150,6 +155,9 @@ const.json. Partitioned nodes.json is the representation of
the model graph whic
suitable for the Marvell compiler (mrvl-tmlc). The compiler compiles the model
graph to
generate the model binary with MLIP instructions.
+4.1 Compile and Run ONNX model for Simulator + LLVM / x86_64 target
+--------------------------------------------------------------------
+
**Model Compilation for Simulator + LLVM / x86_64 target**
.. code:: python
@@ -165,6 +173,23 @@ Generated model binary is simulated using Marvell's MLIP
Simulator(mrvl-mlsim).
python3 -m tvm.driver.tvmc run --inputs infer.npz --outputs predict.npz
model.tar --number=0
+4.2 Compile and Run ONNX model for Octeon target
+----------------------------------------------------------
+
+**Model Compilation for Octeon target**
+
+Please refer to section 3.2 for the example command line.
+
+**Run TVM models on the Octeon Target**
+
+The cross compiled binary can be run on the target hardware using the tvmc run
command.
+Alternatively, the RPC flow enables remote execution on the target device from
your
+local machine:
https://tvm.apache.org/docs/how_to/tutorials/cross_compilation_and_rpc.html
+
+.. code:: python
+
+ python3 -m tvm.driver.tvmc run --inputs infer.npz --outputs predict.npz
model.tar
+
5. Compiling a model using Python APIs
--------------------------------------
diff --git a/python/tvm/relay/op/contrib/mrvl.py
b/python/tvm/relay/op/contrib/mrvl.py
index b13cf3d953..6100fcb991 100644
--- a/python/tvm/relay/op/contrib/mrvl.py
+++ b/python/tvm/relay/op/contrib/mrvl.py
@@ -272,6 +272,8 @@ def add_attributes(mod, annotate_target_str, **kwargs):
mod : module with attributes
"""
working_dir = mrvl_contrib.get_working_dir()
+ sim_attr_found = False
+ hw_attr_found = False
if "mattr" in kwargs:
base_opts_str = kwargs.get("mattr")
@@ -286,6 +288,14 @@ def add_attributes(mod, annotate_target_str, **kwargs):
if "wb_pin_ocm" not in base_opts_str:
base_opts_str = f"{base_opts_str} -wb_pin_ocm=0"
+ if "sim" in base_opts_str:
+ sim_attr_found = True
+ base_opts_str = base_opts_str.replace("sim", "")
+
+ if "hw" in base_opts_str:
+ hw_attr_found = True
+ base_opts_str = base_opts_str.replace("hw", "")
+
else:
base_opts_str = "-arch=mlip -quantize=fp16 -wb_pin_ocm=0"
@@ -294,6 +304,12 @@ def add_attributes(mod, annotate_target_str, **kwargs):
elif "num_tiles" not in base_opts_str:
base_opts_str = f"{base_opts_str} -num_tiles=8"
+ mode_string = "sim"
+ if sim_attr_found:
+ mode_string = "sim"
+ elif hw_attr_found:
+ mode_string = "hw"
+
for var in mod.get_global_vars():
func_name = var.name_hint
func = mod[func_name]
@@ -301,6 +317,7 @@ def add_attributes(mod, annotate_target_str, **kwargs):
if annotate_target_str in func_name:
func = func.with_attr("working_dir", working_dir)
func = func.with_attr("compiler_opts_string", base_opts_str)
+ func = func.with_attr("mode", mode_string)
mod.update_func(var, func)
return mod
diff --git a/src/relay/backend/contrib/mrvl/codegen.cc
b/src/relay/backend/contrib/mrvl/codegen.cc
index 96121e4b4b..7c410f565f 100644
--- a/src/relay/backend/contrib/mrvl/codegen.cc
+++ b/src/relay/backend/contrib/mrvl/codegen.cc
@@ -1467,6 +1467,7 @@ runtime::Module MrvlCompiler(const ObjectRef& ref) {
Function func = Downcast<Function>(ref);
std::string func_name = backend::GetExtSymbol(func);
+ const std::string mrvl_run_mode = func->GetAttr<String>("mode").value();
runtime::Module runtime_lib;
// Extract attributes from the frontend to be passed to the runtime
@@ -1485,13 +1486,32 @@ runtime::Module MrvlCompiler(const ObjectRef& ref) {
std::string modified_json = (*modifyConsts)(nodes_json_string,
consts_json_string);
auto json_vec = split(modified_json, '|');
+ // Extract attributes from the nodes_json by key-value lookup using Python
API
+ // These are passed to hardware runtime module for initialization
+ const tvm::runtime::PackedFunc* json_lookup;
+ json_lookup = runtime::Registry::Get("tvm.mrvl.find_value_in_KV_pair");
+ const std::string string_inp = (*json_lookup)(nodes_json_string,
"num_subgraph_inputs");
+ const int num_inputs = std::stoi(string_inp);
+ const std::string string_out = (*json_lookup)(nodes_json_string,
"num_subgraph_outputs");
+ const int num_outputs = std::stoi(string_out);
+ const std::string string_bsize = (*json_lookup)(nodes_json_string,
"batch_size");
+ const int batch_size = std::stoi(string_bsize);
+
// Invoke Marvell Backend compiler to generate binary for sub graph
const auto* compile = runtime::Registry::Get("tvm.mrvl.CompileModel");
std::string bin = (*compile)(func_name, json_vec[0], json_vec[1],
compiler_opt);
- const auto* pf = runtime::Registry::Get("runtime.mrvl_runtime_create");
- ICHECK(pf != nullptr) << "Cannot find software simulator runtime module to
create";
- runtime_lib = (*pf)(func_name, json_vec[0], bin);
+ if (mrvl_run_mode == "sim") {
+ const auto* pf = runtime::Registry::Get("runtime.mrvl_runtime_create");
+ ICHECK(pf != nullptr) << "Cannot find software simulator runtime module to
create";
+ runtime_lib = (*pf)(func_name, json_vec[0], bin);
+ } else if (mrvl_run_mode == "hw") {
+ const auto* pf = runtime::Registry::Get("runtime.mrvl_hw_runtime_create");
+ ICHECK(pf != nullptr) << "Cannot find hardware runtime module to create";
+ runtime_lib = (*pf)(func_name, json_vec[0], bin, num_inputs, num_outputs,
batch_size);
+ } else {
+ ICHECK(0) << "Unrecognized Marvell Run Mode! " << mrvl_run_mode;
+ }
return runtime_lib;
}
diff --git a/src/runtime/contrib/mrvl/mrvl_hw_runtime.cc
b/src/runtime/contrib/mrvl/mrvl_hw_runtime.cc
new file mode 100644
index 0000000000..84b178b313
--- /dev/null
+++ b/src/runtime/contrib/mrvl/mrvl_hw_runtime.cc
@@ -0,0 +1,485 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+/*!
+ * \file src/runtime/contrib/mrvl/mrvl_hw_runtime.cc
+ * \brief runtime implementation for Marvell Target.
+ */
+
+#include <dlfcn.h>
+#include <tvm/runtime/module.h>
+#include <tvm/runtime/ndarray.h>
+#include <tvm/runtime/registry.h>
+
+#include <cstddef>
+#include <string>
+#include <vector>
+
+#include "../../../../src/support/base64.h"
+#include "mrvl_base64.h"
+
+#define MRVL_LIBMLDPC_DEFAULT_PATH "/usr/lib/libmldpc.so"
+
+namespace tvm {
+namespace runtime {
+namespace contrib {
+
+enum buffer_type { input_quantize = 0, input_dequantize, output_quantize,
output_dequantize };
+enum model_type { TVM = 0, MLIP };
+
+struct run_args {
+ int model_id;
+ void* i_q_buf;
+ void* o_q_buf;
+ int num_batches;
+ void* device;
+ uint16_t layer_idx;
+};
+
+void* device_handle;
+int model_id;
+
+/* Marvell DPDK Interface library callbacks for TVMC */
+extern "C" typedef int (*mrvl_tvmc_ml_init_ptr)(int argc, char* argv[]);
+extern "C" typedef int (*mrvl_tvmc_ml_finish_ptr)(void);
+extern "C" typedef int (*mrvl_tvmc_ml_model_load_ptr)(char* model_buffer, int
model_size);
+extern "C" typedef int (*mrvl_tvmc_ml_model_unload_ptr)(int model_id);
+extern "C" typedef void* (*mrvl_tvmc_ml_io_alloc_ptr)(int model_id, enum
buffer_type dt,
+ uint64_t* size);
+extern "C" typedef void (*mrvl_tvmc_ml_io_free_ptr)(int model_id, enum
buffer_type dt, void* addr);
+extern "C" typedef int (*mrvl_tvmc_ml_model_quantize_ptr)(int model_id, void*
dbuffer,
+ void* qbuffer);
+extern "C" typedef int (*mrvl_tvmc_ml_model_dequantize_ptr)(int model_id,
void* qbuffer,
+ void* dbuffer);
+extern "C" typedef int (*mrvl_tvmc_ml_model_run_ptr)(int model_id, void*
input_buffer,
+ void* output_buffer, int
num_batches);
+
+struct ml_tvmc_cb {
+ void* handle;
+ mrvl_tvmc_ml_init_ptr mrvl_tvmc_ml_init;
+ mrvl_tvmc_ml_finish_ptr mrvl_tvmc_ml_finish;
+ mrvl_tvmc_ml_model_load_ptr mrvl_tvmc_ml_model_load;
+ mrvl_tvmc_ml_model_unload_ptr mrvl_tvmc_ml_model_unload;
+ mrvl_tvmc_ml_io_alloc_ptr mrvl_tvmc_ml_io_alloc;
+ mrvl_tvmc_ml_io_free_ptr mrvl_tvmc_ml_io_free;
+ mrvl_tvmc_ml_model_quantize_ptr mrvl_tvmc_ml_model_quantize;
+ mrvl_tvmc_ml_model_dequantize_ptr mrvl_tvmc_ml_model_dequantize;
+ mrvl_tvmc_ml_model_run_ptr mrvl_tvmc_ml_model_run;
+};
+
+/* DPDK callback functions */
+extern "C" typedef int (*mrvl_dpdk_glow_layer_load_cb)(void* device, uint16_t
model_id,
+ const char* layer_name,
uint8_t* buffer,
+ size_t size, uint16_t*
index);
+extern "C" typedef int (*mrvl_dpdk_glow_layer_unload_cb)(void* device,
uint16_t model_id,
+ const char*
layer_name);
+
+extern "C" typedef int (*mrvl_dpdk_io_alloc_cb)(void* device, uint16_t
model_id,
+ const char* layer_name,
uint64_t** input_qbuffer,
+ uint64_t** output_qbuffer);
+extern "C" typedef int (*mrvl_dpdk_io_free_cb)(void* device, uint16_t model_id,
+ const char* layer_name);
+
+extern "C" typedef int (*mrvl_dpdk_malloc_cb)(const char* name, size_t size,
uint32_t align,
+ void** addr);
+extern "C" typedef int (*mrvl_dpdk_free_cb)(const char* name);
+
+extern "C" typedef int (*mrvl_dpdk_quantize_cb)(void* device, uint16_t
model_id,
+ const char* layer_name, const
DLTensor** deq_tensor,
+ void* qbuffer);
+extern "C" typedef int (*mrvl_dpdk_dequantize_cb)(void* device, uint16_t
model_id,
+ const char* layer_name,
void* qbuffer,
+ const DLTensor** deq_tensor);
+extern "C" typedef int (*mrvl_dpdk_inference_cb)(void* device, uint16_t index,
void* input,
+ void* output, uint16_t
nb_batches);
+
+/* Call back functions structure */
+struct ml_dpdk_cb {
+ mrvl_dpdk_glow_layer_load_cb mrvl_dpdk_glow_layer_load;
+ mrvl_dpdk_glow_layer_unload_cb mrvl_dpdk_glow_layer_unload;
+ mrvl_dpdk_io_alloc_cb mrvl_dpdk_io_alloc;
+ mrvl_dpdk_io_free_cb mrvl_dpdk_io_free;
+ mrvl_dpdk_malloc_cb mrvl_dpdk_malloc;
+ mrvl_dpdk_free_cb mrvl_dpdk_free;
+ mrvl_dpdk_quantize_cb mrvl_dpdk_quantize;
+ mrvl_dpdk_dequantize_cb mrvl_dpdk_dequantize;
+ mrvl_dpdk_inference_cb mrvl_dpdk_inference;
+};
+
+void get_tvmc_callbacks(const char* so_path, ml_tvmc_cb* obj) {
+ obj->handle = dlopen(so_path, RTLD_LAZY);
+ if (obj->handle == nullptr)
+ ICHECK(false) << "Marvell-Runtime-ERROR Loading shared library failed";
+
+ obj->mrvl_tvmc_ml_init = (mrvl_tvmc_ml_init_ptr)dlsym(obj->handle,
"mrvl_ml_init");
+ obj->mrvl_tvmc_ml_finish = (mrvl_tvmc_ml_finish_ptr)dlsym(obj->handle,
"mrvl_ml_finish");
+ obj->mrvl_tvmc_ml_model_load =
+ (mrvl_tvmc_ml_model_load_ptr)dlsym(obj->handle, "mrvl_ml_model_load");
+ obj->mrvl_tvmc_ml_model_unload =
+ (mrvl_tvmc_ml_model_unload_ptr)dlsym(obj->handle,
"mrvl_ml_model_unload");
+ obj->mrvl_tvmc_ml_io_alloc = (mrvl_tvmc_ml_io_alloc_ptr)dlsym(obj->handle,
"mrvl_ml_io_alloc");
+ obj->mrvl_tvmc_ml_io_free = (mrvl_tvmc_ml_io_free_ptr)dlsym(obj->handle,
"mrvl_ml_io_free");
+ obj->mrvl_tvmc_ml_model_quantize =
+ (mrvl_tvmc_ml_model_quantize_ptr)dlsym(obj->handle,
"mrvl_ml_model_quantize");
+ obj->mrvl_tvmc_ml_model_dequantize =
+ (mrvl_tvmc_ml_model_dequantize_ptr)dlsym(obj->handle,
"mrvl_ml_model_dequantize");
+ obj->mrvl_tvmc_ml_model_run = (mrvl_tvmc_ml_model_run_ptr)dlsym(obj->handle,
"mrvl_ml_model_run");
+}
+
+/*!
+ * \brief A json runtime that compiles the serialized JSON format to a binary
for Marvell
+hardware and then runs the generated binary on the target hardware.
+ * \param symbol_name The name of the subgraph / relay function
+ * \param nodes_json The serialized JSON representation of relay function
+ * \param bin_code The binary code generated by the Marvell backend compiler
for the subgraph
+ * \param input_count Number of subgraph inputs
+ * \param output_count Number of subgraph outputs
+ * \param batch_size Batch count
+ *
+ */
+
+class MarvellHardwareModuleNode : public ModuleNode {
+ public:
+ MarvellHardwareModuleNode(const std::string& symbol_name, const std::string&
nodes_json,
+ const std::string& bin_code, const int input_count,
+ const int output_count, const int batch_size)
+ : symbol_name_(symbol_name),
+ nodes_json_(nodes_json),
+ bin_code_(bin_code),
+ num_inputs_(input_count),
+ num_outputs_(output_count) {
+ run_arg.num_batches = batch_size;
+ }
+
+ ~MarvellHardwareModuleNode() {
+ if (use_dpdk_cb) {
+ int ret;
+
+ // Deallocate input quantize and output quantize buffer
+ ret = dpdk_cb_.mrvl_dpdk_io_free(device_handle, run_arg.model_id,
symbol_name_.c_str());
+
+ ICHECK(ret == 0) << "IO free failed, model_id =" << run_arg.model_id;
+
+ // Unload model
+ ret = dpdk_cb_.mrvl_dpdk_glow_layer_unload(run_arg.device,
run_arg.model_id,
+ symbol_name_.c_str());
+ ICHECK(ret == 0) << "Model layer unload failed, model_id =" <<
run_arg.model_id;
+ num_loaded--;
+ } else {
+ // Clean Up
+ if (tvmc_cb_.handle != nullptr) {
+ // Deallocate input quantize and dequant buffer
+ tvmc_cb_.mrvl_tvmc_ml_io_free(run_arg.model_id, input_quantize,
run_arg.i_q_buf);
+ tvmc_cb_.mrvl_tvmc_ml_io_free(run_arg.model_id, input_dequantize,
i_d_buf);
+ // Deallocate output quantize and dequant buffer
+ tvmc_cb_.mrvl_tvmc_ml_io_free(run_arg.model_id, output_quantize,
run_arg.o_q_buf);
+ tvmc_cb_.mrvl_tvmc_ml_io_free(run_arg.model_id, output_dequantize,
o_d_buf);
+ // Unload model
+ tvmc_cb_.mrvl_tvmc_ml_model_unload(run_arg.model_id);
+ num_loaded--;
+ }
+ // All models unloaded; finish the session
+ if (tvmc_cb_.handle != nullptr && num_loaded == 0)
tvmc_cb_.mrvl_tvmc_ml_finish();
+ }
+ }
+
+ const char* type_key() const { return "mrvl_hw"; }
+
+ int GetPropertyMask() const final {
+ return ModulePropertyMask::kBinarySerializable |
ModulePropertyMask::kRunnable;
+ }
+
+ /*!
+ * \brief Get a packed function.
+ * \param name The name/symbol of the function.
+ * \param sptr_to_self The pointer to the module node.
+ * \return The packed function.
+ */
+ virtual PackedFunc GetFunction(const String& name, const ObjectPtr<Object>&
sptr_to_self) {
+ if (name == "get_symbol") {
+ return PackedFunc(
+ [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv =
this->symbol_name_; });
+ } else if (name == "register_cb") {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ struct ml_dpdk_cb* a = static_cast<struct
ml_dpdk_cb*>(args[0].value().v_handle);
+ memcpy(&dpdk_cb_, a, sizeof(struct ml_dpdk_cb));
+ device_handle = args[1].value().v_handle;
+ model_id = args[2];
+ use_dpdk_cb = true;
+ });
+ } else if (name == "get_const_vars") {
+ return PackedFunc(
+ [sptr_to_self, this](TVMArgs args, TVMRetValue* rv) { *rv =
Array<String>{}; });
+ } else if (this->symbol_name_ == name) {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ RunInference(args);
+ *rv = 0;
+ });
+ } else if ("__init_" + this->symbol_name_ == name) {
+ return PackedFunc([sptr_to_self, this](TVMArgs args, TVMRetValue* rv) {
+ run_arg.device = device_handle;
+ run_arg.model_id = model_id;
+ load_and_initialize_model();
+ *rv = 0;
+ });
+ }
+ return PackedFunc(nullptr);
+ }
+
+ virtual void SaveToBinary(dmlc::Stream* stream) {
+ // Save the symbol name and other data and serialize them to
+ // binary format.
+ stream->Write(symbol_name_);
+ stream->Write(nodes_json_);
+ stream->Write(bin_code_);
+ stream->Write(num_inputs_);
+ stream->Write(num_outputs_);
+ stream->Write(run_arg.num_batches);
+ }
+
+ static Module LoadFromBinary(void* strm) {
+ dmlc::Stream* stream = static_cast<dmlc::Stream*>(strm);
+ std::string symbol_name;
+ std::string nodes_json;
+ std::string bin_code;
+ int num_inputs, num_outputs, batch_size;
+
+ // Load the symbol_name and other data to construct the module
+ ICHECK(stream->Read(&symbol_name)) << "Loading symbol name failed";
+ ICHECK(stream->Read(&nodes_json)) << "Loading nodes json failed";
+ ICHECK(stream->Read(&bin_code)) << "Loading binary code failed";
+ ICHECK(stream->Read(&num_inputs)) << "Loading num_inputs failed";
+ ICHECK(stream->Read(&num_outputs)) << "Loading num_outputs failed";
+ ICHECK(stream->Read(&batch_size)) << "Loading batch_size failed";
+ auto n = make_object<MarvellHardwareModuleNode>(symbol_name, nodes_json,
bin_code, num_inputs,
+ num_outputs, batch_size);
+ return Module(n);
+ }
+
+ /*!
+ * \brief Get the source generated by codegen.
+ *
+ * \param format the format to return.
+ * \return A string of JSON.
+ */
+ String GetSource(const String& format = "json") override { return
nodes_json_; }
+
+ protected:
+ std::string symbol_name_;
+ std::string nodes_json_;
+ std::string bin_code_;
+ int num_inputs_;
+ int num_outputs_;
+ static ml_tvmc_cb tvmc_cb_;
+ static ml_dpdk_cb dpdk_cb_;
+ static bool initialized_model;
+ static int num_loaded;
+ void* i_d_buf = nullptr;
+ void* o_d_buf = nullptr;
+ struct run_args run_arg;
+ static bool use_dpdk_cb;
+
+ void RunInference_TVMC(TVMArgs args) {
+ float* i_d_buf_float;
+ float* o_d_buf_float;
+ const DLTensor* tensor;
+
+ i_d_buf_float = reinterpret_cast<float*>(i_d_buf);
+ for (int in = 0; in < num_inputs_; in++) {
+ if (args[in].IsObjectRef<NDArray>()) {
+ NDArray arr = args[in];
+ tensor = arr.operator->();
+ } else {
+ tensor = args[in].operator DLTensor*();
+ }
+
+ if (num_inputs_ == 1) {
+ // Perform Quantization
+ tvmc_cb_.mrvl_tvmc_ml_model_quantize(
+ run_arg.model_id, reinterpret_cast<float*>(tensor->data) +
tensor->byte_offset,
+ run_arg.i_q_buf);
+ } else {
+ uint64_t in_tot_dim = 1;
+
+ for (int i = 0; i < tensor->ndim; i++) {
+ in_tot_dim *= tensor->shape[i];
+ }
+
+ memcpy(i_d_buf_float, tensor->data, sizeof(float) * in_tot_dim);
+ i_d_buf_float += in_tot_dim;
+ }
+ }
+
+ if (num_inputs_ > 1) {
+ // Perform Quantization
+ tvmc_cb_.mrvl_tvmc_ml_model_quantize(run_arg.model_id, i_d_buf,
run_arg.i_q_buf);
+ }
+
+ tvmc_cb_.mrvl_tvmc_ml_model_run(run_arg.model_id, run_arg.i_q_buf,
run_arg.o_q_buf,
+ run_arg.num_batches);
+
+ const DLTensor* outTensor;
+ int out = num_inputs_;
+
+ if (num_outputs_ == 1) {
+ if (args[out].IsObjectRef<NDArray>()) {
+ NDArray arr = args[out];
+ outTensor = arr.operator->();
+ } else {
+ outTensor = args[out].operator DLTensor*();
+ }
+ tvmc_cb_.mrvl_tvmc_ml_model_dequantize(
+ run_arg.model_id, run_arg.o_q_buf,
+ (reinterpret_cast<float*>(outTensor->data) +
outTensor->byte_offset));
+
+ } else {
+ tvmc_cb_.mrvl_tvmc_ml_model_dequantize(run_arg.model_id,
run_arg.o_q_buf, o_d_buf);
+ o_d_buf_float = reinterpret_cast<float*>(o_d_buf);
+
+ for (out = num_inputs_; out < args.size(); out++) {
+ int out_tot_dim = 1;
+ if (args[out].IsObjectRef<NDArray>()) {
+ NDArray arr = args[out];
+ outTensor = arr.operator->();
+ } else {
+ outTensor = args[out].operator DLTensor*();
+ }
+
+ for (int i = 0; i < outTensor->ndim; i++) {
+ out_tot_dim *= outTensor->shape[i];
+ }
+
+ memcpy(outTensor->data, o_d_buf_float, sizeof(float) * out_tot_dim);
+ o_d_buf_float += out_tot_dim;
+ }
+ }
+ }
+
+ void RunInference_DPDK(TVMArgs args) {
+ const DLTensor* tensor[64];
+
+ for (int in = 0; in < num_inputs_; in++) {
+ if (args[in].IsObjectRef<NDArray>()) {
+ NDArray arr = args[in];
+ tensor[in] = arr.operator->();
+ } else {
+ tensor[in] = args[in].operator DLTensor*();
+ }
+ }
+
+ dpdk_cb_.mrvl_dpdk_quantize(run_arg.device, run_arg.model_id,
symbol_name_.c_str(), tensor,
+ run_arg.i_q_buf);
+
+ dpdk_cb_.mrvl_dpdk_inference(run_arg.device, run_arg.layer_idx,
run_arg.i_q_buf,
+ run_arg.o_q_buf, run_arg.num_batches);
+
+ int i = 0;
+ for (int out = num_inputs_; out < args.size(); out++) {
+ if (args[out].IsObjectRef<NDArray>()) {
+ NDArray arr = args[out];
+ tensor[i] = arr.operator->();
+ } else {
+ tensor[i] = args[out].operator DLTensor*();
+ }
+ i++;
+ }
+
+ dpdk_cb_.mrvl_dpdk_dequantize(run_arg.device, run_arg.model_id,
symbol_name_.c_str(),
+ run_arg.o_q_buf, tensor);
+ }
+
+ void RunInference(TVMArgs args) {
+ if (use_dpdk_cb)
+ RunInference_DPDK(args);
+ else
+ RunInference_TVMC(args);
+ }
+
+ void load_and_initialize_model() {
+ // Load dll and get the APIs from Library
+ if (!(use_dpdk_cb) && !(initialized_model)) {
+ char* libpath = getenv("MRVL_LIBMLDPC_PATH");
+ if (libpath == nullptr) {
+ std::string str = MRVL_LIBMLDPC_DEFAULT_PATH;
+ libpath = new char[str.length() + 1];
+ snprintf(libpath, str.length() + 1, "%s", str.c_str());
+ }
+ std::cout << "MRVL_LIBMLDPC_PATH: " << libpath << std::endl;
+ get_tvmc_callbacks(const_cast<char*>(libpath), &tvmc_cb_);
+ int argc = 1;
+ char* argv[] = {const_cast<char*>("tvmc")};
+ tvmc_cb_.mrvl_tvmc_ml_init(argc, argv);
+ initialized_model = true;
+ }
+
+ // Create byte array to pass to the init function
+ int num_bytes = tvm::runtime::contrib::mrvl::b64strlen(bin_code_);
+ std::vector<unsigned char> byte_array(num_bytes);
+ tvm::runtime::contrib::mrvl::b64decode(bin_code_, byte_array.data());
+
+ if (use_dpdk_cb) {
+ int ret;
+ ret = dpdk_cb_.mrvl_dpdk_glow_layer_load(
+ run_arg.device, run_arg.model_id, symbol_name_.c_str(),
+ reinterpret_cast<uint8_t*>(byte_array.data()), num_bytes,
&run_arg.layer_idx);
+ ICHECK(ret == 0) << "Model layer load failed, model_id =" <<
run_arg.model_id;
+ num_loaded++;
+
+ // Allocate input quantize and output quantize buffer
+ ret = dpdk_cb_.mrvl_dpdk_io_alloc(device_handle, run_arg.model_id,
symbol_name_.c_str(),
+
reinterpret_cast<uint64_t**>(&run_arg.i_q_buf),
+
reinterpret_cast<uint64_t**>(&run_arg.o_q_buf));
+ ICHECK(ret == 0) << "IO alloc failed, model_id =" << run_arg.model_id;
+ } else {
+ // Load the model
+ run_arg.model_id =
+
tvmc_cb_.mrvl_tvmc_ml_model_load(reinterpret_cast<char*>(byte_array.data()),
num_bytes);
+ ICHECK(run_arg.model_id >= 0) << "Failed to load model!";
+ num_loaded++;
+ // Allocate input quantize and dequant buffer
+ run_arg.i_q_buf = tvmc_cb_.mrvl_tvmc_ml_io_alloc(run_arg.model_id,
input_quantize, nullptr);
+ i_d_buf = tvmc_cb_.mrvl_tvmc_ml_io_alloc(run_arg.model_id,
input_dequantize, nullptr);
+ // Allocate output quantize and dequant buffer
+ run_arg.o_q_buf = tvmc_cb_.mrvl_tvmc_ml_io_alloc(run_arg.model_id,
output_quantize, nullptr);
+ o_d_buf = tvmc_cb_.mrvl_tvmc_ml_io_alloc(run_arg.model_id,
output_dequantize, nullptr);
+ }
+ }
+};
+
+runtime::Module MarvellHardwareModuleRuntimeCreate(const String& symbol_name,
+ const String& nodes_json,
const String& bin_code,
+ int num_input, int
num_output, int batch_size) {
+ auto n = make_object<MarvellHardwareModuleNode>(symbol_name, nodes_json,
bin_code, num_input,
+ num_output, batch_size);
+ return runtime::Module(n);
+}
+
+bool MarvellHardwareModuleNode::initialized_model = false;
+int MarvellHardwareModuleNode::num_loaded = 0;
+bool MarvellHardwareModuleNode::use_dpdk_cb = false;
+ml_tvmc_cb MarvellHardwareModuleNode::tvmc_cb_ = {};
+ml_dpdk_cb MarvellHardwareModuleNode::dpdk_cb_ = {};
+
+TVM_REGISTER_GLOBAL("runtime.mrvl_hw_runtime_create")
+ .set_body_typed(MarvellHardwareModuleRuntimeCreate);
+TVM_REGISTER_GLOBAL("runtime.module.loadbinary_mrvl_hw")
+ .set_body_typed(MarvellHardwareModuleNode::LoadFromBinary);
+} // namespace contrib
+} // namespace runtime
+} // namespace tvm