This is an automated email from the ASF dual-hosted git repository.
srk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new def551dfd5 [CLI TOOLS][RTVM] Improve rtvm tool with new options to
measure native performance (#15818)
def551dfd5 is described below
commit def551dfd50bfff4e9d50108dc4e8027b553b8ec
Author: Siva <[email protected]>
AuthorDate: Fri Sep 29 10:30:20 2023 +0530
[CLI TOOLS][RTVM] Improve rtvm tool with new options to measure native
performance (#15818)
* [RTVM] Improve rtvm tool with new options to measure native performance
Few fixes and enhancements that affects model loading times
New options to measure performance.
* * review comments
* * review comments
---
apps/cpp_rtvm/README.md | 22 +++++
apps/cpp_rtvm/main.cc | 199 ++++++++++++++++++++++++++++++++++++++------
apps/cpp_rtvm/tvm_runner.cc | 129 +++++++++++++++++++++-------
apps/cpp_rtvm/tvm_runner.h | 24 +++++-
4 files changed, 316 insertions(+), 58 deletions(-)
diff --git a/apps/cpp_rtvm/README.md b/apps/cpp_rtvm/README.md
index c60a7b0e12..652d46eb58 100644
--- a/apps/cpp_rtvm/README.md
+++ b/apps/cpp_rtvm/README.md
@@ -122,6 +122,11 @@ Command line usage
--input - Numpy file for the model input (optional and we use random of
not given)
--output - Numpy file name to dump the model output as numpy
--dump-meta - Dump model meta information
+--pre-compiled - The file name of a file where pre-compiled programs should be
stored
+--profile - Profile over all execution
+--dry-run - Profile after given dry runs, default 10
+--run-count - Profile for given runs, default 50
+--zero-copy - Profile with zero copy api
Example
./rtvm --model=keras-resnet50 --device="opencl" --dump-meta
@@ -366,3 +371,20 @@ stored. If the pre-compiled file name was passed to the
`rtvm` then After method
`Load`, method `UsePreCompiledProgram` is called. This method loads
pre-compiled
programs if the file exists. In opposite case the file will be created and
pre-compiled programs will be saved to this file.
+
+# Performnace Profiling Options
+The tool has added few options to measure wall clock performance of the given
model on Target natively.
+--profile : Can turn on the profiling
+--dry-run : The number of times dry run the model before mearuring the
performance. Default value os 10
+--run-count : The number times to run the model and take an average. Default
value is 50.
+--zero-copy: This option enables graph runtime zero copy to be used for input
and output than byte copy to DLTensor.
+
+Performance profile options dumps information summary as given below.
+ Module Load :27 ms
+ Graph Runtime Create :11 ms
+ Params Read :15 ms
+ Params Set :41 ms
+ Pre Compiled Progs Load :24 ms
+Total Load Time :118 ms
+Average ExecTime :27 ms
+Unload Time :35.9236 ms
diff --git a/apps/cpp_rtvm/main.cc b/apps/cpp_rtvm/main.cc
index c38a5f62bd..dc3cf1c414 100644
--- a/apps/cpp_rtvm/main.cc
+++ b/apps/cpp_rtvm/main.cc
@@ -29,6 +29,7 @@
#endif
#include <dmlc/logging.h>
+#include <chrono>
#include <cstring>
#include <iostream>
#include <sstream>
@@ -54,7 +55,11 @@ static const string kUsage =
"--input - Numpy file for the model input (optional and we use
random of not given)\n"
"--output - Numpy file name to dump the model output as numpy\n"
"--dump-meta - Dump model meta information\n"
- "--pre-compiled - The file name of a file where pre-compiled programs
should be stored"
+ "--pre-compiled - The file name of a file where pre-compiled programs
should be stored\n"
+ "--profile - Profile over all execution\n"
+ "--dry-run - Profile after given dry runs, default 10\n"
+ "--run-count - Profile for given runs, default 50\n"
+ "--zero-copy - Profile with zero copy api\n"
"\n"
" Example\n"
" ./rtvm --model=keras-resnet50 --device=\"opencl\" --dump-meta\n"
@@ -68,6 +73,7 @@ static const string kUsage =
* \arg input Numpy file for the model input
* \arg output Numpy file name to dump the model output as numpy
* \arg pre_compiled File name where pre-compiled programs should be stored
+ * \arg profile Do we profile overall execution
*/
struct ToolArgs {
string model;
@@ -75,7 +81,11 @@ struct ToolArgs {
string input;
string output;
string pre_compiled;
- bool dump_meta = false;
+ bool dump_meta{false};
+ bool profile{false};
+ int dry_run{10};
+ int run_count{50};
+ bool zero_copy{false};
};
/*!
@@ -89,6 +99,10 @@ void PrintArgs(const ToolArgs& args) {
LOG(INFO) << "Output = " << args.output;
LOG(INFO) << "Pre-compiled = " << args.pre_compiled;
LOG(INFO) << "Dump Metadata = " << ((args.dump_meta) ? ("True") : ("False"));
+ LOG(INFO) << "Profile = " << ((args.profile) ? ("True") : ("False"));
+ LOG(INFO) << "Dry Run = " << args.dry_run;
+ LOG(INFO) << "Run Count = " << args.run_count;
+ LOG(INFO) << "Zero Copy = " << ((args.zero_copy) ? ("True") : ("False"));
}
#if defined(__linux__) || defined(__ANDROID__)
@@ -178,6 +192,26 @@ void ParseCmdArgs(int argc, char* argv[], struct ToolArgs&
args) {
}
args.pre_compiled = GetCmdOption(argc, argv, "--pre-compiled=");
+
+ const string pprofile = GetCmdOption(argc, argv, "--profile", true);
+ if (!pprofile.empty()) {
+ args.profile = true;
+ }
+
+ const string pdry_run = GetCmdOption(argc, argv, "--dry-run=");
+ if (!pdry_run.empty()) {
+ args.dry_run = stoi(pdry_run);
+ }
+
+ const string prun = GetCmdOption(argc, argv, "--run-count=");
+ if (!prun.empty()) {
+ args.run_count = stoi(prun);
+ }
+
+ const string pzcopy = GetCmdOption(argc, argv, "--zero-copy", true);
+ if (!pzcopy.empty()) {
+ args.zero_copy = true;
+ }
}
/*!
@@ -192,59 +226,174 @@ int ExecuteModel(ToolArgs& args) {
#endif
// Initialize TVM Runner
- TVMRunner runner = TVMRunner(args.model, args.device);
+ auto runner = new TVMRunner(args.model, args.device);
// Load the model
- runner.Load();
+ runner->Load();
if (!args.pre_compiled.empty()) {
- runner.UsePreCompiledPrograms(args.pre_compiled);
+ runner->UsePreCompiledPrograms(args.pre_compiled);
}
// Query Model meta Information
- TVMMetaInfo mInfo = runner.GetMetaInfo();
+ TVMMetaInfo mInfo = runner->GetMetaInfo();
// Print Meta Information
- if (args.dump_meta) runner.PrintMetaInfo();
+ if (args.dump_meta) runner->PrintMetaInfo();
+
+ int total_exec_time = 0;
+
+ if (args.profile) {
+ if (args.dry_run) {
+ for (int ii = 0; ii < args.dry_run; ++ii) {
+ runner->Run();
+ }
+ TVMSynchronize(GetTVMDevice(args.device), 0, nullptr);
+ }
+ int total_time = 0;
+ std::map<std::string, NDArray> input_data_even, input_data_odd;
+ std::map<std::string, NDArray> output_data_even, output_data_odd;
+
+ std::map<std::string, char*> input_data;
+ std::map<std::string, char*> output_data;
+
+ // Alloc / populate and keep input data ready
+ for (auto& elem : mInfo.input_info) {
+ if (args.zero_copy) {
+ auto ndarr =
+ NDArray::Empty(elem.second.first,
tvm::runtime::String2DLDataType(elem.second.second),
+ DLDevice{GetTVMDevice(args.device), 0});
+ input_data_even.insert({elem.first, ndarr});
+
+ ndarr =
+ NDArray::Empty(elem.second.first,
tvm::runtime::String2DLDataType(elem.second.second),
+ DLDevice{GetTVMDevice(args.device), 0});
+ input_data_odd.insert({elem.first, ndarr});
+ } else {
+ char* data = (char*)malloc(runner->GetInputMemSize(elem.first));
+ input_data.insert({elem.first, data});
+ }
+ }
+
+ // Alloc and keep output bufers ready
+ for (auto& elem : mInfo.output_info) {
+ if (args.zero_copy) {
+ auto ndarr =
+ NDArray::Empty(elem.second.first,
tvm::runtime::String2DLDataType(elem.second.second),
+ DLDevice{GetTVMDevice(args.device), 0});
+ output_data_even.insert({elem.first, ndarr});
+
+ ndarr =
+ NDArray::Empty(elem.second.first,
tvm::runtime::String2DLDataType(elem.second.second),
+ DLDevice{GetTVMDevice(args.device), 0});
+ output_data_odd.insert({elem.first, ndarr});
+ } else {
+ char* data = (char*)malloc(runner->GetOutputMemSize(elem.first));
+ output_data.insert({elem.first, data});
+ }
+ }
+
+ for (int ii = 0; ii < args.run_count; ++ii) {
+ // Timer start
+ auto tstart = std::chrono::high_resolution_clock::now();
+ // Set random input for all input
+ for (auto& elem : mInfo.input_info) {
+ if (args.zero_copy) {
+ if (ii % 2) {
+ runner->SetInput(elem.first, input_data_even[elem.first]);
+ } else {
+ runner->SetInput(elem.first, input_data_odd[elem.first]);
+ }
+ } else {
+ runner->SetInput(elem.first, input_data[elem.first]);
+ }
+ }
+
+ if (args.zero_copy) {
+ // With zero copy set the result NDArray up front
+ for (auto& elem : mInfo.output_info) {
+ if (ii % 2) {
+ runner->SetOutput(elem.first, output_data_even[elem.first]);
+ } else {
+ runner->SetOutput(elem.first, output_data_odd[elem.first]);
+ }
+ }
+ }
- if (args.input.empty() || args.output.empty()) {
+ // Run the model
+ runner->Run();
+
+ if (!args.zero_copy) {
+ // W/o zero copy we need to invoke explicite data copy
+ for (auto& elem : mInfo.output_info) {
+ runner->GetOutput(elem.first, output_data[elem.first]);
+ }
+ } else {
+ // Just wait for the run to complete.
+ TVMSynchronize(GetTVMDevice(args.device), 0, nullptr);
+ }
+
+ // Timer end
+ auto tend = std::chrono::high_resolution_clock::now();
+ LOG(INFO) << "Exec Time:" << static_cast<double>((tend -
tstart).count()) / 1e6;
+ total_exec_time += static_cast<double>((tend - tstart).count()) / 1e6;
+ }
+
+ // Free input bufers
+ for (auto& elem : mInfo.input_info) {
+ free(input_data[elem.first]);
+ }
+
+ // Free output bufers
+ for (auto& elem : mInfo.output_info) {
+ free(output_data[elem.first]);
+ }
+ } else if (!args.input.empty() && !args.output.empty()) {
+ LOG(INFO) << "Executing with Input:" << args.input << " Output:" <<
args.output;
+ // Set Input from Numpy Input
+ runner->SetInput(args.input);
+ // Run the model
+ runner->Run();
+ // Get Output as Numpy dump
+ runner->GetOutput(args.output);
+ } else {
LOG(INFO) << "Executing dry run ... ";
// Set random input for all inputs
for (auto& elem : mInfo.input_info) {
LOG(INFO) << "Set Random Input for :" << elem.first;
auto shape = elem.second.first;
- size_t ssize = runner.GetInputMemSize(elem.first);
+ size_t ssize = runner->GetInputMemSize(elem.first);
char* data = (char*)malloc(ssize);
LOG(INFO) << "Random Input Size:" << ssize << " bytes";
- runner.SetInput(elem.first, data);
+ runner->SetInput(elem.first, data);
free(data);
}
-
// Run the model
- runner.Run();
-
+ runner->Run();
// Get Output and dump few values
for (auto& elem : mInfo.output_info) {
LOG(INFO) << "Get Output for :" << elem.first;
auto shape = elem.second.first;
- size_t ssize = runner.GetOutputMemSize(elem.first);
+ size_t ssize = runner->GetOutputMemSize(elem.first);
char* data = (char*)malloc(ssize);
- runner.GetOutput(elem.first, data);
+ runner->GetOutput(elem.first, data);
LOG(INFO) << "Output Size:" << ssize << " bytes";
free(data);
}
- } else {
- LOG(INFO) << "Executing with Input:" << args.input << " Output:" <<
args.output;
-
- // Set Input from Numpy Input
- runner.SetInput(args.input);
-
- // Run the model
- runner.Run();
+ }
- // Get Output as Numpy dump
- runner.GetOutput(args.output);
+ if (args.profile) {
+ // Print Stats
+ runner->PrintStats();
}
+ auto tstart = std::chrono::high_resolution_clock::now();
+ delete runner;
+ auto tend = std::chrono::high_resolution_clock::now();
+ if (args.profile) {
+ LOG(INFO) << "Average ExecTime :" << total_exec_time / args.run_count << "
ms";
+ LOG(INFO) << "Unload Time :" << static_cast<double>((tend -
tstart).count()) / 1e6
+ << " ms";
+ }
return 0;
}
diff --git a/apps/cpp_rtvm/tvm_runner.cc b/apps/cpp_rtvm/tvm_runner.cc
index 2fd4f2281e..7d6dbc23ee 100644
--- a/apps/cpp_rtvm/tvm_runner.cc
+++ b/apps/cpp_rtvm/tvm_runner.cc
@@ -26,10 +26,12 @@
#include <cnpy.h>
+#include <chrono>
#include <fstream>
#include <iterator>
#include <streambuf>
#include <string>
+#include <vector>
namespace tvm {
namespace runtime {
@@ -39,25 +41,25 @@ namespace runtime {
* \param device the target device in string format.
* \return dl_device corresponding to the device string.
*/
-int GetTVMDevice(std::string device) {
+DLDeviceType GetTVMDevice(std::string device) {
if (!device.compare("cpu")) {
- return static_cast<int>(kDLCPU);
+ return kDLCPU;
} else if (!device.compare("llvm")) {
- return static_cast<int>(kDLCPU);
+ return kDLCPU;
} else if (!device.compare("cuda")) {
- return static_cast<int>(kDLCUDA);
+ return kDLCUDA;
} else if (!device.compare("opencl")) {
- return static_cast<int>(kDLOpenCL);
+ return kDLOpenCL;
} else if (!device.compare("vulkan")) {
- return static_cast<int>(kDLVulkan);
+ return kDLVulkan;
} else if (!device.compare("metal")) {
- return static_cast<int>(kDLMetal);
+ return kDLMetal;
} else if (!device.compare("vpi")) {
- return static_cast<int>(kDLVPI);
+ return kDLVPI;
} else if (!device.compare("rocm")) {
- return static_cast<int>(kDLROCM);
+ return kDLROCM;
} else if (!device.compare("oneapi")) {
- return static_cast<int>(kDLOneAPI);
+ return kDLOneAPI;
} else {
LOG(FATAL) << "TVMRunner : Unsupported device :" << device;
}
@@ -80,34 +82,59 @@ TVMRunner::TVMRunner(std::string path, std::string device)
int TVMRunner::Load(void) {
LOG(INFO) << "TVMRunner Load:" << r_model_path;
// Load the lib file
+ auto tstart = std::chrono::high_resolution_clock::now();
+
r_mod_handle = Module::LoadFromFile((r_model_path + "/mod.so").c_str(),
"so");
+ auto tend = std::chrono::high_resolution_clock::now();
+ r_module_load_ms = static_cast<double>((tend - tstart).count()) / 1e6;
+ tstart = std::chrono::high_resolution_clock::now();
// Read model json file
std::ifstream json_reader((r_model_path + "/mod.json").c_str());
CHECK(!json_reader.fail()) << "Failed to open json file:" << (r_model_path +
"/mod.json").c_str();
- std::string json_str((std::istreambuf_iterator<char>(json_reader)),
- std::istreambuf_iterator<char>());
+ json_reader.seekg(0, std::ios_base::end);
+ std::size_t json_size = json_reader.tellg();
+ json_reader.seekg(0, std::ios_base::beg);
+ std::string json_data;
+ json_data.reserve(json_size);
+ json_reader.read((char*)json_data.c_str(), json_size);
json_reader.close();
// Get ref to graph exeutor
auto f_handle = tvm::runtime::Registry::Get("tvm.graph_executor.create");
// Greate graph runtime
- r_graph_handle = (*f_handle)(json_str, r_mod_handle, GetTVMDevice(r_device),
0);
+ r_graph_handle =
+ (*f_handle)(json_data, r_mod_handle,
static_cast<int>(GetTVMDevice(r_device)), 0);
+
+ tend = std::chrono::high_resolution_clock::now();
+ r_graph_load_ms = static_cast<double>((tend - tstart).count()) / 1e6;
// Read params binary file
+ tstart = std::chrono::high_resolution_clock::now();
std::ifstream params_reader((r_model_path + "/mod.params").c_str(),
std::ios::binary);
CHECK(!params_reader.fail()) << "Failed to open json file:"
<< (r_model_path + "/mod.params").c_str();
- const std::string params_str((std::istreambuf_iterator<char>(params_reader)),
- std::istreambuf_iterator<char>());
+
+ params_reader.seekg(0, std::ios_base::end);
+ std::size_t param_size = params_reader.tellg();
+ params_reader.seekg(0, std::ios_base::beg);
+ std::vector<char> param_data(param_size / sizeof(char));
+ params_reader.read((char*)¶m_data[0], param_size);
params_reader.close();
+
TVMByteArray params_arr;
- params_arr.data = params_str.c_str();
- params_arr.size = params_str.length();
+ params_arr.data = (char*)¶m_data[0];
+ params_arr.size = param_size;
+
+ tend = std::chrono::high_resolution_clock::now();
+ r_param_read_ms = static_cast<double>((tend - tstart).count()) / 1e6;
// Load parameters
+ tstart = std::chrono::high_resolution_clock::now();
r_graph_handle.GetFunction("load_params")(params_arr);
+ tend = std::chrono::high_resolution_clock::now();
+ r_param_load_ms = static_cast<double>((tend - tstart).count()) / 1e6;
return 0;
}
@@ -117,6 +144,7 @@ int TVMRunner::Load(void) {
* \param file_name File name where pre-compiled programs should be stored.
*/
void TVMRunner::UsePreCompiledPrograms(std::string file_name) {
+ auto tstart = std::chrono::high_resolution_clock::now();
if (r_run_was_called) {
LOG(INFO) << "TVMRunner UsePreCompiledPrograms: should be called before
first run";
return;
@@ -130,10 +158,19 @@ void TVMRunner::UsePreCompiledPrograms(std::string
file_name) {
std::ofstream fs(file_name, std::ofstream::binary);
fs.write(bytes.c_str(), bytes.size());
} else {
- std::string bytes((std::istreambuf_iterator<char>(ifs)),
std::istreambuf_iterator<char>());
- f_set(String(bytes));
+ ifs.seekg(0, std::ios_base::end);
+ std::size_t blob_size = ifs.tellg();
+ ifs.seekg(0, std::ios_base::beg);
+ std::string blob_data;
+ blob_data.reserve(blob_size);
+ blob_data.resize(blob_size);
+ ifs.read((char*)blob_data.c_str(), blob_size);
+ ifs.close();
+ f_set(String(blob_data));
}
}
+ auto tend = std::chrono::high_resolution_clock::now();
+ r_pre_compiled_load_ms = static_cast<double>((tend - tstart).count()) / 1e6;
}
/*!
@@ -156,8 +193,6 @@ inline size_t GetMemSize(NDArray& narr) {
* \return The memory size.
*/
size_t TVMRunner::GetInputMemSize(std::string input_id) {
- LOG(INFO) << "TVMRunner::GetInputMemSize:" << input_id;
-
NDArray in_arr = r_graph_handle.GetFunction("get_input")(input_id);
auto ssize = GetMemSize(in_arr);
@@ -170,8 +205,6 @@ size_t TVMRunner::GetInputMemSize(std::string input_id) {
* \return The memory size.
*/
size_t TVMRunner::GetOutputMemSize(std::string output_id) {
- LOG(INFO) << "TVMRunner::GetOutputMemSize:" << output_id;
-
NDArray out_arr = r_graph_handle.GetFunction("get_output")(output_id);
auto ssize = GetMemSize(out_arr);
@@ -209,13 +242,23 @@ int TVMRunner::SetInput(std::string inputfile) {
* \param 0 on success else error code.
*/
int TVMRunner::SetInput(std::string input_id, char* raw_input) {
- LOG(INFO) << "TVMRunner::SetInput (Raw)";
NDArray in_arr = r_graph_handle.GetFunction("get_input")(input_id);
auto ssize = GetMemSize(in_arr);
in_arr.CopyFromBytes(raw_input, ssize);
return 0;
}
+/*!
+ * \brief Set the model input from given NDArray with zero copy.
+ * \param input_id input node name.
+ * \param ndarr NDArray.
+ * \param 0 on success else error code.
+ */
+int TVMRunner::SetInput(std::string input_id, NDArray& ndarr) {
+ r_graph_handle.GetFunction("set_input_zero_copy")(input_id, ndarr);
+ return 0;
+}
+
/*!
* \brief Get the model outputs and dump them to npz file.
* \param outputfile the npz file to where we dump the output data.
@@ -255,21 +298,29 @@ int TVMRunner::GetOutput(std::string outputfile) {
* \param 0 on success else error code.
*/
int TVMRunner::GetOutput(std::string output_id, char* raw_output) {
- LOG(INFO) << "TVMRunner::GetOutput (Raw)";
NDArray out_arr = r_graph_handle.GetFunction("get_output")(output_id);
auto ssize = GetMemSize(out_arr);
out_arr.CopyToBytes(raw_output, ssize);
return 0;
}
+/*!
+ * \brief Set the model output from given NDArray with zero copy.
+ * \param output_id output node name.
+ * \param ndarr NDArray.
+ * \param 0 on success else error code.
+ */
+int TVMRunner::SetOutput(std::string output_id, NDArray& ndarr) {
+ r_graph_handle.GetFunction("set_output_zero_copy")(output_id, ndarr);
+ return 0;
+}
+
/*!
* \brief Call one cycle of execution for the model.
* \param 0 on success else error code.
*/
int TVMRunner::Run(void) {
- LOG(INFO) << "TVMRunner::Run";
r_run_was_called = true;
-
r_graph_handle.GetFunction("run")();
return 0;
}
@@ -289,10 +340,10 @@ TVMMetaInfo TVMRunner::GetMetaInfo(void) {
auto dtype_info = GetRef<Map<String,
ObjectRef>>(tvm_input_info["dtype"].as<MapNode>());
for (const auto& kv : shape_info) {
auto stuple = GetRef<ShapeTuple>(kv.second.as<ShapeTupleObj>());
- std::vector<int> vshape;
+ std::vector<int64_t> vshape;
vshape.assign(stuple.begin(), stuple.end());
auto dtype = GetRef<String>(dtype_info[kv.first].as<StringObj>());
- std::pair<std::vector<int>, std::string> value = std::make_pair(vshape,
dtype);
+ std::pair<std::vector<int64_t>, std::string> value =
std::make_pair(vshape, dtype);
mInfo.input_info.insert({kv.first, value});
}
@@ -301,10 +352,10 @@ TVMMetaInfo TVMRunner::GetMetaInfo(void) {
dtype_info = GetRef<Map<String,
ObjectRef>>(tvm_input_info["dtype"].as<MapNode>());
for (const auto& kv : shape_info) {
auto stuple = GetRef<ShapeTuple>(kv.second.as<ShapeTupleObj>());
- std::vector<int> vshape;
+ std::vector<int64_t> vshape;
vshape.assign(stuple.begin(), stuple.end());
auto dtype = GetRef<String>(dtype_info[kv.first].as<StringObj>());
- std::pair<std::vector<int>, std::string> value = std::make_pair(vshape,
dtype);
+ std::pair<std::vector<int64_t>, std::string> value =
std::make_pair(vshape, dtype);
mInfo.output_info.insert({kv.first, value});
}
@@ -343,5 +394,21 @@ void TVMRunner::PrintMetaInfo(void) {
}
}
+/*!
+ * \brief Print stats information.
+ */
+void TVMRunner::PrintStats(void) {
+ LOG(INFO) << "Performance Stats:" << r_model_path;
+ LOG(INFO) << " Module Load :" << r_module_load_ms << " ms";
+ LOG(INFO) << " Graph Runtime Create :" << r_graph_load_ms << " ms";
+ LOG(INFO) << " Params Read :" << r_param_read_ms << " ms";
+ LOG(INFO) << " Params Set :" << r_param_load_ms << " ms";
+ LOG(INFO) << " Pre Compiled Progs Load :" << r_pre_compiled_load_ms << "
ms";
+ LOG(INFO) << "Total Load Time :"
+ << r_module_load_ms + r_graph_load_ms + r_param_read_ms +
r_param_load_ms +
+ r_pre_compiled_load_ms
+ << " ms";
+}
+
} // namespace runtime
} // namespace tvm
diff --git a/apps/cpp_rtvm/tvm_runner.h b/apps/cpp_rtvm/tvm_runner.h
index 47717c3ecf..e93b63ae85 100644
--- a/apps/cpp_rtvm/tvm_runner.h
+++ b/apps/cpp_rtvm/tvm_runner.h
@@ -41,8 +41,8 @@ namespace runtime {
typedef struct _TVMMetaInfo {
int n_inputs;
int n_outputs;
- std::map<std::string, std::pair<std::vector<int>, std::string>> input_info;
- std::map<std::string, std::pair<std::vector<int>, std::string>> output_info;
+ std::map<std::string, std::pair<std::vector<int64_t>, std::string>>
input_info;
+ std::map<std::string, std::pair<std::vector<int64_t>, std::string>>
output_info;
} TVMMetaInfo;
/*!
@@ -63,10 +63,14 @@ class TVMRunner {
int SetInput(std::string);
/*! \brief To set the input from binary data */
int SetInput(std::string, char*);
+ /*! \brief To set the input from NDArray */
+ int SetInput(std::string, NDArray& ndarr);
/*! \brief Save the model output into given npz file */
int GetOutput(std::string);
/*! \brief Get the model output in binary format */
int GetOutput(std::string, char*);
+ /*! \brief Swap output NDArray with given one */
+ int SetOutput(std::string, NDArray& ndarr);
/*! \brief To get the input mem size */
size_t GetInputMemSize(std::string);
/*! \brief To get the output mem size */
@@ -76,6 +80,21 @@ class TVMRunner {
/*! \brief Print function to show all meta information */
void PrintMetaInfo(void);
+ /*! \brief Print function to show all stats information */
+ void PrintStats(void);
+
+ // Public profiling information
+ /*! Module load time */
+ int r_module_load_ms{0};
+ /*! Graph runtime creatint time */
+ int r_graph_load_ms{0};
+ /*! Params read time */
+ int r_param_read_ms{0};
+ /*! Params load time */
+ int r_param_load_ms{0};
+ /*! Pre compiled programs load time */
+ int r_pre_compiled_load_ms{0};
+
private:
/*! \brief Module handle for the shared object */
Module r_mod_handle;
@@ -91,6 +110,7 @@ class TVMRunner {
bool r_run_was_called;
};
+DLDeviceType GetTVMDevice(std::string device);
} // namespace runtime
} // namespace tvm
#endif // TVM_APPS_CPP_RTVM_RUNNER_H_