tkonolige commented on code in PR #11000:
URL: https://github.com/apache/tvm/pull/11000#discussion_r852464986
##########
python/tvm/contrib/debugger/debug_executor.py:
##########
@@ -281,6 +282,42 @@ def run_individual(self, number, repeat=1,
min_repeat_ms=0):
ret = self._run_individual(number, repeat, min_repeat_ms)
return ret.strip(",").split(",") if ret else []
+ def run_individual_node(self, index, number, repeat=1, min_repeat_ms=0):
+ """Benchmark a single node in the serialized graph.
+
+ Parameters
+ ----------
+ index : int
+ The index of the node, see `self.debug_datum.get_graph_nodes`
+
+ number: int
+ The number of times to run the node to get a benchmark result.
+
+ repeat: int
+ The number of times to benchmark the nodes.
+
+ min_repeat_ms: int
+ The minimum consecutive runtime of the node for a benchmark result.
+
+ Returns
+ -------
+ A list of dimensions `number` x `repeat` each one the runtime of the
node
Review Comment:
`BenchmarkResult` is consistent with other interfaces, so using that would
be best.
##########
src/runtime/graph_executor/debug/graph_executor_debug.cc:
##########
@@ -114,15 +94,69 @@ class GraphExecutorDebug : public GraphExecutor {
std::ostringstream os;
for (size_t index = 0; index < time_sec_per_op.size(); index++) {
- os << time_sec_per_op[index] << ",";
+ double time = time_sec_per_op[index];
+ // To have good behavior when calculating total time, etc.
+ if (isnan(time)) {
+ time = 0;
+ }
+ os << time << ",";
}
return os.str();
}
+ std::vector<std::vector<double>> RunIndividualNode(int node_index, int
number, int repeat,
+ int min_repeat_ms) {
+ // warmup run
+ // GraphExecutor::Run();
+ std::string tkey = module_->type_key();
+
+ // results_in_seconds[a][b] is the bth index run of the ath index repeat
+ std::vector<std::vector<double>> results_in_seconds;
+
+ if (tkey == "rpc") {
+ LOG(FATAL) << "RPC measurements should not use RunIndividualNode!";
+ }
+
+ for (int i = 0; i < repeat; ++i) {
+ std::vector<Timer> op_timers;
+ double duration_ms = 0.0;
+
+ // Keep timing operations, upping number of repeats until we reach
min_repeat_ms
+ do {
+ op_timers.clear();
+ if (duration_ms > 0.0) {
+ number = static_cast<int>(std::max((min_repeat_ms / (duration_ms /
number) + 1),
+ number * 1.618)); // 1.618 is
chosen by random
+ }
+
+ std::chrono::time_point<std::chrono::high_resolution_clock,
std::chrono::nanoseconds>
+ tbegin, tend;
+ tbegin = std::chrono::high_resolution_clock::now();
Review Comment:
Here is the main loop from time evaulator:
https://github.com/apache/tvm/blob/main/src/runtime/rpc/rpc_module.cc#L370-L403.
Here is the timer interface:
https://github.com/apache/tvm/blob/main/include/tvm/runtime/profiling.h#L43-L142.
Using time_evaluator is probably a better choice than reimplementing the
timing loop.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]