AndrewZhaoLuo commented on code in PR #11465:
URL: https://github.com/apache/tvm/pull/11465#discussion_r905345680


##########
src/runtime/graph_executor/debug/graph_executor_debug.cc:
##########
@@ -55,48 +56,54 @@ class GraphExecutorDebug : public GraphExecutor {
    *        By default, one `repeat` contains `number` runs. If this parameter 
is set,
    *        the parameters `number` will be dynamically adjusted to meet the
    *        minimum duration requirement of one `repeat`.
-   * \return Comma seperated string containing the elapsed time per op for the 
last
-   *         iteration only, because returning a long string over rpc can be 
expensive.
+   * \param cooldown_interval_ms The cooldown interval in milliseconds between 
the number of repeats
+   *        defined by `repeats_to_cooldown`.
+   * \param repeats_to_cooldown The number of repeats before the
+   *        cooldown is activated.
+   * \return Comma separated string containing the elapsed time per op for
+   *         the last iteration only, because returning a long string over rpc 
can be expensive.
    */
-  std::string RunIndividual(int number, int repeat, int min_repeat_ms) {
+  std::string RunIndividual(int number, int repeat, int min_repeat_ms, int 
cooldown_interval_ms,
+                            int repeats_to_cooldown) {
     // warmup run
     GraphExecutor::Run();
     std::string tkey = module_->type_key();
-    std::vector<double> time_sec_per_op(op_execs_.size(), 0);
+    std::vector<std::vector<double>> time_sec_per_op(op_execs_.size());
     if (tkey == "rpc") {
       // RPC modules rely on remote timing which implements the logic from the 
else branch.
       for (size_t index = 0; index < op_execs_.size(); ++index) {
-        time_sec_per_op[index] += RunOpRPC(index, number, repeat, 
min_repeat_ms);
+        time_sec_per_op[index] = RunOpRPC(index, number, repeat, min_repeat_ms,
+                                          cooldown_interval_ms, 
repeats_to_cooldown);
       }
     } else {
+      int op = 0;
       for (size_t index = 0; index < op_execs_.size(); ++index) {
-        std::vector<double> results = RunIndividualNode(index, number, repeat, 
min_repeat_ms);
-        for (size_t cur_repeat = 0; cur_repeat < results.size(); cur_repeat++) 
{
-          time_sec_per_op[index] = results[cur_repeat];
-
-          LOG(INFO) << "Iteration: " << cur_repeat;
-          int op = 0;
-          if (op_execs_[index]) {
-            LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": "
-                      << time_sec_per_op[index] * 1e6 << " us/iter";
+        time_sec_per_op[index] = RunIndividualNode(index, number, repeat, 
min_repeat_ms,
+                                                   cooldown_interval_ms, 
repeats_to_cooldown);
+        if (op_execs_[index]) {
+          LOG(INFO) << "Op #" << op << " " << GetNodeName(index) << ":";
+          for (size_t cur_repeat = 0; cur_repeat < 
time_sec_per_op[index].size(); cur_repeat++) {
+            const auto& data = time_sec_per_op[index][cur_repeat];
+            LOG(INFO) << "Iteration: " << cur_repeat << ": " << (data * 1e6) 
<< " us/iter";
           }
+          ++op;
         }
       }
     }
 
     std::ostringstream os;
     for (size_t index = 0; index < time_sec_per_op.size(); index++) {
-      double time = time_sec_per_op[index];
-      // To have good behavior when calculating total time, etc.
-      if (std::isnan(time)) {
-        time = 0;
+      for (const auto& repeat_data : time_sec_per_op[index]) {
+        // To have good behavior when calculating total time, etc.
+        os << (std::isnan(repeat_data) ? std::to_string(0) : 
std::to_string(repeat_data)) << ",";

Review Comment:
   Probably want full precision so want something like
   
   ```
   // use maximum precision available and use fixed representation
   s << std::fixed;
   s.precision(std::numeric_limits<double>::max_digits10);
   ```
   
   and feed every double manually into the stream.



##########
src/runtime/graph_executor/debug/graph_executor_debug.cc:
##########
@@ -55,48 +56,54 @@ class GraphExecutorDebug : public GraphExecutor {
    *        By default, one `repeat` contains `number` runs. If this parameter 
is set,
    *        the parameters `number` will be dynamically adjusted to meet the
    *        minimum duration requirement of one `repeat`.
-   * \return Comma seperated string containing the elapsed time per op for the 
last
-   *         iteration only, because returning a long string over rpc can be 
expensive.
+   * \param cooldown_interval_ms The cooldown interval in milliseconds between 
the number of repeats
+   *        defined by `repeats_to_cooldown`.
+   * \param repeats_to_cooldown The number of repeats before the
+   *        cooldown is activated.
+   * \return Comma separated string containing the elapsed time per op for
+   *         the last iteration only, because returning a long string over rpc 
can be expensive.

Review Comment:
   I forget the exact reason why FloatImm can't be serialized but what you are 
doing is the string serialization hack which is fine. 
   
   "returning a long string over rpc can be expensive." i don't have context on 
but if you aren't having any problems it's probably fine? 



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to