[GitHub] [tvm] Icemist commented on a diff in pull request #11465: Add cooldown_interval_ms and internal measurements for profiling

GitBox Fri, 17 Jun 2022 03:57:37 -0700


Icemist commented on code in PR #11465:
URL: https://github.com/apache/tvm/pull/11465#discussion_r900010081



##########
python/tvm/contrib/debugger/debug_result.py:
##########
@@ -205,12 +210,18 @@ def _dump_graph_json(self, graph):
 
     def get_debug_result(self, sort_by_time=True):
         """Return the debugger result"""
-        header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Shape", 
"Inputs", "Outputs"]
-        lines = ["---------", "---", "--------", "-------", "-----", "------", 
"-------"]
+        header = ["Node Name", "Ops", "Time(us)", "Time(%)", "Shape", 
"Inputs", "Outputs", "Times"]

Review Comment:
   I added a return of all repeats, not just the first. Renamed it 
"Measurements(us)".



##########
src/runtime/graph_executor/debug/graph_executor_debug.cc:
##########
@@ -55,77 +56,96 @@ class GraphExecutorDebug : public GraphExecutor {
    *        By default, one `repeat` contains `number` runs. If this parameter 
is set,
    *        the parameters `number` will be dynamically adjusted to meet the
    *        minimum duration requirement of one `repeat`.
+   * \param cooldown_interval_ms The cool down interval between two 
measurements in milliseconds.
    * \return Comma seperated string containing the elapsed time per op for the 
last
    *         iteration only, because returning a long string over rpc can be 
expensive.
    */
-  std::string RunIndividual(int number, int repeat, int min_repeat_ms) {
+  std::string RunIndividual(int number, int repeat, int min_repeat_ms, int 
cooldown_interval_ms) {
     // warmup run
     GraphExecutor::Run();
     std::string tkey = module_->type_key();
-    std::vector<double> time_sec_per_op(op_execs_.size(), 0);
+    std::vector<std::vector<std::vector<double>>> 
time_sec_per_op(op_execs_.size());
     if (tkey == "rpc") {
       // RPC modules rely on remote timing which implements the logic from the 
else branch.
       for (size_t index = 0; index < op_execs_.size(); ++index) {
-        time_sec_per_op[index] += RunOpRPC(index, number, repeat, 
min_repeat_ms);
+        time_sec_per_op[index] =
+            RunOpRPC(index, number, repeat, min_repeat_ms, 
cooldown_interval_ms);
       }
     } else {
+      int op = 0;
       for (size_t index = 0; index < op_execs_.size(); ++index) {
-        std::vector<double> results = RunIndividualNode(index, number, repeat, 
min_repeat_ms);
-        for (size_t cur_repeat = 0; cur_repeat < results.size(); cur_repeat++) 
{
-          time_sec_per_op[index] = results[cur_repeat];
-
-          LOG(INFO) << "Iteration: " << cur_repeat;
-          int op = 0;
-          if (op_execs_[index]) {
-            LOG(INFO) << "Op #" << op++ << " " << GetNodeName(index) << ": "
-                      << time_sec_per_op[index] * 1e6 << " us/iter";
+        time_sec_per_op[index] =
+            RunIndividualNode(index, number, repeat, min_repeat_ms, 
cooldown_interval_ms);
+        if (op_execs_[index]) {
+          LOG(INFO) << "Op #" << op << " " << GetNodeName(index) << ":";
+          for (size_t cur_repeat = 0; cur_repeat < 
time_sec_per_op[index].size(); cur_repeat++) {
+            const auto& data = time_sec_per_op[index][cur_repeat];
+            std::string delimiter = ", ";
+            std::string log;
+            for (double r : data) {
+              log += std::to_string(r * 1e6) + delimiter;
+            }
+            if (data.size()) log.resize(log.size() - delimiter.size());
+            const double mean = std::accumulate(data.begin(), data.end(), 0.0) 
/ data.size();
+            LOG(INFO) << "Iteration: " << cur_repeat << ": " << (mean * 1e6) 
<< " us/iter [" << log
+                      << "]";
           }
+          ++op;
         }
       }
     }
 
     std::ostringstream os;
     for (size_t index = 0; index < time_sec_per_op.size(); index++) {
-      double time = time_sec_per_op[index];
-      // To have good behavior when calculating total time, etc.
-      if (std::isnan(time)) {
-        time = 0;
+      for (const auto& repeat_data : time_sec_per_op[index]) {
+        for (const auto& number_data : repeat_data) {
+          // To have good behavior when calculating total time, etc.
+          os << (std::isnan(number_data) ? std::to_string(0) : 
std::to_string(number_data)) << ",";
+        }
+        os << ";";
       }
-      os << time << ",";
+      os << ":";
     }
     return os.str();
   }
 
-  std::vector<double> RunIndividualNode(int node_index, int number, int 
repeat, int min_repeat_ms) {
+  std::vector<std::vector<double>> RunIndividualNode(int node_index, int 
number, int repeat,
+                                                     int min_repeat_ms, int 
cooldown_interval_ms) {
     std::string tkey = module_->type_key();
 
-    // results_in_seconds[a][b] is the bth index run of the ath index repeat

Review Comment:
   Done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

[GitHub] [tvm] Icemist commented on a diff in pull request #11465: Add cooldown_interval_ms and internal measurements for profiling

Reply via email to