tkonolige commented on a change in pull request #8807:
URL: https://github.com/apache/tvm/pull/8807#discussion_r695253282



##########
File path: python/tvm/contrib/graph_executor.py
##########
@@ -320,3 +320,62 @@ def __getitem__(self, key):
             The key to the module.
         """
         return self.module[key]
+
+    def benchmark(self, device, func_name="run", repeat=5, number=5, 
min_repeat_ms=None, **kwargs):
+        """Calculate runtime of a function by repeatedly calling it.
+
+        Use this function to get an accurate measurement of the runtime of a 
function. The function
+        is run multiple times in order to account for variability in 
measurements, processor speed
+        or other external factors.  Mean, median, standard deviation, min and 
max runtime are all
+        reported.  On GPUs, CUDA and ROCm specifically, special on-device 
timers are used so that
+        synchonization and data transfer operations are not counted towards 
the runtime. This allows
+        for fair comparison of runtimes across different functions and models.
+
+        The benchmarking loop looks approximately like so:
+
+        .. code-block:: python
+
+            for r in range(repeat):
+                time_start = now()
+                for n in range(number):
+                    func_name()
+                time_end = now()
+                total_times.append((time_end - time_start)/number)
+
+
+        Parameters
+        ----------
+        func_name : str
+            The function to benchmark
+
+        repeat : int
+            Number of times to run the outer loop of the timing code (see 
above). The output will
+            contain `repeat` number of datapoints.
+
+        number : int
+            Number of times to run the inner loop of the timing code. This 
inner loop is run in
+            between the timer starting and stopping. In order to amortize any 
timing overhead,
+            `number` should be increased when the runtime of the function is 
small (less than a 1/10
+            of a millisecond).
+
+        min_repeat_ms : Optional[float]
+            If set, the inner loop will be run until it takes longer than 
`min_repeat_ms`
+            milliseconds. This can be used to ensure that the function is run 
enough to get an
+            accurate measurement.
+
+        kwargs : Dict[str, Object]
+            Named arguments to the function. These are cached before running 
timing code, so that
+            data transfer costs are not counted in the runtime.
+
+        Returns
+        -------
+        timing_results : BenchmarkResult
+            Runtimes of the function. Use `.mean` to access the mean runtime, 
use `.results` to

Review comment:
       done




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to