tkonolige commented on a change in pull request #8807:
URL: https://github.com/apache/tvm/pull/8807#discussion_r695253282
##########
File path: python/tvm/contrib/graph_executor.py
##########
@@ -320,3 +320,62 @@ def __getitem__(self, key):
The key to the module.
"""
return self.module[key]
+
+ def benchmark(self, device, func_name="run", repeat=5, number=5,
min_repeat_ms=None, **kwargs):
+ """Calculate runtime of a function by repeatedly calling it.
+
+ Use this function to get an accurate measurement of the runtime of a
function. The function
+ is run multiple times in order to account for variability in
measurements, processor speed
+ or other external factors. Mean, median, standard deviation, min and
max runtime are all
+ reported. On GPUs, CUDA and ROCm specifically, special on-device
timers are used so that
+ synchonization and data transfer operations are not counted towards
the runtime. This allows
+ for fair comparison of runtimes across different functions and models.
+
+ The benchmarking loop looks approximately like so:
+
+ .. code-block:: python
+
+ for r in range(repeat):
+ time_start = now()
+ for n in range(number):
+ func_name()
+ time_end = now()
+ total_times.append((time_end - time_start)/number)
+
+
+ Parameters
+ ----------
+ func_name : str
+ The function to benchmark
+
+ repeat : int
+ Number of times to run the outer loop of the timing code (see
above). The output will
+ contain `repeat` number of datapoints.
+
+ number : int
+ Number of times to run the inner loop of the timing code. This
inner loop is run in
+ between the timer starting and stopping. In order to amortize any
timing overhead,
+ `number` should be increased when the runtime of the function is
small (less than a 1/10
+ of a millisecond).
+
+ min_repeat_ms : Optional[float]
+ If set, the inner loop will be run until it takes longer than
`min_repeat_ms`
+ milliseconds. This can be used to ensure that the function is run
enough to get an
+ accurate measurement.
+
+ kwargs : Dict[str, Object]
+ Named arguments to the function. These are cached before running
timing code, so that
+ data transfer costs are not counted in the runtime.
+
+ Returns
+ -------
+ timing_results : BenchmarkResult
+ Runtimes of the function. Use `.mean` to access the mean runtime,
use `.results` to
Review comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]