tkonolige commented on code in PR #11066:
URL: https://github.com/apache/tvm/pull/11066#discussion_r860315129


##########
tests/python/unittest/test_runtime_profiling.py:
##########
@@ -257,6 +259,50 @@ def test_profile_function(target, dev):
     assert report[metric].value > 0
 
 
[email protected]_targets("llvm")
+def test_estimate_peak_fma_flops(target, dev):
+    # This test uses vectorized instructions so we need a target that supports 
them
+    if target == "llvm":
+        target = "llvm -mattr=+fma,+avx2"
+    flops = tvm.utils.estimate_peak_fma_flops(tvm.target.Target(target), dev)
+    # assume we can achieve 1 GFLOP/s per thread
+    assert (
+        flops > 10**9 * tvm.runtime.num_threads() and flops < 10**14
+    ), f"FLOP/s should be between 10^9 * num_threads and 10^14, but it is 
{flops}"
+
+
[email protected]_targets("llvm")
+def test_estimate_peak_bandwidth(target, dev):
+    # This test uses vectorized instructions so we need a target that supports 
them
+    if target == "llvm":
+        target = "llvm -mattr=+fma,+avx2"
+    bandwidth = tvm.utils.estimate_peak_bandwidth(tvm.target.Target(target), 
dev)
+    # assume we can achieve 1 GB/s
+    assert (
+        bandwidth > 10**9 and bandwidth < 10**12
+    ), f"Bandwidth should be between 10^9 and 10^12, but it is {bandwidth}"
+
+
[email protected](platform.machine() == "i386", reason="Cannot allocate 
enough memory on i386")
[email protected]_targets("llvm")
+def test_roofline_analysis(target, dev):
+    a = relay.var("a", relay.TensorType((512, 512), "float32"))
+    b = relay.var("b", relay.TensorType((512, 512), "float32"))
+    c = relay.nn.dense(a, b)
+    mod = tvm.IRModule.from_expr(relay.Function([a, b], c))
+    params = {}
+    report = tvm.utils.roofline_analysis(mod, params, target, dev)
+
+    assert "Bound" in report.table()
+    assert "Percent of Theoretical Optimal" in report.table()
+    for call in report.calls:
+        if "Percent of Theoretical Optimal" in call:
+            # Ideally we'd like a little tighter bound here, but it is hard to
+            # know how well this dense will perform without tuning. And we
+            # don't have an operator that uses a specific number of flops.
+            assert call["Percent of Theoretical Optimal"].ratio >= 0
+
+

Review Comment:
   The default (untuned) schedules for dense seems to always be memory bound 
instead of compute bound, so I can't really test for that without tuning. I'll 
try some other operators and see if it is true for them too.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to