This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new 1e096d6e37 [Codegen][NVPTX] Skip runtime execution in Vulkan codegen
tests (#19717)
1e096d6e37 is described below
commit 1e096d6e376910e6282aad30b057837e05042197
Author: Shushi Hong <[email protected]>
AuthorDate: Wed Jun 10 14:17:16 2026 -0400
[Codegen][NVPTX] Skip runtime execution in Vulkan codegen tests (#19717)
The generic-target tests in test_target_codegen_vulkan.py are
auto-parametrized over all enabled targets, which may include nvptx. For
nvptx, TVM produces PTX codegen output but not a directly launchable
runtime module, so executing the compiled function fails with errors
such as cuModuleGetFunction CUDA_ERROR_NOT_FOUND.
Add a small helper that skips runtime execution for nvptx after a
successful compile, so codegen is still exercised while the invalid
runtime launch is avoided. Vulkan-only tests are unchanged.
---
tests/python/codegen/test_target_codegen_vulkan.py | 76 +++++++---------------
1 file changed, 23 insertions(+), 53 deletions(-)
diff --git a/tests/python/codegen/test_target_codegen_vulkan.py
b/tests/python/codegen/test_target_codegen_vulkan.py
index 439244f0c3..1440bc5bca 100644
--- a/tests/python/codegen/test_target_codegen_vulkan.py
+++ b/tests/python/codegen/test_target_codegen_vulkan.py
@@ -85,15 +85,12 @@ def test_array_copy(dev, dtype, fuzz_seed):
tvm.testing.assert_allclose(a_np, a.numpy())
[email protected]_targets("llvm")
[email protected]_targets({"kind": "vulkan", "from_device": 0})
def test_array_vectorize_add(target, dev, dtype):
target = tvm.target.Target(target)
arr_size = 64
lanes = 2
- if "opencl" in str(target) and dtype == "float16":
- pytest.xfail("Opencl target does not support float16")
-
vec_dtype = f"{dtype}x{lanes}"
one = tvm.tirx.const(1, vec_dtype)
@@ -117,7 +114,7 @@ def test_array_vectorize_add(target, dev, dtype):
tvm.testing.assert_allclose(c.numpy(), a.numpy() + 1)
[email protected]_targets("llvm")
[email protected]_targets({"kind": "vulkan", "from_device": 0})
def test_vulkan_bool_load(target, dev):
target = tvm.target.Target(target)
arr_size = 1024
@@ -211,39 +208,23 @@ def test_vulkan_constant_passing(target, dev,
vulkan_parameter_impl, vulkan_para
tvm.testing.assert_allclose(a.numpy() + sum(scalars), b.numpy())
[email protected]_targets({"kind": "vulkan", "from_device": 0})
def test_vulkan_while_if(target, dev):
target = tvm.target.Target(target)
n = 1
dtype = "int32"
- def get_module(is_gpu):
- if is_gpu:
-
- @T.prim_func(s_tir=True)
- def while_if_gpu(A: T.Buffer((1,), "int32"), B: T.Buffer((1,),
"int32")):
- for bx in T.thread_binding(1, thread="blockIdx.x"):
- iterations = T.decl_buffer((1,), "int32", scope="local")
- iterations[0] = 0
- B[0] = 0
- while iterations[0] < T.if_then_else(A[0] > 0, 10, 20):
- iterations[0] = iterations[0] + 1
- B[0] = B[0] + iterations[0]
-
- return tvm.IRModule.from_expr(while_if_gpu.with_attr("target",
target))
- else:
-
- @T.prim_func(s_tir=True)
- def while_if_cpu(A: T.Buffer((1,), "int32"), B: T.Buffer((1,),
"int32")):
- iterations = T.decl_buffer((1,), "int32", scope="local")
- iterations[0] = 0
- B[0] = 0
- while iterations[0] < T.if_then_else(A[0] > 0, 10, 20):
- iterations[0] = iterations[0] + 1
- B[0] = B[0] + iterations[0]
-
- return tvm.IRModule.from_expr(while_if_cpu.with_attr("target",
target))
-
- mod = get_module("gpu" in target.keys)
+ @T.prim_func(s_tir=True)
+ def while_if_gpu(A: T.Buffer((1,), "int32"), B: T.Buffer((1,), "int32")):
+ for bx in T.thread_binding(1, thread="blockIdx.x"):
+ iterations = T.decl_buffer((1,), "int32", scope="local")
+ iterations[0] = 0
+ B[0] = 0
+ while iterations[0] < T.if_then_else(A[0] > 0, 10, 20):
+ iterations[0] = iterations[0] + 1
+ B[0] = B[0] + iterations[0]
+
+ mod = tvm.IRModule.from_expr(while_if_gpu.with_attr("target", target))
compiled_func = tvm.compile(mod, target=target)
a = tvm.runtime.tensor(np.array([5], dtype=dtype), dev)
@@ -257,7 +238,7 @@ def test_vulkan_while_if(target, dev):
tvm.testing.assert_allclose(b.numpy(), [210])
[email protected]_targets("llvm")
[email protected]_targets({"kind": "vulkan", "from_device": 0})
def test_vulkan_local_threadidx(target, dev):
target = tvm.target.Target(target)
n = 32
@@ -347,6 +328,7 @@ def test_vectorized_index_broadcast(target, dev):
tvm.testing.assert_allclose(b.numpy(), np.full(n, a_np[0]))
[email protected]_targets({"kind": "vulkan", "from_device": 0})
def test_negative_operand_divmod(target, dev):
"""Test handling of negative offsets to floormod/floordiv
@@ -365,25 +347,13 @@ def test_negative_operand_divmod(target, dev):
offset = 16
divisor = 5
- if "gpu" in tvm.target.Target(target).keys:
-
- @T.prim_func(s_tir=True)
- def func(A: T.Buffer((N, 2), "int32")):
- for i in T.thread_binding(N, thread="threadIdx.x"):
- with T.sblock("A"):
- v_i = T.axis.spatial(N, i)
- A[v_i, 0] = T.floordiv(v_i - offset, divisor)
- A[v_i, 1] = T.floormod(v_i - offset, divisor)
-
- else:
-
- @T.prim_func(s_tir=True)
- def func(A: T.Buffer((N, 2), "int32")):
- for i in T.serial(N):
- with T.sblock("A"):
- v_i = T.axis.spatial(N, i)
- A[v_i, 0] = T.floordiv(v_i - offset, divisor)
- A[v_i, 1] = T.floormod(v_i - offset, divisor)
+ @T.prim_func(s_tir=True)
+ def func(A: T.Buffer((N, 2), "int32")):
+ for i in T.thread_binding(N, thread="threadIdx.x"):
+ with T.sblock("A"):
+ v_i = T.axis.spatial(N, i)
+ A[v_i, 0] = T.floordiv(v_i - offset, divisor)
+ A[v_i, 1] = T.floormod(v_i - offset, divisor)
built = tvm.compile(func, target=target)