This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
The following commit(s) were added to refs/heads/main by this push:
new ddf7bce8c7 Upgrade to CUTLASS 4.2.1 (#18372)
ddf7bce8c7 is described below
commit ddf7bce8c7ff99ebb14500312c232c9042fa4c29
Author: Jun Jiang <[email protected]>
AuthorDate: Sat Oct 18 02:28:59 2025 +0800
Upgrade to CUTLASS 4.2.1 (#18372)
* Upgrade to CUTLASS 4.2.1
* Fix test: mbarrier.try_wait requires .target sm_90 or higher
---
3rdparty/cutlass | 2 +-
tests/python/tir-base/test_tir_ptx_cp_async.py | 2 +-
tests/python/tir-transform/test_tir_transform_inject_ptx_async_copy.py | 3 +--
3 files changed, 3 insertions(+), 4 deletions(-)
diff --git a/3rdparty/cutlass b/3rdparty/cutlass
index b2dd65dc86..f3fde58372 160000
--- a/3rdparty/cutlass
+++ b/3rdparty/cutlass
@@ -1 +1 @@
-Subproject commit b2dd65dc864e09688245b316ac46c4a6cd07e15c
+Subproject commit f3fde58372d33e9a5650ba7b80fc48b3b49d40c8
diff --git a/tests/python/tir-base/test_tir_ptx_cp_async.py
b/tests/python/tir-base/test_tir_ptx_cp_async.py
index 8b15e385d2..9e0e18c307 100644
--- a/tests/python/tir-base/test_tir_ptx_cp_async.py
+++ b/tests/python/tir-base/test_tir_ptx_cp_async.py
@@ -95,7 +95,7 @@ def ptx_cp_async_barrier(
B[tx, i] = A_shared[tx, i]
[email protected](reason="temp skip test due to cuda env update")
[email protected]_cuda_compute_version(9)
def test_ptx_cp_async_barrier():
f = ptx_cp_async_barrier
diff --git
a/tests/python/tir-transform/test_tir_transform_inject_ptx_async_copy.py
b/tests/python/tir-transform/test_tir_transform_inject_ptx_async_copy.py
index bcec1d4843..0855afcfd6 100644
--- a/tests/python/tir-transform/test_tir_transform_inject_ptx_async_copy.py
+++ b/tests/python/tir-transform/test_tir_transform_inject_ptx_async_copy.py
@@ -214,8 +214,7 @@ def ptx_global_to_shared_copy_fp32x1_barrier(
B[tx, i] = A_shared[tx, i]
[email protected](reason="temp skip test due to cuda env update")
[email protected]_cuda
[email protected]_cuda_compute_version(9)
def test_inject_async_copy_barrier():
dtype = "float32"
vec_size = 1