This is an automated email from the ASF dual-hosted git repository.
github-actions[bot] pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 182db0f252 [Relax][PyTorch] Add rnn_tanh.input converter (#19837)
add 03267616a6 [DOCS] Refine tvm pypi wheel optional install guidance
(#19892)
add 4224d51090 [TIRx] Bundle CUDA tile primitive and op dispatch updates
(#19896)
add 5d9f6e1e5c [FFI] Bump tvm-ffi to latest June 28 (#19903)
No new revisions were added by this update.
Summary of changes:
.agents/scripts/monitor_gpu.sh | 13 +-
.agents/skills/tir-bench/SKILL.md | 195 ------
.agents/skills/tir-test/SKILL.md | 25 +-
.gitignore | 5 -
3rdparty/tvm-ffi | 2 +-
AGENTS.md | 4 +-
docs/how_to/tutorials/bring_your_own_codegen.py | 11 +-
docs/install/pypi.rst | 15 +
docs/tirx/install.rst | 13 -
python/tvm/backend/cuda/op.py | 444 +++++++++---
.../tvm/backend/cuda/operator/intrinsics/memory.py | 744 +++++++++++++--------
.../cuda/operator/tile_primitive/copy/_common.py | 16 +
.../cuda/operator/tile_primitive/copy/gmem_smem.py | 31 +-
.../operator/tile_primitive/copy/ld_stmatrix.py | 12 +-
.../cuda/operator/tile_primitive/copy/reg.py | 73 +-
.../tile_primitive/copy_async/tcgen05_cp.py | 31 +-
.../cuda/operator/tile_primitive/copy_async/tma.py | 57 +-
.../operator/tile_primitive/elementwise/_common.py | 32 +-
.../tile_primitive/elementwise/ops/binary.py | 18 +-
.../tile_primitive/elementwise/ops/unary.py | 12 +-
.../operator/tile_primitive/elementwise/reg.py | 11 +-
.../operator/tile_primitive/elementwise/smem.py | 5 +-
.../elementwise/vec_emit/binary_f32x2.py | 7 +-
.../elementwise/vec_emit/cast_vec2.py | 11 +-
.../elementwise/vec_emit/fma_f32x2.py | 9 +-
.../operator/tile_primitive/gemm_async/tcgen05.py | 218 +++---
.../permute_layout/warp_xor_swizzle.py | 84 ++-
python/tvm/backend/cuda/script.py | 12 +-
.../trn/operator/tile_primitive/private_alloc.py | 15 +-
python/tvm/support/nvcc.py | 41 +-
python/tvm/tirx/bench.py | 210 +++++-
python/tvm/tirx/op.py | 2 +
python/tvm/tirx/script/builder/ir.py | 20 +-
src/arith/const_fold.h | 16 +
src/arith/rewrite_simplify.cc | 24 +
src/backend/cuda/op/target_builtin.cc | 7 +-
src/backend/cuda/runtime/cuda_device_api.cc | 21 +-
tests/python/arith/test_arith_rewrite_simplify.py | 7 +
tests/python/arith/test_arith_simplify.py | 10 +
tests/python/tirx-base/test_tir_imm_values.py | 20 +-
tests/python/tirx/codegen/test_cuda_copy.py | 219 ------
tests/python/tirx/codegen/test_ptx_ld_st_ops.py | 202 ++++++
.../tile_primitive/cuda/copy/test_ld_stmatrix.py | 39 ++
.../operator/tile_primitive/cuda/copy/test_reg.py | 216 +++++-
.../cuda/copy_async/test_smem_tmem.py | 29 +
.../tile_primitive/cuda/copy_async/test_tma.py | 66 ++
.../tile_primitive/cuda/elementwise/test_binary.py | 35 +
.../tile_primitive/cuda/elementwise/test_unary.py | 84 +++
.../cuda/gemm_async/test_gemm_async.py | 220 ++++++
.../cuda/permute_layout/test_permute_layout.py | 42 ++
tests/python/tirx/test_op_namespace_cleanup.py | 10 +-
tests/python/tirx/transform/test_stmt_functor.py | 2 +-
tests/scripts/setup-pytest-env.sh | 14 -
53 files changed, 2570 insertions(+), 1111 deletions(-)
delete mode 100644 .agents/skills/tir-bench/SKILL.md
delete mode 100644 tests/python/tirx/codegen/test_cuda_copy.py
create mode 100644 tests/python/tirx/codegen/test_ptx_ld_st_ops.py