This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a change to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 4d9d129c93 [Relax][ONNX] Fix Cast operator float->int NaN/Inf handling
(#19626)
add 9db74c7cee [TIRx] Update scoped ops and CUDA launch bounds (#19677)
No new revisions were added by this update.
Summary of changes:
include/tvm/tirx/builtin.h | 48 +-
include/tvm/tirx/exec_context.h | 3 -
include/tvm/tirx/exec_scope.h | 9 +-
include/tvm/tirx/function.h | 7 +
include/tvm/tirx/op.h | 7 +-
include/tvm/tirx/op_attr_types.h | 17 +
include/tvm/tirx/script/builder/frame.h | 46 -
include/tvm/tirx/script/builder/ir.h | 15 -
include/tvm/tirx/stmt.h | 44 +-
include/tvm/tirx/stmt_functor.h | 4 -
include/tvm/tirx/target_builtin/cuda.h | 12 +-
include/tvm/tirx/tirx_op.h | 15 +-
include/tvm/tirx/tirx_stmt.h | 9 +-
python/tvm/runtime/script_printer.py | 8 +-
python/tvm/s_tir/backend/adreno/pipeline.py | 4 +-
python/tvm/s_tir/pipeline.py | 4 +-
python/tvm/script/parser/core/entry.py | 3 +-
python/tvm/tirx/__init__.py | 2 +-
python/tvm/tirx/bench.py | 36 +-
python/tvm/tirx/lang/alloc_pool.py | 35 +-
python/tvm/tirx/lang/pipeline.py | 66 +-
python/tvm/tirx/lang/smem_desc.py | 16 +-
python/tvm/tirx/lang/tile_scheduler.py | 294 ++--
python/tvm/tirx/lang/warp_role.py | 55 +-
python/tvm/tirx/op.py | 63 +-
python/tvm/tirx/operator/intrinsics/_schema.py | 38 +-
python/tvm/tirx/operator/intrinsics/cuda/misc.py | 4 +-
.../tvm/tirx/operator/intrinsics/cuda/registry.py | 42 +-
.../tirx/operator/tile_primitive/cuda/common.py | 48 +-
.../tile_primitive/cuda/copy/_swizzle_iter.py | 24 +-
.../operator/tile_primitive/cuda/copy/fallback.py | 10 +-
.../operator/tile_primitive/cuda/copy/gmem_smem.py | 8 +-
.../tile_primitive/cuda/copy/ld_stmatrix.py | 22 +-
.../tirx/operator/tile_primitive/cuda/copy/reg.py | 22 +-
.../tile_primitive/cuda/copy_async/dsmem.py | 24 +-
.../tile_primitive/cuda/copy_async/ldgsts.py | 14 +-
.../tile_primitive/cuda/copy_async/tcgen05_cp.py | 20 +-
.../tile_primitive/cuda/copy_async/tcgen05_ldst.py | 50 +-
.../operator/tile_primitive/cuda/copy_async/tma.py | 40 +-
.../tile_primitive/cuda/elementwise/_common.py | 12 +-
.../cuda/elementwise/ops/__init__.py | 2 +-
.../tile_primitive/cuda/elementwise/ops/unary.py | 18 +-
.../tile_primitive/cuda/elementwise/reg.py | 30 +-
.../tile_primitive/cuda/elementwise/smem.py | 30 +-
.../cuda/elementwise/vec_emit/__init__.py | 2 +-
.../cuda/elementwise/vec_emit/binary_f32x2.py | 14 +-
.../cuda/elementwise/vec_emit/cast_vec2.py | 8 +-
.../cuda/elementwise/vec_emit/fma_f32x2.py | 12 +-
.../tile_primitive/cuda/exec_scope_utils.py | 36 +-
.../tile_primitive/cuda/gemm/mma_m16n8k_.py | 20 +-
.../tile_primitive/cuda/gemm_async/tcgen05.py | 72 +-
.../cuda/permute_layout/warp_xor_swizzle.py | 46 +-
.../tile_primitive/cuda/reduction/local.py | 116 +-
.../tile_primitive/cuda/reduction/shared.py | 58 +-
.../tile_primitive/cuda/reduction/sm100_packed.py | 175 +--
.../tile_primitive/cuda/reduction/utils.py | 10 +-
python/tvm/tirx/operator/tile_primitive/ops.py | 66 +-
.../operator/tile_primitive/trn/binary/default.py | 34 +-
.../tile_primitive/trn/compose_op/binary_chain.py | 30 +-
.../tile_primitive/trn/compose_op/binary_reduce.py | 66 +-
.../tile_primitive/trn/compose_op/compose_op.py | 2 +-
.../tile_primitive/trn/compose_op/reduce_negate.py | 2 +-
.../tile_primitive/trn/compose_op/unary_reduce.py | 66 +-
.../tile_primitive/trn/compose_op/utils.py | 26 +-
.../operator/tile_primitive/trn/copy/default.py | 122 +-
.../tirx/operator/tile_primitive/trn/dim_utils.py | 8 +-
.../operator/tile_primitive/trn/gemm/default.py | 66 +-
.../tile_primitive/trn/instruction_generator.py | 8 +-
.../operator/tile_primitive/trn/private_alloc.py | 32 +-
.../operator/tile_primitive/trn/reduction/utils.py | 58 +-
.../operator/tile_primitive/trn/select/default.py | 32 +-
.../operator/tile_primitive/trn/unary/default.py | 4 +-
.../operator/tile_primitive/trn/unary/utils.py | 46 +-
.../tile_primitive/trn/unary/with_bias_scale.py | 4 +-
python/tvm/tirx/script/__init__.py | 51 +-
python/tvm/tirx/script/builder/__init__.py | 4 +-
python/tvm/tirx/script/builder/frame.py | 12 -
python/tvm/tirx/script/builder/ir.py | 206 ++-
python/tvm/tirx/script/builder/tirx.py | 323 +++-
python/tvm/tirx/script/parser/__init__.py | 3 +
python/tvm/tirx/script/parser/entry.py | 33 +-
python/tvm/tirx/script/parser/parser.py | 2 +-
python/tvm/tirx/script/tile.py | 119 ++
python/tvm/tirx/stmt.py | 78 +-
python/tvm/tirx/stmt_functor.py | 25 +-
python/tvm/tirx/transform/common.py | 24 +-
.../tvm/tirx/transform/trn/private_buffer_alloc.py | 27 +-
src/target/cuda/codegen_cuda.cc | 45 +-
src/target/cuda/intrin_rule_cuda.cc | 20 +-
src/target/hexagon/llvm/intrin_rule_hexagon.cc | 1 +
src/target/intrin_rule.cc | 2 +
src/target/llvm/codegen_llvm.cc | 2 -
src/target/llvm/codegen_llvm.h | 1 -
src/target/metal/intrin_rule_metal.cc | 16 +-
src/target/source/codegen_c.cc | 2 -
src/target/source/codegen_c.h | 1 -
src/target/source/codegen_trn.cc | 36 +-
src/target/webgpu/intrin_rule_webgpu.cc | 17 +-
src/tirx/analysis/exec_context.cc | 10 -
src/tirx/analysis/filter_canonical.cc | 10 +-
src/tirx/analysis/verify_tirx_well_formed.cc | 62 +-
src/tirx/ir/stmt.cc | 15 -
src/tirx/ir/stmt_functor.cc | 16 -
src/tirx/ir/tir_visitor_with_path.cc | 4 -
src/tirx/ir/tir_visitor_with_path.h | 1 -
src/tirx/ir/tirx_stmt.cc | 15 +-
src/tirx/ir/transform.cc | 2 +-
src/tirx/op/builtin.cc | 4 +
src/tirx/op/runtime.cc | 2 +
src/tirx/op/target_builtin/cuda.cc | 251 ++-
src/tirx/op/target_builtin/trn.cc | 61 +
src/tirx/op/tirx.cc | 91 +-
src/tirx/script/builder/frame.cc | 19 +-
src/tirx/script/builder/ir.cc | 24 +-
src/tirx/script/builder/utils.h | 15 -
src/tirx/script/printer/block.cc | 10 +-
src/tirx/script/printer/buffer.cc | 2 +-
src/tirx/script/printer/expr.cc | 2 +-
src/tirx/script/printer/stmt.cc | 61 +-
src/tirx/script/printer/utils.h | 11 -
src/tirx/transform/lower_tirx.cc | 34 +-
src/tirx/transform/lower_tirx_cleanup.cc | 30 -
src/tirx/transform/lower_warp_memory.cc | 25 +-
src/tirx/transform/split_host_device.cc | 45 +-
src/tirx/transform/tile_primitive_dispatch.cc | 178 +--
tests/python/codegen/test_inject_ptx_ldg32.py | 2 +-
.../test_s_tir_transform_inject_ptx_ldg32.py | 2 +-
tests/python/tirx-base/test_tir_op_types.py | 10 +-
tests/python/tirx-base/test_tir_stmt_functor.py | 6 +-
tests/python/tirx/codegen/test_codegen_ampere.py | 206 ++-
.../python/tirx/codegen/test_codegen_blackwell.py | 448 +++---
tests/python/tirx/codegen/test_codegen_cuda.py | 574 +++----
tests/python/tirx/codegen/test_codegen_dsmem.py | 48 +-
tests/python/tirx/codegen/test_codegen_hopper.py | 751 +++++----
tests/python/tirx/codegen/test_codegen_nki.py | 217 ++-
tests/python/tirx/codegen/test_codegen_nvshmem.py | 164 +-
tests/python/tirx/codegen/test_cuda_copy.py | 220 ++-
tests/python/tirx/codegen/test_cuda_cta_reduce.py | 158 +-
tests/python/tirx/codegen/test_cuda_warp_reduce.py | 110 +-
.../tile_primitive/cuda/copy/test_fallback.py | 138 +-
.../tile_primitive/cuda/copy/test_gmem_smem.py | 172 +-
.../tile_primitive/cuda/copy/test_ld_stmatrix.py | 377 +++--
.../operator/tile_primitive/cuda/copy/test_reg.py | 338 ++--
.../tile_primitive/cuda/copy_async/test_dsmem.py | 100 +-
.../tile_primitive/cuda/copy_async/test_ldgsts.py | 30 +-
.../cuda/copy_async/test_smem_tmem.py | 364 ++---
.../tile_primitive/cuda/copy_async/test_tma.py | 425 +++--
.../tile_primitive/cuda/copy_async/test_tmem.py | 314 ++--
.../cuda/copy_async/test_tmem_16xnb.py | 482 +++---
.../tile_primitive/cuda/elementwise/test_binary.py | 736 ++++-----
.../tile_primitive/cuda/elementwise/test_fma.py | 238 ++-
.../tile_primitive/cuda/elementwise/test_unary.py | 1047 ++++++-------
.../cuda/gemm/test_gemm_mma_m16n8k_.py | 457 +++---
.../cuda/gemm_async/test_gemm_async.py | 1257 +++++++--------
.../cuda/permute_layout/test_permute_layout.py | 154 +-
.../cuda/reduction/test_reduction.py | 826 +++++-----
.../operator/tile_primitive/test_dispatcher.py | 26 +-
.../operator/tile_primitive/trn/test_binary_trn.py | 263 ++--
.../tile_primitive/trn/test_compose_op_trn.py | 743 +++++----
.../operator/tile_primitive/trn/test_copy_trn.py | 925 ++++++-----
.../operator/tile_primitive/trn/test_gemm_trn.py | 495 +++---
.../tile_primitive/trn/test_private_alloc_trn.py | 313 ++--
.../tile_primitive/trn/test_reduction_trn.py | 245 ++-
.../operator/tile_primitive/trn/test_select_trn.py | 131 +-
.../operator/tile_primitive/trn/test_unary_trn.py | 245 ++-
tests/python/tirx/test_buffer_print.py | 104 +-
tests/python/tirx/test_control_flow.py | 99 +-
tests/python/tirx/test_hint.py | 119 +-
tests/python/tirx/test_inline.py | 25 +-
tests/python/tirx/test_jit.py | 114 +-
tests/python/tirx/test_layout.py | 4 +-
tests/python/tirx/test_op.py | 132 +-
tests/python/tirx/test_op_namespace_cleanup.py | 265 ++++
tests/python/tirx/test_parser_printer.py | 1639 +++++++++----------
tests/python/tirx/test_printer_tir_namespaces.py | 288 ++--
tests/python/tirx/test_roundtrip_namespaces.py | 22 +-
tests/python/tirx/test_verifier.py | 415 +++--
tests/python/tirx/transform/test_stmt_functor.py | 31 +-
.../tirx/transform/test_transform_lower_tirx.py | 1649 +++++++++-----------
.../transform/test_transform_naive_allocator.py | 143 +-
180 files changed, 11458 insertions(+), 11919 deletions(-)
create mode 100644 python/tvm/tirx/script/tile.py
create mode 100644 tests/python/tirx/test_op_namespace_cleanup.py