This is an automated email from the ASF dual-hosted git repository.
MasterJH5574 pushed a change to branch v0.25.0
in repository https://gitbox.apache.org/repos/asf/tvm.git
from a9a9fc57ba [REFACTOR][PYTHON] Consolidate backend autoload infra
(#19769)
add b3c8849188 [Fix] nn.attention support dynamic batch_size (#19779)
add 1c1afe3e7b [Relax][ONNX] Make ReduceMax/ReduceMin NaN propagation
order-independent(numpy semantics) (#19755)
add 668d119894 [Docs][CI] Bump tlcpack-sphinx-addon to restore search
result summaries (#19782)
add 1086fc9394 [REFACTOR][IR] Cleanup IR naming utilities (#19781)
add 4066127509 [CUDA] Narrow the cuda extra from cuda-python to
cuda-bindings (#19784)
add 3ea565b482 [AGENT] Migrate agent instructions to vendor-neutral layout
(#19783)
add b684868bd8 [Tests] Modernize test gating (#19777)
add 694dacb964 [TIRX][CUDA] Framework support for FA4, CLC intrinsics, and
nvfp4 tcgen05 GEMM (#19785)
add bce6ebcde4 [Relax][TensorRT] Update TensorRT runtime to 10 (#19789)
add e8e94786ea [Tests] Make TargetCreation.DeduplicateKeys host-agnostic
on AArch64 (#19786)
add 00813d6b14 [Tests] Replace remaining requires_* helpers with standard
pytest (#19787)
add 8ede60c9be [TIRx][RISC-V] Use scalable RVV loops for fixed vectorize
(#19776)
add c4c737a08f [Docs] Modernize test-gating documentation (#19788)
add beb6511383 [Web] Destroy GPUDevice once on buffer creation error
(#19790)
add a7864af945 [REFACTOR] Phase out unused queue and rang license entries
(#19794)
add 5388ea33a1 [REFACTOR][HEXAGON] Phase out Hexagon app and test wrappers
(#19796)
add b6c73cdaeb [CI] Pin GitHub Actions to SHA for ASF INFRA compliance
(#19793)
add a335a14e8f [Web] use singular requestFileHandle() instead of
requestFileHandles() (#19780)
add b1b95b1e1e [REFACTOR][IR] Simplify CallingConv attribute access
(#19799)
add 9112a17d84 [CI] Remove Jenkins PR linter step (#19798)
add d4dcb70755 [Relax][Frontend][TFLite] Add support for FFT/complex
operators: REAL, IMAG, COMPLEX_ABS (#19763)
add 297f944764 [REFACTOR][TIRX] Add IntImm common scalar ctor and
streamline MakeConst (#19797)
add 0815e00052 [Tests][Refactor] Remove unused testing helpers (#19800)
No new revisions were added by this update.
Summary of changes:
{.claude => .agents}/scripts/monitor_gpu.sh | 13 +-
.../skills/tir-bench/SKILL.md | 0
.../skills/tir-build/SKILL.md | 6 +-
.../skills/tir-test/SKILL.md | 4 +-
.github/actions/build-wheel-for-publish/action.yml | 2 +-
.github/workflows/lint.yml | 2 +-
.github/workflows/publish_wheel.yml | 2 +-
AGENTS.md | 99 ++
CMakeLists.txt | 1 -
apps/cpp_rpc/rpc_env.cc | 14 +-
apps/cpp_rpc/rpc_server.cc | 5 +-
apps/hexagon_api/CMakeLists.txt | 166 ---
apps/hexagon_api/README.md | 58 --
apps/hexagon_launcher/CMakeLists.txt | 81 --
apps/hexagon_launcher/README.md | 145 ---
apps/hexagon_launcher/cmake/HexagonLauncher.cmake | 75 --
apps/hexagon_launcher/cmake/android/CMakeLists.txt | 100 --
apps/hexagon_launcher/cmake/hexagon/CMakeLists.txt | 105 --
apps/hexagon_launcher/launcher_android.cc | 170 ---
apps/hexagon_launcher/launcher_core.cc | 231 ----
apps/hexagon_launcher/launcher_core.h | 133 ---
apps/hexagon_launcher/launcher_hexagon.cc | 237 -----
apps/hexagon_launcher/launcher_main.cc | 159 ---
apps/hexagon_launcher/launcher_rpc.idl | 33 -
apps/hexagon_launcher/launcher_util.cc | 68 --
apps/hexagon_launcher/launcher_util.h | 34 -
ci/jenkins/data.py | 4 -
ci/jenkins/generated/arm_jenkinsfile.groovy | 21 +-
ci/jenkins/generated/cpu_jenkinsfile.groovy | 21 +-
ci/jenkins/generated/docker_jenkinsfile.groovy | 21 +-
ci/jenkins/generated/gpu_jenkinsfile.groovy | 21 +-
ci/jenkins/generated/wasm_jenkinsfile.groovy | 21 +-
ci/jenkins/templates/utils/Prepare.groovy.j2 | 19 -
ci/scripts/jenkins/check_pr.py | 143 ---
docker/install/ubuntu_install_sphinx.sh | 2 +-
docs/contribute/code_guide.rst | 11 +-
docs/contribute/testing.rst | 62 +-
include/tvm/ir/expr.h | 29 +-
include/tvm/ir/global_var_supply.h | 128 ---
include/tvm/ir/name_supply.h | 169 ---
include/tvm/ir/node_functor.h | 2 +-
include/tvm/ir/unique_name_supply.h | 143 +++
include/tvm/relax/binding_rewrite.h | 4 +-
include/tvm/relax/block_builder.h | 8 +-
include/tvm/script/printer/doc.h | 4 +-
include/tvm/tirx/buffer.h | 2 +-
include/tvm/tirx/op.h | 108 +-
include/tvm/topi/detail/broadcast.h | 2 +-
include/tvm/topi/detail/extern.h | 13 +-
include/tvm/topi/detail/strided_slice.h | 4 +-
include/tvm/topi/elemwise.h | 58 +-
include/tvm/topi/nn.h | 28 +-
include/tvm/topi/nn/bnn.h | 2 +-
include/tvm/topi/nn/dilate.h | 2 +-
include/tvm/topi/nn/group_norm.h | 4 +-
include/tvm/topi/nn/instance_norm.h | 4 +-
include/tvm/topi/nn/layer_norm.h | 4 +-
include/tvm/topi/nn/local_response_norm.h | 6 +-
include/tvm/topi/nn/pooling.h | 27 +-
include/tvm/topi/nn/rms_norm.h | 4 +-
include/tvm/topi/nn/softmax.h | 2 +-
include/tvm/topi/reduction.h | 12 +-
include/tvm/topi/transform.h | 30 +-
licenses/LICENSE.blockingconcurrentqueue.txt | 26 -
licenses/LICENSE.concurrentqueue.txt | 22 -
licenses/LICENSE.rang.txt | 24 -
pyproject.toml | 10 +-
python/tvm/backend/cuda/lang/pipeline.py | 11 +-
python/tvm/backend/cuda/lang/tile_scheduler.py | 135 ++-
python/tvm/backend/cuda/op.py | 78 +-
.../tvm/backend/cuda/operator/intrinsics/header.py | 6 +-
.../tvm/backend/cuda/operator/intrinsics/sync.py | 100 +-
.../tile_primitive/copy_async/tcgen05_ldst.py | 35 +-
.../operator/tile_primitive/elementwise/reg.py | 67 ++
python/tvm/backend/cuda/script.py | 4 +
python/tvm/contrib/hexagon/_ci_env_check.py | 10 +-
python/tvm/contrib/hexagon/build.py | 830 ---------------
python/tvm/contrib/hexagon/hexagon_profiler.py | 125 ---
python/tvm/contrib/hexagon/meta_schedule.py | 195 ----
.../contrib/hexagon/profiling/process_lwp_data.py | 388 -------
python/tvm/contrib/hexagon/pytest_plugin.py | 397 -------
python/tvm/contrib/hexagon/session.py | 287 -----
python/tvm/ir/supply.py | 97 +-
python/tvm/relax/frontend/onnx/onnx_frontend.py | 38 +-
.../tvm/relax/frontend/tflite/tflite_frontend.py | 65 +-
python/tvm/relax/transform/legalize_ops/nn.py | 13 +-
python/tvm/runtime/__init__.py | 2 +-
python/tvm/runtime/_ffi_node_api.py | 5 +-
python/tvm/support/nvcc.py | 86 +-
python/tvm/testing/__init__.py | 1 +
python/tvm/testing/env.py | 520 +++++++++
python/tvm/testing/plugin.py | 83 +-
python/tvm/testing/utils.py | 1098 +-------------------
python/tvm/tirx/script/builder/external_kernel.py | 2 +-
src/arith/analyzer.cc | 4 +-
src/arith/canonical_simplify.cc | 62 +-
src/arith/conjunctive_normal_form.cc | 13 +-
src/arith/const_fold.h | 32 +-
src/arith/detect_linear_equation.cc | 10 +-
src/arith/int_constraints.cc | 2 +-
src/arith/int_set.cc | 33 +-
src/arith/ir_mutator_with_analyzer.cc | 10 +-
src/arith/iter_affine_map.cc | 68 +-
src/arith/modular_set.cc | 2 +-
src/arith/pattern_match.h | 2 +-
src/arith/presburger_set.cc | 30 +-
src/arith/product_normal_form.h | 4 +-
src/arith/rewrite_simplify.cc | 45 +-
src/arith/solve_linear_equation.cc | 26 +-
src/arith/solve_linear_inequality.cc | 24 +-
src/backend/cuda/codegen/codegen_cuda.cc | 33 +-
src/backend/cuda/codegen/literal/cuda_half_t.h | 6 +-
src/backend/cuda/op/target_builtin.cc | 6 +
.../hexagon/codegen/llvm/intrin_rule_hexagon.cc | 15 +-
src/backend/hexagon/runtime/profiler/README.md | 85 --
.../hexagon/runtime/rpc/android_bash.sh.template | 31 -
src/backend/metal/codegen/codegen_metal.cc | 10 +-
src/backend/opencl/codegen/codegen_opencl.cc | 14 +-
src/backend/rocm/codegen/llvm/intrin_rule_rocm.cc | 6 +-
src/backend/trn/transform/lower_trainium_layout.cc | 2 +-
src/backend/vulkan/codegen/codegen_spirv.cc | 2 +-
src/backend/vulkan/codegen/spirv_utils.cc | 10 +-
src/backend/webgpu/codegen/codegen_webgpu.cc | 10 +-
src/ir/access_path_repr.cc | 49 -
src/ir/expr.cc | 2 +-
src/ir/global_var_supply.cc | 111 --
src/ir/module.cc | 12 +-
src/ir/name_supply.cc | 108 --
src/ir/unique_name_supply.cc | 114 ++
src/relax/analysis/struct_info_analysis.cc | 66 +-
src/relax/analysis/tir_op_pattern_kind.cc | 6 +-
src/relax/backend/contrib/clml/codegen.cc | 7 +-
src/relax/backend/contrib/cutlass/codegen.cc | 6 +-
src/relax/backend/contrib/tensorrt/codegen.cc | 3 +-
src/relax/backend/contrib/utils.cc | 2 +-
src/relax/backend/vm/codegen_vm_tir.cc | 7 +-
src/relax/ir/binding_rewrite.cc | 4 +-
src/relax/ir/block_builder.cc | 6 +-
src/relax/ir/dataflow_expr_rewriter.cc | 2 +-
src/relax/ir/dataflow_matcher.cc | 8 +-
src/relax/ir/dataflow_matcher.h | 2 +-
src/relax/ir/emit_te.cc | 2 +-
src/relax/ir/expr.cc | 4 +-
src/relax/ir/expr_functor.cc | 2 +-
src/relax/op/distributed/statistical.cc | 2 +-
src/relax/op/image/resize.cc | 8 +-
src/relax/op/memory/view.cc | 6 +-
src/relax/op/nn/convolution.cc | 96 +-
src/relax/op/nn/nn.cc | 6 +-
src/relax/op/nn/pooling.cc | 102 +-
src/relax/op/tensor/binary.cc | 2 +-
src/relax/op/tensor/index.cc | 4 +-
src/relax/op/tensor/inspect.cc | 24 +-
src/relax/op/tensor/manipulate.cc | 23 +-
src/relax/op/tensor/set.cc | 14 +-
src/relax/op/tensor/statistical.cc | 14 +-
src/relax/op/vision/multibox_transform_loc.cc | 2 +-
src/relax/op/vision/nms.cc | 6 +-
src/relax/op/vision/roi_align.cc | 9 +-
src/relax/op/vision/roi_pool.cc | 4 +-
src/relax/transform/adjust_matmul_order.cc | 4 +-
src/relax/transform/allocate_workspace.cc | 10 +-
src/relax/transform/alter_op_impl.cc | 2 +-
src/relax/transform/combine_parallel_matmul.cc | 2 +-
src/relax/transform/dataflow_inplace.cc | 4 +-
src/relax/transform/fold_constant.cc | 2 +-
src/relax/transform/fuse_tir.cc | 4 +-
src/relax/transform/infer_amp_utils.cc | 4 +-
src/relax/transform/lazy_transform_params.cc | 19 +-
src/relax/transform/lower_alloc_tensor.cc | 4 +-
src/relax/transform/normalize.cc | 2 +-
src/relax/transform/rewrite_cuda_graph.cc | 10 +-
.../transform/split_layout_rewrite_preproc.cc | 4 +-
src/relax/transform/static_plan_block_memory.cc | 16 +-
.../extra/contrib/tensorrt/tensorrt_builder.cc | 128 +--
.../extra/contrib/tensorrt/tensorrt_builder.h | 18 +-
.../extra/contrib/tensorrt/tensorrt_calibrator.h | 5 +-
src/runtime/extra/contrib/tensorrt/tensorrt_ops.cc | 161 ++-
src/runtime/extra/contrib/tensorrt/tensorrt_ops.h | 9 +-
.../extra/contrib/tensorrt/tensorrt_runtime.cc | 138 +--
.../extra/contrib/tensorrt/tensorrt_utils.h | 21 +-
src/s_tir/analysis/identify_memcpy.cc | 4 +-
.../analysis/sblock_access_region_detector.cc | 2 +-
src/s_tir/backend/adreno/inject_texture_alloc.cc | 4 +-
src/s_tir/backend/adreno/texture_flatten.cc | 2 +-
src/s_tir/meta_schedule/database/json_database.cc | 2 +-
.../feature_extractor/per_store_feature.cc | 4 +-
src/s_tir/meta_schedule/mutator/mutate_parallel.cc | 4 +-
.../postproc/rewrite_cooperative_fetch.cc | 25 +-
.../postproc/rewrite_parallel_vectorize_unroll.cc | 7 +-
.../postproc/rewrite_unbound_block.cc | 2 +-
.../meta_schedule/postproc/verify_gpu_code.cc | 6 +-
.../meta_schedule/schedule/cuda/thread_bind.cc | 9 +-
src/s_tir/meta_schedule/schedule/cuda/winograd.cc | 59 +-
.../meta_schedule/schedule_rule/add_rfactor.cc | 2 +-
.../schedule_rule/multi_level_tiling.cc | 4 +-
.../multi_level_tiling_tensor_core.cc | 17 +-
.../schedule_rule/parallel_vectorize_unroll.cc | 4 +-
.../meta_schedule/task_scheduler/task_scheduler.cc | 8 +-
src/s_tir/schedule/analysis/layout.cc | 12 +-
src/s_tir/schedule/concrete_schedule.cc | 4 +-
src/s_tir/schedule/concrete_schedule.h | 4 +-
src/s_tir/schedule/primitive/blockize_tensorize.cc | 10 +-
src/s_tir/schedule/primitive/cache_index.cc | 10 +-
src/s_tir/schedule/primitive/cache_read_write.cc | 42 +-
src/s_tir/schedule/primitive/compute_at.cc | 8 +-
src/s_tir/schedule/primitive/compute_inline.cc | 6 +-
src/s_tir/schedule/primitive/decompose_padding.cc | 4 +-
.../schedule/primitive/layout_transformation.cc | 18 +-
.../schedule/primitive/loop_transformation.cc | 13 +-
src/s_tir/schedule/primitive/pad_einsum.cc | 10 +-
src/s_tir/schedule/primitive/read_write_at.cc | 5 +-
src/s_tir/schedule/primitive/reduction.cc | 30 +-
src/s_tir/schedule/state.cc | 6 +-
src/s_tir/schedule/trace.cc | 2 +-
src/s_tir/schedule/traced_schedule.cc | 87 +-
src/s_tir/schedule/transform.cc | 6 +-
src/s_tir/support/nd_int_set.h | 2 +-
src/s_tir/transform/bound_checker.cc | 4 +-
src/s_tir/transform/canonicalize_loop.cc | 4 +-
src/s_tir/transform/compact_buffer_region.cc | 24 +-
src/s_tir/transform/decorate_device_scope.cc | 2 +-
src/s_tir/transform/default_gpu_schedule.cc | 9 +-
src/s_tir/transform/inject_double_buffer.cc | 14 +-
src/s_tir/transform/inject_ptx_ldg32.cc | 20 +-
src/s_tir/transform/inject_software_pipeline.cc | 23 +-
src/s_tir/transform/inject_virtual_thread.cc | 6 +-
src/s_tir/transform/loop_partition.cc | 16 +-
.../transform/lower_cross_thread_reduction.cc | 26 +-
src/s_tir/transform/lower_match_buffer.cc | 4 +-
src/s_tir/transform/lower_opaque_block.cc | 4 +-
src/s_tir/transform/lower_thread_allreduce.cc | 18 +-
src/s_tir/transform/lower_vtcm_alloc.cc | 2 +-
src/s_tir/transform/memhammer_coalesce.cc | 4 +-
.../transform/memhammer_intermediate_stage.cc | 8 +-
src/s_tir/transform/memhammer_lower_auto_copy.cc | 7 +-
.../transform/memhammer_tensorcore_rewrite.cc | 20 +-
.../plan_update_buffer_allocation_location.cc | 2 +-
src/s_tir/transform/thread_storage_sync.cc | 2 +-
src/s_tir/transform/transform_mma_buffer_layout.cc | 12 +-
.../transform/using_assume_to_reduce_branches.cc | 4 +-
src/script/printer/ir/distributed.cc | 2 +-
src/target/intrin_rule.cc | 30 +-
src/target/llvm/codegen_cpu.cc | 4 +-
src/target/llvm/codegen_llvm.cc | 24 +-
src/target/llvm/codegen_llvm.h | 3 +
src/target/llvm/codegen_params.cc | 4 +-
src/target/llvm/codegen_x86_64.cc | 2 +-
src/target/llvm/intrin_rule_llvm.cc | 11 +-
src/target/source/codegen_c.cc | 2 +-
src/target/source/codegen_c.h | 4 +-
src/target/source/codegen_source_base.cc | 2 +-
src/target/source/codegen_source_base.h | 6 +-
src/te/operation/create_primfunc.cc | 18 +-
src/te/tensor.cc | 2 +-
src/tirx/analysis/exec_context.cc | 2 +-
src/tirx/analysis/verify_memory.cc | 4 +-
src/tirx/ir/buffer.cc | 18 +-
src/tirx/ir/exec_scope.cc | 9 +-
src/tirx/ir/expr.cc | 8 +-
src/tirx/ir/index_map.cc | 8 +-
src/tirx/ir/layout/tile_core.cc | 2 +-
src/tirx/ir/layout/tile_slice.cc | 10 +-
src/tirx/ir/layout/utils.cc | 2 +-
src/tirx/ir/script/script_complete.cc | 2 +-
src/tirx/ir/stmt.cc | 6 +-
src/tirx/op/op.cc | 88 +-
src/tirx/script/builder/frame.cc | 9 +-
src/tirx/script/builder/ir.cc | 10 +-
src/tirx/transform/bind_target.cc | 7 +-
src/tirx/transform/dtype_conversion.cc | 11 +-
src/tirx/transform/force_narrow_index_to_i32.cc | 2 +-
src/tirx/transform/ir_utils.h | 15 +-
src/tirx/transform/lower_intrin.cc | 18 +-
src/tirx/transform/lower_tirx_cleanup.cc | 2 +-
src/tirx/transform/lower_tirx_opaque.cc | 2 +-
src/tirx/transform/lower_tvm_builtin.cc | 49 +-
src/tirx/transform/lower_warp_memory.cc | 13 +-
src/tirx/transform/make_packed_api.cc | 44 +-
src/tirx/transform/split_host_device.cc | 19 +-
src/tirx/transform/storage_rewrite.cc | 22 +-
src/tirx/transform/tile_primitive_dispatch.cc | 12 +-
src/tirx/transform/tvm_ffi_binder.cc | 46 +-
src/tirx/transform/unroll_loop.cc | 2 +-
src/tirx/transform/vectorize_loop.cc | 99 +-
src/topi/einsum.cc | 6 +-
tests/cpp/arith_simplify_test.cc | 14 +-
tests/cpp/ir_functor_test.cc | 14 +-
tests/cpp/nested_msg_test.cc | 34 +-
tests/cpp/target_test.cc | 2 +-
tests/cpp/tir_scalable_datatype.cc | 11 +-
tests/lint/check_asf_header.py | 2 +-
.../test_minimal_target_codegen_llvm.py | 4 +-
tests/python/ci/test_ci.py | 43 -
.../python/codegen/test_codegen_error_handling.py | 4 +-
tests/python/codegen/test_gpu_codegen_allreduce.py | 4 +-
tests/python/codegen/test_inject_ptx_ldg32.py | 5 +-
tests/python/codegen/test_target_codegen_blob.py | 2 +-
tests/python/codegen/test_target_codegen_bool.py | 3 +-
.../codegen/test_target_codegen_cross_llvm.py | 4 +-
tests/python/codegen/test_target_codegen_cuda.py | 96 +-
.../codegen/test_target_codegen_cuda_fastmath.py | 5 +-
.../python/codegen/test_target_codegen_cuda_fp4.py | 10 +-
.../python/codegen/test_target_codegen_cuda_fp8.py | 34 +-
tests/python/codegen/test_target_codegen_device.py | 8 +-
tests/python/codegen/test_target_codegen_extern.py | 3 +-
.../codegen/test_target_codegen_gpu_common.py | 4 +-
.../python/codegen/test_target_codegen_hexagon.py | 7 +-
tests/python/codegen/test_target_codegen_llvm.py | 108 +-
tests/python/codegen/test_target_codegen_metal.py | 32 +-
tests/python/codegen/test_target_codegen_opencl.py | 27 +-
tests/python/codegen/test_target_codegen_riscv.py | 51 +-
tests/python/codegen/test_target_codegen_rocm.py | 20 +-
tests/python/codegen/test_target_codegen_vulkan.py | 14 +-
tests/python/codegen/test_target_codegen_x86.py | 3 +-
tests/python/contrib/test_cutlass_gemm.py | 22 +-
tests/python/contrib/test_hexagon/README.md | 130 ---
tests/python/contrib/test_hexagon/README_RPC.md | 371 -------
tests/python/contrib/test_hexagon/__init__.py | 19 -
.../python/contrib/test_hexagon/benchmark_util.py | 277 -----
tests/python/contrib/test_hexagon/conftest.py | 27 -
tests/python/contrib/test_hexagon/conv2d/README.md | 37 -
.../python/contrib/test_hexagon/conv2d/__init__.py | 19 -
.../test_hexagon/conv2d/test_conv2d_blocked.md | 494 ---------
.../test_hexagon/conv2d/test_conv2d_conv2d.md | 986 ------------------
.../python/contrib/test_hexagon/infrastructure.py | 376 -------
tests/python/contrib/test_hexagon/pytest_util.py | 176 ----
.../test_hexagon/test_async_dma_pipeline.py | 888 ----------------
.../test_hexagon/test_benchmark_elemwise_add.py | 424 --------
.../test_hexagon/test_benchmark_maxpool2d.py | 357 -------
.../contrib/test_hexagon/test_dma_builtin.py | 188 ----
.../contrib/test_hexagon/test_memory_alloc.py | 84 --
.../contrib/test_hexagon/test_meta_schedule.py | 369 -------
.../contrib/test_hexagon/test_parallel_hvx.py | 238 -----
.../test_hexagon/test_parallel_hvx_load_vtcm.py | 560 ----------
.../contrib/test_hexagon/test_parallel_scalar.py | 175 ----
.../test_relax_2d_buffer_allocation.py | 93 --
.../contrib/test_hexagon/test_relax_integration.py | 114 --
.../contrib/test_hexagon/test_run_unit_tests.py | 180 ----
tests/python/contrib/test_hexagon/test_sigmoid.py | 118 ---
.../test_hexagon/test_software_pipeline_async.py | 204 ----
tests/python/contrib/test_hexagon/test_take.py | 397 -------
.../contrib/test_hexagon/test_thread_pool.py | 106 --
tests/python/contrib/test_hexagon/test_vtcm.py | 93 --
.../contrib/test_hexagon/test_vtcm_bandwidth.py | 193 ----
tests/python/contrib/test_hipblas.py | 8 +-
tests/python/contrib/test_random.py | 3 +-
.../python/contrib/test_tir_triton_integration.py | 4 +-
tests/python/disco/test_callback.py | 5 +-
tests/python/disco/test_loader.py | 5 +-
tests/python/disco/test_nvshmem.py | 6 +-
...t_name_supply.py => test_unique_name_supply.py} | 27 +-
tests/python/nightly/test_nnapi/test_network.py | 3 +-
tests/python/relax/backend/adreno/test_clml_ops.py | 38 +-
.../relax/backend/adreno/test_texture_ops.py | 83 +-
tests/python/relax/backend/adreno/utils.py | 50 +-
tests/python/relax/test_codegen_cublas.py | 15 +-
tests/python/relax/test_codegen_cudnn.py | 6 +-
tests/python/relax/test_codegen_cutlass.py | 5 +-
tests/python/relax/test_codegen_hipblas.py | 6 +-
tests/python/relax/test_codegen_tensorrt.py | 210 +++-
tests/python/relax/test_contrib_vllm.py | 8 +-
tests/python/relax/test_frontend_dynamo.py | 19 +-
.../relax/test_frontend_from_exported_program.py | 3 +-
tests/python/relax/test_frontend_from_fx.py | 7 +-
...test_frontend_nn_llm_sequence_prefill_masked.py | 29 +-
tests/python/relax/test_frontend_nn_op.py | 11 +-
tests/python/relax/test_frontend_onnx.py | 40 +
tests/python/relax/test_frontend_stablehlo.py | 31 +-
tests/python/relax/test_frontend_tflite.py | 94 ++
tests/python/relax/test_op_vision.py | 31 +-
...ime_builtin_paged_attention_kv_cache_mla_tir.py | 17 +-
...runtime_builtin_paged_attention_kv_cache_tir.py | 33 +-
.../python/relax/test_runtime_builtin_rnn_state.py | 13 +-
tests/python/relax/test_tir_call_source_kernel.py | 5 +-
tests/python/relax/test_transform_codegen_pass.py | 15 +-
.../python/relax/test_transform_legalize_ops_nn.py | 35 +
tests/python/relax/test_vm_build.py | 10 +-
tests/python/relax/test_vm_cuda_graph.py | 7 +-
tests/python/relax/test_vm_multi_device.py | 6 +-
tests/python/relax/texture/test_texture_nd.py | 6 +-
tests/python/runtime/test_runtime_dlpack.py | 7 +-
tests/python/runtime/test_runtime_module_export.py | 5 +-
tests/python/runtime/test_runtime_module_load.py | 8 +-
tests/python/runtime/test_runtime_rpc.py | 37 +-
tests/python/s_tir/dlight/test_primitives.py | 6 +-
.../test_meta_schedule_mma_tensorize.py | 13 +-
.../test_meta_schedule_space_post_opt.py | 6 +-
.../meta_schedule/test_meta_schedule_tune_tir.py | 6 +-
..._tir_schedule_tensorize_ldmatrix_mma_numeric.py | 17 +-
.../test_tir_schedule_tensorize_mfma_numeric.py | 11 +-
.../test_s_tir_transform_inject_ptx_async_copy.py | 13 +-
...est_s_tir_transform_inject_software_pipeline.py | 7 +-
.../transform/test_s_tir_transform_thread_sync.py | 9 +-
tests/python/target/test_arm_target.py | 9 +-
tests/python/target/test_target_target.py | 13 +-
tests/python/testing/test_env.py | 205 ++++
tests/python/tirx-base/test_tir_imm_values.py | 13 +-
tests/python/tirx-base/test_tir_ptx_cp_async.py | 5 +-
.../tirx-base/test_tir_ptx_griddepcontrol.py | 5 +-
tests/python/tirx-base/test_tir_ptx_ldmatrix.py | 5 +-
tests/python/tirx-base/test_tir_ptx_mma.py | 61 +-
tests/python/tirx-base/test_tir_ptx_mma_sp.py | 8 +-
.../tirx-base/test_tir_ptx_scalar_f32_math.py | 5 +-
.../test_tir_transform_lower_intrin.py | 8 +-
.../test_tir_transform_lower_tvm_builtin.py | 3 +-
tests/python/tirx/codegen/test_codegen_ampere.py | 7 +-
.../python/tirx/codegen/test_codegen_blackwell.py | 19 +-
tests/python/tirx/codegen/test_codegen_cuda.py | 11 +
tests/python/tirx/codegen/test_codegen_hopper.py | 55 +-
tests/python/tirx/codegen/test_codegen_nvshmem.py | 3 +
tests/python/tirx/codegen/test_cuda_copy.py | 11 +
tests/python/tirx/codegen/test_cuda_cta_reduce.py | 13 +
tests/python/tirx/codegen/test_cuda_warp_reduce.py | 13 +
tests/python/tirx/conftest.py | 40 +
.../tile_primitive/cuda/copy/test_fallback.py | 5 +
.../tile_primitive/cuda/copy/test_gmem_smem.py | 8 +-
.../tile_primitive/cuda/copy/test_ld_stmatrix.py | 13 +-
.../operator/tile_primitive/cuda/copy/test_reg.py | 5 +
.../tile_primitive/cuda/copy_async/test_dsmem.py | 4 +-
.../tile_primitive/cuda/copy_async/test_ldgsts.py | 3 +
.../cuda/copy_async/test_smem_tmem.py | 13 +-
.../tile_primitive/cuda/copy_async/test_tma.py | 16 +-
.../tile_primitive/cuda/copy_async/test_tmem.py | 7 +
.../cuda/copy_async/test_tmem_16xnb.py | 144 +++
.../tile_primitive/cuda/elementwise/test_binary.py | 13 +
.../tile_primitive/cuda/elementwise/test_fma.py | 15 +
.../tile_primitive/cuda/elementwise/test_unary.py | 232 ++++-
.../cuda/gemm/test_gemm_mma_m16n8k_.py | 16 +-
.../cuda/gemm_async/test_gemm_async.py | 23 +
.../cuda/permute_layout/test_permute_layout.py | 7 +
.../cuda/reduction/test_reduction.py | 23 +
tests/python/tirx/test_bench_utils.py | 17 +-
tests/python/tirx/test_buffer_print.py | 4 +
tests/python/tirx/test_control_flow.py | 8 +
tests/python/tirx/test_layout.py | 35 +
tests/python/tvmscript/test_tvmscript_ops.py | 3 +-
tests/scripts/task_python_integration_gpuonly.sh | 1 +
tests/scripts/task_python_unittest.sh | 1 +
tests/scripts/task_python_unittest_gpuonly.sh | 1 +
web/src/artifact_cache.ts | 14 +-
web/src/webgpu.ts | 17 +-
442 files changed, 5451 insertions(+), 16228 deletions(-)
rename {.claude => .agents}/scripts/monitor_gpu.sh (86%)
rename .claude/commands/tir-bench.md => .agents/skills/tir-bench/SKILL.md
(100%)
rename .claude/commands/tir-build.md => .agents/skills/tir-build/SKILL.md (69%)
rename .claude/commands/tir-test.md => .agents/skills/tir-test/SKILL.md (95%)
create mode 100644 AGENTS.md
delete mode 100644 apps/hexagon_api/CMakeLists.txt
delete mode 100644 apps/hexagon_api/README.md
delete mode 100644 apps/hexagon_launcher/CMakeLists.txt
delete mode 100644 apps/hexagon_launcher/README.md
delete mode 100644 apps/hexagon_launcher/cmake/HexagonLauncher.cmake
delete mode 100644 apps/hexagon_launcher/cmake/android/CMakeLists.txt
delete mode 100644 apps/hexagon_launcher/cmake/hexagon/CMakeLists.txt
delete mode 100644 apps/hexagon_launcher/launcher_android.cc
delete mode 100644 apps/hexagon_launcher/launcher_core.cc
delete mode 100644 apps/hexagon_launcher/launcher_core.h
delete mode 100644 apps/hexagon_launcher/launcher_hexagon.cc
delete mode 100644 apps/hexagon_launcher/launcher_main.cc
delete mode 100644 apps/hexagon_launcher/launcher_rpc.idl
delete mode 100644 apps/hexagon_launcher/launcher_util.cc
delete mode 100644 apps/hexagon_launcher/launcher_util.h
delete mode 100755 ci/scripts/jenkins/check_pr.py
delete mode 100644 include/tvm/ir/global_var_supply.h
delete mode 100644 include/tvm/ir/name_supply.h
create mode 100644 include/tvm/ir/unique_name_supply.h
delete mode 100644 licenses/LICENSE.blockingconcurrentqueue.txt
delete mode 100644 licenses/LICENSE.concurrentqueue.txt
delete mode 100644 licenses/LICENSE.rang.txt
delete mode 100644 python/tvm/contrib/hexagon/build.py
delete mode 100644 python/tvm/contrib/hexagon/hexagon_profiler.py
delete mode 100644 python/tvm/contrib/hexagon/meta_schedule.py
delete mode 100644 python/tvm/contrib/hexagon/profiling/process_lwp_data.py
delete mode 100644 python/tvm/contrib/hexagon/pytest_plugin.py
delete mode 100644 python/tvm/contrib/hexagon/session.py
create mode 100644 python/tvm/testing/env.py
delete mode 100644 src/backend/hexagon/runtime/profiler/README.md
delete mode 100644 src/backend/hexagon/runtime/rpc/android_bash.sh.template
delete mode 100644 src/ir/access_path_repr.cc
delete mode 100644 src/ir/global_var_supply.cc
delete mode 100644 src/ir/name_supply.cc
create mode 100644 src/ir/unique_name_supply.cc
delete mode 100644 tests/python/contrib/test_hexagon/README.md
delete mode 100644 tests/python/contrib/test_hexagon/README_RPC.md
delete mode 100644 tests/python/contrib/test_hexagon/__init__.py
delete mode 100644 tests/python/contrib/test_hexagon/benchmark_util.py
delete mode 100644 tests/python/contrib/test_hexagon/conftest.py
delete mode 100644 tests/python/contrib/test_hexagon/conv2d/README.md
delete mode 100644 tests/python/contrib/test_hexagon/conv2d/__init__.py
delete mode 100644
tests/python/contrib/test_hexagon/conv2d/test_conv2d_blocked.md
delete mode 100644
tests/python/contrib/test_hexagon/conv2d/test_conv2d_conv2d.md
delete mode 100644 tests/python/contrib/test_hexagon/infrastructure.py
delete mode 100644 tests/python/contrib/test_hexagon/pytest_util.py
delete mode 100644 tests/python/contrib/test_hexagon/test_async_dma_pipeline.py
delete mode 100644
tests/python/contrib/test_hexagon/test_benchmark_elemwise_add.py
delete mode 100644
tests/python/contrib/test_hexagon/test_benchmark_maxpool2d.py
delete mode 100644 tests/python/contrib/test_hexagon/test_dma_builtin.py
delete mode 100644 tests/python/contrib/test_hexagon/test_memory_alloc.py
delete mode 100644 tests/python/contrib/test_hexagon/test_meta_schedule.py
delete mode 100644 tests/python/contrib/test_hexagon/test_parallel_hvx.py
delete mode 100644
tests/python/contrib/test_hexagon/test_parallel_hvx_load_vtcm.py
delete mode 100644 tests/python/contrib/test_hexagon/test_parallel_scalar.py
delete mode 100644
tests/python/contrib/test_hexagon/test_relax_2d_buffer_allocation.py
delete mode 100644 tests/python/contrib/test_hexagon/test_relax_integration.py
delete mode 100644 tests/python/contrib/test_hexagon/test_run_unit_tests.py
delete mode 100644 tests/python/contrib/test_hexagon/test_sigmoid.py
delete mode 100644
tests/python/contrib/test_hexagon/test_software_pipeline_async.py
delete mode 100644 tests/python/contrib/test_hexagon/test_take.py
delete mode 100644 tests/python/contrib/test_hexagon/test_thread_pool.py
delete mode 100644 tests/python/contrib/test_hexagon/test_vtcm.py
delete mode 100644 tests/python/contrib/test_hexagon/test_vtcm_bandwidth.py
rename tests/python/ir/{test_name_supply.py => test_unique_name_supply.py}
(62%)
create mode 100644 tests/python/testing/test_env.py
create mode 100644 tests/python/tirx/conftest.py