This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 75f8589732 [CI] Update images to `20260301-134651-63f099ad` (#18827)
add 21e5225484 [ADRENO] Revive and consolicate Adreno features (#18867)
add 0fba1606be [REFACTOR][TIR] Introduce AllocBuffer and phase out
Allocate+DeclBuffer (#18865)
add 969fad363b [TIR] Add VisitBufferDef/VisitBufferUse to base
StmtVisitor/StmtMutator (#18873)
No new revisions were added by this update.
Summary of changes:
cmake/modules/contrib/CLML.cmake | 6 +-
.../tensor_ir/tutorials/tir_transformation.py | 25 +-
include/tvm/script/ir_builder/tir/frame.h | 34 +-
include/tvm/script/ir_builder/tir/ir.h | 33 +-
include/tvm/tir/stmt.h | 118 +++--
include/tvm/tir/stmt_functor.h | 50 ++-
python/tvm/relax/backend/adreno/clml.py | 103 ++++-
python/tvm/relax/backend/adreno/pipeline.py | 5 +-
python/tvm/relax/backend/gpu_generic/cumsum.py | 6 +-
python/tvm/relax/backend/gpu_generic/sampling.py | 26 +-
python/tvm/relax/frontend/nn/llm/kv_cache.py | 220 ++++-----
python/tvm/relax/frontend/nn/llm/tree_attn.py | 104 ++---
python/tvm/s_tir/schedule/schedule.py | 64 +--
python/tvm/script/ir_builder/tir/frame.py | 7 +
python/tvm/script/ir_builder/tir/ir.py | 61 ++-
python/tvm/tir/__init__.py | 2 +-
python/tvm/tir/functor.py | 35 +-
python/tvm/tir/stmt.py | 59 +--
src/relax/backend/contrib/clml/codegen.cc | 3 -
.../backend/contrib/codegen_json/codegen_json.h | 4 +-
src/relax/transform/dataflow_inplace.cc | 7 +
src/runtime/contrib/clml/clml_memory_planner.cc | 6 +-
src/runtime/contrib/clml/clml_runtime.cc | 497 ++++++++++++++++-----
src/runtime/contrib/clml/clml_runtime.h | 92 +++-
src/runtime/contrib/clml/clml_utils.cc | 27 +-
src/runtime/contrib/clml/clml_utils.h | 9 +-
src/s_tir/analysis/calculate_allocated_memory.cc | 80 ++--
src/s_tir/analysis/estimate_flops.cc | 2 +-
src/s_tir/analysis/is_pure_function.cc | 4 +-
src/s_tir/analysis/verify_gpu_code.cc | 31 +-
src/s_tir/backend/adreno/inject_texture_alloc.cc | 28 +-
src/s_tir/schedule/primitive/cache_read_write.cc | 4 +-
src/s_tir/schedule/transform.cc | 9 +-
src/s_tir/transform/bound_checker.cc | 7 +-
src/s_tir/transform/compact_buffer_region.cc | 42 +-
src/s_tir/transform/inject_double_buffer.cc | 29 +-
src/s_tir/transform/inject_ptx_ldg32.cc | 15 +-
src/s_tir/transform/inject_virtual_thread.cc | 50 +--
src/s_tir/transform/lower_opaque_block.cc | 5 +-
src/s_tir/transform/lower_thread_allreduce.cc | 19 +-
src/s_tir/transform/lower_vtcm_alloc.cc | 13 +-
.../transform/merge_shared_memory_allocations.cc | 129 +++---
src/s_tir/transform/profile_instrumentation.cc | 4 +-
src/s_tir/transform/renew_defs.cc | 107 ++---
src/s_tir/transform/tensorcore_infer_fragment.cc | 11 +-
src/script/ir_builder/tir/frame.cc | 20 +-
src/script/ir_builder/tir/ir.cc | 50 ++-
src/script/printer/tir/block.cc | 2 +-
src/script/printer/tir/function.cc | 2 +-
src/script/printer/tir/stmt.cc | 115 ++---
src/script/printer/tir/utils.h | 7 +-
src/target/llvm/codegen_amdgpu.cc | 31 +-
src/target/llvm/codegen_llvm.cc | 29 +-
src/target/llvm/codegen_llvm.h | 2 +-
src/target/llvm/codegen_nvptx.cc | 34 +-
src/target/source/codegen_c.cc | 22 +-
src/target/source/codegen_c.h | 2 +-
src/target/source/codegen_cuda.cc | 43 +-
src/target/source/codegen_cuda.h | 2 +-
src/target/source/codegen_metal.cc | 33 +-
src/target/source/codegen_metal.h | 2 +-
src/target/source/codegen_opencl.cc | 11 +-
src/target/source/codegen_opencl.h | 2 +-
src/target/source/codegen_webgpu.cc | 24 +-
src/target/source/codegen_webgpu.h | 2 +-
src/target/spirv/codegen_spirv.cc | 35 +-
src/target/spirv/codegen_spirv.h | 6 +-
src/tir/analysis/var_use_def_analysis.cc | 49 +-
src/tir/analysis/var_use_def_analysis.h | 12 +-
src/tir/analysis/verify_ssa.cc | 4 +-
src/tir/analysis/verify_well_formed.cc | 6 +-
src/tir/ir/data_type_rewriter.cc | 86 +---
src/tir/ir/data_type_rewriter.h | 8 +-
src/tir/ir/py_functor.cc | 24 +-
src/tir/ir/script/script_complete.cc | 11 +
src/tir/ir/specialize.cc | 31 +-
src/tir/ir/stmt.cc | 70 +--
src/tir/ir/stmt_functor.cc | 252 +++++------
src/tir/ir/tir_visitor_with_path.cc | 32 +-
src/tir/ir/tir_visitor_with_path.h | 10 +-
src/tir/transform/flatten_buffer.cc | 69 +--
src/tir/transform/ir_utils.cc | 47 +-
src/tir/transform/lower_custom_datatypes.cc | 26 +-
src/tir/transform/lower_device_kernel_launch.cc | 12 +-
src/tir/transform/lower_tvm_builtin.cc | 49 +-
src/tir/transform/lower_warp_memory.cc | 31 +-
src/tir/transform/remove_no_op.cc | 6 +-
src/tir/transform/simplify.cc | 13 +
src/tir/transform/storage_rewrite.cc | 202 +++++----
src/tir/transform/unsupported_dtype_legalize.cc | 80 ++--
src/tir/transform/update_pointer_storage_scope.cc | 13 +-
src/tir/transform/update_pointer_storage_scope.h | 2 +-
src/tir/transform/vectorize_loop.cc | 37 +-
tests/cpp/ir_functor_test.cc | 102 +++--
tests/python/codegen/test_inject_ptx_ldg32.py | 2 +-
tests/python/codegen/test_target_codegen_bool.py | 4 +-
tests/python/codegen/test_target_codegen_cuda.py | 6 +-
.../python/codegen/test_target_codegen_cuda_fp8.py | 6 +-
tests/python/codegen/test_target_codegen_device.py | 2 +-
tests/python/codegen/test_target_codegen_llvm.py | 8 +-
tests/python/codegen/test_target_codegen_rocm.py | 6 +-
tests/python/codegen/test_target_codegen_vulkan.py | 10 +-
.../test_hexagon/test_async_dma_pipeline.py | 18 +-
.../relax/backend/{clml => adreno}/mod_utils.py | 301 +++++++++----
.../test_clml_ops.py} | 424 ++++++++++++------
.../adreno/test_texture_network.py} | 9 +-
.../adreno/test_texture_ops.py} | 185 ++++----
.../adreno/test_transform_annotate_custom_scope.py | 0
.../test_transform_fold_vdevice_scope_change.py | 0
tests/python/relax/backend/adreno/utils.py | 249 +++++++++++
tests/python/relax/backend/clml/conftest.py | 42 --
.../backend/clml/test_op_exec_clml_codegen.py | 331 --------------
tests/python/relax/backend/clml/utils.py | 110 -----
.../test_distributed_transform_lower_distir.py | 16 +-
...ributed_transform_lower_global_to_local_view.py | 40 +-
...est_distributed_transform_propagate_sharding.py | 20 +-
tests/python/relax/test_analysis.py | 4 +-
.../python/relax/test_analysis_detect_recursion.py | 2 +-
.../python/relax/test_backend_dispatch_sampling.py | 30 +-
tests/python/relax/test_codegen_cutlass.py | 18 +-
tests/python/relax/test_dataflow_inplace.py | 2 +-
tests/python/relax/test_frontend_dynamo.py | 4 +-
.../test_transform_annotate_tir_op_pattern.py | 8 +-
tests/python/relax/test_transform_fuse_ops.py | 10 +-
tests/python/relax/test_transform_fuse_tir.py | 34 +-
.../relax/test_transform_legalize_ops_grad.py | 30 +-
.../test_transform_legalize_ops_manipulate.py | 4 +-
.../python/relax/test_transform_legalize_ops_nn.py | 402 ++++++++---------
...st_transform_legalize_ops_search_statistical.py | 84 ++--
.../relax/test_transform_rewrite_cuda_graph.py | 8 +-
.../test_transform_split_layout_rewrite_preproc.py | 8 +-
tests/python/relax/texture/adreno_utils.py | 202 ---------
...st_s_tir_analysis_calculate_allocated_memory.py | 8 +-
.../s_tir/analysis/test_sblock_access_region.py | 52 +--
.../analysis/test_sblock_buffer_access_lca.py | 4 +-
.../s_tir/base/test_sblock_dependence_info.py | 2 +-
.../s_tir/base/test_tir_te_extern_primfunc.py | 8 +-
tests/python/s_tir/dlight/test_benchmark.py | 6 +-
tests/python/s_tir/dlight/test_cpu_gemv.py | 46 +-
tests/python/s_tir/dlight/test_gpu_conv.py | 8 +-
tests/python/s_tir/dlight/test_gpu_fallback.py | 8 +-
tests/python/s_tir/dlight/test_gpu_gemv.py | 104 ++---
.../s_tir/dlight/test_gpu_general_reduction.py | 72 +--
.../python/s_tir/dlight/test_gpu_low_batch_gemv.py | 42 +-
tests/python/s_tir/dlight/test_gpu_matmul.py | 82 ++--
.../s_tir/dlight/test_gpu_matmul_tensorize.py | 88 ++--
tests/python/s_tir/dlight/test_gpu_reduction.py | 86 ++--
tests/python/s_tir/dlight/test_gpu_rmsnorm.py | 40 +-
tests/python/s_tir/dlight/test_gpu_transpose.py | 10 +-
tests/python/s_tir/dlight/test_primitives.py | 4 +-
.../meta_schedule/test_meta_schedule_builder.py | 2 +-
.../meta_schedule/test_meta_schedule_database.py | 2 +-
...eta_schedule_mutator_mutate_compute_location.py | 2 +-
.../test_meta_schedule_post_order_apply.py | 6 +-
..._schedule_postproc_rewrite_cooperative_fetch.py | 12 +-
.../test_meta_schedule_postproc_rewrite_layout.py | 38 +-
...e_postproc_rewrite_parallel_vectorize_unroll.py | 12 +-
...ta_schedule_postproc_rewrite_reduction_block.py | 16 +-
...est_meta_schedule_postproc_rewrite_tensorize.py | 12 +-
...meta_schedule_postproc_rewrite_unbound_block.py | 12 +-
.../test_meta_schedule_postproc_verify_gpu_code.py | 28 +-
...est_meta_schedule_postproc_verify_vtcm_limit.py | 4 +-
.../meta_schedule/test_meta_schedule_runner.py | 2 +-
...test_meta_schedule_schedule_rule_add_rfactor.py | 12 +-
...test_meta_schedule_schedule_rule_auto_inline.py | 70 +--
...chedule_schedule_rule_cross_thread_reduction.py | 48 +-
.../test_meta_schedule_schedule_rule_mlt.py | 38 +-
.../test_meta_schedule_schedule_rule_mlt_intrin.py | 10 +-
.../test_meta_schedule_schedule_rule_mlt_tc.py | 112 ++---
...dule_schedule_rule_parallel_vectorize_unroll.py | 40 +-
...hedule_schedule_rule_random_compute_location.py | 4 +-
.../meta_schedule/test_meta_schedule_space_cpu.py | 184 ++++----
.../meta_schedule/test_meta_schedule_space_cuda.py | 86 ++--
.../test_meta_schedule_space_cuda_async.py | 24 +-
.../test_meta_schedule_task_scheduler.py | 2 +-
.../test_meta_schedule_trace_apply.py | 298 ++++++------
.../meta_schedule/test_meta_schedule_tune_tir.py | 2 +-
.../s_tir/schedule/test_tir_schedule_analysis.py | 4 +-
.../test_tir_schedule_annotate_buffer_access.py | 22 +-
.../schedule/test_tir_schedule_block_scope.py | 2 +-
.../s_tir/schedule/test_tir_schedule_blockize.py | 8 +-
.../schedule/test_tir_schedule_cache_index.py | 10 +-
.../schedule/test_tir_schedule_cache_read_write.py | 188 ++++----
.../s_tir/schedule/test_tir_schedule_compute_at.py | 206 ++++-----
.../schedule/test_tir_schedule_compute_inline.py | 118 ++---
.../test_tir_schedule_decompose_padding.py | 10 +-
.../s_tir/schedule/test_tir_schedule_for_kind.py | 16 +-
.../test_tir_schedule_fuse_reduction_epilogue.py | 16 +-
...ir_schedule_fuse_reduction_epilogue_clipping.py | 12 +-
...st_tir_schedule_fuse_reduction_epilogue_relu.py | 10 +-
.../s_tir/schedule/test_tir_schedule_merge.py | 18 +-
.../s_tir/schedule/test_tir_schedule_pad_einsum.py | 32 +-
.../schedule/test_tir_schedule_read_write_at.py | 12 +-
.../s_tir/schedule/test_tir_schedule_reduction.py | 4 +-
.../s_tir/schedule/test_tir_schedule_reindex.py | 18 +-
.../s_tir/schedule/test_tir_schedule_reorder.py | 6 +-
.../s_tir/schedule/test_tir_schedule_rfactor.py | 72 +--
.../schedule/test_tir_schedule_rolling_buffer.py | 30 +-
.../s_tir/schedule/test_tir_schedule_sampling.py | 2 +-
.../test_tir_schedule_set_axis_separator.py | 18 +-
.../s_tir/schedule/test_tir_schedule_set_dtype.py | 8 +-
.../s_tir/schedule/test_tir_schedule_set_scope.py | 8 +-
.../s_tir/schedule/test_tir_schedule_split_fuse.py | 2 +-
.../s_tir/schedule/test_tir_schedule_state.py | 2 +-
.../test_tir_schedule_state_cached_flags.py | 28 +-
.../schedule/test_tir_schedule_storage_align.py | 6 +-
.../s_tir/schedule/test_tir_schedule_tensorize.py | 8 +-
.../s_tir/schedule/test_tir_schedule_trace.py | 4 +-
.../schedule/test_tir_schedule_transform_layout.py | 78 ++--
.../s_tir/schedule/test_tir_schedule_utilities.py | 10 +-
tests/python/s_tir/test_s_tir_renew_defs.py | 4 +-
.../test_s_tir_transform_compact_buffer_region.py | 140 +++---
...est_s_tir_transform_convert_blocks_to_opaque.py | 4 +-
.../test_s_tir_transform_inject_double_buffer.py | 39 +-
.../test_s_tir_transform_inject_permuted_layout.py | 48 +-
.../test_s_tir_transform_inject_ptx_async_copy.py | 26 +-
.../test_s_tir_transform_inject_ptx_ldg32.py | 2 +-
...est_s_tir_transform_inject_software_pipeline.py | 130 +++---
.../test_s_tir_transform_inject_virtual_thread.py | 31 +-
.../test_s_tir_transform_lift_thread_binding.py | 16 +-
.../test_s_tir_transform_loop_partition.py | 25 +-
...s_tir_transform_lower_cross_thread_reduction.py | 146 +++---
.../test_s_tir_transform_lower_opaque_block.py | 25 +-
...test_s_tir_transform_lower_thread_all_reduce.py | 20 +-
...transform_manifest_shared_memory_local_stage.py | 12 +-
...st_s_tir_transform_memhammer_lower_auto_copy.py | 52 +--
...form_merge_dynamic_shared_memory_allocations.py | 2 +-
...sform_plan_update_buffer_allocation_location.py | 44 +-
...transform_remove_weight_layout_rewrite_block.py | 2 +-
.../transform/test_s_tir_transform_thread_sync.py | 4 +-
tests/python/te/test_te_create_primfunc.py | 10 +-
.../test_tir_analysis_undefined_vars.py | 93 ++++
.../test_tir_analysis_verify_well_formed.py | 4 +-
tests/python/tir-base/test_tir_constructor.py | 18 +-
tests/python/tir-base/test_tir_ptx_cp_async.py | 6 +-
tests/python/tir-base/test_tir_ptx_ldmatrix.py | 4 +-
tests/python/tir-base/test_tir_specialize.py | 6 +-
tests/python/tir-base/test_tir_texture_scope.py | 2 +-
.../test_tir_transform_convert_ssa.py | 31 ++
.../test_tir_transform_flatten_buffer.py | 89 ++--
...test_tir_transform_force_narrow_index_to_i32.py | 2 +-
.../test_tir_transform_lower_tvm_builtin.py | 4 +-
.../tir-transform/test_tir_transform_simplify.py | 45 +-
.../test_tir_transform_storage_rewrite.py | 69 +--
tests/python/tvmscript/test_tvmscript_complete.py | 12 +-
.../tvmscript/test_tvmscript_error_report.py | 14 +-
.../tvmscript/test_tvmscript_ir_builder_tir.py | 19 +-
tests/python/tvmscript/test_tvmscript_ops.py | 4 +-
.../python/tvmscript/test_tvmscript_parser_tir.py | 14 +-
.../test_tvmscript_printer_structural_equal.py | 4 +-
.../python/tvmscript/test_tvmscript_printer_tir.py | 24 +-
.../python/tvmscript/test_tvmscript_regression.py | 4 +-
tests/python/tvmscript/test_tvmscript_roundtrip.py | 39 +-
.../tvmscript/test_tvmscript_syntax_sugar.py | 4 +-
tests/python/tvmscript/test_tvmscript_type.py | 6 +-
tests/scripts/ci.py | 2 +-
tests/scripts/task_python_integration.sh | 3 -
tests/scripts/task_python_integration_gpuonly.sh | 1 -
tests/scripts/task_python_unittest_gpuonly.sh | 14 +-
259 files changed, 5528 insertions(+), 5118 deletions(-)
rename tests/python/relax/backend/{clml => adreno}/mod_utils.py (68%)
rename tests/python/relax/backend/{clml/test_clml_codegen.py =>
adreno/test_clml_ops.py} (54%)
rename tests/python/relax/{texture/test_network.py =>
backend/adreno/test_texture_network.py} (99%)
rename tests/python/relax/{texture/test_ops.py =>
backend/adreno/test_texture_ops.py} (86%)
rename tests/python/relax/{ =>
backend}/adreno/test_transform_annotate_custom_scope.py (100%)
rename tests/python/relax/{ =>
backend}/adreno/test_transform_fold_vdevice_scope_change.py (100%)
create mode 100644 tests/python/relax/backend/adreno/utils.py
delete mode 100644 tests/python/relax/backend/clml/conftest.py
delete mode 100644 tests/python/relax/backend/clml/test_op_exec_clml_codegen.py
delete mode 100644 tests/python/relax/backend/clml/utils.py
delete mode 100644 tests/python/relax/texture/adreno_utils.py
create mode 100644
tests/python/tir-analysis/test_tir_analysis_undefined_vars.py