This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 10a12bacb8 [CI][EZ] Upgrade CI Lint Image (#14373)
add b56d7f56ab [TIR][Utility] More flexible tir::Substitute arguments
(#14251)
add 3b274aa6c7 [Hexagon] Allow scalar tensors to have null shape during
allocation (#14376)
add 3f56a95b87 [TVMScript] Use new variable frame in If/Then/Else (#14250)
add e5ae4347dd [CUDA][Schedule] Better Layout Transform Schedules (#14167)
add b987556375 [TIR] Remove LoadNode and StoreNode (#14381)
add 67597025e7 [TVMScript][Fix] Fix `bool` printing for roundtrip (#14390)
add ad6fbec066 [TIR] Improved error message in InjectSoftwarePipeline
(#14391)
No new revisions were added by this update.
Summary of changes:
include/tvm/runtime/container/array.h | 6 +
include/tvm/tir/expr.h | 60 ---
include/tvm/tir/expr_functor.h | 4 -
include/tvm/tir/stmt.h | 66 ---
include/tvm/tir/stmt_functor.h | 148 ++++--
include/tvm/topi/transform.h | 10 +-
python/tvm/ir/json_compact.py | 2 -
python/tvm/meta_schedule/schedule/cuda/__init__.py | 2 +
.../schedule/cuda/layout_transform.py | 583 +++++++++++++++++++++
.../tvm/relay/backend/contrib/ethosu/tir/passes.py | 2 +-
.../backend/contrib/ethosu/tir_to_cs_translator.py | 1 -
python/tvm/relay/op/_transform.py | 2 +-
python/tvm/relay/op/strategy/cuda.py | 11 +
python/tvm/relay/op/strategy/generic.py | 36 +-
python/tvm/script/ir_builder/tir/ir.py | 2 -
python/tvm/script/parser/tir/parser.py | 6 +-
python/tvm/tir/__init__.py | 3 +-
python/tvm/tir/analysis/analysis.py | 3 +-
python/tvm/tir/expr.py | 30 --
python/tvm/tir/stmt.py | 38 +-
python/tvm/topi/transform.py | 17 +-
src/contrib/hybrid/codegen_hybrid.cc | 6 -
src/contrib/hybrid/codegen_hybrid.h | 2 -
src/relay/printer/text_printer.h | 2 -
src/relay/printer/tir_text_printer.cc | 19 -
src/relay/printer/tvmscript_printer.cc | 26 -
src/runtime/hexagon/hexagon_device_api.cc | 2 +-
src/script/printer/legacy_repr.cc | 27 -
src/script/printer/tir/expr.cc | 6 -
src/script/printer/tir/ir.cc | 3 +-
src/script/printer/tir/stmt.cc | 7 -
src/target/llvm/codegen_llvm.cc | 8 -
src/target/llvm/codegen_llvm.h | 2 -
src/target/source/codegen_c.cc | 8 -
src/target/source/codegen_c.h | 2 -
src/target/source/codegen_opencl.cc | 4 -
src/target/source/codegen_opencl.h | 1 -
src/target/stackvm/codegen_stackvm.cc | 8 -
src/target/stackvm/codegen_stackvm.h | 2 -
src/te/autodiff/jacobian.cc | 1 -
src/te/operation/create_primfunc.cc | 2 +-
src/te/operation/cross_thread_reduction.cc | 1 +
src/te/operation/hybrid_op.cc | 4 +-
src/te/operation/op_utils.cc | 16 -
src/te/operation/op_utils.h | 16 -
src/tir/analysis/block_access_region_detector.cc | 10 -
src/tir/analysis/buffer_access_lca_detector.cc | 9 -
src/tir/analysis/device_constraint_utils.cc | 18 -
src/tir/analysis/side_effect.cc | 5 -
src/tir/analysis/var_touch.cc | 8 -
src/tir/analysis/var_use_def_analysis.cc | 8 -
src/tir/analysis/var_use_def_analysis.h | 4 -
src/tir/analysis/verify_gpu_code.cc | 8 -
src/tir/analysis/verify_memory.cc | 8 -
src/tir/ir/expr.cc | 70 +--
src/tir/ir/expr_functor.cc | 8 -
src/tir/ir/index_map.cc | 2 +-
src/tir/ir/stmt.cc | 53 --
src/tir/ir/stmt_functor.cc | 27 -
src/tir/schedule/analysis/reducer.cc | 18 -
src/tir/schedule/primitive/blockize_tensorize.cc | 2 +-
src/tir/schedule/primitive/cache_index.cc | 8 +-
src/tir/schedule/primitive/cache_read_write.cc | 16 +-
src/tir/schedule/primitive/compute_inline.cc | 8 -
.../schedule/primitive/layout_transformation.cc | 18 +-
src/tir/schedule/primitive/reduction.cc | 8 +-
src/tir/transforms/bf16_legalize.cc | 8 -
src/tir/transforms/bound_checker.cc | 8 -
src/tir/transforms/common_subexpr_elim.cc | 5 +-
src/tir/transforms/compact_buffer_region.cc | 8 -
src/tir/transforms/coproc_sync.cc | 6 -
src/tir/transforms/inject_copy_intrin.cc | 2 +-
src/tir/transforms/inject_double_buffer.cc | 8 -
src/tir/transforms/inject_software_pipeline.cc | 16 +-
src/tir/transforms/inject_virtual_thread.cc | 18 +-
src/tir/transforms/install_debug_spans.h | 1 -
src/tir/transforms/ir_utils.cc | 8 -
src/tir/transforms/lower_cross_thread_reduction.cc | 2 +-
src/tir/transforms/lower_custom_datatypes.cc | 8 -
src/tir/transforms/lower_match_buffer.cc | 14 -
src/tir/transforms/lower_thread_allreduce.cc | 8 -
src/tir/transforms/lower_warp_memory.cc | 12 -
.../manifest_shared_memory_local_stage.cc | 2 +-
.../merge_dynamic_shared_memory_allocations.cc | 16 -
src/tir/transforms/narrow_datatype.cc | 10 +-
src/tir/transforms/renew_defs.cc | 8 -
src/tir/transforms/rewrite_unsafe_select.cc | 3 -
src/tir/transforms/simplify.cc | 4 -
src/tir/transforms/split_host_device.cc | 2 +-
src/tir/transforms/storage_access.cc | 8 -
src/tir/transforms/storage_access.h | 2 -
src/tir/transforms/storage_flatten.cc | 16 -
src/tir/transforms/storage_rewrite.cc | 44 +-
src/tir/transforms/thread_storage_sync.cc | 7 -
src/tir/transforms/unroll_loop.cc | 4 -
src/tir/transforms/update_pointer_storage_scope.cc | 8 -
src/tir/transforms/update_pointer_storage_scope.h | 2 -
src/tir/transforms/vectorize_loop.cc | 19 +-
src/tir/usmp/analysis/extract_buffer_info.cc | 7 +-
src/tir/usmp/transform/create_io_allocates.cc | 6 -
src/topi/transform.cc | 2 +-
.../hexagon/hexagon_device_api_tests.cc | 3 +
tests/python/integration/test_reduce.py | 4 +-
.../test_meta_schedule_relay_integration.py | 3 +
...meta_schedule_schedule_cuda_layout_transform.py | 466 ++++++++++++++++
tests/python/unittest/test_tir_nodes.py | 2 +-
.../unittest/test_tir_schedule_compute_inline.py | 2 +-
.../unittest/test_tir_transform_storage_rewrite.py | 8 +-
.../python/unittest/test_tvmscript_printer_tir.py | 2 +-
tests/python/unittest/test_tvmscript_roundtrip.py | 16 +
110 files changed, 1329 insertions(+), 1002 deletions(-)
create mode 100644 python/tvm/meta_schedule/schedule/cuda/layout_transform.py
create mode 100644
tests/python/unittest/test_meta_schedule_schedule_cuda_layout_transform.py