This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 7e6165e896 Fix BufferError when converting PyTorch models with sparse
tensors (#18492)
add 97d78aa9eb [Relax][PyTorch] Add `mul` operator in ExportedProgram
frontend (#18496)
add faab2e7f27 [Relax] Fix the squeeze operator to behave consistently
with torch (#18478)
add 2032e713ca [TIR][Schedule] Add FuseReductionEpilogue primitive to fuse
epilogue … (#18418)
add 0bd6f9cad5 [CI] Use glob for `conda/build-environment.yaml` in cache
key (#18498)
add 91c1921210 [Relax][PyTorch] Add binary operation dtype promotion
following PyTorch rules in ExportedProgram frontend (#18497)
add 9e905f9bfb [CI] Enhance python linting scripts to support
revision-based checks (#18470)
add 13ea9dc104 [TIR] Add step attribute to ForNode (Initial codes) (#18421)
add 4be951d710 [RELAX][PASS] Annotate Custom Scope layout pass for Adreno
GPU (#17599)
No new revisions were added by this update.
Summary of changes:
.github/actions/setup/action.yml | 2 +-
CMakeLists.txt | 1 +
docker/lint.sh | 12 +-
include/tvm/relax/attrs/op.h | 4 +-
include/tvm/relax/backend/adreno/transform.h | 67 ++
include/tvm/relax/expr.h | 6 +-
include/tvm/relax/transform.h | 9 +
include/tvm/runtime/tensor.h | 10 +
include/tvm/script/ir_builder/tir/frame.h | 8 +-
include/tvm/script/ir_builder/tir/ir.h | 16 +-
include/tvm/tir/schedule/schedule.h | 7 +
include/tvm/tir/stmt.h | 17 +-
include/tvm/topi/transform.h | 7 +-
python/tvm/dlight/__init__.py | 1 +
python/tvm/dlight/{cpu => adreno}/__init__.py | 4 +-
python/tvm/dlight/{cpu => adreno}/base.py | 11 +-
python/tvm/dlight/adreno/convolution.py | 230 ++++
python/tvm/relax/backend/adreno/__init__.py | 3 +
.../backend/adreno/transform}/__init__.py | 7 +-
.../backend/adreno/transform}/_ffi_api.py | 8 +-
.../relax/backend/adreno/transform/transform.py | 50 +
.../frontend/torch/base_fx_graph_translator.py | 43 +-
.../frontend/torch/exported_program_translator.py | 1 +
python/tvm/relax/op/base.py | 9 +-
python/tvm/relax/transform/__init__.py | 1 +
.../tvm/relax/transform/legalize_ops/__init__.py | 3 +
.../transform/legalize_ops/adreno}/__init__.py | 4 +-
.../transform/legalize_ops/adreno/convolution.py | 31 +-
python/tvm/relax/transform/transform.py | 22 +-
python/tvm/relax/utils.py | 20 +-
python/tvm/script/ir_builder/tir/ir.py | 44 +-
python/tvm/script/parser/tir/parser.py | 27 +-
python/tvm/tir/analysis/analysis.py | 4 +
python/tvm/tir/ir_builder.py | 8 +-
python/tvm/tir/pipeline.py | 1 +
python/tvm/tir/schedule/schedule.py | 27 +
python/tvm/tir/stmt.py | 7 +
python/tvm/tir/transform/transform.py | 11 +
python/tvm/topi/nn/conv2d.py | 129 +++
.../backend/adreno/annotate_custom_storage.cc | 755 ++++++++++++
.../backend/adreno/fold_vdevice_scope_change.cc | 193 ++++
.../transform/lower_global_view_to_local_view.cc | 4 +-
src/relax/op/nn/convolution.cc | 8 +-
src/relax/op/op.cc | 12 +-
src/relax/op/op_common.h | 10 +
src/relax/op/tensor/binary.cc | 21 +-
src/relax/op/tensor/manipulate.cc | 54 +-
src/relax/transform/legalize_ops.cc | 21 +-
src/relax/transform/realize_vdevice.cc | 4 +-
.../specialize_primfunc_based_on_callsite.cc | 174 +++
src/relax/transform/utils.h | 2 +-
src/runtime/contrib/clml/clml_runtime.cc | 20 +-
src/runtime/tensor.cc | 20 +
src/script/ir_builder/tir/frame.cc | 2 +-
src/script/ir_builder/tir/ir.cc | 20 +-
src/script/printer/relax/call.cc | 7 +-
src/script/printer/relax/struct_info.cc | 5 +-
src/script/printer/tir/for_loop.cc | 15 +-
src/target/llvm/codegen_cpu.cc | 16 +-
src/target/llvm/codegen_llvm.cc | 8 +-
src/target/source/codegen_c.cc | 14 +-
src/target/source/codegen_cuda.cc | 1 -
src/target/source/codegen_webgpu.cc | 14 +-
src/target/spirv/codegen_spirv.cc | 23 +-
src/tir/ir/data_type_rewriter.cc | 9 +-
src/tir/ir/stmt.cc | 30 +-
src/tir/ir/stmt_functor.cc | 11 +-
src/tir/schedule/analysis/analysis.cc | 23 +-
src/tir/schedule/concrete_schedule.cc | 9 +
src/tir/schedule/concrete_schedule.h | 2 +
src/tir/schedule/primitive.h | 8 +
src/tir/schedule/primitive/blockize_tensorize.cc | 2 +-
src/tir/schedule/primitive/compute_inline.cc | 492 ++++++++
src/tir/schedule/primitive/decompose_padding.cc | 2 +-
src/tir/schedule/primitive/loop_transformation.cc | 4 +-
src/tir/schedule/primitive/reduction.cc | 13 +-
src/tir/schedule/schedule.cc | 4 +-
src/tir/schedule/traced_schedule.cc | 11 +
src/tir/schedule/traced_schedule.h | 1 +
src/tir/transforms/canonicalize_loop.cc | 102 ++
src/tir/transforms/common_subexpr_elim.cc | 2 +-
src/tir/transforms/convert_for_loops_serial.cc | 2 +-
src/tir/transforms/inject_software_pipeline.cc | 2 +-
src/tir/transforms/ir_utils.cc | 6 +-
src/tir/transforms/lift_thread_binding.cc | 2 +-
src/tir/transforms/loop_partition.cc | 8 +-
src/tir/transforms/lower_cross_thread_reduction.cc | 4 +-
src/tir/transforms/lower_opaque_block.cc | 2 +-
src/tir/transforms/memhammer_coalesce.cc | 3 +-
src/tir/transforms/memhammer_tensorcore_rewrite.cc | 55 +-
src/tir/transforms/storage_rewrite.cc | 2 +-
src/tir/transforms/unify_thread_binding.cc | 6 +-
src/tir/transforms/unroll_loop.cc | 5 +-
src/tir/transforms/vectorize_loop.cc | 6 +-
tests/lint/flake8.sh | 38 +-
tests/lint/pylint.sh | 38 +-
tests/python/codegen/test_target_codegen.py | 44 +-
tests/python/codegen/test_target_codegen_cuda.py | 32 +
.../adreno/test_transform_annotate_custom_scope.py | 1204 ++++++++++++++++++++
.../test_transform_fold_vdevice_scope_change.py | 282 +++++
.../relax/test_frontend_from_exported_program.py | 80 +-
tests/python/relax/test_op_manipulate.py | 18 +-
tests/python/relax/test_transform.py | 1 +
.../python/relax/test_transform_convert_layout.py | 415 ++++++-
...nsform_specialize_primfunc_based_on_callsite.py | 344 ++++++
.../relax/test_tvmscript_parser_op_manipulate.py | 15 +
tests/python/tir-base/test_tir_nodes.py | 1 +
.../test_tir_schedule_fuse_reduction_epilogue.py | 218 ++++
.../test_tir_transform_canonicalize_loop.py | 88 ++
.../python/tvmscript/test_tvmscript_parser_tir.py | 26 +
tests/python/tvmscript/test_tvmscript_roundtrip.py | 20 +
tests/scripts/task_build_adreno_bins.sh | 7 +-
112 files changed, 5751 insertions(+), 243 deletions(-)
create mode 100644 include/tvm/relax/backend/adreno/transform.h
copy python/tvm/dlight/{cpu => adreno}/__init__.py (93%)
copy python/tvm/dlight/{cpu => adreno}/base.py (75%)
create mode 100644 python/tvm/dlight/adreno/convolution.py
copy python/tvm/{contrib/hexagon =>
relax/backend/adreno/transform}/__init__.py (86%)
copy python/tvm/{arith => relax/backend/adreno/transform}/_ffi_api.py (86%)
create mode 100644 python/tvm/relax/backend/adreno/transform/transform.py
copy python/tvm/{contrib/cutlass =>
relax/transform/legalize_ops/adreno}/__init__.py (87%)
copy tests/lint/blocklint.sh =>
python/tvm/relax/transform/legalize_ops/adreno/convolution.py (57%)
mode change 100755 => 100644
create mode 100644 src/relax/backend/adreno/annotate_custom_storage.cc
create mode 100644 src/relax/backend/adreno/fold_vdevice_scope_change.cc
create mode 100644 src/relax/transform/specialize_primfunc_based_on_callsite.cc
create mode 100644 src/tir/transforms/canonicalize_loop.cc
create mode 100644
tests/python/relax/adreno/test_transform_annotate_custom_scope.py
create mode 100644
tests/python/relax/adreno/test_transform_fold_vdevice_scope_change.py
create mode 100644
tests/python/relax/test_transform_specialize_primfunc_based_on_callsite.py
create mode 100644
tests/python/tir-schedule/test_tir_schedule_fuse_reduction_epilogue.py
create mode 100644
tests/python/tir-transform/test_tir_transform_canonicalize_loop.py