This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a change to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git
from ee4e657116 [Unity] Support clear global memory allocators (#15172)
add 4641b40bd9 [Minor] Fix Compilation Warnings (#15154)
add 8e4148d8d9 [DOC] Add RPC System Setup Document (#15126)
add 904515b98e [Bug] Add typing_extensions requirement again (#14960)
add 28aead905b [microNPU][ETHOSU] Fix SoftMax legalization parameters
(#15069)
add 55df810b16 [Runtime] Support void as dtype in FFI (#15162)
add 48f295fda9 [Target] Add MetaSchedule-compatible attributes to OpenCL
(#15166)
add bca7ebf1b5 [TIR] Fix RenewDef for symbolic input shapes (#15163)
add 42f37ff780 [TIR] Expose UndefinedVars to Python (#15165)
add 5e77b7eff7 [microNPU][ETHOSU] Add option to disable copying constants
for case without cascader (#15147)
add b9c76f56dc [Runtime] Clean TVM stacktrace in error messages (#15161)
add c8f5595ca5 [MetaSchedule] Introduce MMA Tensor Core Multilevel Tiling
(#14673)
add 588d1f2e93 [TensorIR][ROCm] AMD Matrix Core Support (#15106)
add 99d72fd593 [TIR][Schedule] Support padding-by-factor in PadEinsum
(#15168)
add 22e592b744 [FRONTEND][TFLITE][BugFix] Fix int16 transpose conv loading
(#15173)
add 6cf5b0928a [microNPU][ETHOSU] Fix compiler attributes types (#15159)
add e504d5d884 [RUNTIME] Add weak symbol to builtin fp16 (#15182)
add 9710d81650 [Testing] Utility method to run TVM on remote device
(#15179)
add 7828489c6c Merge remote-tracking branch 'apache-upstream/main' into
unity
No new revisions were added by this update.
Summary of changes:
cmake/modules/LibInfo.cmake | 1 +
docs/dev/how_to/how_to.rst | 1 +
docs/dev/how_to/setup_rpc_system.rst | 245 ++++
include/tvm/runtime/logging.h | 22 +-
include/tvm/tir/buffer.h | 2 +
include/tvm/tir/schedule/schedule.h | 18 +
include/tvm/tir/transform.h | 12 +
python/gen_requirements.py | 2 +
python/tvm/_ffi/runtime_ctypes.py | 4 +
python/tvm/contrib/rocm.py | 119 +-
python/tvm/error.py | 8 +-
python/tvm/relay/backend/contrib/ethosu/codegen.py | 3 +-
.../backend/contrib/ethosu/softmax_rewriter.py | 75 +-
python/tvm/relay/backend/contrib/ethosu/util.py | 14 +-
python/tvm/relay/frontend/tflite.py | 3 +-
python/tvm/testing/__init__.py | 37 +-
python/tvm/testing/rpc_run.py | 162 +++
python/tvm/testing/tir.py | 111 ++
python/tvm/testing/utils.py | 17 +-
python/tvm/tir/analysis/analysis.py | 26 +-
python/tvm/tir/schedule/schedule.py | 140 ++-
python/tvm/tir/tensor_intrin/cuda.py | 318 +++++
python/tvm/tir/tensor_intrin/rocm.py | 433 ++++++-
python/tvm/tir/transform/transform.py | 22 +
src/arith/iter_affine_map.cc | 2 +-
src/driver/driver_api.cc | 6 +-
.../feature_extractor/per_store_feature.cc | 3 +-
src/meta_schedule/postproc/verify_gpu_code.cc | 3 +-
.../schedule_rule/multi_level_tiling.cc | 9 +-
.../schedule_rule/multi_level_tiling.h | 15 +
.../multi_level_tiling_tensor_core.cc | 330 ++++-
src/meta_schedule/schedule_rule/schedule_rule.cc | 15 +
src/relay/backend/contrib/ethosu/compiler_attrs.cc | 33 +-
src/relay/qnn/op/convolution_transpose.cc | 2 +-
src/runtime/builtin_fp16.cc | 2 +-
src/runtime/logging.cc | 35 +-
src/runtime/thread_storage_scope.h | 21 +
src/support/libinfo.cc | 1 +
src/target/source/codegen_c_host.h | 2 +-
src/target/target_kind.cc | 14 +-
src/tir/analysis/var_use_def_analysis.cc | 9 +
src/tir/schedule/analysis.h | 18 -
src/tir/schedule/analysis/analysis.cc | 4 +-
src/tir/schedule/concrete_schedule.cc | 10 +
src/tir/schedule/concrete_schedule.h | 3 +
src/tir/schedule/primitive.h | 39 +-
src/tir/schedule/primitive/compute_inline.cc | 19 +
src/tir/schedule/primitive/pad_einsum.cc | 574 +++++----
src/tir/schedule/primitive/sampling.cc | 106 ++
src/tir/schedule/schedule.cc | 2 +
src/tir/schedule/traced_schedule.cc | 16 +
src/tir/schedule/traced_schedule.h | 3 +
src/tir/schedule/transform.cc | 13 +
src/tir/transforms/inject_permuted_layout.cc | 266 ++++
src/tir/transforms/inject_software_pipeline.cc | 7 +
src/tir/transforms/memhammer_intermediate_stage.cc | 3 +-
src/tir/transforms/memhammer_lower_auto_copy.cc | 6 +-
src/tir/transforms/memhammer_rewrite_rule.h | 16 +
src/tir/transforms/memhammer_tensorcore_rewrite.cc | 210 ++++
src/tir/transforms/renew_defs.cc | 12 +
src/tir/transforms/transform_mma_buffer_layout.cc | 192 +++
.../contrib/test_ethosu/test_attr_passing.py | 67 +-
tests/python/contrib/test_ethosu/test_legalize.py | 114 +-
tests/python/contrib/test_ethosu/test_scheduler.py | 16 +
tests/python/driver/tvmc/test_target_options.py | 14 +
tests/python/frontend/tflite/test_forward.py | 80 ++
...meta_schedule_mma_m16n8k8_auto_tensorization.py | 1315 ++++++++++++++++++++
tests/python/unittest/test_tir_renew_defs.py | 20 +-
.../python/unittest/test_tir_schedule_analysis.py | 39 +-
.../unittest/test_tir_schedule_pad_einsum.py | 277 +++--
..._mma.py => test_tir_schedule_tensorize_mfma.py} | 261 ++--
...test_tir_transform_memhammer_lower_auto_copy.py | 127 ++
72 files changed, 5315 insertions(+), 831 deletions(-)
create mode 100644 docs/dev/how_to/setup_rpc_system.rst
create mode 100644 python/tvm/testing/rpc_run.py
create mode 100644 src/tir/transforms/inject_permuted_layout.cc
create mode 100644 src/tir/transforms/transform_mma_buffer_layout.cc
create mode 100644
tests/python/unittest/test_meta_schedule_mma_m16n8k8_auto_tensorization.py
copy tests/python/unittest/{test_tir_schedule_tensorize_ldmatrix_mma.py =>
test_tir_schedule_tensorize_mfma.py} (53%)