This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a change to branch unity-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 01ea438e6e [MERGE] Merge main to unity 2023-06-24
add 730d380ff6 [Fix] Fix merge error
add 7ef7ce7a61 [Unity][Dlight] Add reduction rules (#15156)
add daf9c202ac [Unity][IR][UX] Privacy annotation in Relax (#15140)
add 0dba0e3ed2 [Unity][UX][Tweak] Make it an error to mark a function
private and specify a global symbol (#15170)
add ee4e657116 [Unity] Support clear global memory allocators (#15172)
add 4641b40bd9 [Minor] Fix Compilation Warnings (#15154)
add 8e4148d8d9 [DOC] Add RPC System Setup Document (#15126)
add 904515b98e [Bug] Add typing_extensions requirement again (#14960)
add 28aead905b [microNPU][ETHOSU] Fix SoftMax legalization parameters
(#15069)
add 55df810b16 [Runtime] Support void as dtype in FFI (#15162)
add 48f295fda9 [Target] Add MetaSchedule-compatible attributes to OpenCL
(#15166)
add bca7ebf1b5 [TIR] Fix RenewDef for symbolic input shapes (#15163)
add 42f37ff780 [TIR] Expose UndefinedVars to Python (#15165)
add 5e77b7eff7 [microNPU][ETHOSU] Add option to disable copying constants
for case without cascader (#15147)
add b9c76f56dc [Runtime] Clean TVM stacktrace in error messages (#15161)
add c8f5595ca5 [MetaSchedule] Introduce MMA Tensor Core Multilevel Tiling
(#14673)
add 588d1f2e93 [TensorIR][ROCm] AMD Matrix Core Support (#15106)
add 99d72fd593 [TIR][Schedule] Support padding-by-factor in PadEinsum
(#15168)
add 22e592b744 [FRONTEND][TFLITE][BugFix] Fix int16 transpose conv loading
(#15173)
add 6cf5b0928a [microNPU][ETHOSU] Fix compiler attributes types (#15159)
add e504d5d884 [RUNTIME] Add weak symbol to builtin fp16 (#15182)
add 9710d81650 [Testing] Utility method to run TVM on remote device
(#15179)
new 7828489c6c Merge remote-tracking branch 'apache-upstream/main' into
unity
The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
cmake/modules/LibInfo.cmake | 1 +
docs/dev/how_to/how_to.rst | 1 +
docs/dev/how_to/setup_rpc_system.rst | 245 ++++
include/tvm/runtime/logging.h | 22 +-
include/tvm/runtime/relax_vm/memory_manager.h | 3 +
include/tvm/script/ir_builder/relax/frame.h | 2 +
include/tvm/script/ir_builder/relax/ir.h | 3 +-
include/tvm/tir/buffer.h | 2 +
include/tvm/tir/schedule/schedule.h | 18 +
include/tvm/tir/transform.h | 12 +
python/gen_requirements.py | 2 +
python/tvm/_ffi/runtime_ctypes.py | 4 +
python/tvm/contrib/rocm.py | 119 +-
python/tvm/dlight/base/__init__.py | 2 +
python/tvm/dlight/base/analysis.py | 75 ++
python/tvm/dlight/base/common_schedules.py | 60 +
python/tvm/dlight/base/transform.py | 14 +-
python/tvm/dlight/gpu/__init__.py | 1 +
python/tvm/dlight/gpu/fallback.py | 44 +-
python/tvm/dlight/gpu/reduction.py | 92 ++
python/tvm/error.py | 8 +-
python/tvm/relax/block_builder.py | 12 +
python/tvm/relax/frontend/torch/dynamo.py | 3 +-
python/tvm/relax/training/setup_trainer.py | 5 +-
python/tvm/relay/backend/contrib/ethosu/codegen.py | 3 +-
.../backend/contrib/ethosu/softmax_rewriter.py | 75 +-
python/tvm/relay/backend/contrib/ethosu/util.py | 14 +-
python/tvm/relay/frontend/tflite.py | 3 +-
python/tvm/script/ir_builder/relax/ir.py | 9 +-
python/tvm/script/parser/core/parser.py | 2 +
python/tvm/script/parser/relax/entry.py | 6 +-
python/tvm/script/parser/relax/parser.py | 27 +-
python/tvm/testing/__init__.py | 37 +-
python/tvm/testing/rpc_run.py | 162 +++
python/tvm/testing/tir.py | 111 ++
python/tvm/testing/utils.py | 17 +-
python/tvm/tir/analysis/analysis.py | 26 +-
python/tvm/tir/schedule/schedule.py | 140 ++-
python/tvm/tir/tensor_intrin/cuda.py | 318 +++++
python/tvm/tir/tensor_intrin/rocm.py | 433 ++++++-
python/tvm/tir/transform/transform.py | 22 +
python/tvm/topi/hexagon/qnn/nn.py | 37 -
src/arith/iter_affine_map.cc | 2 +-
src/driver/driver_api.cc | 6 +-
.../feature_extractor/per_store_feature.cc | 3 +-
src/meta_schedule/postproc/verify_gpu_code.cc | 3 +-
.../schedule_rule/multi_level_tiling.cc | 9 +-
.../schedule_rule/multi_level_tiling.h | 15 +
.../multi_level_tiling_tensor_core.cc | 330 ++++-
src/meta_schedule/schedule_rule/schedule_rule.cc | 15 +
src/relax/training/utils.cc | 4 +-
src/relax/transform/gradient.cc | 8 +-
src/relax/transform/lift_transform_params.cc | 5 +-
src/relay/backend/contrib/ethosu/compiler_attrs.cc | 33 +-
src/relay/qnn/op/convolution_transpose.cc | 2 +-
src/runtime/builtin_fp16.cc | 2 +-
src/runtime/logging.cc | 35 +-
src/runtime/relax_vm/memory_manager.cc | 9 +
src/runtime/thread_storage_scope.h | 21 +
src/script/ir_builder/relax/frame.cc | 5 +
src/script/ir_builder/relax/ir.cc | 7 +-
src/script/printer/relax/function.cc | 69 +-
src/support/libinfo.cc | 1 +
src/target/source/codegen_c_host.h | 2 +-
src/target/target_kind.cc | 14 +-
src/tir/analysis/var_use_def_analysis.cc | 9 +
src/tir/schedule/analysis.h | 18 -
src/tir/schedule/analysis/analysis.cc | 4 +-
src/tir/schedule/concrete_schedule.cc | 10 +
src/tir/schedule/concrete_schedule.h | 3 +
src/tir/schedule/primitive.h | 39 +-
src/tir/schedule/primitive/compute_inline.cc | 19 +
src/tir/schedule/primitive/pad_einsum.cc | 574 +++++----
src/tir/schedule/primitive/sampling.cc | 106 ++
src/tir/schedule/schedule.cc | 2 +
src/tir/schedule/traced_schedule.cc | 16 +
src/tir/schedule/traced_schedule.h | 3 +
src/tir/schedule/transform.cc | 13 +
src/tir/transforms/inject_permuted_layout.cc | 266 ++++
src/tir/transforms/inject_software_pipeline.cc | 7 +
src/tir/transforms/memhammer_intermediate_stage.cc | 3 +-
src/tir/transforms/memhammer_lower_auto_copy.cc | 6 +-
src/tir/transforms/memhammer_rewrite_rule.h | 16 +
src/tir/transforms/memhammer_tensorcore_rewrite.cc | 210 ++++
src/tir/transforms/renew_defs.cc | 12 +
src/tir/transforms/transform_mma_buffer_layout.cc | 192 +++
.../contrib/test_ethosu/test_attr_passing.py | 67 +-
tests/python/contrib/test_ethosu/test_legalize.py | 114 +-
tests/python/contrib/test_ethosu/test_scheduler.py | 16 +
...{test_schedule_rule.py => test_gpu_fallback.py} | 0
tests/python/dlight/test_gpu_reduction.py | 282 +++++
tests/python/driver/tvmc/test_target_options.py | 14 +
tests/python/frontend/tflite/test_forward.py | 80 ++
tests/python/relax/test_dataflow_pattern.py | 12 +-
tests/python/relax/test_training_loss.py | 10 +
tests/python/relax/test_training_optimizer.py | 8 +
.../relax/test_training_optimizer_numeric.py | 2 +-
.../relax/test_transform_attach_global_symbol.py | 6 +-
.../test_transform_combine_parallel_matmul.py | 23 +-
.../relax/test_transform_dead_code_elimination.py | 8 +-
tests/python/relax/test_transform_fuse_ops.py | 54 +-
.../relax/test_transform_fuse_ops_by_pattern.py | 25 +-
tests/python/relax/test_transform_fuse_tir.py | 6 +-
tests/python/relax/test_transform_lambda_lift.py | 14 +-
.../test_transform_merge_composite_functions.py | 44 +-
tests/python/relax/test_transform_normalize.py | 20 +-
.../relax/test_transform_rewrite_cuda_graph.py | 12 +-
tests/python/relax/test_tvmscript_parser.py | 57 +-
.../relax/test_tvmscript_parser_op_datatype.py | 2 +-
tests/python/relax/test_tvmscript_printer_relax.py | 159 ++-
tests/python/relax/test_utils.py | 8 +-
...meta_schedule_mma_m16n8k8_auto_tensorization.py | 1315 ++++++++++++++++++++
tests/python/unittest/test_tir_renew_defs.py | 20 +-
.../python/unittest/test_tir_schedule_analysis.py | 39 +-
.../unittest/test_tir_schedule_pad_einsum.py | 277 +++--
..._mma.py => test_tir_schedule_tensorize_mfma.py} | 261 ++--
...test_tir_transform_memhammer_lower_auto_copy.py | 127 ++
117 files changed, 6357 insertions(+), 1045 deletions(-)
create mode 100644 docs/dev/how_to/setup_rpc_system.rst
create mode 100644 python/tvm/dlight/base/analysis.py
create mode 100644 python/tvm/dlight/base/common_schedules.py
create mode 100644 python/tvm/dlight/gpu/reduction.py
create mode 100644 python/tvm/testing/rpc_run.py
create mode 100644 src/tir/transforms/inject_permuted_layout.cc
create mode 100644 src/tir/transforms/transform_mma_buffer_layout.cc
rename tests/python/dlight/{test_schedule_rule.py => test_gpu_fallback.py}
(100%)
create mode 100644 tests/python/dlight/test_gpu_reduction.py
create mode 100644
tests/python/unittest/test_meta_schedule_mma_m16n8k8_auto_tensorization.py
copy tests/python/unittest/{test_tir_schedule_tensorize_ldmatrix_mma.py =>
test_tir_schedule_tensorize_mfma.py} (53%)