This is an automated email from the ASF dual-hosted git repository.
junrushao pushed a change to branch unity-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 8ff15c1626 Merge remote-tracking branch 'apache-upstream/main' into
unity-staging
add b7471f8315 [Bugfix] Remove duplicate code in nccl.cc (#15800)
add e809d64658 [Unity][Dlight] Fix inline consumer in matmul tensorize
rule (#15781)
add 6ea16b98e7 [Unity] Delegate DataflowVar visitor to Var by default
(#15688)
add 732ae53653 [Unity][TVMScript] Produce var = R.ExternFunc("")
statements (#15703)
add dfc77eb129 [Unity] Extend RemoveAllUnused to support relax::Expr
(#15807)
add beef1f7c75 [TE] Support using tir::Var as CreatePrimFunc args (#15817)
add d06a658b10 [Unity][Disco] Use default stream in disco (#15827)
add 2fd23843fd Remove create_relax_prim_func (#15828)
add 230f8b2491 [Disco] Add AllGather (#15764)
add ea329cba02 [Unity][Analysis] Change warning to info for non-affine
transform (#15820)
add 82586fb9ea [Unity] Extend EliminateCommonSubexpr to operate on
relax::Expr (#15815)
add 1230b4009b [Unity] Dtype check in legalization of R.matmul (#15825)
add f78d9ed0ed [UNITY][Pass] Remove redundant reshape (#15806)
add 7494bc4580 [Unity][BYOC] Support offloading multi-query attention by
Flash Attention (#15831)
add aa4587feb5 [Unity] Implement relax.transform.KillAfterLastUse (#15810)
add 225d067fb8 [Unity] Support Padding Reversal in Alter-Op pass (#15679)
add 4a8a7b9c63 [Unity] Implement LowerAllocTensor to remove
R.builtin.alloc_tensor (#15809)
add ae89c1e56d [OpenCL] Don't initialize OpenCL runtime on host (#15745)
add cde83e1088 [TVMC] enable dumping imported modules too (#15779)
add dfd525bda5 Revert "[TensorIR][Visitor] Visit buffer members in
`match_buffer`'s in block visitor functions (#15153) (#15816)
add d5fab9e4fb [TVMScript] Use environment variable TVM_BLACK_FORMAT for
.show() (#15762)
add c318fa8632 [Docker] Install oneflow from PyPi (#15819)
add cf8521ad5c [Target] LLVM helper functions for any target info (#15761)
add 0d683284b0 [Unittest][Metal] Add minimal metal functionality test to
CI (#15756)
add 73e7909a71 [TVMScript] Preserve traceback across TVMScript parsing
(#15824)
add cf081d9929 [BugFix][CPP] Fix cpp deploy bug (#15773)
add 9d8e6fda50 [ADRENO] Minor changes for Adreno docs and help scripts
(#15830)
add 8b40f5d028 [FRONTEND] Fix unnecessary pylint errors (#15838)
add def551dfd5 [CLI TOOLS][RTVM] Improve rtvm tool with new options to
measure native performance (#15818)
add 28908998e0 [Relay][Keras][Bugfix] fix the converters of GRU and
SimpleRNN about the go_backwards attribute (#15829)
new 2a0540a69a Merge branch 'apache-upstream-main' into
apache-upstream-unity
The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
.github/workflows/main.yml | 8 +
3rdparty/libflash_attn | 2 +-
apps/cpp_rtvm/README.md | 22 ++
apps/cpp_rtvm/main.cc | 199 ++++++++++++--
apps/cpp_rtvm/tvm_runner.cc | 129 ++++++---
apps/cpp_rtvm/tvm_runner.h | 24 +-
apps/howto_deploy/prepare_test_libs.py | 8 +-
cmake/modules/LLVM.cmake | 3 +
docker/install/ubuntu_install_oneflow.sh | 2 +-
docs/how_to/deploy/adreno.rst | 2 +-
include/tvm/relax/analysis.h | 19 +-
python/tvm/contrib/cutlass/attention_operation.py | 28 +-
python/tvm/contrib/cutlass/build.py | 7 +-
python/tvm/contrib/cutlass/gen_tensor_op.py | 23 +-
python/tvm/dlight/gpu/matmul.py | 43 +--
python/tvm/driver/tvmc/compiler.py | 2 +
python/tvm/relax/backend/contrib/cutlass.py | 13 +-
python/tvm/relax/backend/patterns.py | 15 +-
python/tvm/relax/op/ccl/ccl.py | 26 +-
python/tvm/relax/transform/__init__.py | 1 +
python/tvm/relax/transform/legalize_ops/ccl.py | 25 ++
.../relax/transform/legalize_ops/linear_algebra.py | 8 +
.../tvm/relax/transform/legalize_ops/manipulate.py | 10 +-
...ut_transform.py => remove_redundant_reshape.py} | 51 ++--
python/tvm/relax/transform/transform.py | 32 ++-
python/tvm/relax/utils.py | 6 +-
python/tvm/relax/vm_build.py | 3 +
python/tvm/relay/frontend/keras.py | 4 +
python/tvm/runtime/disco/session.py | 17 ++
python/tvm/runtime/script_printer.py | 30 ++-
python/tvm/script/ir_builder/relax/ir.py | 1 +
python/tvm/script/parser/core/parser.py | 13 +-
python/tvm/script/parser/ir/parser.py | 9 +-
python/tvm/script/parser/relax/entry.py | 110 +++++---
python/tvm/target/codegen.py | 93 ++++++-
python/tvm/target/x86.py | 28 +-
python/tvm/te/__init__.py | 1 -
python/tvm/te/operation.py | 68 +----
python/tvm/tir/schedule/schedule.py | 25 +-
python/tvm/tir/schedule/trace.py | 9 +-
python/tvm/topi/x86/batch_matmul.py | 8 +-
python/tvm/topi/x86/dense.py | 9 +-
python/tvm/topi/x86/dense_alter_op.py | 5 +-
.../space_generator/space_generator.cc | 19 +-
src/relax/analysis/analysis.cc | 2 -
src/relax/analysis/layout_transformation.cc | 4 +-
src/relax/analysis/struct_info_analysis.cc | 4 -
src/relax/analysis/udchain.cc | 11 +-
src/relax/analysis/well_formed.cc | 10 -
src/relax/backend/vm/vm_builtin_lower.cc | 37 +--
src/relax/ir/binding_rewrite.cc | 56 ++--
src/relax/ir/dataflow_matcher.cc | 6 +-
src/relax/ir/expr_functor.cc | 39 +--
src/relax/op/ccl/ccl.cc | 36 +++
src/relax/op/ccl/ccl.h | 3 +
src/relax/op/nn/attention.cc | 13 +-
src/relax/transform/alter_op_impl.cc | 81 +++++-
src/relax/transform/canonicalize_bindings.cc | 8 -
src/relax/transform/convert_layout.cc | 2 -
src/relax/transform/dead_code_elimination.cc | 2 +-
src/relax/transform/eliminate_common_subexpr.cc | 43 +--
src/relax/transform/fold_constant.cc | 11 +-
src/relax/transform/gradient.cc | 5 +-
src/relax/transform/kill_after_last_use.cc | 289 +++++++++++++++++++++
src/relax/transform/lift_transform_params.cc | 4 -
src/relax/transform/lower_alloc_tensor.cc | 106 ++++++++
src/relax/transform/to_mixed_precision.cc | 9 -
src/relax/transform/utils.h | 41 ++-
src/relay/qnn/op/requantize.cc | 6 +-
src/relay/qnn/op/requantize_config.h | 6 +-
src/runtime/disco/builtin.cc | 3 +
src/runtime/disco/builtin.h | 6 +
src/runtime/disco/nccl/nccl.cc | 27 +-
src/runtime/opencl/opencl_common.h | 6 +-
src/runtime/opencl/opencl_device_api.cc | 4 +
src/runtime/opencl/opencl_module.cc | 22 +-
src/runtime/opencl/opencl_module.h | 1 +
src/script/printer/ir/ir.cc | 15 +-
src/script/printer/relax/binding.cc | 3 +-
src/script/printer/relax/function.cc | 2 +-
src/script/printer/relax/struct_info.cc | 26 +-
src/target/llvm/codegen_x86_64.cc | 39 +--
src/target/llvm/llvm_instance.cc | 154 ++++++++++-
src/target/llvm/llvm_instance.h | 30 +++
src/target/llvm/llvm_module.cc | 197 ++++----------
src/te/operation/compute_op.cc | 6 +-
src/te/operation/create_primfunc.cc | 64 ++---
src/te/operation/create_primfunc.h | 8 +-
src/tir/ir/stmt_functor.cc | 32 +--
tests/python/disco/test_ccl.py | 23 ++
tests/python/dlight/test_gpu_matmul_tensorize.py | 164 ++++++++++++
tests/python/driver/tvmc/test_compiler.py | 1 +
tests/python/frontend/keras/test_forward.py | 14 +-
tests/python/frontend/oneflow/test_forward.py | 2 +-
tests/python/relax/test_analysis.py | 126 +++++++--
tests/python/relax/test_codegen_cutlass.py | 51 +++-
tests/python/relax/test_kill_after_last_use.py | 55 ++++
..._json_compact.py => test_lower_alloc_tensor.py} | 51 ++--
tests/python/relax/test_op_ccl.py | 55 ++++
.../python/relax/test_remove_redundant_reshape.py | 118 +++++++++
tests/python/relax/test_transform.py | 39 +--
tests/python/relax/test_transform_alter_op_impl.py | 80 +++++-
tests/python/relax/test_transform_fold_constant.py | 15 +-
tests/python/relax/test_transform_legalize_ops.py | 24 +-
.../relax/test_transform_legalize_ops_ccl.py | 23 ++
.../test_transform_static_plan_block_memory.py | 1 +
tests/python/relax/test_tuning_api.py | 1 -
tests/python/relax/test_tvmscript_parser.py | 43 +++
tests/python/relax/test_tvmscript_printer_relax.py | 54 +++-
tests/python/relax/test_vm_builtin_lower.py | 86 ++++++
tests/python/relay/test_op_level2.py | 5 +-
tests/python/relay/test_op_qnn_conv2_transpose.py | 2 +-
tests/python/relay/test_op_qnn_conv2d.py | 4 +-
tests/python/relay/test_pass_alter_op_layout.py | 6 +-
tests/python/relay/test_pass_qnn_legalize.py | 14 +-
tests/python/target/test_llvm_features_info.py | 104 ++++++++
tests/python/target/test_x86_features.py | 155 +++++------
tests/python/unittest/test_allreduce.py | 44 ++++
tests/python/unittest/test_target_codegen_llvm.py | 2 +-
tests/python/unittest/test_te_create_primfunc.py | 29 +++
.../test_tir_transform_unify_thread_binding.py | 43 ---
tests/scripts/setup-adreno-env.sh | 18 +-
tests/scripts/task_config_build_adreno.sh | 2 +
123 files changed, 3002 insertions(+), 1091 deletions(-)
copy python/tvm/relax/transform/{optimize_layout_transform.py =>
remove_redundant_reshape.py} (52%)
create mode 100644 src/relax/transform/kill_after_last_use.cc
create mode 100644 src/relax/transform/lower_alloc_tensor.cc
create mode 100644 tests/python/relax/test_kill_after_last_use.py
copy tests/python/relax/{test_json_compact.py => test_lower_alloc_tensor.py}
(57%)
create mode 100644 tests/python/relax/test_remove_redundant_reshape.py
create mode 100644 tests/python/relax/test_vm_builtin_lower.py
create mode 100644 tests/python/target/test_llvm_features_info.py