This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a change to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 72bd41d3e8 [Unity][Dlight] Avoid too large vectorization factor in
caching (#15443)
add 3fbf0ffc87 [CI] Update test to include unique attribute (#15339)
add 0603cce06b [CI] Make Graviton3 default AArch64 job runner node (#15352)
add 4b62aac365 [ACL] Update Compute Library to v23.05 (#15344)
add 991163c86c [CI] Pin cython version to fix cython compilation (#15353)
add 7e830e5870 [TIR][UX] Implement privacy annotations in TIR (#15214)
add 0a3ad644e5 [topi] Add `arm_cpu` specific pooling schedules (#15311)
add ae3de3d64c [CMAKE] Conditionally link "clog" in NNPack install (#15359)
add ab75b58117 [Bug][Relay] fix relay frontend pytorch op addmm bug
(#15294)
add 8e33401937 [Bugfix][Relay][Frontend][Keras] Add a assertion to reject
a invalid value for attribute units in RNN layers (#15334)
add 03fecba6c8 [Bugfix][Frontend][Keras] Add a check to reject the invalid
input shape (#15335)
add 0fadb98731 [CI] Add ml_dypes dependency for all docker images (#15226)
add 48fe2f36c0 Revert "[topi] Add `arm_cpu` specific pooling schedules"
(#15371)
add 7d88352c3c [Relay][Pytorch] Add aten::view_as (#15370)
add d8c06767e3 [Hexagon] Simplify Mul->Sub->Conv to Conv->Add when
possible (#15367)
add 7ebc802d38 [Relay] Introduce arguments limit to FuseOps pass (#15137)
add 5029477268 [TIR] Allreduce broadcast result to each thread in
multi-warp case (#15373)
add e4af30220f [Target] Add Jetson Orin Nano tag (#15380)
add 3f69ed43a8 [TIR] Finer predicate handling in cross-thread reduction
(#15374)
add 8c8b51f02b [Bugfix] Work around "Internal Compiler Error" in MSVC
(#15385)
add 8babadb100 [Relay] improve SimplifyClipAndConsecutiveCast pass (#15362)
add 684689e924 [Bugfix] [Relay] fix a bug of printing dataflow pattern
(#15350)
add d6407bef5d [Adreno] Small fixes in Adreno schedules (#15391)
add 236eb31f09 [BugFix][TIR] Fix multi-grouped multi-warp allreduce
(#15399)
add 9ff74fb13f [TVMC] Add tvmc flag to print compilation time per pass
(#15349)
add 304aa1e084 [TIR] Allow starred expressions in TIR script (#15404)
add 22ec541a6f [Codegen][Metal] Support metal warp-level primitive (#15401)
add 3e00253b68 [TIR] Fix Primitive Rfactor DType (#15413)
add e2c8d7b33e [VM][OpenCL] Introduce textures allocation to VM memory
manager (#15400)
add d9c2b9c966 [TIR][BugFix]Ensure the Var's scope is correct (#15406)
add 4a558204fd [Hexagon] Add default vtcm capacity for targets (#15414)
add 64ac43a243 [CI] Bump Flax and Jaxlib versions to fix Jaxlib install
error (#15421)
add d1f7ef4986 [XGBoost,MetaSchedule] Support xgb set tree method (#15133)
add 6e33de61af [Frontend][Keras] Add support for swish actiivation (#15422)
add 5b431f5dd0 [Submodule] Add Flash attention v2 (#15423)
add 2d76c9704f [TIR] Generalize implementation of T.macro to work with
other dialects (#15432)
add 0556653830 [Frontend][ONNX] add onnx Mish operator (#15415)
add 619bb1d939 [FRONTEND][TFLITE] Add support for TFLite's regular NMS
operator (#15117)
add 95a2fff959 [MetaSchedule] Fix mma default rule and disable tuning
abort (#15437)
add 23edbff40a Merge remote-tracking branch 'upstream/main' into
unity-staging
add 53fd712bf7 [MERGE-FIX] Update the code to fix merge issues
No new revisions were added by this update.
Summary of changes:
.gitmodules | 3 +
3rdparty/libflash_attn | 1 +
LICENSE | 3 +-
ci/jenkins/generated/arm_jenkinsfile.groovy | 38 +--
ci/jenkins/templates/arm_jenkinsfile.groovy.j2 | 8 +-
cmake/modules/contrib/NNPack.cmake | 4 +-
.../install/ubuntu2004_install_python_package.sh | 3 +-
.../ubuntu_download_arm_compute_lib_binaries.sh | 2 +-
docker/install/ubuntu_install_jax.sh | 10 +-
docker/install/ubuntu_install_python_package.sh | 2 +-
include/tvm/relay/attrs/vision.h | 25 ++
include/tvm/relay/dataflow_pattern.h | 41 +++
include/tvm/relay/transform.h | 5 +-
include/tvm/runtime/vm/memory_manager.h | 18 +-
include/tvm/script/ir_builder/tir/frame.h | 3 +
include/tvm/script/ir_builder/tir/ir.h | 2 +-
include/tvm/topi/transform.h | 2 +-
licenses/LICENSE.libflash_attn.txt | 29 ++
python/setup.py | 6 +
python/tvm/contrib/hexagon/transform.py | 94 ++++++-
python/tvm/driver/tvmc/compiler.py | 20 +-
python/tvm/meta_schedule/cost_model/cost_model.py | 9 +-
python/tvm/meta_schedule/cost_model/xgb_model.py | 14 +
.../tvm/meta_schedule/testing/space_generation.py | 17 +-
python/tvm/meta_schedule/tune.py | 2 +-
python/tvm/relay/frontend/keras.py | 13 +
python/tvm/relay/frontend/onnx.py | 19 ++
python/tvm/relay/frontend/pytorch.py | 29 +-
python/tvm/relay/frontend/tflite.py | 45 ++-
python/tvm/relay/op/strategy/generic.py | 34 ++-
python/tvm/relay/op/tensor.py | 9 +-
python/tvm/relay/op/vision/_vision.py | 30 ++
python/tvm/relay/op/vision/multibox.py | 22 +-
python/tvm/relay/op/vision/nms.py | 59 ++++
python/tvm/relay/transform/transform.py | 9 +-
python/tvm/script/ir_builder/tir/ir.py | 11 +-
python/tvm/script/parser/_core.py | 2 +-
python/tvm/script/parser/core/entry.py | 6 +-
python/tvm/script/parser/core/evaluator.py | 11 +
python/tvm/script/parser/core/parser.py | 105 +++++++
python/tvm/script/parser/tir/entry.py | 83 +++---
python/tvm/script/parser/tir/parser.py | 77 ++----
python/tvm/target/target.py | 15 +-
python/tvm/testing/utils.py | 8 +-
python/tvm/tir/schedule/testing.py | 21 +-
python/tvm/topi/adreno/reduction.py | 2 +-
python/tvm/topi/adreno/utils.py | 6 +-
python/tvm/topi/cuda/ssd/multibox.py | 63 ++++-
python/tvm/topi/testing/common.py | 1 +
python/tvm/topi/vision/nms.py | 308 +++++++++++++++++++++
python/tvm/topi/vision/ssd/multibox.py | 47 +++-
.../multi_level_tiling_tensor_core.cc | 5 +-
src/meta_schedule/schedule_rule/schedule_rule.cc | 23 +-
src/relax/transform/fuse_ops.cc | 2 +-
src/relay/analysis/graph_partitioner.cc | 136 +++++++++
src/relay/analysis/graph_partitioner.h | 46 ++-
src/relay/backend/build_module.cc | 2 +-
src/relay/backend/vm/compiler.cc | 7 +
src/relay/ir/dataflow_pattern.cc | 304 +++++++++++++++-----
src/relay/op/vision/multibox_op.cc | 3 +-
src/relay/op/vision/nms.cc | 61 ++++
src/relay/transforms/fuse_ops.cc | 23 +-
src/relay/transforms/simplify_expr.cc | 86 ++++--
src/relay/transforms/split_args.cc | 96 +++++--
src/runtime/vm/memory_manager.cc | 18 ++
src/runtime/vm/naive_allocator.h | 26 ++
src/runtime/vm/pooled_allocator.h | 10 +
src/script/ir_builder/tir/frame.cc | 16 ++
src/script/ir_builder/tir/ir.cc | 7 +-
src/script/printer/tir/function.cc | 33 ++-
src/target/source/codegen_metal.cc | 4 +-
src/target/source/intrin_rule_metal.cc | 53 ++++
src/target/tag.cc | 12 +
src/target/target_kind.cc | 5 +
src/tir/schedule/primitive/reduction.cc | 6 +-
src/tir/transforms/compact_buffer_region.cc | 162 +++++------
src/tir/transforms/lower_cross_thread_reduction.cc | 120 ++++++--
src/tir/transforms/lower_thread_allreduce.cc | 159 ++++++-----
src/tir/transforms/unsupported_dtype_legalize.cc | 15 +-
.../convert_pool_allocations_to_offsets.cc | 16 +-
tests/cpp/runtime/vm/memory_manager_tests.cc | 204 ++++++++++++++
.../test_hexagon/test_relay_simplify_conv_pat.py | 224 +++++++++++++++
tests/python/contrib/test_hexagon/test_vtcm.py | 4 +-
tests/python/driver/tvmc/test_command_line.py | 17 ++
tests/python/frontend/keras/test_forward.py | 15 +-
tests/python/frontend/onnx/test_forward.py | 9 +
tests/python/frontend/pytorch/test_forward.py | 27 ++
tests/python/frontend/tflite/test_forward.py | 15 +
.../test_analysis_suggest_layout_transforms.py | 70 ++---
.../relax/test_backend_transform_shape_lower.py | 2 +-
tests/python/relax/test_blockbuilder_emit_te.py | 2 +-
tests/python/relax/test_frontend_nn_op.py | 2 +-
.../relax/test_meta_schedule_relax_integration.py | 6 +-
tests/python/relax/test_transform_alter_op_impl.py | 34 +--
tests/python/relax/test_transform_fuse_ops.py | 40 +--
tests/python/relax/test_transform_fuse_tir.py | 20 +-
.../relax/test_transform_gradient_te_register.py | 16 +-
tests/python/relax/test_transform_legalize_ops.py | 14 +-
.../relax/test_transform_legalize_ops_binary.py | 88 +++---
.../test_transform_legalize_ops_create_datatype.py | 46 +--
.../relax/test_transform_legalize_ops_grad.py | 14 +-
.../relax/test_transform_legalize_ops_image.py | 4 +-
..._transform_legalize_ops_index_linear_algebra.py | 34 +--
.../test_transform_legalize_ops_manipulate.py | 78 +++---
.../python/relax/test_transform_legalize_ops_nn.py | 106 +++----
...st_transform_legalize_ops_search_statistical.py | 46 +--
tests/python/relax/test_tvmscript_parser.py | 6 +-
tests/python/relay/aot/test_crt_aot_usmp.py | 2 +-
.../opencl_texture/test_conv2d_nchw_texture.py | 4 +-
.../relay/opencl_texture/test_reduction_texture.py | 18 ++
tests/python/relay/test_op_level5.py | 31 ++-
tests/python/relay/test_pass_fuse_ops.py | 116 ++++++++
.../relay/test_pass_merge_compiler_regions.py | 2 +-
tests/python/relay/test_pass_plan_devices.py | 4 +-
tests/python/relay/test_pass_simplify_expr.py | 45 +++
tests/python/relay/test_pass_split_args.py | 96 +++----
tests/python/tir/test_debug_info.py | 3 +-
tests/python/topi/python/test_topi_transform.py | 2 +-
...rp_reduction_cuda.py => test_allreduce_cuda.py} | 8 +-
.../python/unittest/test_evaluator_with_preproc.py | 2 +-
.../test_meta_schedule_postproc_rewrite_layout.py | 3 +-
...e_postproc_rewrite_parallel_vectorize_unroll.py | 7 +-
.../python/unittest/test_target_codegen_opencl.py | 211 +++++++++++++-
.../python/unittest/test_tir_lower_match_buffer.py | 4 +-
.../unittest/test_tir_reorder_block_iter_var.py | 4 +-
.../python/unittest/test_tir_schedule_blockize.py | 29 +-
.../unittest/test_tir_schedule_cache_index.py | 8 +-
.../unittest/test_tir_schedule_cache_read_write.py | 79 +++---
.../unittest/test_tir_schedule_compute_at.py | 89 +++---
.../unittest/test_tir_schedule_compute_inline.py | 65 +++--
.../test_tir_schedule_decompose_padding.py | 3 +-
.../python/unittest/test_tir_schedule_for_kind.py | 47 ++--
tests/python/unittest/test_tir_schedule_merge.py | 9 +-
.../unittest/test_tir_schedule_pad_einsum.py | 86 +++++-
.../unittest/test_tir_schedule_read_write_at.py | 11 +-
.../python/unittest/test_tir_schedule_reduction.py | 21 +-
tests/python/unittest/test_tir_schedule_reindex.py | 19 +-
tests/python/unittest/test_tir_schedule_reorder.py | 19 +-
tests/python/unittest/test_tir_schedule_rfactor.py | 88 +++++-
.../unittest/test_tir_schedule_rolling_buffer.py | 7 +-
.../test_tir_schedule_set_axis_separator.py | 11 +-
.../python/unittest/test_tir_schedule_set_dtype.py | 9 +-
.../python/unittest/test_tir_schedule_set_scope.py | 9 +-
.../unittest/test_tir_schedule_split_fuse.py | 37 ++-
.../unittest/test_tir_schedule_storage_align.py | 9 +-
.../python/unittest/test_tir_schedule_tensorize.py | 17 +-
tests/python/unittest/test_tir_schedule_trace.py | 7 +-
.../unittest/test_tir_schedule_transform_layout.py | 34 ++-
.../python/unittest/test_tir_schedule_utilities.py | 9 +-
tests/python/unittest/test_tir_specialize.py | 19 +-
tests/python/unittest/test_tir_texture_scope.py | 2 +-
.../unittest/test_tir_transform_bf16_legalize.py | 69 +++++
.../test_tir_transform_common_subexpr_elim.py | 4 +-
.../test_tir_transform_compact_buffer_region.py | 6 +-
.../test_tir_transform_convert_blocks_to_opaque.py | 4 +-
.../unittest/test_tir_transform_convert_ssa.py | 28 +-
.../python/unittest/test_tir_transform_helpers.py | 28 +-
.../test_tir_transform_hoist_expression.py | 4 +-
.../test_tir_transform_inject_software_pipeline.py | 22 +-
.../test_tir_transform_inject_virtual_thread.py | 14 +-
.../test_tir_transform_lift_thread_binding.py | 4 +-
.../unittest/test_tir_transform_loop_partition.py | 52 ++--
...t_tir_transform_lower_cross_thread_reduction.py | 141 +++++++++-
.../test_tir_transform_lower_opaque_block.py | 14 +-
.../test_tir_transform_lower_thread_all_reduce.py | 171 ++++++++++--
.../test_tir_transform_lower_tvm_builtin.py | 4 +-
.../unittest/test_tir_transform_make_packed_api.py | 14 +-
.../test_tir_transform_make_unpacked_api.py | 8 +-
.../unittest/test_tir_transform_narrow_datatype.py | 18 +-
...sform_plan_update_buffer_allocation_location.py | 4 +-
.../unittest/test_tir_transform_profiling_instr.py | 36 ++-
...transform_remove_weight_layout_rewrite_block.py | 4 +-
.../test_tir_transform_split_host_device.py | 8 +-
.../unittest/test_tir_transform_storage_rewrite.py | 8 +-
.../test_tir_transform_unify_thread_binding.py | 6 +-
.../unittest/test_tir_unsafe_hide_buffer_access.py | 9 +-
tests/python/unittest/test_tvmscript_complete.py | 27 +-
.../unittest/test_tvmscript_meta_programming.py | 8 +-
tests/python/unittest/test_tvmscript_ops.py | 14 +-
tests/python/unittest/test_tvmscript_parser_tir.py | 70 ++++-
.../unittest/test_tvmscript_printer_annotation.py | 4 +-
.../python/unittest/test_tvmscript_printer_tir.py | 57 +++-
.../unittest/test_tvmscript_printer_underlining.py | 6 +-
tests/python/unittest/test_tvmscript_regression.py | 8 +-
tests/python/unittest/test_tvmscript_roundtrip.py | 28 +-
.../python/unittest/test_tvmscript_syntax_sugar.py | 41 +--
tests/scripts/request_hook/request_hook.py | 1 +
187 files changed, 4885 insertions(+), 1359 deletions(-)
create mode 160000 3rdparty/libflash_attn
create mode 100644 licenses/LICENSE.libflash_attn.txt
create mode 100644 tests/cpp/runtime/vm/memory_manager_tests.cc
create mode 100644
tests/python/contrib/test_hexagon/test_relay_simplify_conv_pat.py
rename tests/python/unittest/{test_subwarp_reduction_cuda.py =>
test_allreduce_cuda.py} (94%)