This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a change to branch unity-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git
omit cfce06f073 [Unity][Fix] Fix bug in MergeCompositeFunctions (#14117)
omit cc5292c6cf [Unity] Update tests again to adapt to latest TVMScript
syntax (#14115)
omit d3a0e98b6d [Unity][BYOC]Add relax backend pattern registry (#14106)
omit 3f12d4df59 [Unity] Remove attributes of relax.print, assert and unique
(#14101)
omit 4d72dafa6b [Unity][Layout] Add layout transformation analysis for
PrimFunc (#14066)
omit 3f4835c4c8 [Unity] Relax Recursive function (#14092)
omit fc5981b09e [Unity] Lower `shape_of` to a builtin (#14093)
omit 4ca7107ac6 [Unity] Fix typo in the comment (#14096)
omit d1997fd53e [Unity][Relax] Set Shape Function to Be Host Function
(#14090)
omit a283a71be6 [Unity] Refactor Relax Build JIT UX (#14088)
omit c0a591d222 [Unity][Fix][Pass] FoldConstant with DCE in dataflow block
(#14087)
omit df67561c71 [Unity][Analysis] TIR pattern kind analysis for
multi-buffer write block (#14075)
omit e7354e6463 [Unity][Op] `log_softmax` and `cross_entropy_with_logits`
(#14083)
omit cdd61cdf0f [Unity][BYOC] Add DNNL backend (#14082)
omit 59692e75a9 [Unity][BYOC] Add CUTLASS backend (#14081)
omit a40f1da47f [Unity] Add testcases for `expr_args_converter` (#14080)
omit 69cf869a4a [Unity][Pass] Canonicalize Bindings (#14079)
omit 5eee3af667 [Unity][BYOC][Pass] RunCodegen and TensorRT (#14078)
omit 9be900becf [Unity][Transform] Add LiftTransformParams pass (#14069)
omit c575220cbc [Unity][Frontend] Annotate number of non-static input of FX
function (#14067)
omit 8083332e1b [Unity][BYOC] Add pass to merge composite functions to
offload large subgraphs (#14062)
omit f5149054af [Unity][Pass] Remove Unused Function (#14061)
omit 93cf0874e8 [Unity][Fix][Pass] Fix FuseOps for lack graph edges (#14058)
omit 6d5f6f0e93 [Unity] Relax op: collapse sum (#14059)
omit be1cc698d2 [Unity][BYOC] Add pattern-based partitioning pass (#14054)
omit 6f4ca6b29c [Unity][VM] Add per-op profiling support (#14053)
omit 166bb92fd3 [Unity][TVMScript] Overload `__neg__` for relax expr
(#14045)
omit 63166441e3 [Unity][Pass] FuseOps FuseTIR fixes (#14044)
omit 988b2aaf0f [Unity] Statement rewriter for DataflowBlock (#14043)
omit ef3524a6c9 [Unity] Relax dataflow pattern language (matching) (#14041)
omit 7d2296fb39 [Unity] Update tests to adapt to latest TVMScript syntax
(#14039)
omit ff84737270 [Unity] Disallow inline prim_func in relax IR (#14040)
omit df0e043272 [Unity][Pass] Block-level static memory planning (#14038)
omit f8ad7845ed [Unity] Initial PyTorch Frontend (#14037)
omit b2e46d010a [Unity][Op] Add ShapeExpr Tests for Reshape Op (#14035)
omit 7ccda2571c [Unity][Pass] Operator legalization (#14029)
omit 8d05dce13d [Unity][TVMScript] Move tir/relax import in script out of
__init__.py (#14033)
omit 2d9fcfa595 [Unity][Pass] Wellformed Analysis (#14032)
omit bccae02c52 [Unity][BlockBuilder] CallTE convert PrimValue args
(#14028)
omit 47722e3b6b [Unity][Pass] Normalize Pass (#14031)
omit eeb40ac348 [Unity] Relay -> Relax translator (#14026)
omit 2aed16966f [Unity][Pass][TuningAPI] Introduce TuningAPI and
MetaSchedule pass (#14014)
omit b06d77929a [Unity][Pass] BindParams pass, FoldConstant pass (#14016)
omit af63d19d7d [Unity][VM] Supporting "compiled" exec mode. (#14015)
omit 6475d9884a [Unity][Pass] LambdaLift pass (#14012)
omit 75b905796e [Unity][Pass] Operator Fusion Passes (#14001)
omit fe81ddaf60 [Unity] NestedMsg Support utility (#13995)
omit 24704357dc [Unity] Relax op: manipulation (#13989)
omit 4577c986b8 [Unity] Relax op: search (#13992)
omit b95a20a46e [Unity] Relax op: linear algebra (#13988)
omit 35f17cfab9 [Unity] Relax op: creation (#13984)
omit fcf4f59db8 [Unity] Relax op: neural networks (#13993)
omit d4a7cfccdc [Unity] Relax op: statistical (#13991)
omit 27dde569cc [Unity] Relax op: arithmetic, comparison (#13983)
omit c06d16f0cb [Unity] Relax op: image (#13994)
omit 20ca7c07ba [Unity] Relax op: set (#13990)
omit 886689a5f7 [Unity] Relax op: datatype (#13986)
omit e48d4d2379 [Unity] Relax op: index (#13987)
omit 819c720640 [Unity][TVMScript] Use explicit `R.shape` in TVMScript
(#13979)
omit 9e47ae6808 [Unity] e2e Relax minimum build flow (#13961)
omit 55c2d1f665 [Unity] Relax VM shape lowering pass (#13956)
omit 5095764133 [Unity] Relax VM codegen (#13954)
omit 409bf916ec [Unity] Relax TVMScript Printer (#13944)
omit 450d2a7f39 [Unity] Relax TVMScript Parser. (#13932)
omit ff488e94df [Unity] Relax BlockBuilder and ExprMutator (#13926)
omit fa561c816d [Unity] Basic StructInfo Analysis and Expr construction
(#13916)
omit 76cc9f7dc5 [Unity][CI] Unity specific jenkins setup (do not upstream
to main) (#13910)
omit ecbd0a41f8 [Unity][IR] First-class StructInfo (#13907)
omit 2f96da7e7a [Unity] Relax expressions and types (#13901)
omit 9745433202 [Unity] Relax VM (#13878)
add f7165a1328 [microTVM] Fix tvmc tutorial (#14076)
add 10fb8c52d9 [MetaSchedule] Introduce Async Pipeline in MultiLevelTiling
(#14009)
add 9fab56c4c1 [TVMScript] Use op attribute to control whether to print
dtype in TVMScript (#14111)
add 1ad1994f5f [Fix][TVMScript] Fix index of metadata in printed script
(#14130)
add f21a17b67c [Pytorch] frontend full_impl fix (#14122)
add d9b0a80e1b [DOCKER] Configurable NDK version support (#14000)
add 54a62c1b53 [Fix][TIR] SampleCategorical apply-to-schedule (#14133)
add 74603eeac3 [Arith] ConstIntBound was incorrectly assuming bounds were
over int64… (#13918)
add 0e046daf9e [CMSIS-NN] Reduction in code size of AOT test runner binary
(#13815)
add 77df6e8d7c [CMSIS-NN] Add a runtime error message (#13643)
add bf589f3d11 [CRT]Cleanup unused macros in crt_config.h.template (#14125)
add 663f7ae77b [Fix][Relay] Fix axis transformation in squeeze shape
function (#14135)
add 4d152fe7c2 [Unittest] merge test_cp_async_in_if_then_else into
test_tir_transform_inject_ptx_async_copy (#14138)
add 2feb243bb7 [Frontend][TFLite] Fix conv2d import bug (#14124)
add 6097df5307 [ONNX][TORCH] Replace scatter op by scatter_elements
(#14019)
add 2b2cb96733 [TVMScript][Printer] Remove relax prefix for now (#14140)
add 7d67bb1be4 [microNPU] Sum legalization support (#13997)
add 7c06de52a1 [Fix][MetaSchedule] Fix redundant stages in async pipeline
for mlt (#14143)
add 428400c6e0 [COMMUNITY] Cheng Wen -> Reviewer (#14153)
add 1043136c9f [Runtime] Fix high RAM usage when saving / loading
paramters of big models (#14147)
add e9cf04e0e4 [Relay][Frontend] Span Filling PyTorch (#14050)
add 6c04ac52bd [TRT][BYOC] allow strided_slice ops on selected dimensions
(#14142) (#14144)
add 69acdfb042 [ONNX][TOPI] Add `DFT` operator (#13999)
add 908dc8f8ab [CRT][microTVM] Enable USMP by default for AoTExecutor +
CRT runtime (#14107)
add 25f4d06c55 [Android] Fix using system libraries in Android apps
(#14145)
add 05cbe329d8 [microTVM]Enable TVMC micro with AoT Executor (#14077)
add bd8e7d3bd3 [bugfix] Fix the write buffer scope of `mma_store_impl`
(#14174)
add cb37b82608 [Relay] Enhance EliminateCommonSubexpr to support Tuple
argument (#14169)
add 91dc8efe0f [TIR] Fix typo in doc (#14178)
add a42e98b195 [microTVM] Use QNN schedules to give SOTA performance
(#13752)
add bc92a3ff66 Add v0.11.0 docs link to site (#14181)
add df429c58d8 [TIR] Allow TransformLayout with non-inversible index map
(#14095)
add c0f148a231 [TIR][Analysis] Implement IdentifyMemCpy analysis function
(#13947)
add 736cecab3f [HotFix][MetaSchedule] Turn off database shash check
(#14188)
new 079abf1949 [Unity] Relax VM (#13878)
new 8eb3d7c495 [Unity] Relax expressions and types (#13901)
new 051e1f1474 [Unity][IR] First-class StructInfo (#13907)
new dab5ec4e83 [Unity][CI] Unity specific jenkins setup (do not upstream
to main) (#13910)
new 7af2beae0c [Unity] Basic StructInfo Analysis and Expr construction
(#13916)
new 886be6e8b0 [Unity] Relax BlockBuilder and ExprMutator (#13926)
new f74330451d [Unity] Relax TVMScript Parser. (#13932)
new bc6de4b733 [Unity] Relax TVMScript Printer (#13944)
new 78a87e6172 [Unity] Relax VM codegen (#13954)
new ad27005c35 [Unity] Relax VM shape lowering pass (#13956)
new e33ed56e41 [Unity] e2e Relax minimum build flow (#13961)
new e784761cdb [Unity][TVMScript] Use explicit `R.shape` in TVMScript
(#13979)
new 7489379ac4 [Unity] Relax op: index (#13987)
new c8869b7cbb [Unity] Relax op: datatype (#13986)
new 5d868d895d [Unity] Relax op: set (#13990)
new aef2bf4511 [Unity] Relax op: image (#13994)
new 2b55420146 [Unity] Relax op: arithmetic, comparison (#13983)
new d83d57c8c1 [Unity] Relax op: statistical (#13991)
new 1219825f12 [Unity] Relax op: neural networks (#13993)
new f5798631bf [Unity] Relax op: creation (#13984)
new 8d25cb5081 [Unity] Relax op: linear algebra (#13988)
new 5814dcb926 [Unity] Relax op: search (#13992)
new 9807c1c8eb [Unity] Relax op: manipulation (#13989)
new b06c02e2a0 [Unity] NestedMsg Support utility (#13995)
new 2ac610a090 [Unity][Pass] Operator Fusion Passes (#14001)
new 534fa748dc [Unity][Pass] LambdaLift pass (#14012)
new e4d59f5b4b [Unity][VM] Supporting "compiled" exec mode. (#14015)
new 3cb151f54d [Unity][Pass] BindParams pass, FoldConstant pass (#14016)
new 4d172093e0 [Unity][Pass][TuningAPI] Introduce TuningAPI and
MetaSchedule pass (#14014)
new a2a59b0665 [Unity] Relay -> Relax translator (#14026)
new 195c0ec414 [Unity][Pass] Normalize Pass (#14031)
new daf33d5be7 [Unity][BlockBuilder] CallTE convert PrimValue args
(#14028)
new eb05be6289 [Unity][Pass] Wellformed Analysis (#14032)
new 333d42d89f [Unity][TVMScript] Move tir/relax import in script out of
__init__.py (#14033)
new 5a07c5aeed [Unity][Pass] Operator legalization (#14029)
new d0d02a8a04 [Unity][Op] Add ShapeExpr Tests for Reshape Op (#14035)
new df0c1b00b0 [Unity] Initial PyTorch Frontend (#14037)
new f4b05f38ba [Unity][Pass] Block-level static memory planning (#14038)
new 9e0778e35a [Unity] Disallow inline prim_func in relax IR (#14040)
new 123ede268f [Unity] Update tests to adapt to latest TVMScript syntax
(#14039)
new ca293e8957 [Unity] Relax dataflow pattern language (matching) (#14041)
new 086d32d6cb [Unity] Statement rewriter for DataflowBlock (#14043)
new 6cec2edcdd [Unity][Pass] FuseOps FuseTIR fixes (#14044)
new 138496fabb [Unity][TVMScript] Overload `__neg__` for relax expr
(#14045)
new e7500dbe22 [Unity][VM] Add per-op profiling support (#14053)
new 7963e6d619 [Unity][BYOC] Add pattern-based partitioning pass (#14054)
new 7a7cce60ed [Unity] Relax op: collapse sum (#14059)
new 23109d7281 [Unity][Fix][Pass] Fix FuseOps for lack graph edges (#14058)
new ae400b5a21 [Unity][Pass] Remove Unused Function (#14061)
new bd15cf149b [Unity][BYOC] Add pass to merge composite functions to
offload large subgraphs (#14062)
new 1e53a9ce15 [Unity][Frontend] Annotate number of non-static input of FX
function (#14067)
new 60c59b2e2e [Unity][Transform] Add LiftTransformParams pass (#14069)
new 6ac619dcb3 [Unity][BYOC][Pass] RunCodegen and TensorRT (#14078)
new 717eb5f5a9 [Unity][Pass] Canonicalize Bindings (#14079)
new d953b7659d [Unity] Add testcases for `expr_args_converter` (#14080)
new 2d00e8ebc5 [Unity][BYOC] Add CUTLASS backend (#14081)
new 18fdb29111 [Unity][BYOC] Add DNNL backend (#14082)
new 4ae6d34957 [Unity][Op] `log_softmax` and `cross_entropy_with_logits`
(#14083)
new e5340371a3 [Unity][Analysis] TIR pattern kind analysis for
multi-buffer write block (#14075)
new e76e94dd9f [Unity][Fix][Pass] FoldConstant with DCE in dataflow block
(#14087)
new bc13158d65 [Unity] Refactor Relax Build JIT UX (#14088)
new 4b13b7045e [Unity][Relax] Set Shape Function to Be Host Function
(#14090)
new 9b9fdba6fb [Unity] Fix typo in the comment (#14096)
new 1229317707 [Unity] Lower `shape_of` to a builtin (#14093)
new 3dabb7089a [Unity] Relax Recursive function (#14092)
new 1b6ccf096b [Unity][Layout] Add layout transformation analysis for
PrimFunc (#14066)
new 356365eb2f [Unity] Remove attributes of relax.print, assert and unique
(#14101)
new 00be298ed8 [Unity][BYOC]Add relax backend pattern registry (#14106)
new 67a3721be0 [Unity] Update tests again to adapt to latest TVMScript
syntax (#14115)
new e2ee1af952 [Unity][Fix] Fix bug in MergeCompositeFunctions (#14117)
new a82bef5820 [Unity][BlockBuilder] Add `name_hint` argument for `emit`
and `emit_output` (#14126)
new b7ecfe9a54 [Unity][WEB] Relax vm on web runtime (#14131)
new ec0ede82fd [Unity] Add Global info (#14132)
new 8e5712c95e [Unity][BYOC] Add transposed matmul support to Relax
CUTLASS BYOC (#14128)
new c02289ad63 [Unity][TVMScript] emit_te sugar (#14123)
new eef40997d5 [Unity][BYOC] Assign group to unused bindings and ignroe
PrimFunc (#14139)
new 8f0f62e841 [Unity] Add callback to FuseOpsByPattern to check match
result is accepted (#14109)
new 3baa50bd94 [Unity][Legalize] Fix Scalar Constant Legalization (#14127)
new 8f4eb69594 [Unity][Pass] Enhance constant folding to fold relax ops by
evaluating them. (#14146)
new cc81d3487f [Unity][Debugging] AST printer (#14152)
new d9ad8860ac [Unity][Pass] Support Symbolic Shape Deduction during
BindParam (#14154)
new fc627c7f3b [Unity][Analysis] Checking function return struct info in
well-formed check (#14155)
new 21da09c779 [Unity][BYOC] Use Relax legalize + CPU build for reference
in tests (#14162)
new ba380fe49e [Unity] Add bind_constants option to FuseOpsByPattern
(#14151)
new a102afb7f9 [Unity][Analysis] Analysis for detecting recursion in Relax
(#14149)
new 3c529ba8be [Unity][BYOC] Add batch matmul support to Relax CUTLASS
BYOC (#14166)
new 4763de07d1 [Unity][Op] Full support of Relax op `power` (#14171)
new a526a0db02 [Unity][Analysis] Restore Python bindings for var analyses
(#14180)
new 0881e0b711 [Unity][OP] Add an operator for fused multi head attention
(#14150)
new d0ea42ae9e [Unity][WEBGPU] Codegen improvements and WebRuntime (#14187)
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (cfce06f073)
\
N -- N -- N refs/heads/unity-staging (d0ea42ae9e)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
The 90 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails. The revisions
listed as "add" were already present in the repository and have only
been added to this reference.
Summary of changes:
3rdparty/cutlass | 2 +-
CONTRIBUTORS.md | 1 +
apps/android_camera/README.md | 10 +
.../app/src/main/AndroidManifest.xml | 3 +
apps/android_camera/models/prepare_model.py | 2 +-
apps/android_deploy/README.md | 11 +
.../app/src/main/AndroidManifest.xml | 3 +
apps/android_rpc/README.md | 11 +
apps/android_rpc/app/src/main/AndroidManifest.xml | 3 +
.../template_project/src/mlperftiny/platform.cc | 17 -
.../src/mlperftiny/submitter_implemented.cc | 2 +-
cmake/utils/CRTConfig.cmake | 3 -
docker/Dockerfile.ci_adreno | 5 +-
docker/Dockerfile.ci_cpu | 3 -
docker/Dockerfile.ci_hexagon | 2 -
docker/Dockerfile.demo_android | 2 -
docker/install/ubuntu_install_androidsdk.sh | 43 +-
docs/conf.py | 2 +-
docs/script_convert.py | 28 +-
gallery/how_to/work_with_microtvm/micro_tvmc.sh | 41 +-
include/tvm/ir/global_info.h | 80 +++
include/tvm/ir/module.h | 16 +-
include/tvm/relax/analysis.h | 25 +
include/tvm/relax/expr_functor.h | 4 +-
include/tvm/relax/transform.h | 7 +-
include/tvm/relax/utils.h | 4 +-
include/tvm/relay/attrs/transform.h | 19 +-
include/tvm/relay/transform.h | 7 +
include/tvm/runtime/relax_vm/memory_manager.h | 2 +-
include/tvm/runtime/relax_vm/vm.h | 4 +
include/tvm/script/ir_builder/base.h | 2 +
include/tvm/script/ir_builder/ir/frame.h | 7 +
include/tvm/tir/analysis.h | 29 +
include/tvm/tir/op_attr_types.h | 21 +
include/tvm/tir/schedule/schedule.h | 9 +-
python/tvm/contrib/cutlass/attention_operation.py | 134 +++++
python/tvm/contrib/cutlass/build.py | 184 +++++-
python/tvm/contrib/cutlass/gemm_operation.py | 20 +-
python/tvm/contrib/cutlass/gen_tensor_op.py | 165 ++++--
python/tvm/contrib/cutlass/library.py | 6 +
python/tvm/contrib/tvmjs.py | 172 ++++++
python/tvm/driver/tvmc/model.py | 12 +
python/tvm/driver/tvmc/runner.py | 10 +-
python/tvm/exec/rpc_proxy.py | 38 +-
python/tvm/ir/__init__.py | 1 +
.../rewrite_layout.py => ir/global_info.py} | 26 +-
python/tvm/ir/module.py | 30 +-
python/tvm/relax/__init__.py | 1 +
python/tvm/relax/analysis/analysis.py | 99 +++-
python/tvm/relax/backend/contrib/cutlass.py | 124 +++-
python/tvm/relax/backend/pattern_registry.py | 69 ++-
python/tvm/relax/backend/patterns.py | 27 +
python/tvm/relax/block_builder.py | 19 +-
python/tvm/relax/dpl/pattern.py | 25 +
python/tvm/relax/expr.py | 6 +
python/tvm/relax/frontend/torch/fx_translator.py | 7 +
python/tvm/relax/op/binary.py | 18 +
python/tvm/relax/op/nn/nn.py | 39 ++
python/tvm/relax/testing/__init__.py | 1 +
python/tvm/relax/testing/ast_printer.py | 372 ++++++++++++
python/tvm/relax/transform/legalize_ops/binary.py | 1 +
python/tvm/relax/transform/legalize_ops/common.py | 34 +-
.../tvm/relax/transform/legalize_ops/creation.py | 4 +-
.../tvm/relax/transform/legalize_ops/datatype.py | 2 +-
python/tvm/relax/transform/legalize_ops/nn.py | 12 +-
python/tvm/relax/transform/transform.py | 30 +-
python/tvm/relax/vm_build.py | 14 +-
.../tvm/relay/backend/contrib/ethosu/legalize.py | 82 +++
.../tvm/relay/backend/contrib/ethosu/te/pooling.py | 11 +-
.../backend/contrib/ethosu/tir_to_cs_translator.py | 7 +-
python/tvm/relay/expr.py | 2 +-
python/tvm/relay/frontend/common.py | 4 +
python/tvm/relay/frontend/oneflow.py | 2 +-
python/tvm/relay/frontend/onnx.py | 151 ++++-
python/tvm/relay/frontend/paddlepaddle.py | 10 +-
python/tvm/relay/frontend/pytorch.py | 282 +++++++--
python/tvm/relay/frontend/pytorch_utils.py | 2 +-
python/tvm/relay/frontend/qnn_torch.py | 4 +-
python/tvm/relay/frontend/tflite.py | 7 +-
python/tvm/relay/op/_transform.py | 28 +-
python/tvm/relay/op/contrib/ethosu.py | 90 +++
python/tvm/relay/op/contrib/tensorrt.py | 3 +-
python/tvm/relay/op/nn/_nn.py | 17 -
python/tvm/relay/op/op_attrs.py | 5 -
python/tvm/relay/op/strategy/cuda.py | 41 +-
python/tvm/relay/op/strategy/generic.py | 48 +-
python/tvm/relay/op/strategy/rocm.py | 14 +-
python/tvm/relay/op/transform.py | 52 +-
python/tvm/relay/qnn/op/_qnn.py | 60 +-
python/tvm/relay/qnn/strategy/arm_cpu.py | 73 ++-
python/tvm/relay/transform/mixed_precision.py | 2 +
python/tvm/rpc/proxy.py | 21 +-
python/tvm/runtime/__init__.py | 7 +-
python/tvm/runtime/params.py | 49 +-
python/tvm/script/ir_builder/base.py | 11 +
python/tvm/script/ir_builder/ir/__init__.py | 9 +-
python/tvm/script/ir_builder/ir/ir.py | 39 +-
python/tvm/script/ir_builder/relax/ir.py | 38 +-
python/tvm/script/parser/ir/__init__.py | 4 +-
python/tvm/script/parser/ir/parser.py | 11 +-
python/tvm/testing/aot.py | 67 ++-
python/tvm/tir/schedule/schedule.py | 20 +-
python/tvm/tir/tensor_intrin/cuda.py | 2 +-
python/tvm/topi/__init__.py | 2 +-
python/tvm/topi/arm_cpu/__init__.py | 3 +-
.../dsp/micro_kernel/multi_channel_convolve.py | 58 +-
.../arm_cpu/mprofile/dsp/micro_kernel/tensordot.py | 9 +-
python/tvm/topi/arm_cpu/qnn.py | 358 +++++++++---
python/tvm/topi/arm_cpu/qnn_alter_op.py | 228 ++++++--
python/tvm/topi/arm_cpu/qnn_legalize.py | 382 +++++++++++++
python/tvm/topi/cuda/__init__.py | 2 +-
python/tvm/topi/cuda/scatter.py | 440 +-------------
python/tvm/topi/cuda/{stft.py => signal.py} | 96 ++++
python/tvm/topi/generic/search.py | 16 -
python/tvm/topi/hexagon/qnn/nn.py | 2 +-
python/tvm/topi/nn/qnn.py | 35 +-
python/tvm/topi/scatter.py | 183 +-----
python/tvm/topi/{stft.py => signal.py} | 82 +++
src/{auto_scheduler/utils.cc => ir/global_info.cc} | 20 +-
src/ir/module.cc | 25 +-
src/meta_schedule/database/json_database.cc | 10 +-
.../schedule_rule/multi_level_tiling.cc | 56 ++
.../schedule_rule/multi_level_tiling.h | 4 +
src/relax/analysis/detect_recursion.cc | 389 +++++++++++++
src/relax/analysis/struct_info_analysis.cc | 2 +-
src/relax/analysis/well_formed.cc | 6 +
src/relax/backend/pattern_registry.cc | 20 +-
src/relax/backend/pattern_registry.h | 18 +-
src/relax/ir/block_builder.cc | 11 +-
src/relax/ir/dataflow_matcher.cc | 2 +
src/relax/ir/expr_functor.cc | 4 +-
src/relax/op/nn/attention.cc | 123 ++++
src/relax/op/{tensor/search.h => nn/attention.h} | 20 +-
src/relax/op/tensor/binary.cc | 1 +
src/relax/op/tensor/binary.h | 3 +
src/relax/transform/bind_params.cc | 64 ++-
src/relax/transform/fold_constant.cc | 56 +-
src/relax/transform/fuse_ops.cc | 74 +--
src/relax/transform/run_codegen.cc | 3 +
src/relax/utils.cc | 66 ++-
src/relay/backend/aot_executor_codegen.cc | 10 +-
.../backend/contrib/cmsisnn/compiler_attrs.cc | 2 +
src/relay/backend/contrib/cmsisnn/compiler_attrs.h | 4 +
src/relay/backend/contrib/cmsisnn/target.cc | 1 +
.../backend/contrib/cmsisnn/tir_to_runtime.cc | 59 +-
src/relay/backend/utils.cc | 1 +
src/relay/op/contrib/ethosu/op_attrs.h | 4 +-
src/relay/op/contrib/ethosu/pooling.cc | 23 +-
src/relay/op/tensor/transform.cc | 98 ++--
src/relay/transforms/eliminate_common_subexpr.cc | 28 +-
src/relay/transforms/simplify_expr.cc | 23 +
src/runtime/crt/crt_config.h.template | 9 -
src/runtime/file_utils.cc | 12 +
src/runtime/relax_vm/memory_manager.cc | 2 +-
src/runtime/relax_vm/vm.cc | 87 ++-
src/script/ir_builder/base.cc | 6 +
src/script/ir_builder/ir/frame.cc | 3 +-
src/script/ir_builder/ir/ir.cc | 24 +
src/script/ir_builder/relax/ir.cc | 2 +-
src/script/printer/ir/ir.cc | 15 +
src/script/printer/ir_docsifier.cc | 10 +-
src/script/printer/tir/expr.cc | 31 +-
src/target/intrin_rule.cc | 2 +-
src/target/source/codegen_c.cc | 4 +-
src/target/source/codegen_webgpu.cc | 191 +++++--
src/target/source/codegen_webgpu.h | 7 +-
src/tir/analysis/identify_memcpy.cc | 316 ++++++++++
src/tir/ir/stmt.cc | 4 +-
src/tir/op/builtin.cc | 67 ++-
src/tir/schedule/concrete_schedule.cc | 5 +-
src/tir/schedule/concrete_schedule.h | 3 +-
src/tir/schedule/primitive.h | 7 +-
.../schedule/primitive/layout_transformation.cc | 44 +-
src/tir/schedule/primitive/sampling.cc | 17 +-
src/tir/schedule/schedule.cc | 4 +-
src/tir/schedule/traced_schedule.cc | 9 +-
src/tir/schedule/traced_schedule.h | 3 +-
src/tir/transforms/lower_intrin.cc | 10 +-
tests/lint/check_file_type.py | 1 +
tests/micro/arduino/test_arduino_rpc_server.py | 1 -
tests/micro/arduino/test_arduino_workflow.py | 1 -
tests/micro/common/test_tvmc.py | 138 +++--
tests/micro/zephyr/test_ms_tuning.py | 2 +-
tests/micro/zephyr/test_zephyr_aot_exec.py | 1 -
.../zephyr/test_zephyr_aot_exec_standalone.py | 7 +-
tests/python/ci/test_script_converter.py | 21 +-
tests/python/contrib/test_cmsisnn/test_conv2d.py | 1 +
...{test_remove_reshapes.py => test_last_error.py} | 139 +++--
tests/python/contrib/test_cmsisnn/utils.py | 6 +-
tests/python/contrib/test_ethosu/test_codegen.py | 30 +
tests/python/contrib/test_ethosu/test_legalize.py | 117 ++++
.../contrib/test_ethosu/test_replace_pooling.py | 64 ++-
tests/python/frontend/onnx/test_forward.py | 78 ++-
tests/python/frontend/pytorch/qnn_test.py | 24 +-
tests/python/frontend/pytorch/test_forward.py | 291 +++++++++-
tests/python/frontend/pytorch/test_fx_quant.py | 7 +-
tests/python/frontend/pytorch/test_lstm.py | 6 +-
.../frontend/pytorch/test_object_detection.py | 6 +-
tests/python/frontend/pytorch/test_rnns.py | 16 +-
tests/python/frontend/tflite/test_forward.py | 175 ++----
tests/python/relax/test_analysis.py | 113 +++-
.../python/relax/test_analysis_detect_recursion.py | 405 +++++++++++++
tests/python/relax/test_analysis_well_formed.py | 25 +-
tests/python/relax/test_ast_printer.py | 636 +++++++++++++++++++++
tests/python/relax/test_blockbuilder.py | 16 +
tests/python/relax/test_codegen_cutlass.py | 553 ++++++++++--------
tests/python/relax/test_codegen_dnnl.py | 74 +--
tests/python/relax/test_codegen_tensorrt.py | 68 +--
tests/python/relax/test_frontend_from_fx.py | 49 +-
tests/python/relax/test_op_binary.py | 2 +
tests/python/relax/test_op_misc.py | 1 +
tests/python/relax/test_transform_bind_params.py | 52 ++
tests/python/relax/test_transform_fold_constant.py | 103 ++++
.../relax/test_transform_fuse_ops_by_pattern.py | 192 ++++++-
tests/python/relax/test_transform_legalize_ops.py | 97 ++++
.../relax/test_transform_legalize_ops_binary.py | 80 +++
tests/python/relax/test_tvmscript_ir_builder.py | 36 +-
tests/python/relax/test_tvmscript_parser.py | 48 +-
.../relax/test_tvmscript_parser_op_arith_cmp.py | 1 +
tests/python/relay/aot/corstone300.mk | 26 +-
tests/python/relay/aot/test_crt_aot.py | 3 +
tests/python/relay/aot/test_crt_aot_usmp.py | 35 ++
tests/python/relay/qnn/test_clip_legalization.py | 87 +++
.../python/relay/qnn/test_qnn_channel_stripping.py | 299 ++++++++++
.../strategy/arm_cpu/test_quantized_convolution.py | 71 ++-
tests/python/relay/test_any.py | 14 +
tests/python/relay/test_op_floordiv.py | 117 ++++
tests/python/relay/test_op_level3.py | 4 +-
.../relay/test_pass_eliminate_common_subexpr.py | 33 +-
.../topi/python/test_topi_conv2d_tensordot_opts.py | 6 +-
tests/python/topi/python/test_topi_dft.py | 88 +++
tests/python/topi/python/test_topi_transform.py | 1 -
.../unittest/test_cp_async_in_if_then_else.py | 238 --------
.../test_meta_schedule_schedule_rule_mlt.py | 6 +-
.../unittest/test_meta_schedule_space_cuda.py | 2 +-
.../test_meta_schedule_space_cuda_async.py | 340 +++++++++++
...ule_space_cuda_async_multiple_initialization.py | 88 +++
.../test_meta_schedule_space_cuda_winograd.py | 2 +-
.../unittest/test_micro_model_library_format.py | 8 +-
tests/python/unittest/test_runtime_graph.py | 16 +-
.../unittest/test_tir_analysis_identify_memcpy.py | 324 +++++++++++
tests/python/unittest/test_tir_schedule_trace.py | 27 +-
.../unittest/test_tir_schedule_transform_layout.py | 34 +-
.../test_tir_transform_inject_ptx_async_copy.py | 214 +++++++
...plify.py => test_tvmscript_printer_metadata.py} | 33 +-
tests/scripts/setup-pytest-env.sh | 2 +
web/.gitignore | 1 +
.../README.md => web/apps/browser/rpc_plugin.html | 5 +-
web/apps/browser/rpc_server.html | 78 ++-
web/emcc/wasm_runtime.cc | 109 ++++
web/src/rpc_server.ts | 43 +-
web/src/runtime.ts | 433 +++++++++++++-
web/src/webgpu.ts | 402 ++++++++++---
.../node/{test_module_load.js => test_relax_vm.js} | 26 +-
web/tests/python/prepare_test_libs.py | 30 +-
.../{webgpu_rpc_test.py => relax_rpc_test.py} | 79 +--
web/tests/python/webgpu_rpc_test.py | 4 +-
web/tests/python/websock_rpc_test.py | 4 +-
258 files changed, 11726 insertions(+), 2716 deletions(-)
create mode 100644 include/tvm/ir/global_info.h
create mode 100644 python/tvm/contrib/cutlass/attention_operation.py
create mode 100644 python/tvm/contrib/tvmjs.py
copy python/tvm/{meta_schedule/postproc/rewrite_layout.py =>
ir/global_info.py} (59%)
create mode 100644 python/tvm/relax/testing/ast_printer.py
create mode 100644 python/tvm/topi/arm_cpu/qnn_legalize.py
rename python/tvm/topi/cuda/{stft.py => signal.py} (60%)
rename python/tvm/topi/{stft.py => signal.py} (62%)
copy src/{auto_scheduler/utils.cc => ir/global_info.cc} (75%)
mode change 100755 => 100644
create mode 100644 src/relax/analysis/detect_recursion.cc
create mode 100644 src/relax/op/nn/attention.cc
copy src/relax/op/{tensor/search.h => nn/attention.h} (72%)
create mode 100644 src/tir/analysis/identify_memcpy.cc
copy tests/python/contrib/test_cmsisnn/{test_remove_reshapes.py =>
test_last_error.py} (53%)
create mode 100644 tests/python/relax/test_analysis_detect_recursion.py
create mode 100644 tests/python/relax/test_ast_printer.py
create mode 100644 tests/python/relay/qnn/test_clip_legalization.py
create mode 100644 tests/python/relay/qnn/test_qnn_channel_stripping.py
create mode 100644 tests/python/relay/test_op_floordiv.py
create mode 100644 tests/python/topi/python/test_topi_dft.py
delete mode 100644 tests/python/unittest/test_cp_async_in_if_then_else.py
create mode 100644 tests/python/unittest/test_meta_schedule_space_cuda_async.py
create mode 100644
tests/python/unittest/test_meta_schedule_space_cuda_async_multiple_initialization.py
create mode 100644 tests/python/unittest/test_tir_analysis_identify_memcpy.py
copy tests/python/unittest/{test_arith_simplify.py =>
test_tvmscript_printer_metadata.py} (53%)
copy 3rdparty/picojson/README.md => web/apps/browser/rpc_plugin.html (91%)
copy web/tests/node/{test_module_load.js => test_relax_vm.js} (76%)
copy web/tests/python/{webgpu_rpc_test.py => relax_rpc_test.py} (51%)