This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a change to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git
omit 2a9709c90b [Unity][Frontend] FX exp and strided_slice fix (#14338)
omit 57b42a8a62 [Unity][BYOC] Update testcases to follow recent changes
(#14339)
omit 4b75414b79 [Unity] Remove Python interface of RemoveUnusedFunction
(#14336)
omit 873ab3d7a9 [Unity][Pass] Reuse prior infra to implement more complete
DCE (#14334)
omit e8caffa489 [Unity][Op] Fix Strided Slice Shape Inference (#14324)
omit d06267b8e0 [Unity][Transform] DefaultSchedule pass (#14266)
omit aeb6f21446 [Unity][Lint] Fix cpplint casting (#14333)
omit 5e2e942d5a [Unity][Transform] Automatic Mixed Precision (#14242)
omit e73d26cb96 [Unity][Transform] Simple Dead Code Elimination (#14262)
omit fec44f605e [Unity][Transform] Automatic Layout Conversion (#14257)
omit 86ec016650 [Unity][TOPI] fp16 LayerNorm & GroupNorm (#14264)
omit 0ffd24c9f3 [Unity][Contrib] Introduce several features of cutlass
profiler (#14275)
omit 2dc5046afc [Unity][Transform] Enhance RewriteDataflowReshape transform
(#14265)
omit 5513095b5a [Unity][BYOC] Improve expressiveness of the pattern check
function in FuseOpsByPattern (#14310)
omit d56fa44b93 [Unity][BYOC] Support matmul + residual block fusion in
CUTLASS BYOC (#14317)
omit cb6efae413 [Unity] Support pattern-based rewriting (#14312)
omit 7270715cc6 [Unity][Web] WebGPU explicit max buffer size (#14321)
omit 39dc299c68 [Unity][Op] Enable special dimension value 0 in reshape
(#14311)
omit 3c0639eee9 [Unity][Pass] Add a pass to alter the TIR implementation of
an operator (#14215)
omit 50d0128a2e [Unity][DEBUG] Add Instrument (#14302)
omit 3c65ac467d [Unity][Op] Cumsum (#14297)
omit ae27e6f21d [Unity] Fix StructInfo Infer for `vm.alloc_tensor` (#14283)
omit 6cb52821a0 [Unity] Mark tests that need python3.8 compact.
omit f68f44729a [TVMScript][Unity] Improve PyLint Compatibility (#14276)
omit 8103493bfe [Unity][ci] Use CPU-SMALL instances (#14256)
omit 793f6ecc8c [Unity] Introduce call_dps_packed (#14183)
omit 9dbbb83b84 [Unity] Consider target context for Relay to Relax
conversion (#14269)
omit b7dd6ccfb8 [Unity][Frontend] Import `tanh` and fix `layer_norm`
(#14247)
omit c446ac5911 [Unity][BYOC] Add conv2d and residual block patterns for
Relax cutlass BYOC (#14252)
omit 8721d61932 [Unity] Allow user defined func attrs in emit_te (#14255)
omit f2f52dfedd [Unity][Op] Add repeat, tile, conv2d_transpose, avg_pool2d
(#14238)
omit a407613e06 [Unity][Op][Tweak] Improve `StructInfo` inference for
`shape_of` (#14243)
omit 38c1409ee7 [Unity][WEB] Improve ndarray cache (#14236)
omit 2ba8743db2 [Unity][WEB] Update text prompts for syntactical
correctness (#14237)
omit 8161f8df51 [Unity][TVMScript] Fix prim_func lost issue in
relax.emit_te (#14189)
omit 1e3a4d8db1 [Unity][TVMScript] Enable Context-Aware Parsing (#14234)
omit 238f7fbd6d [Unity][Bugfix] Do not include `PrimFunc`s in the
dependency graph when checking for recursion (#14228)
omit 6782cbc062 [Unity][Transform] SimplifyNormInference (#14221)
omit 544d60cc02 [Unity] Improve implementation of FuseOps (#14229)
omit 0a3ea9b93c [Unity] ensure memory.alloc_tensor/storage roundtrippable
(#14226)
omit 9ba395e985 [Unity][WEB] Simplify WebGPU Codegen per spec (#14225)
omit 0afa468a73 [Unity][Transform] Memory plan across the IRModule (#14220)
omit 9f8a5c3c00 [Unity][BYOC] Add dynamic shape support to CUTLASS matmul
(#14216)
omit 75dcf0015e [Unity][Frontend] from_fx keeps parameters in order (#14214)
omit 21f535789d [Unity][WEB] Improve webgpu codegen options to skip
readonly (#14213)
omit c179a4f7d5 [Unity][Frontend] FX translator supports unwrapping unit
return tuple (#14212)
omit 1eca5601c0 [Unity][Frontend] Attach imported model weights, deprecate
ImporterOutput (#14211)
omit ad25d0f8a3 [Unity] Introduce Default GPU Schedule Pass (#14182)
omit 06ee45b1aa [Unity][Frontend] FX translator support torch.baddbmm
(#14202)
omit 988c0223be [Unity][TIR][Pass] ForceNarrowIndexToInt32 (#14203)
omit 8037314df5 [Unity][Fix] FX translating dtype (#14201)
omit 4cf2834bed [Unity][Frontend] FX translator returning weights with
`keep_params_as_input` (#14197)
omit 61778f0a65 [Unity][Frontend] FX translator supporting more ops (#14196)
omit a55ec3e8ec [Unity][Op] Legalize `round`, `floor`, `ceil`, `sign`
(#14198)
omit 09d9546e72 [Unity][Op] Argmax and argmin (#14195)
omit b3ed9efbec [Unity][Op] Group normalization (#14194)
omit 2905eb24fb [Unity][Transform] LiftTransformParams handling multiple
functions (#14192)
omit e44839bacf [Unity][WEBGPU] Codegen improvements and WebRuntime (#14187)
omit 15810deacc [Unity][OP] Add an operator for fused multi head attention
(#14150)
omit a11dcf4867 [Unity][Analysis] Restore Python bindings for var analyses
(#14180)
omit 55c9c132b3 [Unity][Op] Full support of Relax op `power` (#14171)
omit 8838ca6f82 [Unity][BYOC] Add batch matmul support to Relax CUTLASS
BYOC (#14166)
omit 50cbdbbb7d [Unity][Analysis] Analysis for detecting recursion in Relax
(#14149)
omit 8431269bf7 [Unity] Add bind_constants option to FuseOpsByPattern
(#14151)
omit 052aff5597 [Unity][BYOC] Use Relax legalize + CPU build for reference
in tests (#14162)
omit 4b1cd39c59 [Unity][Analysis] Checking function return struct info in
well-formed check (#14155)
omit cb1ea98423 [Unity][Pass] Support Symbolic Shape Deduction during
BindParam (#14154)
omit eb56e4a184 [Unity][Debugging] AST printer (#14152)
omit 614f1f6140 [Unity][Pass] Enhance constant folding to fold relax ops by
evaluating them. (#14146)
omit 6b2c84b7dd [Unity][Legalize] Fix Scalar Constant Legalization (#14127)
omit d1d05ab459 [Unity] Add callback to FuseOpsByPattern to check match
result is accepted (#14109)
omit 90771d753d [Unity][BYOC] Assign group to unused bindings and ignroe
PrimFunc (#14139)
omit 8e3b10f078 [Unity][TVMScript] emit_te sugar (#14123)
omit 9a035e479d [Unity][BYOC] Add transposed matmul support to Relax
CUTLASS BYOC (#14128)
omit 4aa05622b8 [Unity] Add Global info (#14132)
omit 958f67b679 [Unity][WEB] Relax vm on web runtime (#14131)
omit 30f1d1b6aa [Unity][BlockBuilder] Add `name_hint` argument for `emit`
and `emit_output` (#14126)
omit d834ec6ebc [Unity][Fix] Fix bug in MergeCompositeFunctions (#14117)
omit ccdd1097e5 [Unity] Update tests again to adapt to latest TVMScript
syntax (#14115)
omit 0994870117 [Unity][BYOC]Add relax backend pattern registry (#14106)
omit 4086eb2e3e [Unity] Remove attributes of relax.print, assert and unique
(#14101)
omit 825eda8959 [Unity][Layout] Add layout transformation analysis for
PrimFunc (#14066)
omit e1f4af2157 [Unity] Relax Recursive function (#14092)
omit cd85685886 [Unity] Lower `shape_of` to a builtin (#14093)
omit 0c67231078 [Unity] Fix typo in the comment (#14096)
omit 170a04cc9a [Unity][Relax] Set Shape Function to Be Host Function
(#14090)
omit 53b5619999 [Unity] Refactor Relax Build JIT UX (#14088)
omit 282c94717a [Unity][Fix][Pass] FoldConstant with DCE in dataflow block
(#14087)
omit 9a2ea3991c [Unity][Analysis] TIR pattern kind analysis for
multi-buffer write block (#14075)
omit 9202f25fac [Unity][Op] `log_softmax` and `cross_entropy_with_logits`
(#14083)
omit c33c05e01b [Unity][BYOC] Add DNNL backend (#14082)
omit d8d1c3e4cb [Unity][BYOC] Add CUTLASS backend (#14081)
omit 3d610ec53c [Unity] Add testcases for `expr_args_converter` (#14080)
omit 73442eebe8 [Unity][Pass] Canonicalize Bindings (#14079)
omit 70f03636a8 [Unity][BYOC][Pass] RunCodegen and TensorRT (#14078)
omit 29a4a37347 [Unity][Transform] Add LiftTransformParams pass (#14069)
omit 88edf5f084 [Unity][Frontend] Annotate number of non-static input of FX
function (#14067)
omit 2770f92b50 [Unity][BYOC] Add pass to merge composite functions to
offload large subgraphs (#14062)
omit ea5c67cc85 [Unity][Pass] Remove Unused Function (#14061)
omit 26fb793d6a [Unity][Fix][Pass] Fix FuseOps for lack graph edges (#14058)
omit 9753ca95d6 [Unity] Relax op: collapse sum (#14059)
omit 5a4b4ff006 [Unity][BYOC] Add pattern-based partitioning pass (#14054)
omit a8ebb3a8c2 [Unity][VM] Add per-op profiling support (#14053)
omit 6035ca8c4e [Unity][TVMScript] Overload `__neg__` for relax expr
(#14045)
omit 5df6a24f96 [Unity][Pass] FuseOps FuseTIR fixes (#14044)
omit ef7ecb392d [Unity] Statement rewriter for DataflowBlock (#14043)
omit 0bbc420e94 [Unity] Relax dataflow pattern language (matching) (#14041)
omit 77af69bcb7 [Unity] Update tests to adapt to latest TVMScript syntax
(#14039)
omit 928ff81bd4 [Unity] Disallow inline prim_func in relax IR (#14040)
omit ca872f9135 [Unity][Pass] Block-level static memory planning (#14038)
omit a929a149ce [Unity] Initial PyTorch Frontend (#14037)
omit 05096d68fe [Unity][Op] Add ShapeExpr Tests for Reshape Op (#14035)
omit 878625b17a [Unity][Pass] Operator legalization (#14029)
omit 0785ec88dc [Unity][TVMScript] Move tir/relax import in script out of
__init__.py (#14033)
omit b29906b710 [Unity][Pass] Wellformed Analysis (#14032)
omit 6fad06c8ce [Unity][BlockBuilder] CallTE convert PrimValue args
(#14028)
omit 209369ec39 [Unity][Pass] Normalize Pass (#14031)
omit 58130034d0 [Unity] Relay -> Relax translator (#14026)
omit 03beb2b448 [Unity][Pass][TuningAPI] Introduce TuningAPI and
MetaSchedule pass (#14014)
omit a3739da691 [Unity][Pass] BindParams pass, FoldConstant pass (#14016)
omit 28064aafc9 [Unity][VM] Supporting "compiled" exec mode. (#14015)
omit 9192da2d06 [Unity][Pass] LambdaLift pass (#14012)
omit 9b63670fcf [Unity][Pass] Operator Fusion Passes (#14001)
omit e3952ae01e [Unity] NestedMsg Support utility (#13995)
omit 8e89591525 [Unity] Relax op: manipulation (#13989)
omit 5e08d9831a [Unity] Relax op: search (#13992)
omit 81b03c867b [Unity] Relax op: linear algebra (#13988)
omit a4541ac9f3 [Unity] Relax op: creation (#13984)
omit 90bc292b4d [Unity] Relax op: neural networks (#13993)
omit 82c0783230 [Unity] Relax op: statistical (#13991)
omit 4806de6814 [Unity] Relax op: arithmetic, comparison (#13983)
omit 2d676d4351 [Unity] Relax op: image (#13994)
omit 1653505281 [Unity] Relax op: set (#13990)
omit 54810c36ed [Unity] Relax op: datatype (#13986)
omit 31d1eb184f [Unity] Relax op: index (#13987)
omit aa157dbf8d [Unity][TVMScript] Use explicit `R.shape` in TVMScript
(#13979)
omit 652bf23a49 [Unity] e2e Relax minimum build flow (#13961)
omit c08a2630a6 [Unity] Relax VM shape lowering pass (#13956)
omit cdc257e16d [Unity] Relax VM codegen (#13954)
omit 1eab9937b1 [Unity] Relax TVMScript Printer (#13944)
omit 020bea042f [Unity] Relax TVMScript Parser. (#13932)
omit b8631adebc [Unity] Relax BlockBuilder and ExprMutator (#13926)
omit 59d7ff5931 [Unity] Basic StructInfo Analysis and Expr construction
(#13916)
omit c35aedbd2d [Unity][CI] Unity specific jenkins setup (do not upstream
to main) (#13910)
omit e9014d90ad [Unity][IR] First-class StructInfo (#13907)
omit 46ec39c8ad [Unity] Relax expressions and types (#13901)
omit 5292e3b314 [Unity] Relax VM (#13878)
add ff12a20323 [Fix][Relay][TOPI] Bug fix in relay.sum and topi.sum
functions when w… (#14285)
add eecc02ad35 [microTVM] Custom IDE Tutorial (#13857)
add ccc0b9162f [fix][relay][qnn] Bug fix for 8-bit quantized mul (#14286)
add c9ab1979f1 [CI][ETHOSN] Add ssh to the driver stack installation
(#14246)
add d22bdce2bf [Relay][Op] Connect existing arm_cpu schedule to relay
strategy for concat (#14270)
add ce1fa8908f [TE] Record primitives of Schedule for visualization
(#14168)
add e22a2d5b9f [IR] Enhance IRModule SEqual/SHash to support cross
function calls (#14289)
add 075e2ec7bb [Frontend][Paddle]fix eye and dist (#14292)
add 970cd1def8 [TIR][Hexagon] Enhancement of NarrowDataType pass for
binary ops (#14298)
add 6eb4b873e1 [DOCS][ADRENO] Improved Adreno documentation (#13867)
add 5d0509237e [CI] Update ci_arm docker image to have LLVM 15 (#14296)
add f6b75792c5 [TVMC] Fix logging in TVMC (#14175)
add 946581ab56 [TIR][Compute-at] Utilize InverseAffineIterMap for dom
estimation (#14184)
add f4520c4f15 [TVMC] Improve --desired-layouts functionality (#14272)
add 32e500b7f7 [LLVM] Add support to generate llvm.assume (#14294)
add d1dea13762 [Bugfix][TVMScript] Preserve variable names in LetStmt
(#14319)
add 58dce66097 [CI] Update CUDA to 11.7 (#14293)
add 3f2dac0d07 [PaddlePaddle Hackathon 4][Frontend][Paddle]add conv3d for
paddle frontend (#14290)
add fe3fa9d75d [Fix][TIR] Fix tvm::arith::UnionLowerBound (#14304)
add f7c2bbbe87 [LLVM] Fix registerCallbacks API after recent change
(#14323)
add 84e8f86960 [docs] Add details about patch releases (#14301)
add 4ae08d8710 [apps][bundle_deploy]Fix bundle build issue (#14315)
add ca8153d502 [AOT]Raise error when input name is not valid (#14322)
add 2ff41c6156 [TIR][Schedule] Allow buffer name argument to
Schedule.set_scope (#14327)
add a5ed21d12a [CODEGEN][METAL] Fix ramp codegen (#14330)
add 542274dde9 [Schedule] Add an optional argument `disable_checks` for
`Schedule` (#14281)
add 56d0e3b7af [METAL][CODEGEN] testcase for ramp codegen (#14331)
add fc2a9e50af [CODEGEN][METAL] Fix unaligned vector load (#14332)
add c6c89c3a25 [Hexagon] Add concept of DMA groups (#14254)
add 06276846a1 [Docs] Update listed tvmc python dependencies (#14341)
add b8e4110467 [Unity] Relax VM (#13878)
add 2c7f480f4f [Unity] Relax expressions and types (#13901)
add 4e659d1f26 [Unity][IR] First-class StructInfo (#13907)
add fb90fd1a46 [Unity][CI] Unity specific jenkins setup (do not upstream
to main) (#13910)
add 6915444b2d [Unity] Basic StructInfo Analysis and Expr construction
(#13916)
add d1ad4e6543 [Unity] Relax BlockBuilder and ExprMutator (#13926)
add 2001903486 [Unity] Relax TVMScript Parser. (#13932)
add e38a3360f5 [Unity] Relax TVMScript Printer (#13944)
add ea6cc94c8d [Unity] Relax VM codegen (#13954)
add 2c158714cf [Unity] Relax VM shape lowering pass (#13956)
add fadbb3f256 [Unity] e2e Relax minimum build flow (#13961)
add 75eecf7dd9 [Unity][TVMScript] Use explicit `R.shape` in TVMScript
(#13979)
add de164d2524 [Unity] Relax op: index (#13987)
add c8a153314d [Unity] Relax op: datatype (#13986)
add c7a57aecd6 [Unity] Relax op: set (#13990)
add 42409202db [Unity] Relax op: image (#13994)
add 5385d6d635 [Unity] Relax op: arithmetic, comparison (#13983)
add a6a2e84ca9 [Unity] Relax op: statistical (#13991)
add a96f2006a6 [Unity] Relax op: neural networks (#13993)
add 4dd591b800 [Unity] Relax op: creation (#13984)
add 9694c673bb [Unity] Relax op: linear algebra (#13988)
add 7a8765d819 [Unity] Relax op: search (#13992)
add 4ab73eabc3 [Unity] Relax op: manipulation (#13989)
add 62daae4457 [Unity] NestedMsg Support utility (#13995)
add 20adb37493 [Unity][Pass] Operator Fusion Passes (#14001)
add e78d523e74 [Unity][Pass] LambdaLift pass (#14012)
add 0e2bb802bb [Unity][VM] Supporting "compiled" exec mode. (#14015)
add 63e2402358 [Unity][Pass] BindParams pass, FoldConstant pass (#14016)
add 87659ea3ea [Unity][Pass][TuningAPI] Introduce TuningAPI and
MetaSchedule pass (#14014)
add 388941ad6b [Unity] Relay -> Relax translator (#14026)
add 0d91c33103 [Unity][Pass] Normalize Pass (#14031)
add 251a062bf1 [Unity][BlockBuilder] CallTE convert PrimValue args
(#14028)
add b5d2304029 [Unity][Pass] Wellformed Analysis (#14032)
add 5e2f2b9d43 [Unity][TVMScript] Move tir/relax import in script out of
__init__.py (#14033)
add 814eb921c2 [Unity][Pass] Operator legalization (#14029)
add 130e362430 [Unity][Op] Add ShapeExpr Tests for Reshape Op (#14035)
add 85477ac489 [Unity] Initial PyTorch Frontend (#14037)
add 96a9b6e4d8 [Unity][Pass] Block-level static memory planning (#14038)
add 837a557210 [Unity] Disallow inline prim_func in relax IR (#14040)
add d3494933fe [Unity] Update tests to adapt to latest TVMScript syntax
(#14039)
add 02cefd91a5 [Unity] Relax dataflow pattern language (matching) (#14041)
add f428a4ae23 [Unity] Statement rewriter for DataflowBlock (#14043)
add ac5bf3a76a [Unity][Pass] FuseOps FuseTIR fixes (#14044)
add 81169f6576 [Unity][TVMScript] Overload `__neg__` for relax expr
(#14045)
add 2bd1581596 [Unity][VM] Add per-op profiling support (#14053)
add 591b800bfa [Unity][BYOC] Add pattern-based partitioning pass (#14054)
add ff7d4950e0 [Unity] Relax op: collapse sum (#14059)
add 828edeb5ea [Unity][Fix][Pass] Fix FuseOps for lack graph edges (#14058)
add 7be4441569 [Unity][Pass] Remove Unused Function (#14061)
add 0bd303c7c1 [Unity][BYOC] Add pass to merge composite functions to
offload large subgraphs (#14062)
add 466a004d6c [Unity][Frontend] Annotate number of non-static input of FX
function (#14067)
add 51b1ce1ec7 [Unity][Transform] Add LiftTransformParams pass (#14069)
add 6ba5cac678 [Unity][BYOC][Pass] RunCodegen and TensorRT (#14078)
add 4b3794c24a [Unity][Pass] Canonicalize Bindings (#14079)
add d5fa61fd46 [Unity] Add testcases for `expr_args_converter` (#14080)
add fe7e0651ec [Unity][BYOC] Add CUTLASS backend (#14081)
add f3ee944a58 [Unity][BYOC] Add DNNL backend (#14082)
add 3a0f4c5eca [Unity][Op] `log_softmax` and `cross_entropy_with_logits`
(#14083)
add 5aecfe4121 [Unity][Analysis] TIR pattern kind analysis for
multi-buffer write block (#14075)
add 22c7b75834 [Unity][Fix][Pass] FoldConstant with DCE in dataflow block
(#14087)
add 99f6d67dd0 [Unity] Refactor Relax Build JIT UX (#14088)
add f15b80a561 [Unity][Relax] Set Shape Function to Be Host Function
(#14090)
add 0eff29a505 [Unity] Fix typo in the comment (#14096)
add 03799a50cb [Unity] Lower `shape_of` to a builtin (#14093)
add 70c8debc7a [Unity] Relax Recursive function (#14092)
add 9df23f7c67 [Unity][Layout] Add layout transformation analysis for
PrimFunc (#14066)
add a31c856de7 [Unity] Remove attributes of relax.print, assert and unique
(#14101)
add b168949441 [Unity][BYOC]Add relax backend pattern registry (#14106)
add fbf56475d2 [Unity] Update tests again to adapt to latest TVMScript
syntax (#14115)
add 4e5e81a1b8 [Unity][Fix] Fix bug in MergeCompositeFunctions (#14117)
add cf9beab753 [Unity][BlockBuilder] Add `name_hint` argument for `emit`
and `emit_output` (#14126)
add 133b4acaeb [Unity][WEB] Relax vm on web runtime (#14131)
add fa47ee995f [Unity] Add Global info (#14132)
add 5939a6e8c8 [Unity][BYOC] Add transposed matmul support to Relax
CUTLASS BYOC (#14128)
add 1004bdf02a [Unity][TVMScript] emit_te sugar (#14123)
add a16021ace5 [Unity][BYOC] Assign group to unused bindings and ignroe
PrimFunc (#14139)
add 3bdd8013c1 [Unity] Add callback to FuseOpsByPattern to check match
result is accepted (#14109)
add 019ef59f2e [Unity][Legalize] Fix Scalar Constant Legalization (#14127)
add 43c5f29813 [Unity][Pass] Enhance constant folding to fold relax ops by
evaluating them. (#14146)
add 82bfc57772 [Unity][Debugging] AST printer (#14152)
add 7e96a3aeed [Unity][Pass] Support Symbolic Shape Deduction during
BindParam (#14154)
add 1cdc3d336b [Unity][Analysis] Checking function return struct info in
well-formed check (#14155)
add ef0f4481cf [Unity][BYOC] Use Relax legalize + CPU build for reference
in tests (#14162)
add 7cad6ef8d8 [Unity] Add bind_constants option to FuseOpsByPattern
(#14151)
add c56b17f4f6 [Unity][Analysis] Analysis for detecting recursion in Relax
(#14149)
add 1bbe881241 [Unity][BYOC] Add batch matmul support to Relax CUTLASS
BYOC (#14166)
add be532f28f2 [Unity][Op] Full support of Relax op `power` (#14171)
add 1cc9bb014e [Unity][Analysis] Restore Python bindings for var analyses
(#14180)
add 8a46c21e33 [Unity][OP] Add an operator for fused multi head attention
(#14150)
add 281cc206cc [Unity][WEBGPU] Codegen improvements and WebRuntime (#14187)
add cd88b0ab49 [Unity][Transform] LiftTransformParams handling multiple
functions (#14192)
add 1927d7d4aa [Unity][Op] Group normalization (#14194)
add 50c1e7a147 [Unity][Op] Argmax and argmin (#14195)
add 6731783749 [Unity][Op] Legalize `round`, `floor`, `ceil`, `sign`
(#14198)
add 439ec78118 [Unity][Frontend] FX translator supporting more ops (#14196)
add ec6e26827b [Unity][Frontend] FX translator returning weights with
`keep_params_as_input` (#14197)
add fb6b1ea299 [Unity][Fix] FX translating dtype (#14201)
add 3b7db40860 [Unity][TIR][Pass] ForceNarrowIndexToInt32 (#14203)
add 044080ff93 [Unity][Frontend] FX translator support torch.baddbmm
(#14202)
add b7193cf056 [Unity] Introduce Default GPU Schedule Pass (#14182)
add 399d9daf71 [Unity][Frontend] Attach imported model weights, deprecate
ImporterOutput (#14211)
add c4225052e9 [Unity][Frontend] FX translator supports unwrapping unit
return tuple (#14212)
add 1ffc31777e [Unity][WEB] Improve webgpu codegen options to skip
readonly (#14213)
add 841f8a0c03 [Unity][Frontend] from_fx keeps parameters in order (#14214)
add 6cb1fe7a94 [Unity][BYOC] Add dynamic shape support to CUTLASS matmul
(#14216)
add 84c20b3abe [Unity][Transform] Memory plan across the IRModule (#14220)
add 2531c7eaf5 [Unity][WEB] Simplify WebGPU Codegen per spec (#14225)
add afdf218125 [Unity] ensure memory.alloc_tensor/storage roundtrippable
(#14226)
add 6b75a40036 [Unity] Improve implementation of FuseOps (#14229)
add 13c8c673ba [Unity][Transform] SimplifyNormInference (#14221)
add f2804d15f7 [Unity][Bugfix] Do not include `PrimFunc`s in the
dependency graph when checking for recursion (#14228)
add 198caa55d1 [Unity][TVMScript] Enable Context-Aware Parsing (#14234)
add b270be88fe [Unity][TVMScript] Fix prim_func lost issue in
relax.emit_te (#14189)
add 81c38c5e1b [Unity][WEB] Update text prompts for syntactical
correctness (#14237)
add 4c90f052f6 [Unity][WEB] Improve ndarray cache (#14236)
add 9b757c9f39 [Unity][Op][Tweak] Improve `StructInfo` inference for
`shape_of` (#14243)
add f5b6ac8fb4 [Unity][Op] Add repeat, tile, conv2d_transpose, avg_pool2d
(#14238)
add 24b8e7bef5 [Unity] Allow user defined func attrs in emit_te (#14255)
add 6ae1c52610 [Unity][BYOC] Add conv2d and residual block patterns for
Relax cutlass BYOC (#14252)
add 1456f99a26 [Unity][Frontend] Import `tanh` and fix `layer_norm`
(#14247)
add 72c9510ae4 [Unity] Consider target context for Relay to Relax
conversion (#14269)
add 7f89e22406 [Unity] Introduce call_dps_packed (#14183)
add e6f3db185a [Unity][ci] Use CPU-SMALL instances (#14256)
add 581889aa6c [TVMScript][Unity] Improve PyLint Compatibility (#14276)
add 8c34de2d7f [Unity] Mark tests that need python3.8 compact.
add c7f40bd1d9 [Unity] Fix StructInfo Infer for `vm.alloc_tensor` (#14283)
add c1783b83a7 [Unity][Op] Cumsum (#14297)
add 15de2c2df7 [Unity][DEBUG] Add Instrument (#14302)
add 765375f187 [Unity][Pass] Add a pass to alter the TIR implementation of
an operator (#14215)
add b10498b51c [Unity][Op] Enable special dimension value 0 in reshape
(#14311)
add 001f17814c [Unity][Web] WebGPU explicit max buffer size (#14321)
add f5ee09795f [Unity] Support pattern-based rewriting (#14312)
add 10b834f887 [Unity][BYOC] Support matmul + residual block fusion in
CUTLASS BYOC (#14317)
add 981a822bd3 [Unity][BYOC] Improve expressiveness of the pattern check
function in FuseOpsByPattern (#14310)
add 850e549b32 [Unity][Transform] Enhance RewriteDataflowReshape transform
(#14265)
add e2b1d93591 [Unity][Contrib] Introduce several features of cutlass
profiler (#14275)
add 5529830bf5 [Unity][TOPI] fp16 LayerNorm & GroupNorm (#14264)
add fdf86e4c3e [Unity][Transform] Automatic Layout Conversion (#14257)
add 3ca23b3378 [Unity][Transform] Simple Dead Code Elimination (#14262)
add 795568e148 [Unity][Transform] Automatic Mixed Precision (#14242)
add 1d35ef2135 [Unity][Lint] Fix cpplint casting (#14333)
add 0b47f0bfe3 [Unity][Transform] DefaultSchedule pass (#14266)
add 9aae685bf6 [Unity][Op] Fix Strided Slice Shape Inference (#14324)
add a0bd29917c [Unity][Pass] Reuse prior infra to implement more complete
DCE (#14334)
add 06fe80be71 [Unity] Remove Python interface of RemoveUnusedFunction
(#14336)
add 6c6985940c [Unity][BYOC] Update testcases to follow recent changes
(#14339)
add 18c19fb830 [Unity][Frontend] FX exp and strided_slice fix (#14338)
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (2a9709c90b)
\
N -- N -- N refs/heads/unity (18c19fb830)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
No new revisions were added by this update.
Summary of changes:
apps/bundle_deploy/Makefile | 2 +-
ci/jenkins/docker-images.ini | 2 +-
docker/Dockerfile.ci_gpu | 2 +-
.../install/ubuntu_install_ethosn_driver_stack.sh | 3 +-
docs/contribute/release_process.rst | 29 +-
docs/how_to/deploy/adreno.rst | 718 +++++++++++++++------
docs/install/from_source.rst | 2 +-
.../how_to/deploy_models/deploy_model_on_adreno.py | 309 +++++----
.../deploy_models/deploy_model_on_adreno_tvmc.py | 198 ++++++
.../how_to/work_with_microtvm/micro_custom_ide.py | 361 +++++++++++
include/tvm/meta_schedule/postproc.h | 3 +-
include/tvm/te/schedule.h | 43 +-
include/tvm/tir/builtin.h | 37 +-
include/tvm/tir/schedule/schedule.h | 13 +-
include/tvm/tir/schedule/state.h | 9 +-
include/tvm/topi/reduction.h | 6 +-
python/tvm/contrib/tedd.py | 27 +-
python/tvm/driver/tvmc/autotuner.py | 1 +
python/tvm/driver/tvmc/main.py | 7 +-
python/tvm/driver/tvmc/runner.py | 1 +
python/tvm/driver/tvmc/transform.py | 42 +-
.../postproc/disallow_async_strided_mem_copy.py | 11 +-
python/tvm/micro/testing/utils.py | 4 +-
python/tvm/relay/frontend/paddlepaddle.py | 63 +-
python/tvm/relay/op/strategy/arm_cpu.py | 16 +-
python/tvm/relay/quantize/_annotate.py | 10 +
python/tvm/relay/quantize/_partition.py | 5 +
python/tvm/script/parser/tir/parser.py | 1 +
python/tvm/tir/schedule/schedule.py | 29 +-
python/tvm/tir/schedule/state.py | 14 +
python/tvm/tir/tensor_intrin/hexagon.py | 35 +-
src/arith/int_set.cc | 1 +
src/arith/iter_affine_map.cc | 3 +-
src/driver/driver_api.cc | 1 -
src/ir/module.cc | 75 +--
.../postproc/disallow_async_strided_mem_copy.cc | 8 +-
src/relay/backend/te_compiler.cc | 13 +-
src/runtime/aot_executor/aot_executor.cc | 2 +-
src/runtime/hexagon/hexagon_device_api.cc | 19 +-
src/runtime/hexagon/hexagon_user_dma.cc | 14 +-
src/runtime/hexagon/hexagon_user_dma.h | 31 +-
src/runtime/hexagon/ring_buffer.h | 76 ++-
src/target/llvm/codegen_llvm.cc | 7 +
src/target/source/codegen_c.cc | 15 +-
src/target/source/codegen_metal.cc | 6 +-
src/target/source/codegen_metal.h | 3 +-
src/target/source/codegen_opencl.cc | 37 ++
src/target/source/codegen_opencl.h | 3 +
src/te/schedule/schedule_dataflow_rewrite.cc | 14 +-
src/te/schedule/schedule_lang.cc | 51 +-
src/tir/op/builtin.cc | 6 +
src/tir/schedule/analysis/analysis.cc | 2 +-
src/tir/schedule/concrete_schedule.cc | 6 +-
src/tir/schedule/primitive/compute_at.cc | 179 +++--
src/tir/schedule/primitive/for_kind.cc | 4 +-
src/tir/schedule/primitive/reduction.cc | 30 +-
src/tir/schedule/schedule.cc | 10 +-
src/tir/schedule/state.cc | 23 +-
src/tir/schedule/traced_schedule.cc | 5 +-
src/tir/transforms/inject_software_pipeline.cc | 35 +-
src/tir/transforms/lower_async_dma.cc | 172 ++---
src/tir/transforms/lower_tvm_builtin.cc | 26 +
src/tir/transforms/narrow_datatype.cc | 38 ++
.../cpp-runtime/hexagon/hexagon_user_dma_tests.cc | 2 +-
tests/cpp-runtime/hexagon/ring_buffer_tests.cc | 203 +++++-
.../metaschedule_e2e/test_resnet50_int8.py | 1 -
.../test_hexagon/test_async_dma_pipeline.py | 6 +-
.../test_hexagon/test_software_pipeline_async.py | 1 -
tests/python/contrib/test_tedd.py | 58 +-
tests/python/driver/tvmc/test_command_line.py | 62 ++
tests/python/driver/tvmc/test_transform.py | 86 ++-
tests/python/frontend/paddlepaddle/test_forward.py | 43 ++
tests/python/relay/aot/test_cpp_aot.py | 38 ++
tests/python/relay/opencl_texture/test_network.py | 40 +-
.../relay/strategy/test_select_implementation.py | 56 ++
tests/python/relay/test_build_module.py | 37 ++
tests/python/relay/test_op_level4.py | 36 +-
tests/python/topi/python/test_topi_reduce.py | 12 +-
tests/python/unittest/test_arith_intset.py | 4 +
tests/python/unittest/test_target_codegen_llvm.py | 24 +
tests/python/unittest/test_target_codegen_metal.py | 54 +-
tests/python/unittest/test_te_schedule_ops.py | 53 ++
.../unittest/test_tir_schedule_compute_at.py | 74 +++
.../python/unittest/test_tir_schedule_set_scope.py | 9 +-
.../unittest/test_tir_transform_narrow_datatype.py | 61 ++
.../python/unittest/test_tvmscript_syntax_sugar.py | 13 +
tests/scripts/request_hook/request_hook.py | 2 +
tests/scripts/setup-adreno-env.sh | 113 ++++
tests/scripts/task_build_adreno_bins.sh | 1 -
tests/scripts/task_config_build_adreno.sh | 1 -
tests/scripts/task_config_build_gpu.sh | 1 +
tests/scripts/task_microtvm_cpp_tests.sh | 4 +-
92 files changed, 3303 insertions(+), 710 deletions(-)
create mode 100644 gallery/how_to/deploy_models/deploy_model_on_adreno_tvmc.py
create mode 100644 gallery/how_to/work_with_microtvm/micro_custom_ide.py
create mode 100644 tests/python/relay/strategy/test_select_implementation.py
create mode 100755 tests/scripts/setup-adreno-env.sh