[tvm] branch unity-staging updated (6cb52821a0 -> 18c19fb830)

tqchen Mon, 20 Mar 2023 08:04:10 -0700

This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a change to branch unity-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git



    omit 6cb52821a0 [Unity] Mark tests that need python3.8 compact.
    omit f68f44729a [TVMScript][Unity] Improve PyLint Compatibility (#14276)
    omit 8103493bfe [Unity][ci] Use CPU-SMALL instances (#14256)
    omit 793f6ecc8c [Unity] Introduce call_dps_packed (#14183)
    omit 9dbbb83b84 [Unity] Consider target context for Relay to Relax 
conversion (#14269)
    omit b7dd6ccfb8 [Unity][Frontend] Import `tanh` and fix `layer_norm` 
(#14247)
    omit c446ac5911 [Unity][BYOC] Add conv2d and residual block patterns for 
Relax cutlass BYOC (#14252)
    omit 8721d61932 [Unity] Allow user defined func attrs in emit_te (#14255)
    omit f2f52dfedd [Unity][Op] Add repeat, tile, conv2d_transpose, avg_pool2d 
(#14238)
    omit a407613e06 [Unity][Op][Tweak] Improve `StructInfo` inference for 
`shape_of` (#14243)
    omit 38c1409ee7 [Unity][WEB] Improve ndarray cache (#14236)
    omit 2ba8743db2 [Unity][WEB] Update text prompts for syntactical 
correctness (#14237)
    omit 8161f8df51 [Unity][TVMScript] Fix prim_func lost issue in 
relax.emit_te (#14189)
    omit 1e3a4d8db1 [Unity][TVMScript] Enable Context-Aware Parsing (#14234)
    omit 238f7fbd6d [Unity][Bugfix] Do not include `PrimFunc`s in the 
dependency graph when checking for recursion (#14228)
    omit 6782cbc062 [Unity][Transform] SimplifyNormInference (#14221)
    omit 544d60cc02 [Unity] Improve implementation of FuseOps (#14229)
    omit 0a3ea9b93c [Unity] ensure memory.alloc_tensor/storage roundtrippable 
(#14226)
    omit 9ba395e985 [Unity][WEB] Simplify WebGPU Codegen per spec (#14225)
    omit 0afa468a73 [Unity][Transform] Memory plan across the IRModule (#14220)
    omit 9f8a5c3c00 [Unity][BYOC] Add dynamic shape support to CUTLASS matmul 
(#14216)
    omit 75dcf0015e [Unity][Frontend] from_fx keeps parameters in order (#14214)
    omit 21f535789d [Unity][WEB] Improve webgpu codegen options to skip 
readonly (#14213)
    omit c179a4f7d5 [Unity][Frontend] FX translator supports unwrapping unit 
return tuple (#14212)
    omit 1eca5601c0 [Unity][Frontend] Attach imported model weights, deprecate 
ImporterOutput (#14211)
    omit ad25d0f8a3 [Unity] Introduce Default GPU Schedule Pass (#14182)
    omit 06ee45b1aa [Unity][Frontend] FX translator support torch.baddbmm 
(#14202)
    omit 988c0223be [Unity][TIR][Pass] ForceNarrowIndexToInt32 (#14203)
    omit 8037314df5 [Unity][Fix] FX translating dtype (#14201)
    omit 4cf2834bed [Unity][Frontend] FX translator returning weights with 
`keep_params_as_input` (#14197)
    omit 61778f0a65 [Unity][Frontend] FX translator supporting more ops (#14196)
    omit a55ec3e8ec [Unity][Op] Legalize `round`, `floor`, `ceil`, `sign` 
(#14198)
    omit 09d9546e72 [Unity][Op] Argmax and argmin (#14195)
    omit b3ed9efbec [Unity][Op] Group normalization (#14194)
    omit 2905eb24fb [Unity][Transform] LiftTransformParams handling multiple 
functions (#14192)
    omit e44839bacf [Unity][WEBGPU] Codegen improvements and WebRuntime (#14187)
    omit 15810deacc [Unity][OP] Add an operator for fused multi head attention 
(#14150)
    omit a11dcf4867 [Unity][Analysis] Restore Python bindings for var analyses 
(#14180)
    omit 55c9c132b3 [Unity][Op] Full support of Relax op `power` (#14171)
    omit 8838ca6f82 [Unity][BYOC] Add batch matmul support to Relax CUTLASS 
BYOC (#14166)
    omit 50cbdbbb7d [Unity][Analysis] Analysis for detecting recursion in Relax 
(#14149)
    omit 8431269bf7 [Unity] Add bind_constants option to FuseOpsByPattern 
(#14151)
    omit 052aff5597 [Unity][BYOC] Use Relax legalize + CPU build for reference 
in tests (#14162)
    omit 4b1cd39c59 [Unity][Analysis] Checking function return struct info in 
well-formed check (#14155)
    omit cb1ea98423 [Unity][Pass] Support Symbolic Shape Deduction during 
BindParam (#14154)
    omit eb56e4a184 [Unity][Debugging] AST printer (#14152)
    omit 614f1f6140 [Unity][Pass] Enhance constant folding to fold relax ops by 
evaluating them. (#14146)
    omit 6b2c84b7dd [Unity][Legalize] Fix Scalar Constant Legalization (#14127)
    omit d1d05ab459 [Unity] Add callback to FuseOpsByPattern to check match 
result is accepted (#14109)
    omit 90771d753d [Unity][BYOC] Assign group to unused bindings and ignroe 
PrimFunc (#14139)
    omit 8e3b10f078 [Unity][TVMScript] emit_te sugar (#14123)
    omit 9a035e479d [Unity][BYOC] Add transposed matmul support to Relax 
CUTLASS BYOC (#14128)
    omit 4aa05622b8 [Unity] Add Global info (#14132)
    omit 958f67b679 [Unity][WEB] Relax vm on web runtime (#14131)
    omit 30f1d1b6aa [Unity][BlockBuilder] Add `name_hint` argument for `emit` 
and `emit_output` (#14126)
    omit d834ec6ebc [Unity][Fix] Fix bug in MergeCompositeFunctions (#14117)
    omit ccdd1097e5 [Unity] Update tests again to adapt to latest TVMScript 
syntax (#14115)
    omit 0994870117 [Unity][BYOC]Add relax backend pattern registry (#14106)
    omit 4086eb2e3e [Unity] Remove attributes of relax.print, assert and unique 
(#14101)
    omit 825eda8959 [Unity][Layout] Add layout transformation analysis for 
PrimFunc (#14066)
    omit e1f4af2157 [Unity] Relax Recursive function (#14092)
    omit cd85685886 [Unity] Lower `shape_of` to a builtin (#14093)
    omit 0c67231078 [Unity] Fix typo in the comment (#14096)
    omit 170a04cc9a [Unity][Relax] Set Shape Function to Be Host Function 
(#14090)
    omit 53b5619999 [Unity] Refactor Relax Build JIT UX (#14088)
    omit 282c94717a [Unity][Fix][Pass] FoldConstant with DCE in dataflow block 
(#14087)
    omit 9a2ea3991c [Unity][Analysis] TIR pattern kind analysis for 
multi-buffer write block (#14075)
    omit 9202f25fac [Unity][Op] `log_softmax` and `cross_entropy_with_logits` 
(#14083)
    omit c33c05e01b [Unity][BYOC] Add DNNL backend (#14082)
    omit d8d1c3e4cb [Unity][BYOC] Add CUTLASS backend (#14081)
    omit 3d610ec53c [Unity] Add testcases for `expr_args_converter` (#14080)
    omit 73442eebe8 [Unity][Pass] Canonicalize Bindings (#14079)
    omit 70f03636a8 [Unity][BYOC][Pass] RunCodegen and TensorRT  (#14078)
    omit 29a4a37347 [Unity][Transform] Add LiftTransformParams pass (#14069)
    omit 88edf5f084 [Unity][Frontend] Annotate number of non-static input of FX 
function (#14067)
    omit 2770f92b50 [Unity][BYOC] Add pass to merge composite functions to 
offload large subgraphs (#14062)
    omit ea5c67cc85 [Unity][Pass] Remove Unused Function (#14061)
    omit 26fb793d6a [Unity][Fix][Pass] Fix FuseOps for lack graph edges (#14058)
    omit 9753ca95d6 [Unity] Relax op: collapse sum (#14059)
    omit 5a4b4ff006 [Unity][BYOC] Add pattern-based partitioning pass (#14054)
    omit a8ebb3a8c2 [Unity][VM] Add per-op profiling support  (#14053)
    omit 6035ca8c4e [Unity][TVMScript] Overload `__neg__` for relax expr 
(#14045)
    omit 5df6a24f96 [Unity][Pass] FuseOps FuseTIR fixes (#14044)
    omit ef7ecb392d [Unity] Statement rewriter for DataflowBlock (#14043)
    omit 0bbc420e94 [Unity] Relax dataflow pattern language (matching) (#14041)
    omit 77af69bcb7 [Unity] Update tests to adapt to latest TVMScript syntax 
(#14039)
    omit 928ff81bd4 [Unity] Disallow inline prim_func in relax IR (#14040)
    omit ca872f9135 [Unity][Pass] Block-level static memory planning (#14038)
    omit a929a149ce [Unity] Initial PyTorch Frontend (#14037)
    omit 05096d68fe [Unity][Op] Add ShapeExpr Tests for Reshape Op (#14035)
    omit 878625b17a [Unity][Pass] Operator legalization (#14029)
    omit 0785ec88dc [Unity][TVMScript] Move tir/relax import in script out of 
__init__.py (#14033)
    omit b29906b710 [Unity][Pass] Wellformed Analysis (#14032)
    omit 6fad06c8ce [Unity][BlockBuilder] CallTE convert PrimValue args  
(#14028)
    omit 209369ec39 [Unity][Pass] Normalize Pass (#14031)
    omit 58130034d0 [Unity] Relay -> Relax translator  (#14026)
    omit 03beb2b448 [Unity][Pass][TuningAPI] Introduce TuningAPI and 
MetaSchedule pass (#14014)
    omit a3739da691 [Unity][Pass] BindParams pass, FoldConstant pass (#14016)
    omit 28064aafc9 [Unity][VM] Supporting "compiled" exec mode. (#14015)
    omit 9192da2d06 [Unity][Pass] LambdaLift pass (#14012)
    omit 9b63670fcf [Unity][Pass] Operator Fusion Passes (#14001)
    omit e3952ae01e [Unity] NestedMsg Support utility (#13995)
    omit 8e89591525 [Unity] Relax op: manipulation (#13989)
    omit 5e08d9831a [Unity] Relax op: search (#13992)
    omit 81b03c867b [Unity] Relax op: linear algebra (#13988)
    omit a4541ac9f3 [Unity] Relax op: creation (#13984)
    omit 90bc292b4d [Unity] Relax op: neural networks (#13993)
    omit 82c0783230 [Unity] Relax op: statistical (#13991)
    omit 4806de6814 [Unity] Relax op: arithmetic, comparison (#13983)
    omit 2d676d4351 [Unity] Relax op: image (#13994)
    omit 1653505281 [Unity] Relax op: set (#13990)
    omit 54810c36ed [Unity] Relax op: datatype (#13986)
    omit 31d1eb184f [Unity] Relax op: index (#13987)
    omit aa157dbf8d [Unity][TVMScript] Use explicit `R.shape` in TVMScript 
(#13979)
    omit 652bf23a49 [Unity] e2e Relax minimum build flow (#13961)
    omit c08a2630a6 [Unity] Relax VM shape lowering pass (#13956)
    omit cdc257e16d [Unity] Relax VM codegen (#13954)
    omit 1eab9937b1 [Unity] Relax TVMScript Printer (#13944)
    omit 020bea042f [Unity] Relax TVMScript Parser. (#13932)
    omit b8631adebc [Unity] Relax BlockBuilder and ExprMutator (#13926)
    omit 59d7ff5931 [Unity] Basic StructInfo Analysis and Expr construction 
(#13916)
    omit c35aedbd2d [Unity][CI] Unity specific jenkins setup (do not upstream 
to main) (#13910)
    omit e9014d90ad [Unity][IR] First-class StructInfo (#13907)
    omit 46ec39c8ad [Unity] Relax expressions and types (#13901)
    omit 5292e3b314 [Unity] Relax VM (#13878)
     add ff12a20323 [Fix][Relay][TOPI] Bug fix in relay.sum and topi.sum 
functions when w… (#14285)
     add eecc02ad35 [microTVM] Custom IDE Tutorial (#13857)
     add ccc0b9162f [fix][relay][qnn] Bug fix for 8-bit quantized mul (#14286)
     add c9ab1979f1 [CI][ETHOSN] Add ssh to the driver stack installation 
(#14246)
     add d22bdce2bf [Relay][Op] Connect existing arm_cpu schedule to relay 
strategy for concat (#14270)
     add ce1fa8908f [TE] Record primitives of Schedule for visualization 
(#14168)
     add e22a2d5b9f [IR] Enhance IRModule SEqual/SHash to support cross 
function calls (#14289)
     add 075e2ec7bb [Frontend][Paddle]fix eye and dist (#14292)
     add 970cd1def8 [TIR][Hexagon] Enhancement of NarrowDataType pass for 
binary ops (#14298)
     add 6eb4b873e1 [DOCS][ADRENO] Improved Adreno documentation (#13867)
     add 5d0509237e [CI] Update ci_arm docker image to have LLVM 15 (#14296)
     add f6b75792c5 [TVMC] Fix logging in TVMC (#14175)
     add 946581ab56 [TIR][Compute-at] Utilize InverseAffineIterMap for dom 
estimation (#14184)
     add f4520c4f15 [TVMC] Improve --desired-layouts functionality (#14272)
     add 32e500b7f7 [LLVM] Add support to generate llvm.assume (#14294)
     add d1dea13762 [Bugfix][TVMScript] Preserve variable names in LetStmt 
(#14319)
     add 58dce66097 [CI] Update CUDA to 11.7 (#14293)
     add 3f2dac0d07 [PaddlePaddle Hackathon 4][Frontend][Paddle]add conv3d for 
paddle frontend (#14290)
     add fe3fa9d75d [Fix][TIR] Fix tvm::arith::UnionLowerBound (#14304)
     add f7c2bbbe87 [LLVM] Fix registerCallbacks API after recent change 
(#14323)
     add 84e8f86960 [docs] Add details about patch releases (#14301)
     add 4ae08d8710 [apps][bundle_deploy]Fix bundle build issue (#14315)
     add ca8153d502 [AOT]Raise error when input name is not valid (#14322)
     add 2ff41c6156 [TIR][Schedule] Allow buffer name argument to 
Schedule.set_scope (#14327)
     add a5ed21d12a [CODEGEN][METAL] Fix ramp codegen (#14330)
     add 542274dde9 [Schedule] Add an optional argument `disable_checks` for 
`Schedule` (#14281)
     add 56d0e3b7af [METAL][CODEGEN] testcase for ramp codegen (#14331)
     add fc2a9e50af [CODEGEN][METAL] Fix unaligned vector load (#14332)
     add c6c89c3a25 [Hexagon] Add concept of DMA groups (#14254)
     add 06276846a1 [Docs] Update listed tvmc python dependencies (#14341)
     new b8e4110467 [Unity] Relax VM (#13878)
     new 2c7f480f4f [Unity] Relax expressions and types (#13901)
     new 4e659d1f26 [Unity][IR] First-class StructInfo (#13907)
     new fb90fd1a46 [Unity][CI] Unity specific jenkins setup (do not upstream 
to main) (#13910)
     new 6915444b2d [Unity] Basic StructInfo Analysis and Expr construction 
(#13916)
     new d1ad4e6543 [Unity] Relax BlockBuilder and ExprMutator (#13926)
     new 2001903486 [Unity] Relax TVMScript Parser. (#13932)
     new e38a3360f5 [Unity] Relax TVMScript Printer (#13944)
     new ea6cc94c8d [Unity] Relax VM codegen (#13954)
     new 2c158714cf [Unity] Relax VM shape lowering pass (#13956)
     new fadbb3f256 [Unity] e2e Relax minimum build flow (#13961)
     new 75eecf7dd9 [Unity][TVMScript] Use explicit `R.shape` in TVMScript 
(#13979)
     new de164d2524 [Unity] Relax op: index (#13987)
     new c8a153314d [Unity] Relax op: datatype (#13986)
     new c7a57aecd6 [Unity] Relax op: set (#13990)
     new 42409202db [Unity] Relax op: image (#13994)
     new 5385d6d635 [Unity] Relax op: arithmetic, comparison (#13983)
     new a6a2e84ca9 [Unity] Relax op: statistical (#13991)
     new a96f2006a6 [Unity] Relax op: neural networks (#13993)
     new 4dd591b800 [Unity] Relax op: creation (#13984)
     new 9694c673bb [Unity] Relax op: linear algebra (#13988)
     new 7a8765d819 [Unity] Relax op: search (#13992)
     new 4ab73eabc3 [Unity] Relax op: manipulation (#13989)
     new 62daae4457 [Unity] NestedMsg Support utility (#13995)
     new 20adb37493 [Unity][Pass] Operator Fusion Passes (#14001)
     new e78d523e74 [Unity][Pass] LambdaLift pass (#14012)
     new 0e2bb802bb [Unity][VM] Supporting "compiled" exec mode. (#14015)
     new 63e2402358 [Unity][Pass] BindParams pass, FoldConstant pass (#14016)
     new 87659ea3ea [Unity][Pass][TuningAPI] Introduce TuningAPI and 
MetaSchedule pass (#14014)
     new 388941ad6b [Unity] Relay -> Relax translator  (#14026)
     new 0d91c33103 [Unity][Pass] Normalize Pass (#14031)
     new 251a062bf1 [Unity][BlockBuilder] CallTE convert PrimValue args  
(#14028)
     new b5d2304029 [Unity][Pass] Wellformed Analysis (#14032)
     new 5e2f2b9d43 [Unity][TVMScript] Move tir/relax import in script out of 
__init__.py (#14033)
     new 814eb921c2 [Unity][Pass] Operator legalization (#14029)
     new 130e362430 [Unity][Op] Add ShapeExpr Tests for Reshape Op (#14035)
     new 85477ac489 [Unity] Initial PyTorch Frontend (#14037)
     new 96a9b6e4d8 [Unity][Pass] Block-level static memory planning (#14038)
     new 837a557210 [Unity] Disallow inline prim_func in relax IR (#14040)
     new d3494933fe [Unity] Update tests to adapt to latest TVMScript syntax 
(#14039)
     new 02cefd91a5 [Unity] Relax dataflow pattern language (matching) (#14041)
     new f428a4ae23 [Unity] Statement rewriter for DataflowBlock (#14043)
     new ac5bf3a76a [Unity][Pass] FuseOps FuseTIR fixes (#14044)
     new 81169f6576 [Unity][TVMScript] Overload `__neg__` for relax expr 
(#14045)
     new 2bd1581596 [Unity][VM] Add per-op profiling support  (#14053)
     new 591b800bfa [Unity][BYOC] Add pattern-based partitioning pass (#14054)
     new ff7d4950e0 [Unity] Relax op: collapse sum (#14059)
     new 828edeb5ea [Unity][Fix][Pass] Fix FuseOps for lack graph edges (#14058)
     new 7be4441569 [Unity][Pass] Remove Unused Function (#14061)
     new 0bd303c7c1 [Unity][BYOC] Add pass to merge composite functions to 
offload large subgraphs (#14062)
     new 466a004d6c [Unity][Frontend] Annotate number of non-static input of FX 
function (#14067)
     new 51b1ce1ec7 [Unity][Transform] Add LiftTransformParams pass (#14069)
     new 6ba5cac678 [Unity][BYOC][Pass] RunCodegen and TensorRT  (#14078)
     new 4b3794c24a [Unity][Pass] Canonicalize Bindings (#14079)
     new d5fa61fd46 [Unity] Add testcases for `expr_args_converter` (#14080)
     new fe7e0651ec [Unity][BYOC] Add CUTLASS backend (#14081)
     new f3ee944a58 [Unity][BYOC] Add DNNL backend (#14082)
     new 3a0f4c5eca [Unity][Op] `log_softmax` and `cross_entropy_with_logits` 
(#14083)
     new 5aecfe4121 [Unity][Analysis] TIR pattern kind analysis for 
multi-buffer write block (#14075)
     new 22c7b75834 [Unity][Fix][Pass] FoldConstant with DCE in dataflow block 
(#14087)
     new 99f6d67dd0 [Unity] Refactor Relax Build JIT UX (#14088)
     new f15b80a561 [Unity][Relax] Set Shape Function to Be Host Function 
(#14090)
     new 0eff29a505 [Unity] Fix typo in the comment (#14096)
     new 03799a50cb [Unity] Lower `shape_of` to a builtin (#14093)
     new 70c8debc7a [Unity] Relax Recursive function (#14092)
     new 9df23f7c67 [Unity][Layout] Add layout transformation analysis for 
PrimFunc (#14066)
     new a31c856de7 [Unity] Remove attributes of relax.print, assert and unique 
(#14101)
     new b168949441 [Unity][BYOC]Add relax backend pattern registry (#14106)
     new fbf56475d2 [Unity] Update tests again to adapt to latest TVMScript 
syntax (#14115)
     new 4e5e81a1b8 [Unity][Fix] Fix bug in MergeCompositeFunctions (#14117)
     new cf9beab753 [Unity][BlockBuilder] Add `name_hint` argument for `emit` 
and `emit_output` (#14126)
     new 133b4acaeb [Unity][WEB] Relax vm on web runtime (#14131)
     new fa47ee995f [Unity] Add Global info (#14132)
     new 5939a6e8c8 [Unity][BYOC] Add transposed matmul support to Relax 
CUTLASS BYOC (#14128)
     new 1004bdf02a [Unity][TVMScript] emit_te sugar (#14123)
     new a16021ace5 [Unity][BYOC] Assign group to unused bindings and ignroe 
PrimFunc (#14139)
     new 3bdd8013c1 [Unity] Add callback to FuseOpsByPattern to check match 
result is accepted (#14109)
     new 019ef59f2e [Unity][Legalize] Fix Scalar Constant Legalization (#14127)
     new 43c5f29813 [Unity][Pass] Enhance constant folding to fold relax ops by 
evaluating them. (#14146)
     new 82bfc57772 [Unity][Debugging] AST printer (#14152)
     new 7e96a3aeed [Unity][Pass] Support Symbolic Shape Deduction during 
BindParam (#14154)
     new 1cdc3d336b [Unity][Analysis] Checking function return struct info in 
well-formed check (#14155)
     new ef0f4481cf [Unity][BYOC] Use Relax legalize + CPU build for reference 
in tests (#14162)
     new 7cad6ef8d8 [Unity] Add bind_constants option to FuseOpsByPattern 
(#14151)
     new c56b17f4f6 [Unity][Analysis] Analysis for detecting recursion in Relax 
(#14149)
     new 1bbe881241 [Unity][BYOC] Add batch matmul support to Relax CUTLASS 
BYOC (#14166)
     new be532f28f2 [Unity][Op] Full support of Relax op `power` (#14171)
     new 1cc9bb014e [Unity][Analysis] Restore Python bindings for var analyses 
(#14180)
     new 8a46c21e33 [Unity][OP] Add an operator for fused multi head attention 
(#14150)
     new 281cc206cc [Unity][WEBGPU] Codegen improvements and WebRuntime (#14187)
     new cd88b0ab49 [Unity][Transform] LiftTransformParams handling multiple 
functions (#14192)
     new 1927d7d4aa [Unity][Op] Group normalization (#14194)
     new 50c1e7a147 [Unity][Op] Argmax and argmin (#14195)
     new 6731783749 [Unity][Op] Legalize `round`, `floor`, `ceil`, `sign` 
(#14198)
     new 439ec78118 [Unity][Frontend] FX translator supporting more ops (#14196)
     new ec6e26827b [Unity][Frontend] FX translator returning weights with 
`keep_params_as_input` (#14197)
     new fb6b1ea299 [Unity][Fix] FX translating dtype (#14201)
     new 3b7db40860 [Unity][TIR][Pass] ForceNarrowIndexToInt32 (#14203)
     new 044080ff93 [Unity][Frontend] FX translator support torch.baddbmm 
(#14202)
     new b7193cf056 [Unity] Introduce Default GPU Schedule Pass (#14182)
     new 399d9daf71 [Unity][Frontend] Attach imported model weights, deprecate 
ImporterOutput (#14211)
     new c4225052e9 [Unity][Frontend] FX translator supports unwrapping unit 
return tuple (#14212)
     new 1ffc31777e [Unity][WEB] Improve webgpu codegen options to skip 
readonly (#14213)
     new 841f8a0c03 [Unity][Frontend] from_fx keeps parameters in order (#14214)
     new 6cb1fe7a94 [Unity][BYOC] Add dynamic shape support to CUTLASS matmul 
(#14216)
     new 84c20b3abe [Unity][Transform] Memory plan across the IRModule (#14220)
     new 2531c7eaf5 [Unity][WEB] Simplify WebGPU Codegen per spec (#14225)
     new afdf218125 [Unity] ensure memory.alloc_tensor/storage roundtrippable 
(#14226)
     new 6b75a40036 [Unity] Improve implementation of FuseOps (#14229)
     new 13c8c673ba [Unity][Transform] SimplifyNormInference (#14221)
     new f2804d15f7 [Unity][Bugfix] Do not include `PrimFunc`s in the 
dependency graph when checking for recursion (#14228)
     new 198caa55d1 [Unity][TVMScript] Enable Context-Aware Parsing (#14234)
     new b270be88fe [Unity][TVMScript] Fix prim_func lost issue in 
relax.emit_te (#14189)
     new 81c38c5e1b [Unity][WEB] Update text prompts for syntactical 
correctness (#14237)
     new 4c90f052f6 [Unity][WEB] Improve ndarray cache (#14236)
     new 9b757c9f39 [Unity][Op][Tweak] Improve `StructInfo` inference for 
`shape_of` (#14243)
     new f5b6ac8fb4 [Unity][Op] Add repeat, tile, conv2d_transpose, avg_pool2d 
(#14238)
     new 24b8e7bef5 [Unity] Allow user defined func attrs in emit_te (#14255)
     new 6ae1c52610 [Unity][BYOC] Add conv2d and residual block patterns for 
Relax cutlass BYOC (#14252)
     new 1456f99a26 [Unity][Frontend] Import `tanh` and fix `layer_norm` 
(#14247)
     new 72c9510ae4 [Unity] Consider target context for Relay to Relax 
conversion (#14269)
     new 7f89e22406 [Unity] Introduce call_dps_packed (#14183)
     new e6f3db185a [Unity][ci] Use CPU-SMALL instances (#14256)
     new 581889aa6c [TVMScript][Unity] Improve PyLint Compatibility (#14276)
     new 8c34de2d7f [Unity] Mark tests that need python3.8 compact.
     new c7f40bd1d9 [Unity] Fix StructInfo Infer for `vm.alloc_tensor` (#14283)
     new c1783b83a7 [Unity][Op] Cumsum (#14297)
     new 15de2c2df7 [Unity][DEBUG] Add Instrument (#14302)
     new 765375f187 [Unity][Pass] Add a pass to alter the TIR implementation of 
an operator (#14215)
     new b10498b51c [Unity][Op] Enable special dimension value 0 in reshape 
(#14311)
     new 001f17814c [Unity][Web] WebGPU explicit max buffer size (#14321)
     new f5ee09795f [Unity] Support pattern-based rewriting (#14312)
     new 10b834f887 [Unity][BYOC] Support matmul + residual block fusion in 
CUTLASS BYOC (#14317)
     new 981a822bd3 [Unity][BYOC] Improve expressiveness of the pattern check 
function in FuseOpsByPattern (#14310)
     new 850e549b32 [Unity][Transform] Enhance RewriteDataflowReshape transform 
(#14265)
     new e2b1d93591 [Unity][Contrib] Introduce several features of cutlass 
profiler (#14275)
     new 5529830bf5 [Unity][TOPI] fp16 LayerNorm & GroupNorm (#14264)
     new fdf86e4c3e [Unity][Transform] Automatic Layout Conversion (#14257)
     new 3ca23b3378 [Unity][Transform] Simple Dead Code Elimination (#14262)
     new 795568e148 [Unity][Transform] Automatic Mixed Precision (#14242)
     new 1d35ef2135 [Unity][Lint] Fix cpplint casting (#14333)
     new 0b47f0bfe3 [Unity][Transform] DefaultSchedule pass (#14266)
     new 9aae685bf6 [Unity][Op] Fix Strided Slice Shape Inference (#14324)
     new a0bd29917c [Unity][Pass] Reuse prior infra to implement more complete 
DCE (#14334)
     new 06fe80be71 [Unity] Remove Python interface of RemoveUnusedFunction 
(#14336)
     new 6c6985940c [Unity][BYOC] Update testcases to follow recent changes 
(#14339)
     new 18c19fb830 [Unity][Frontend] FX exp and strided_slice fix (#14338)

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (6cb52821a0)
            \
             N -- N -- N   refs/heads/unity-staging (18c19fb830)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 147 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 apps/bundle_deploy/Makefile                        |    2 +-
 ci/jenkins/docker-images.ini                       |    2 +-
 docker/Dockerfile.ci_gpu                           |    2 +-
 .../install/ubuntu_install_ethosn_driver_stack.sh  |    3 +-
 docs/contribute/release_process.rst                |   29 +-
 docs/how_to/deploy/adreno.rst                      |  718 ++++++++---
 docs/install/from_source.rst                       |    2 +-
 .../how_to/deploy_models/deploy_model_on_adreno.py |  309 +++--
 .../deploy_models/deploy_model_on_adreno_tvmc.py   |  198 +++
 .../how_to/work_with_microtvm/micro_custom_ide.py  |  361 ++++++
 include/tvm/meta_schedule/postproc.h               |    3 +-
 include/tvm/relax/attrs/datatype.h                 |    9 +
 include/tvm/relax/attrs/manipulate.h               |   15 +
 include/tvm/relax/nested_msg.h                     |   44 +
 include/tvm/relax/struct_info.h                    |    7 +
 include/tvm/relax/transform.h                      |  156 ++-
 include/tvm/runtime/relax_vm/vm.h                  |   30 +
 include/tvm/te/schedule.h                          |   43 +-
 include/tvm/tir/builtin.h                          |   37 +-
 include/tvm/tir/function.h                         |    7 +
 include/tvm/tir/schedule/schedule.h                |   13 +-
 include/tvm/tir/schedule/state.h                   |    9 +-
 include/tvm/topi/nn/group_norm.h                   |   31 +-
 include/tvm/topi/nn/layer_norm.h                   |   28 +-
 include/tvm/topi/reduction.h                       |    6 +-
 python/tvm/contrib/cutlass/build.py                |    9 +-
 python/tvm/contrib/cutlass/conv2d_operation.py     |   14 +-
 python/tvm/contrib/cutlass/gemm_operation.py       |  132 +-
 python/tvm/contrib/cutlass/gen_conv2d.py           |  119 +-
 python/tvm/contrib/cutlass/gen_gemm.py             |   48 +-
 python/tvm/contrib/cutlass/gen_tensor_op.py        |   28 +-
 python/tvm/contrib/tedd.py                         |   27 +-
 python/tvm/driver/tvmc/autotuner.py                |    1 +
 python/tvm/driver/tvmc/main.py                     |    7 +-
 python/tvm/driver/tvmc/runner.py                   |    1 +
 python/tvm/driver/tvmc/transform.py                |   42 +-
 .../postproc/disallow_async_strided_mem_copy.py    |   11 +-
 python/tvm/micro/testing/utils.py                  |    4 +-
 python/tvm/relax/__init__.py                       |    2 +-
 python/tvm/relax/analysis/analysis.py              |    8 +-
 python/tvm/relax/backend/contrib/cutlass.py        |  110 +-
 python/tvm/relax/backend/pattern_registry.py       |   75 +-
 python/tvm/relax/backend/patterns.py               |   49 +-
 python/tvm/relax/dpl/pattern.py                    |   40 +-
 python/tvm/relax/frontend/torch/fx_translator.py   |   47 +-
 python/tvm/relax/op/datatype.py                    |   17 +
 python/tvm/relax/op/manipulate.py                  |   51 +
 python/tvm/relax/testing/lib_comparator.py         |  128 ++
 .../tvm/relax/transform/legalize_ops/manipulate.py |    5 +
 python/tvm/relax/transform/transform.py            |  225 +++-
 python/tvm/relay/frontend/paddlepaddle.py          |   63 +-
 python/tvm/relay/op/strategy/arm_cpu.py            |   16 +-
 python/tvm/relay/quantize/_annotate.py             |   10 +
 python/tvm/relay/quantize/_partition.py            |    5 +
 python/tvm/runtime/relax_vm.py                     |   48 +
 python/tvm/script/ir_builder/relax/ir.py           |    4 +
 python/tvm/script/parser/tir/parser.py             |    1 +
 python/tvm/tir/schedule/schedule.py                |   29 +-
 python/tvm/tir/schedule/state.py                   |   14 +
 python/tvm/tir/tensor_intrin/hexagon.py            |   35 +-
 python/tvm/topi/nn/group_norm.py                   |    2 +
 python/tvm/topi/nn/layer_norm.py                   |    2 +
 python/tvm/topi/scan.py                            |    2 +-
 python/tvm/topi/testing/group_norm_python.py       |    5 +-
 python/tvm/topi/testing/layer_norm_python.py       |    3 +
 src/arith/int_set.cc                               |    1 +
 src/arith/iter_affine_map.cc                       |    3 +-
 src/driver/driver_api.cc                           |    1 -
 src/ir/module.cc                                   |   75 +-
 .../postproc/disallow_async_strided_mem_copy.cc    |    8 +-
 src/relax/analysis/tir_op_pattern_kind.cc          |   12 +-
 src/relax/backend/pattern_registry.cc              |   39 +-
 src/relax/backend/pattern_registry.h               |   59 +-
 src/relax/ir/dataflow_matcher.cc                   |   48 +
 src/relax/op/image/resize.cc                       |   17 +-
 src/relax/op/nn/convolution.cc                     |   55 +-
 src/relax/op/nn/nn.cc                              |  104 +-
 src/relax/op/nn/pooling.cc                         |   46 +-
 src/relax/op/op.cc                                 |    4 +-
 src/relax/op/op_common.cc                          |    8 +
 src/relax/op/op_common.h                           |   24 +-
 src/relax/op/tensor/binary.cc                      |   22 +
 src/relax/op/tensor/binary.h                       |   22 +-
 src/relax/op/tensor/create.cc                      |   12 +-
 src/relax/op/tensor/datatype.cc                    |   31 +-
 src/relax/op/tensor/datatype.h                     |    8 +
 src/relax/op/tensor/index.cc                       |   53 +-
 src/relax/op/tensor/linear_algebra.cc              |    8 +-
 src/relax/op/tensor/manipulate.cc                  |  295 ++++-
 src/relax/op/tensor/manipulate.h                   |   12 +
 src/relax/op/tensor/statistical.cc                 |   52 +
 src/relax/op/tensor/statistical.h                  |   27 +-
 src/relax/op/tensor/ternary.cc                     |   19 +-
 src/relax/transform/alter_op_impl.cc               |  312 +++++
 src/relax/transform/convert_layout.cc              |  309 +++++
 ...ve_unused_funcs.cc => dead_code_elimination.cc} |   48 +-
 src/relax/transform/fuse_ops.cc                    |   87 +-
 src/relax/transform/infer_amp_utils.cc             |   59 +
 src/relax/transform/infer_amp_utils.h              |   85 ++
 src/relax/transform/infer_layout_utils.cc          |  126 ++
 src/relax/transform/infer_layout_utils.h           |  244 ++++
 src/relax/transform/merge_composite_functions.cc   |    2 +-
 src/relax/transform/meta_schedule.cc               |    1 +
 src/relax/transform/run_codegen.cc                 |    2 +-
 src/relax/transform/to_mixed_precision.cc          |  538 ++++++++
 src/{auto_scheduler => relax/transform}/utils.cc   |   16 +-
 src/relax/transform/utils.h                        |   55 +-
 src/relay/backend/te_compiler.cc                   |   13 +-
 src/runtime/aot_executor/aot_executor.cc           |    2 +-
 src/runtime/hexagon/hexagon_device_api.cc          |   19 +-
 src/runtime/hexagon/hexagon_user_dma.cc            |   14 +-
 src/runtime/hexagon/hexagon_user_dma.h             |   31 +-
 src/runtime/hexagon/ring_buffer.h                  |   76 +-
 src/runtime/relax_vm/vm.cc                         |   70 +-
 src/target/llvm/codegen_llvm.cc                    |    7 +
 src/target/source/codegen_c.cc                     |   15 +-
 src/target/source/codegen_metal.cc                 |    6 +-
 src/target/source/codegen_metal.h                  |    3 +-
 src/target/source/codegen_opencl.cc                |   37 +
 src/target/source/codegen_opencl.h                 |    3 +
 src/target/source/codegen_webgpu.cc                |   26 +-
 src/target/spirv/intrin_rule_spirv.cc              |    5 +
 src/te/schedule/schedule_dataflow_rewrite.cc       |   14 +-
 src/te/schedule/schedule_lang.cc                   |   51 +-
 src/tir/op/builtin.cc                              |    6 +
 src/tir/schedule/analysis/analysis.cc              |    2 +-
 src/tir/schedule/concrete_schedule.cc              |    6 +-
 src/tir/schedule/primitive/compute_at.cc           |  179 ++-
 src/tir/schedule/primitive/for_kind.cc             |    4 +-
 src/tir/schedule/primitive/reduction.cc            |   30 +-
 src/tir/schedule/schedule.cc                       |   10 +-
 src/tir/schedule/state.cc                          |   23 +-
 src/tir/schedule/traced_schedule.cc                |    5 +-
 src/tir/transforms/default_gpu_schedule.cc         |    2 +-
 src/tir/transforms/inject_software_pipeline.cc     |   35 +-
 src/tir/transforms/lower_async_dma.cc              |  172 +--
 src/tir/transforms/lower_tvm_builtin.cc            |   26 +
 src/tir/transforms/narrow_datatype.cc              |   38 +
 .../cpp-runtime/hexagon/hexagon_user_dma_tests.cc  |    2 +-
 tests/cpp-runtime/hexagon/ring_buffer_tests.cc     |  203 ++-
 .../metaschedule_e2e/test_resnet50_int8.py         |    1 -
 .../test_hexagon/test_async_dma_pipeline.py        |    6 +-
 .../test_hexagon/test_software_pipeline_async.py   |    1 -
 tests/python/contrib/test_tedd.py                  |   58 +-
 tests/python/driver/tvmc/test_command_line.py      |   62 +
 tests/python/driver/tvmc/test_transform.py         |   86 +-
 tests/python/frontend/paddlepaddle/test_forward.py |   43 +
 tests/python/relax/test_ast_printer.py             |    4 +-
 tests/python/relax/test_codegen_cutlass.py         |  106 +-
 tests/python/relax/test_dataflow_pattern.py        |  118 ++
 tests/python/relax/test_frontend_from_fx.py        |   78 +-
 tests/python/relax/test_op_datatype.py             |   17 +
 tests/python/relax/test_op_index.py                |   41 +-
 tests/python/relax/test_op_manipulate.py           |   71 +-
 tests/python/relax/test_op_misc.py                 |    8 +
 tests/python/relax/test_transform_alter_op_impl.py |  342 +++++
 tests/python/relax/test_transform_codegen_pass.py  |   23 +-
 .../python/relax/test_transform_convert_layout.py  | 1352 ++++++++++++++++++++
 .../relax/test_transform_dead_code_elimination.py  |  452 +++++++
 .../relax/test_transform_fuse_ops_by_pattern.py    |   30 +-
 .../test_transform_legalize_ops_manipulate.py      |   73 ++
 .../python/relax/test_transform_legalize_ops_nn.py |  144 +++
 .../relax/test_transform_meta_schedule_tuning.py   |   71 +
 .../relax/test_transform_remove_unused_funcs.py    |  211 ---
 .../test_transform_rewrite_dataflow_reshape.py     |   93 +-
 .../relax/test_transform_to_mixed_precision.py     |  540 ++++++++
 .../relax/test_tvmscript_parser_op_manipulate.py   |   15 +
 .../{test_vm_profiler.py => test_vm_instrument.py} |  101 +-
 tests/python/relay/aot/test_cpp_aot.py             |   38 +
 tests/python/relay/opencl_texture/test_network.py  |   40 +-
 .../relay/strategy/test_select_implementation.py   |   56 +
 tests/python/relay/test_build_module.py            |   37 +
 tests/python/relay/test_op_level4.py               |   36 +-
 tests/python/topi/python/test_topi_group_norm.py   |    3 +-
 tests/python/topi/python/test_topi_layer_norm.py   |    3 +-
 tests/python/topi/python/test_topi_reduce.py       |   12 +-
 tests/python/unittest/test_arith_intset.py         |    4 +
 tests/python/unittest/test_target_codegen_llvm.py  |   24 +
 tests/python/unittest/test_target_codegen_metal.py |   54 +-
 tests/python/unittest/test_te_schedule_ops.py      |   53 +
 .../unittest/test_tir_schedule_compute_at.py       |   74 ++
 .../python/unittest/test_tir_schedule_set_scope.py |    9 +-
 .../unittest/test_tir_transform_narrow_datatype.py |   61 +
 .../python/unittest/test_tvmscript_syntax_sugar.py |   13 +
 tests/scripts/request_hook/request_hook.py         |    2 +
 tests/scripts/setup-adreno-env.sh                  |  113 ++
 tests/scripts/task_build_adreno_bins.sh            |    1 -
 tests/scripts/task_config_build_adreno.sh          |    1 -
 tests/scripts/task_config_build_gpu.sh             |    1 +
 tests/scripts/task_microtvm_cpp_tests.sh           |    4 +-
 web/apps/browser/rpc_server.html                   |    9 +-
 web/src/rpc_server.ts                              |    1 +
 web/src/runtime.ts                                 |   10 +
 web/src/webgpu.ts                                  |   49 +-
 194 files changed, 10845 insertions(+), 1597 deletions(-)
 create mode 100644 gallery/how_to/deploy_models/deploy_model_on_adreno_tvmc.py
 create mode 100644 gallery/how_to/work_with_microtvm/micro_custom_ide.py
 create mode 100644 python/tvm/relax/testing/lib_comparator.py
 create mode 100644 src/relax/transform/alter_op_impl.cc
 create mode 100644 src/relax/transform/convert_layout.cc
 rename src/relax/transform/{remove_unused_funcs.cc => 
dead_code_elimination.cc} (69%)
 create mode 100644 src/relax/transform/infer_amp_utils.cc
 create mode 100644 src/relax/transform/infer_amp_utils.h
 create mode 100644 src/relax/transform/infer_layout_utils.cc
 create mode 100644 src/relax/transform/infer_layout_utils.h
 create mode 100644 src/relax/transform/to_mixed_precision.cc
 copy src/{auto_scheduler => relax/transform}/utils.cc (76%)
 mode change 100755 => 100644
 create mode 100644 tests/python/relax/test_transform_alter_op_impl.py
 create mode 100644 tests/python/relax/test_transform_convert_layout.py
 create mode 100644 tests/python/relax/test_transform_dead_code_elimination.py
 delete mode 100644 tests/python/relax/test_transform_remove_unused_funcs.py
 create mode 100644 tests/python/relax/test_transform_to_mixed_precision.py
 copy tests/python/relax/{test_vm_profiler.py => test_vm_instrument.py} (53%)
 create mode 100644 tests/python/relay/strategy/test_select_implementation.py
 create mode 100755 tests/scripts/setup-adreno-env.sh

[tvm] branch unity-staging updated (6cb52821a0 -> 18c19fb830)

Reply via email to