This is an automated email from the ASF dual-hosted git repository.

tqchen pushed a change to branch unity-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git


    from 6c38001fe1 [MERGE] Merge main into unity 2023-08-04
     add 9fb191159f [Unity] Commutative pattern match based on relax.Expr op 
(#15494)
     add 1a1af0c700 [Unity][NN] Allow nn.Module to generate subroutines (#15453)
     add e7cb3a848d [Unity][BYOC] Support ND A and 2D B batched matmul for 
cublasLt (#15499)
     add 38e2b886dd [Unity][BYOC] Integrate Flash attention v2 kernel into 
CUTLASS BYOC (#15467)
     add c0b3949765 [Unity][Frontend] Fix torch addmm op param alpha&beta 
(#15497)
     add 5b15824ec5 [Unity][Minor] Fix compilation warning (#15504)
     add c97753ea7c [Unity][Dlight] Matmul tensorization for SM70 (#15503)
     add 1afbf20129 [Unity][BYOC] Unify the interface between 
`FuseOpsByPattern(.., annotate_codegen=True)`and `MergeCompositeFunctions()` 
(#15491)
     add bd359685c7 [Unity][DLight] Update gemv rule (#15490)
     add cd14d4d7e2 [Unity][Hot fix] Flash attention offload bug due to typo 
(#15512)
     add f500a0dd1a [Unity] Update relax.Function.ret_struct_info when mutated 
(#15510)
     add a4cea71687 [Unity] Add batched A matrix support for FasterTransformer 
fp16A intB matmul (#15501)
     add 4ed34013de [Relax] CuDNN Fallback Support through BYOC. (#15462)
     add bcc9a68b3e [Unity] Remove unused local function definitions (#15507)
     add c8494dc004 [Unity][ONNX] Support ONNX Symbolic Shape Deduction (#15498)
     add 61c693dc42 [Unity] Recursive pattern match in rewrite_call (#15495)
     add 7cc7ea8ce6 [Unity] Propagate dynamic begin/end in strided_slice when 
legalizing (#15515)
     add 13398c1ada [Unity][Bugfix] Handle assignment of non-dataflow var to a 
dataflow var in CanonicalizeBindings (#15523)
     add a012fe0df4 [Unity][DLight] Update GEMV Rule for Mali GPUs (#15531)
     add 2226a1f558 [Unity] Multi-device support for Relax (#15447)
     add 92125d79cf [BugFix][Unity] Fix for TupleStructInfo (#15538)
     add 59b5f6d9ae [Unity][BYOC] Make `MergeCompositeFunctions` append codegen 
name at the end of function name (#15534)
     add aa21782828 [Unity][MSC][M0.1] Enable set name and layout for exprs 
(#15500)
     add 36af50439b [Unity] Fix the ambiguity issue in `vDevice` constructor on 
Mac with Apple clang version 14.0.3 (#15546)
     add fa0d35be2c [Unity][Relax][FuseOps] Add MatchCastNode to node_map of 
IndexedForwardGraph (#15544)
     add 0e4c99cdb8 [Unity][ONNX] Improved symbolic handling and reshape 
functionality (#15550)
     add 0092e97840 [Unity] Preserve shape parameters in LazyTransformParams 
(#15563)
     add 74a547b029 [Unity] Infer concatenated shape with multiplication where 
possible (#15562)
     add e4e3d1bc11 [Unity][Minor] Fix Compilation Warning (#15560)
     add 54423aa822 [Unity][TVMScript] In-line literals for int8, int16, and 
float16 (#15565)
     add 7b5a89eba1 [Unity] Fix handling of null values in cuda graph rewriting 
(#15567)
     add 2e2126f9e3 [Unity] Implement relax.Function.bind_symbolic_vars (#15509)
     add fd08c1c6cd [unity][frontend] add relax frontend torch op (#15557)
     add a66e1cd1f5 [WEB] Fix WebGPU runtime build (#15575)
     add 688d9dd068 [Unity][CUTLASS][Cherry-pick] Skip profiling all conv2d 
output alignments when possible (#15583)
     add 684218f72d [Unity][Topi] Handle variable begin/end axes in 
topi.strided_slice (#15520)
     add ac3078c7f5 [Unity][FX] Add fx support for functional linear and conv2d 
(#15566)
     add 4eabefbe85 [Minor][BugFix] Remove PyTest Dependency (#15585)
     add 497b7481d2 [Unity][Frontend][NN] Add GroupNorm Layer (#15592)
     add f922f7cca2 [Unity][Frontend][NN] gelu NN module (#15593)
     add b959645454 [Unity] nn.Module external modules (#15487)
     add 567848e3a0 [Unity] Avoid trivial `var2 = var1` bindings in pattern 
matcher (#15578)
     add f18e2976f9 [Unity][Frontend][NN] Conv2D NN module (#15586)
     add d9d8b19ecd [Unity][Frontend][NN] Conv1D/LayerNorm NN module (#15587)
     add b3658480e2 [Unity][MSC][M0.2] MSCGraph core (#15569)
     add b99f34d56a [Unity] Support float in vm executable (#15608)
     add 619cbeca84 [Unity] Prevent Relax VM from being corrupted after getting 
incorrect shape (#15612)
     add 71cdd460b4 [Unity][Frontend][NN] Op print_  (#15604)
     add 4a513825bc [relax][frontend]add relax frontend torch op: 
tan,asin,acos,atan,sinh,cosh,tanh,asinh,… (#15610)
     add 7d1a101e24 [Unity][Frontend][NN] Add Timesteps layer to NN Module API 
(#15603)
     add 0443482f7e [Unity] UpdateVDevice pass and infer vdevice (#15570)
     add 5d0ef94ae4 [Unity][Transform] Track callees from external functions in 
DeadCodeElimination (#15561)
     add d28613f3cf Change == to === operator in compact.ts file (#15620)
     add d5dcabf2b2 [Unity][Frontend][NN] Add diffusers style Attention layer 
(#15609)
     add 082d98e8ab Fix equality comparison and code consistency in web (#15621)
     add c9a73789e7 [Unity] fix masked_fill operation (#15617)
     add d2972f3b42 [Unity] Add instruments to relay translator (#15601)
     add d3856d34e7 [Unity] Disco: A Framework-Agnostic SPMD Runtime for 
Distributed Inference/Training (#15622)
     add 54d3fec26f [Unity][Dlight] GeMV rule skip cases of "outer dim being 
grouped" (#15571)
     add 7556592dd2 [Disco][Test] MultiHeadAttention Testcase (#15632)
     add 0230c77cb7 [Disco] Support ShapeTuple in Disco Protocol (#15634)
     add 06ae46e567 [Runtime] Fix ICE from Clang (#15635)
     add 71b81127a2 [Disco][Op] broadcast_from_worker0 (#15633)
     add 14d9cf804e [Unity][MSC][M0.3] MSCGraph Builder (#15615)
     add e0501b527d [Unity][Frontend][NN] Collection of new NN operators 
(#15642)
     add 07ee13f663 [Unity][Dlight] GeMV rule max_num_threads awareness (#15647)
     add 6e2ca87cf8 [Runtime] Support Loading Standalone `ndarray-cache.json` 
(#15654)
     add 11d5f677ec [Unity][Frontend][NN] Make effects optional in nn module. 
(#15650)
     add 71c6839b1a [Unity] Implement R.macro for Relax macros (#15455)
     add c604012310 [Runtime] ShapeTuple.Product and ShapeTuple Printing 
(#15652)
     add 1d56770213 [Runtime] Refactor NDArrayCache Support (#15659)
     add c2eaa1d370 [Disco] Add `Scatter-From-Worker0` (#15653)
     add f1246d0078 [Disco] Introduce ShardLoader (#15655)
     add 935c91314a [Unity][Disco] Correct document for CopyFromWorker0 and 
CopyToWorker0 (#15660)
     add 04211b3f69 [Unity][DLight] Improve the performance of matmul on Adreno 
GPU (#15661)
     add 4ad3f3389c [Unity][Frontend][NN] Add nn.MultiLinear (#15662)
     add a0e0390f0b [Unity] RealizeVDevice pass (#15636)
     add 37ad0497fe [Unity] Add support for Google Pixel OpenCL (#15675)
     add ac99367aa4 [Relay] [Bugfix] Fix some bugs of dominator pattern (#15473)
     add 9bcf0bc107 [Relay] add redirecting operation to dataflow pattern graph 
(#15392)
     add 18467c95dd [Script] Be more careful when generating ast.ExtSlice for 
Subscript (#15483)
     add 2bf127bdd3 [TVMScript] Allow use of Python builtins in script (#15492)
     add 93356cdcbe [Bugfix][TOPI] Fix a bug in arm_cpu int8 conv2d i8mm 
schedule (#15484)
     add 40bac5717a [CI] Remove cython version pin (#15485)
     add ae45b04772 [Relay][Strategy] Use x86 dense schedules for arm_cpu 
(#15470)
     add 8cadd1fbc5 [ARITH] Enhance Canonical Simplify for LE (#15471)
     add 34cacb0a64 [VM][Textures] Enable OpenCL textures for VM  (#15419)
     add 9ff71f4a9f [CodeGenC] Handle GlobalVar callee as internal function 
call (#15103)
     add 8b37d4d11a [Relay][Strategy] Use x86 pool schedules for arm_cpu 
(#15506)
     add 77b71fc830 [CMSIS-NN] Support for Softmax Int16 operator (#15407)
     add ff47aeac20 [docs] Add v0.13.0 docs to site (#15508)
     add 907b29e544 [TOPI] check empty array of x86 injective's iters (#15513)
     add ed8b82c5d6 Remove duplicate the word (#15524)
     add 624f8a73c7 [TEST] Run tests/python/relay/aot tests in ci-cortexm 
(#15519)
     add 326f8e7435 Fix typo mistake and change whethe to whether (#15525)
     add 81fd9f3476 Remove IRModule Dependency from Target (#15511)
     add 6c33787cd2 [MetaSchedule] Enable subprocess to stdout for DEBUG level 
(#15532)
     add 513bd1a198 Remove duplicate msg word and condition inside the function 
doc (#15530)
     add a1d6e82291 [Runtime][Minor] Suppress verbose logging in Metal device 
API (#15543)
     add 927df59662 [Relay] Disable exception for ADT in mixed precision pass 
(#15533)
     add 94f6b37b06 [Relay][TFLite] Fix in qnn.conv2d when parameter groups not 
equal to 1 (#15472)
     add 482b3c2c99 [Fix] Fix the typo in compile flag (#15542)
     add 2b718e5d00 Fixed search task comment (#15535)
     add 63592fd71d [quantize] fix bug of annotate for output of add op (#15529)
     add 1422c1826f fixed typo [TypoFix] (#15536)
     add 208e01f485 [TVMScript] Create loop var with min_val dtype in for frame 
(#15547)
     add 096b4a28c0 Bump tornado from 6.1 to 6.3.3 in /apps/microtvm/cmsisnn 
(#15553)
     add f34e7256a1 Bump tornado from 6.1 to 6.3.3 in /apps/microtvm/ethosu 
(#15552)
     add 325c7aef90 Bump tornado from 6.1 to 6.3.3 in /apps/microtvm (#15554)
     add 0c6fbb8923 [TVMScript] Use triple-quoted python strings for metadata 
(#15564)
     add 760c03085a [Arith] Fix handling of overlapping predicates (#15555)
     add 8afa6d2571 [CUTLASS][Cherry-pick] Introduce several features of 
cutlass profiler (#15573)
     add ee54e98c78 Remove duplicate 'from' word inside python script (#15582)
     add 5db18efab9 Remove duplicate load word inside .cc file (#15581)
     add f45ed308cb Fix "to" duplicate word in python and C header file (#15580)
     add 98320ab5bf [Testing] Allow Capitalized name in CompareBeforeAfter 
(#15568)
     add 925148e444 [TIR] Shuffle in PointerValueTypeRewrite for scalar reads  
(#15517)
     add 63b0c8fecc [Runtime] Enhance PackedFunc Metaprogramming with 
`PackArgs` (#15595)
     add 072a5c1bc0 [TVMScript] Optionally output the address as part of 
variable names (#15579)
     add a99bc9de29 [MetaSchedule] Fix metaschedule flop estimation for 
non-integer loop dimensions (#15574)
     add 32658f851a [ACL] Update Compute Library to v23.05.1 (#15600)
     add 483a8c3234 [Arith] Add tvm::arith::PresburgerSetNode to work with 
Presburger Set in MLIR (#14690)
     add d0c94d447b [Bugfix][Relay][Strategy] Enable compile time 
transformation of weights matrix for arm_cpu NHWC quantized conv2d (#15584)
     add 6166121de4 [CMake] Add NCCL to TVM and TVM Runtime (#15605)
     add 512b114322 [CPP_RPC] export listdir for RPC (#15537)
     add b5dae98ebf [Arith] Fix detect linear equation with uint var (#15558)
     add b9652a2db0 [Hopper TMA] CUDA codegen for async copy with barrier 
synchronization (#15616)
     add 220f57dc68 [DOCS] community strategy decision process (#15619)
     add 344fd2d20c [ONNX][BugFix] Support If body with free variable from 
graph input  (#15602)
     add aa805f2f86 [Runtime] Expose ModuleGetFunction as PackedFunc (#15623)
     add 909c8fab8c [CMake] Add RCCL to TVM and TVM Runtime (#15624)
     add 6554e2e082 [RPC] Enhance RPC Protocol to support TVM Object (#15631)
     add 0e2fabd0e4 [BugFix][VTA] tvm.tir.Call has no name attribute (#15629)
     add 0166400906 [Runtime] Utils to Stringify Device (#15630)
     add 8f60213cb5 [Runtime] Serialization/Deserialization of runtime module 
(#15244)
     add c921781c46 [TIR] Output DeclBuffer in SplitHostDevice (#15493)
     add 3c6f9c9bcc [Arith] Added simplification rule for multiple equality 
compares (#15628)
     add 34bd9be0a8 [Backport][Runtime] Fix ICE from Clang (#15637)
     add 79f9e577f7 Do not link LLVM libraries into cpptest binary (#15639)
     add 022299b51f [Arith] MLIR PresburgerSet compile fix mlir >= 160 (#15638)
     add f9e6018cfe [Runtime] Make `export_library` parameters after 
`file_name` keyword-only (#15658)
     add d75083cd97 [Relay] Fix an adaptive_max_pool1d operator conversion bug 
(#15386)
     add 476d9d1d41 Add output_data_sec section in corstone300.ld (#15649)
     add 04ee895d8d [IR] Use structural equal for Range equality (#15664)
     add d26fdcf3d1 [Hopper TMA] Add CUDA codegen support for bulk asynchronous 
copy (#15656)
     add ea388e5e74 [CI] Allow Limit CPUs in Docker (#15668)
     add 1e6e2b35ae [BugFix][Arith] IterMapRewriter abort rewriting once 
failure (#15677)
     new 90c64c6dce [MERGE] Merge main into unity 2023-09-06

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 CMakeLists.txt                                     |   59 +-
 apps/android_camera/models/prepare_model.py        |    2 +-
 apps/android_rpc/tests/android_rpc_test.py         |    6 +-
 apps/benchmark/adreno/adreno_gpu_bench_clml.py     |    2 +-
 apps/benchmark/adreno/adreno_gpu_bench_texture.py  |    2 +-
 apps/benchmark/arm_cpu_imagenet_bench.py           |    2 +-
 apps/benchmark/mobile_gpu_imagenet_bench.py        |    2 +-
 apps/cpp_rpc/rpc_env.cc                            |   16 +
 apps/hexagon_launcher/README.md                    |    2 +-
 apps/ios_rpc/tests/ios_rpc_mobilenet.py            |    2 +-
 apps/ios_rpc/tests/ios_rpc_test.py                 |    4 +-
 apps/microtvm/cmsisnn/requirements.txt             |   54 +-
 apps/microtvm/ethosu/corstone300.ld                |    1 +
 apps/microtvm/ethosu/requirements.txt              |   54 +-
 apps/microtvm/poetry.lock                          |  175 +-
 apps/microtvm/pyproject.toml                       |    2 +-
 apps/topi_recipe/gemm/android_gemm_square.py       |    2 +-
 cmake/config.cmake                                 |   19 +
 cmake/modules/CUDA.cmake                           |    2 +-
 cmake/modules/LLVM.cmake                           |    3 +
 cmake/modules/LibInfo.cmake                        |   10 +
 cmake/modules/contrib/CUTLASS.cmake                |    1 +
 .../__init__.py => cmake/modules/contrib/MSC.cmake |   20 +-
 cmake/utils/FindLLVM.cmake                         |   12 +
 cmake/utils/FindNCCL.cmake                         |   56 +
 cmake/utils/FindRCCL.cmake                         |   52 +
 docker/bash.sh                                     |   17 +-
 .../ubuntu_download_arm_compute_lib_binaries.sh    |    2 +-
 docker/install/ubuntu_install_python_package.sh    |    2 +-
 docs/conf.py                                       |   10 +-
 docs/contribute/community.rst                      |   11 +
 docs/how_to/deploy/android.rst                     |    2 +-
 .../how_to/deploy_models/deploy_model_on_adreno.py |    2 +-
 .../deploy_models/deploy_model_on_android.py       |    2 +-
 .../ci_logs/resnet-18-NHWC-B1-cuda.json            |   48 +-
 .../tune_with_autoscheduler/tune_network_arm.py    |    2 +-
 .../tune_with_autoscheduler/tune_network_mali.py   |    2 +-
 gallery/how_to/tune_with_autotvm/tune_relay_arm.py |    2 +-
 .../tune_with_autotvm/tune_relay_mobile_gpu.py     |    2 +-
 .../how_to/work_with_microtvm/micro_mlperftiny.py  |    2 +-
 include/tvm/arith/analyzer.h                       |    4 +-
 include/tvm/ir/global_info.h                       |   50 +-
 include/tvm/node/script_printer.h                  |    3 +
 .../tvm/relax/attrs/ccl.h                          |   30 +-
 include/tvm/relax/attrs/nn.h                       |   20 +-
 include/tvm/relax/attrs/op.h                       |   18 +
 include/tvm/relax/struct_info.h                    |   17 +-
 include/tvm/relax/transform.h                      |   32 +
 include/tvm/relax/utils.h                          |    5 +
 include/tvm/relay/dataflow_pattern.h               |    6 +
 include/tvm/relay/transform.h                      |   26 +-
 include/tvm/runtime/container/shape_tuple.h        |   24 +
 include/tvm/runtime/data_type.h                    |    1 +
 include/tvm/runtime/device_api.h                   |   50 +-
 include/tvm/runtime/disco/session.h                |  286 +++
 include/tvm/runtime/module.h                       |    5 +
 include/tvm/runtime/ndarray.h                      |    8 +-
 include/tvm/runtime/object.h                       |    2 +
 include/tvm/runtime/packed_func.h                  |  107 +-
 include/tvm/runtime/vm/bytecode.h                  |   19 +-
 include/tvm/runtime/vm/executable.h                |    5 +-
 include/tvm/script/printer/ir_docsifier.h          |    7 +
 include/tvm/target/codegen.h                       |   16 +
 include/tvm/target/target.h                        |   10 -
 include/tvm/target/target_kind.h                   |   27 +-
 include/tvm/tir/analysis.h                         |    4 +-
 include/tvm/tir/builtin.h                          |   63 +-
 include/tvm/tir/op.h                               |    1 +
 include/tvm/tir/schedule/schedule.h                |    2 +-
 include/tvm/tir/usmp/utils.h                       |    1 +
 include/tvm/topi/transform.h                       |   64 +-
 include/tvm/topi/x86/injective.h                   |    2 +-
 python/tvm/arith/__init__.py                       |    1 +
 python/tvm/arith/analyzer.py                       |   10 +-
 python/tvm/arith/int_set.py                        |    8 +
 python/tvm/auto_scheduler/measure.py               |    2 +-
 python/tvm/auto_scheduler/search_task.py           |    2 +-
 python/tvm/autotvm/measure/measure_methods.py      |    2 +-
 python/tvm/contrib/cc.py                           |    6 +-
 python/tvm/contrib/cudnn.py                        |   21 +-
 python/tvm/contrib/cutlass/attention_operation.py  |  105 +
 python/tvm/contrib/cutlass/build.py                |    6 +-
 python/tvm/contrib/cutlass/gemm_operation.py       |    2 +-
 python/tvm/contrib/cutlass/gen_conv2d.py           |    7 +-
 python/tvm/contrib/cutlass/gen_tensor_op.py        |  154 +-
 .../ir_builder/ir => contrib/msc}/__init__.py      |   11 +-
 .../ir_builder/ir => contrib/msc/core}/__init__.py |   11 +-
 .../__init__.py => contrib/msc/core/_ffi_api.py}   |   15 +-
 .../ir_builder => contrib/msc/core}/ir/__init__.py |   14 +-
 python/tvm/contrib/msc/core/ir/graph.py            |  657 +++++++
 python/tvm/contrib/msc/core/ir/translate.py        |  172 ++
 .../ir => contrib/msc/core/transform}/__init__.py  |   14 +-
 python/tvm/contrib/msc/core/transform/pattern.py   |  626 ++++++
 python/tvm/contrib/msc/core/transform/transform.py |   61 +
 .../ir => contrib/msc/core/utils}/__init__.py      |   14 +-
 python/tvm/contrib/msc/core/utils/expr.py          |  105 +
 python/tvm/contrib/msc/core/utils/info.py          |   98 +
 python/tvm/contrib/pipeline_executor_build.py      |    2 +-
 python/tvm/contrib/torch/optimize_torch.py         |   24 +-
 python/tvm/dlight/benchmark/bench.py               |    4 +-
 python/tvm/dlight/benchmark/extract.py             |    6 +
 python/tvm/dlight/gpu/gemv.py                      |  392 +++-
 python/tvm/dlight/gpu/matmul.py                    |  171 +-
 python/tvm/dlight/gpu/utils.py                     |    2 +
 python/tvm/driver/tvmc/model.py                    |    5 +-
 python/tvm/ir/__init__.py                          |    2 +-
 python/tvm/ir/expr.py                              |    6 +
 python/tvm/ir/global_info.py                       |   12 +
 python/tvm/ir/json_compact.py                      |   17 +
 python/tvm/meta_schedule/builder/local_builder.py  |    2 +-
 python/tvm/meta_schedule/runner/local_runner.py    |    7 +-
 .../meta_schedule/testing/custom_builder_runner.py |    2 +-
 python/tvm/relax/backend/contrib/cublas.py         |    7 +-
 python/tvm/relax/backend/contrib/cudnn.py          |  105 +
 python/tvm/relax/backend/patterns.py               |   36 +
 python/tvm/relax/block_builder.py                  |   98 +-
 python/tvm/relax/expr.py                           |   40 +-
 python/tvm/relax/frontend/nn/__init__.py           |    5 +-
 python/tvm/relax/frontend/nn/core.py               |  109 +-
 python/tvm/relax/frontend/nn/modules.py            |  516 ++++-
 python/tvm/relax/frontend/nn/op.py                 |  676 ++++++-
 python/tvm/relax/frontend/nn/spec.py               |  122 +-
 python/tvm/relax/frontend/nn/subroutine.py         |  179 ++
 python/tvm/relax/frontend/onnx/onnx_frontend.py    |  203 +-
 python/tvm/relax/frontend/torch/fx_translator.py   |  181 +-
 python/tvm/relax/op/__init__.py                    |    1 +
 python/tvm/relax/op/base.py                        |   41 +
 .../ir_builder/ir => relax/op/ccl}/__init__.py     |   13 +-
 .../ir/__init__.py => relax/op/ccl/_ffi_api.py}    |   14 +-
 python/tvm/relax/op/ccl/ccl.py                     |   60 +
 python/tvm/relax/op/image/image.py                 |    2 +-
 python/tvm/relax/op/mask.py                        |    3 +-
 python/tvm/relax/op/nn/nn.py                       |   31 +-
 python/tvm/relax/struct_info.py                    |   14 +-
 python/tvm/relax/testing/nn.py                     |   14 +-
 python/tvm/relax/testing/relay_translator.py       |   11 +-
 .../tvm/relax/transform/lazy_transform_params.py   |   16 +-
 .../tvm/relax/transform/legalize_ops/__init__.py   |    1 +
 python/tvm/relax/transform/legalize_ops/ccl.py     |   54 +
 python/tvm/relax/transform/legalize_ops/nn.py      |   16 +
 python/tvm/relax/transform/transform.py            |   65 +-
 python/tvm/relay/backend/contrib/ethosu/codegen.py |    2 +-
 python/tvm/relay/backend/executor_factory.py       |    4 +-
 python/tvm/relay/dataflow_pattern/__init__.py      |   13 +
 python/tvm/relay/frontend/onnx.py                  |    9 +-
 python/tvm/relay/frontend/pytorch.py               |   12 +-
 python/tvm/relay/op/contrib/cmsisnn.py             |   14 +-
 python/tvm/relay/op/contrib/dnnl.py                |    2 +-
 python/tvm/relay/op/strategy/arm_cpu.py            |   98 +-
 python/tvm/relay/quantize/_annotate.py             |    4 +-
 python/tvm/rpc/client.py                           |   17 +
 .../ir_builder/ir => runtime/disco}/__init__.py    |   12 +-
 .../ir/__init__.py => runtime/disco/_ffi_api.py}   |   14 +-
 python/tvm/runtime/disco/session.py                |  338 ++++
 python/tvm/runtime/module.py                       |    5 +-
 python/tvm/runtime/script_printer.py               |   11 +
 python/tvm/script/ir_builder/ir/__init__.py        |    2 +
 python/tvm/script/ir_builder/ir/ir.py              |   38 +-
 python/tvm/script/ir_builder/relax/ir.py           |   80 +-
 python/tvm/script/ir_builder/tir/ir.py             |   12 +
 python/tvm/script/parser/core/doc.py               |   21 +-
 python/tvm/script/parser/core/evaluator.py         |   21 +-
 python/tvm/script/parser/ir/__init__.py            |    2 +
 python/tvm/script/parser/relax/__init__.py         |    3 +-
 python/tvm/script/parser/relax/dist.py             |    8 +-
 python/tvm/script/parser/relax/entry.py            |   86 +-
 python/tvm/testing/runner.py                       |    2 +-
 python/tvm/testing/utils.py                        |   27 +-
 python/tvm/tir/__init__.py                         |   13 +-
 python/tvm/tir/op.py                               |  164 +-
 python/tvm/tir/schedule/schedule.py                |    2 +-
 python/tvm/tir/transform/transform.py              |   14 +
 python/tvm/topi/arm_cpu/conv2d_alter_op.py         |   23 +-
 python/tvm/topi/arm_cpu/conv2d_gemm.py             |   45 +-
 .../arm_cpu/mprofile/dsp/micro_kernel/avg_pool.py  |    8 +-
 .../topi/arm_cpu/mprofile/dsp/micro_kernel/gemm.py |   87 +-
 .../arm_cpu/mprofile/dsp/micro_kernel/max_pool.py  |   13 +-
 .../arm_cpu/mprofile/dsp/micro_kernel/tensordot.py |    7 +-
 python/tvm/topi/hexagon/compute_poolarea.py        |    2 +-
 python/tvm/topi/hexagon/slice_ops/max_pool2d.py    |    2 +-
 src/arith/canonical_simplify.cc                    |   58 +-
 src/arith/detect_linear_equation.cc                |    3 +-
 src/arith/iter_affine_map.cc                       |   35 +-
 src/arith/presburger_set.cc                        |  276 +++
 src/arith/presburger_set.h                         |  194 ++
 src/arith/rewrite_simplify.cc                      |    2 +
 src/contrib/msc/core/ir/graph.cc                   | 1075 +++++++++++
 src/contrib/msc/core/ir/graph.h                    |  751 ++++++++
 src/contrib/msc/core/ir/graph_builder.cc           |  695 +++++++
 src/contrib/msc/core/ir/graph_builder.h            |  325 ++++
 src/contrib/msc/core/printer/msc_base_printer.cc   |  171 ++
 src/contrib/msc/core/printer/msc_base_printer.h    |  232 +++
 src/contrib/msc/core/printer/print_utils.cc        |   75 +
 src/contrib/msc/core/printer/print_utils.h         |   95 +
 src/contrib/msc/core/printer/prototxt_printer.cc   |  112 ++
 src/contrib/msc/core/printer/prototxt_printer.h    |   79 +
 src/contrib/msc/core/printer/python_printer.cc     |  176 ++
 src/contrib/msc/core/printer/python_printer.h      |   87 +
 src/contrib/msc/core/transform/layout_utils.cc     |  194 ++
 src/contrib/msc/core/transform/layout_utils.h      |  110 ++
 src/contrib/msc/core/transform/set_expr_layout.cc  | 1214 ++++++++++++
 src/contrib/msc/core/transform/set_expr_name.cc    |  348 ++++
 src/contrib/msc/core/utils.cc                      |  314 +++
 src/contrib/msc/core/utils.h                       |  270 +++
 src/contrib/torch/base64.h                         |   75 -
 .../tvm_module_wrapper/RuntimeModuleWrapperTVM.cc  |  143 +-
 src/driver/driver_api.cc                           |   17 +
 src/ir/global_info.cc                              |   13 +
 src/node/container_printing.cc                     |   10 +-
 src/node/script_printer.cc                         |    3 +
 src/node/structural_hash.cc                        |   17 +
 src/relax/analysis/struct_info_analysis.cc         |  105 +-
 src/relax/backend/contrib/cudnn/codegen.cc         |  110 ++
 src/relax/backend/vm/vm_builtin_lower.cc           |   21 +
 src/relax/ir/binding_rewrite.cc                    |   22 +-
 src/relax/ir/dataflow_matcher.cc                   |   52 +-
 src/relax/ir/expr.cc                               |    2 +-
 src/relax/ir/expr_functor.cc                       |   20 +-
 src/relax/ir/struct_info.cc                        |   12 +-
 src/relax/ir/struct_info_functor.cc                |    4 +-
 src/relax/op/ccl/ccl.cc                            |   74 +
 src/{ir/global_info.cc => relax/op/ccl/ccl.h}      |   28 +-
 src/relax/op/image/resize.cc                       |    6 +
 src/relax/op/nn/attention.cc                       |    3 +
 src/relax/op/nn/convolution.cc                     |   28 +
 src/relax/op/nn/nn.cc                              |   84 +-
 src/relax/op/nn/pooling.cc                         |   12 +
 src/relax/op/op.cc                                 |   58 +
 src/relax/op/op_common.h                           |   26 +
 src/relax/op/tensor/binary.cc                      |   16 +
 src/relax/op/tensor/create.cc                      |    3 +
 src/relax/op/tensor/index.cc                       |   30 +
 src/relax/op/tensor/linear_algebra.cc              |   46 +
 src/relax/op/tensor/manipulate.cc                  |  244 ++-
 src/relax/op/tensor/search.cc                      |   43 +
 src/relax/op/tensor/set.cc                         |   38 +-
 src/relax/op/tensor/statistical.cc                 |   27 +
 src/relax/op/tensor/ternary.cc                     |   25 +-
 src/relax/transform/alter_op_impl.cc               |    4 +
 src/relax/transform/bind_symbolic_vars.cc          |  177 ++
 src/relax/transform/canonicalize_bindings.cc       |    2 +-
 src/relax/transform/convert_layout.cc              |    3 +-
 src/relax/transform/dead_code_elimination.cc       |   77 +-
 src/relax/transform/fold_dataflow_block_output.cc  |    2 +-
 src/relax/transform/fuse_ops.cc                    |   20 +-
 src/relax/transform/merge_composite_functions.cc   |   97 +-
 src/relax/transform/realize_vdevice.cc             |  296 +++
 src/relax/transform/rewrite_cuda_graph.cc          |    7 +-
 src/relax/transform/rewrite_dataflow_reshape.cc    |    9 +-
 src/relax/transform/to_mixed_precision.cc          |    6 +-
 src/relax/transform/to_non_dataflow.cc             |    1 +
 src/relax/transform/update_vdevice.cc              |  114 ++
 src/relax/transform/utils.h                        |    7 +
 src/relax/utils.cc                                 |   58 +-
 src/relay/backend/contrib/cmsisnn/compute_luts.cc  |   76 +
 src/relay/backend/contrib/cmsisnn/compute_luts.h   |   55 +
 src/relay/backend/contrib/cmsisnn/relay_to_tir.cc  |  151 +-
 src/relay/backend/contrib/cmsisnn/target.cc        |    3 +-
 .../backend/contrib/cmsisnn/tir_to_runtime.cc      |   84 +-
 src/relay/backend/contrib/codegen_c/target.cc      |    2 +-
 src/relay/backend/contrib/cutlass/target.cc        |    2 +-
 src/relay/backend/contrib/ethosu/codegen.cc        |    4 +-
 .../backend/contrib/example_target_hooks/target.cc |    5 +-
 .../contrib/example_target_hooks/tir_to_runtime.cc |   26 +-
 src/relay/backend/contrib/tensorrt/target.cc       |    2 +-
 src/relay/backend/contrib/uma/targets.cc           |    6 +-
 src/relay/backend/contrib/uma/tir_to_runtime.cc    |   34 +-
 src/relay/backend/vm/compiler.cc                   |   41 +-
 src/relay/backend/vm/manifest_lifetimes.cc         |    4 +-
 src/relay/ir/dataflow_matcher.cc                   |   21 +-
 src/relay/ir/dataflow_pattern.cc                   |   10 +
 src/relay/ir/dataflow_pattern_functor.cc           |    6 +-
 src/relay/ir/indexed_graph.cc                      |    7 +-
 src/relay/op/memory/memory.cc                      |   20 +-
 src/relay/op/memory/memory.h                       |    5 +-
 src/relay/op/nn/convolution.cc                     |    4 +-
 src/relay/qnn/op/convolution.cc                    |   17 +-
 src/relay/transforms/annotate_texture_storage.cc   |    9 +
 src/relay/transforms/device_domains.cc             |    7 +-
 src/relay/transforms/memory_alloc.cc               |    4 +-
 src/relay/transforms/to_mixed_precision.cc         |    9 +-
 src/runtime/c_runtime_api.cc                       |    4 +-
 src/runtime/contrib/cublas/cublas.cc               |   17 +-
 src/runtime/contrib/cudnn/conv_backward.cc         |   40 +-
 src/runtime/contrib/cudnn/conv_forward.cc          |   36 +-
 src/runtime/contrib/cudnn/cudnn_json_runtime.cc    |  197 ++
 src/runtime/contrib/cudnn/cudnn_utils.h            |   15 +
 src/runtime/contrib/json/json_node.h               |    2 +-
 src/runtime/contrib/papi/papi.cc                   |    7 +-
 src/runtime/disco/bcast_session.cc                 |  115 ++
 src/runtime/disco/bcast_session.h                  |   89 +
 src/runtime/disco/builtin.cc                       |  115 ++
 src/runtime/disco/builtin.h                        |   80 +
 src/runtime/disco/loader.cc                        |  191 ++
 src/runtime/disco/nccl/nccl.cc                     |  190 ++
 src/runtime/disco/nccl/utils.h                     |   93 +
 src/runtime/disco/session.cc                       |   68 +
 src/runtime/disco/threaded_session.cc              |  250 +++
 src/runtime/disco/utils.h                          |   79 +
 src/runtime/disco/worker.cc                        |  155 ++
 src/runtime/disco/worker.h                         |  113 ++
 src/runtime/hexagon/hexagon_device_api.cc          |    2 +-
 src/runtime/hexagon/ops/conv2d_fp16_hvx.cc         |    2 +-
 src/runtime/library_module.cc                      |    9 -
 src/runtime/library_module.h                       |   10 +
 src/runtime/metal/metal_device_api.mm              |    2 +-
 src/runtime/minrpc/minrpc_server.h                 |   10 +
 src/runtime/minrpc/minrpc_server_logging.h         |    4 +
 src/runtime/minrpc/rpc_reference.h                 |   13 +
 src/runtime/module.cc                              |    4 +
 src/runtime/opencl/opencl_device_api.cc            |    2 +-
 .../opencl/opencl_wrapper/opencl_wrapper.cc        |   28 +-
 src/runtime/profiling.cc                           |    6 +-
 src/runtime/relax_vm/builtin.cc                    |    6 +
 src/runtime/relax_vm/executable.cc                 |   10 +
 src/runtime/relax_vm/memory_manager.cc             |   18 +-
 src/runtime/relax_vm/ndarray_cache_support.cc      |  143 +-
 src/runtime/relax_vm/ndarray_cache_support.h       |   93 +
 src/runtime/relax_vm/vm.cc                         |   40 +-
 src/runtime/rpc/rpc_endpoint.cc                    |   10 +
 src/runtime/rpc/rpc_module.cc                      |    1 +
 src/runtime/vm/bytecode.cc                         |   56 +-
 src/runtime/vm/executable.cc                       |   34 +-
 src/runtime/vm/memory_manager.cc                   |   15 +-
 src/runtime/vm/profiler/vm.cc                      |   16 +-
 src/runtime/vm/vm.cc                               |   47 +-
 src/script/ir_builder/ir/ir.cc                     |   30 +
 src/script/ir_builder/relax/ir.cc                  |    9 +
 src/script/ir_builder/tir/ir.cc                    |    4 +-
 src/script/printer/ir/ir.cc                        |   10 +
 src/script/printer/ir_docsifier.cc                 |   16 +-
 src/script/printer/relax/call.cc                   |   51 +
 src/script/printer/relax/expr.cc                   |   36 +-
 src/script/printer/relax/struct_info.cc            |    7 +
 src/script/printer/relax/utils.h                   |   16 +
 src/script/printer/utils.h                         |    7 +-
 src/support/libinfo.cc                             |   17 +
 src/support/str_escape.h                           |   56 +-
 src/target/codegen.cc                              |   99 +-
 src/target/opt/build_cuda_on.cc                    |   18 +-
 src/target/source/codegen_aocl.cc                  |   19 +-
 src/target/source/codegen_c.cc                     |  153 +-
 src/target/source/codegen_c.h                      |   59 +-
 src/target/source/codegen_c_host.cc                |   93 +-
 src/target/source/codegen_c_host.h                 |    3 +-
 src/target/source/codegen_cuda.cc                  |   56 +-
 src/target/source/codegen_cuda.h                   |    4 +-
 src/target/source/codegen_metal.cc                 |   77 +-
 src/target/source/codegen_metal.h                  |    3 +-
 src/target/source/codegen_opencl.cc                |   24 +-
 src/target/source/codegen_vhls.cc                  |   34 +-
 src/target/source/ptx.cc                           |  145 +-
 src/target/source/ptx.h                            |   62 +
 src/target/source/source_module.cc                 |    6 +-
 src/target/spirv/codegen_spirv.cc                  |   11 +
 src/target/spirv/codegen_spirv.h                   |    1 +
 src/target/target.cc                               |   15 +-
 src/tir/analysis/estimate_flops.cc                 |   20 +-
 src/tir/op/builtin.cc                              |   16 +
 src/tir/op/op.cc                                   |   26 +
 src/tir/schedule/analysis.h                        |    2 +-
 src/tir/schedule/primitive.h                       |    2 +-
 .../schedule/primitive/layout_transformation.cc    |    2 +-
 src/tir/transforms/inject_virtual_thread.cc        |    2 +-
 src/tir/transforms/lower_tvm_builtin.cc            |    4 +-
 src/tir/transforms/split_host_device.cc            |    9 +-
 src/tir/transforms/storage_rewrite.cc              |  112 +-
 src/topi/transform.cc                              |   10 +-
 .../cpp/arith_integer_set_test.cc                  |   33 +-
 tests/cpp/target_test.cc                           |    3 +-
 tests/lint/pylint.sh                               |    3 +
 tests/python/contrib/test_clml/infrastructure.py   |    2 +-
 tests/python/contrib/test_cmsisnn/test_softmax.py  |   43 +
 tests/python/contrib/test_ethosn/test_codegen.py   |    4 +-
 tests/python/contrib/test_msc/test_graph_build.py  | 2037 ++++++++++++++++++++
 .../test_msc/test_transform_set_expr_layout.py     |   74 +
 .../test_msc/test_transform_set_expr_name.py       |  108 ++
 tests/python/disco/test_loader.py                  |  178 ++
 tests/python/disco/test_nccl.py                    |  362 ++++
 tests/python/disco/test_session.py                 |  254 +++
 tests/python/dlight/test_gpu_gemv.py               |  680 +++++--
 tests/python/dlight/test_gpu_matmul.py             |  180 +-
 tests/python/dlight/test_gpu_matmul_tensorize.py   |   36 +-
 tests/python/frontend/onnx/test_forward.py         |   95 +
 tests/python/frontend/pytorch/test_forward.py      |   10 +
 tests/python/frontend/tflite/test_forward.py       |   35 +-
 tests/python/relax/test_analysis.py                |   47 +
 .../relax/test_analysis_struct_info_analysis.py    |   60 +-
 tests/python/relax/test_bind_symbolic_vars.py      |  205 ++
 tests/python/relax/test_codegen_cublas.py          |    4 +-
 tests/python/relax/test_codegen_cudnn.py           |  252 +++
 tests/python/relax/test_codegen_cutlass.py         |  146 +-
 tests/python/relax/test_dataflow_pattern.py        |  157 +-
 tests/python/relax/test_expr_functor.py            |   31 +
 tests/python/relax/test_frontend_from_fx.py        |  489 ++++-
 .../python/relax/test_frontend_nn_extern_module.py |  117 ++
 tests/python/relax/test_frontend_nn_modules.py     |  403 +++-
 tests/python/relax/test_frontend_nn_op.py          |  310 ++-
 tests/python/relax/test_frontend_nn_subroutines.py |  101 +
 tests/python/relax/test_frontend_nn_tensor.py      |   15 +-
 tests/python/relax/test_frontend_onnx.py           |   73 +-
 .../python/relax/test_json_compact.py              |   50 +-
 tests/python/relax/test_op_binary.py               |   32 +-
 tests/python/relax/test_op_ccl.py                  |  164 ++
 tests/python/relax/test_op_create.py               |   17 +-
 tests/python/relax/test_op_image.py                |   11 +-
 tests/python/relax/test_op_index.py                |   57 +-
 tests/python/relax/test_op_linear_algebra.py       |   16 +-
 tests/python/relax/test_op_manipulate.py           |  114 +-
 tests/python/relax/test_op_nn.py                   |   47 +-
 tests/python/relax/test_op_nn_convolution.py       |   30 +-
 tests/python/relax/test_op_nn_pooling.py           |   19 +-
 tests/python/relax/test_op_search.py               |   14 +-
 tests/python/relax/test_op_set.py                  |   11 +-
 tests/python/relax/test_op_statistical.py          |   12 +-
 tests/python/relax/test_op_ternary.py              |    9 +-
 tests/python/relax/test_op_unary.py                |    8 +-
 tests/python/relax/test_relax_operators.py         |   40 +-
 tests/python/relax/test_relay_translator.py        |   26 +
 tests/python/relax/test_testing_nn.py              |   62 +-
 .../relax/test_transform_bind_symbolic_vars.py     |  270 +++
 .../relax/test_transform_canonicalize_bindings.py  |   34 +
 .../test_transform_combine_parallel_matmul.py      |  155 +-
 .../python/relax/test_transform_convert_layout.py  |    6 +-
 .../relax/test_transform_dead_code_elimination.py  |   71 +-
 tests/python/relax/test_transform_fuse_ops.py      |   18 +
 .../relax/test_transform_fuse_ops_by_pattern.py    |  106 +-
 .../relax/test_transform_lazy_transform_params.py  |   79 +
 .../relax/test_transform_legalize_ops_ccl.py       |   76 +
 ..._transform_legalize_ops_index_linear_algebra.py |    4 +-
 .../test_transform_legalize_ops_manipulate.py      |    4 +-
 .../python/relax/test_transform_legalize_ops_nn.py |   50 +-
 ...st_transform_legalize_ops_search_statistical.py |    2 +-
 .../test_transform_merge_composite_functions.py    |  773 ++++----
 .../python/relax/test_transform_realize_vdevice.py |  330 ++++
 .../relax/test_transform_rewrite_cuda_graph.py     |   24 +-
 .../test_transform_rewrite_dataflow_reshape.py     |   54 +
 .../relax/test_transform_to_mixed_precision.py     |   12 +-
 .../python/relax/test_transform_update_vdevice.py  |  128 ++
 tests/python/relax/test_tvmscript_ir_builder.py    |   82 +
 tests/python/relax/test_tvmscript_parser.py        |  174 +-
 .../relax/test_tvmscript_parser_op_manipulate.py   |   38 +-
 tests/python/relax/test_vm_build.py                |   35 +
 tests/python/relax/test_vm_codegen_only.py         |   27 +
 tests/python/relax/test_vm_execbuilder.py          |   35 +-
 tests/python/relay/aot/test_c_device_api.py        |   42 +-
 .../relay/aot/test_crt_forward_declarations.py     |   10 +-
 .../opencl_texture/test_conv2d_nchw_texture.py     |  361 +++-
 .../opencl_texture/test_conv2d_nhwc_texture.py     |  245 ++-
 .../test_depthwise_conv2d_nchw_texture.py          |   52 +-
 .../test_depthwise_conv2d_nhwc_texture.py          |   50 +-
 .../relay/opencl_texture/test_injection_texture.py |   33 +-
 tests/python/relay/opencl_texture/test_network.py  |   24 +-
 .../relay/opencl_texture/test_pool_texture.py      |   63 +-
 .../relay/opencl_texture/test_reduction_texture.py |   87 +-
 .../relay/opencl_texture/utils/adreno_utils.py     |   86 +-
 .../relay/strategy/test_select_implementation.py   |   89 +-
 tests/python/relay/test_dataflow_pattern.py        |   87 +
 tests/python/relay/test_pass_alter_op_layout.py    |    2 +-
 tests/python/relay/test_pass_auto_quantize.py      |   75 +
 .../relay/test_pass_dead_code_elimination.py       |   18 +-
 tests/python/relay/test_pass_plan_devices.py       |   11 +-
 tests/python/relay/test_to_mixed_precision.py      |   35 +-
 tests/python/topi/python/test_topi_conv2d_int8.py  |   11 +-
 .../topi/python/test_topi_conv2d_tensordot_opts.py |   28 +-
 tests/python/topi/python/test_topi_transform.py    |   46 +-
 .../unittest/test_arith_canonical_simplify.py      |   40 +
 .../unittest/test_arith_detect_linear_equation.py  |    4 +
 .../python/unittest/test_arith_iter_affine_map.py  |   27 +
 .../python/unittest/test_arith_rewrite_simplify.py |    2 +
 ...e_postproc_rewrite_parallel_vectorize_unroll.py |    6 +-
 .../test_meta_schedule_space_cuda_winograd.py      |    4 +-
 .../unittest/test_roundtrip_runtime_module.py      |  121 ++
 tests/python/unittest/test_runtime_module_load.py  |    2 +-
 tests/python/unittest/test_runtime_rpc.py          |    2 +-
 tests/python/unittest/test_target_codegen_blob.py  |    4 +-
 .../python/unittest/test_target_codegen_c_host.py  |   51 +-
 .../test_tir_analysis_estimate_tir_flops.py        |   28 +-
 tests/python/unittest/test_tir_op_types.py         |   35 +
 tests/python/unittest/test_tir_ptx_cp_async.py     |  112 ++
 .../test_tir_transform_inject_ptx_async_copy.py    |  109 +-
 .../test_tir_transform_lower_warp_memory.py        |   10 +-
 ...form_merge_dynamic_shared_memory_allocations.py |    2 +-
 ...est_tir_transform_pointer_value_type_rewrite.py |   73 +
 .../unittest/test_tir_transform_thread_sync.py     |    2 +-
 .../python/unittest/test_tvmscript_error_report.py |    8 -
 .../unittest/test_tvmscript_ir_builder_tir.py      |   20 +
 tests/python/unittest/test_tvmscript_parser_tir.py |   33 +
 .../python/unittest/test_tvmscript_printer_tir.py  |   41 +
 tests/scripts/task_config_build_cpu.sh             |    1 +
 tests/scripts/task_config_build_gpu.sh             |    1 +
 tests/scripts/task_python_integration.sh           |    2 +-
 tests/scripts/task_python_microtvm.sh              |    1 +
 tests/scripts/unity/task_python_relax.sh           |    3 +
 vta/python/vta/transform.py                        |    2 +-
 web/src/compact.ts                                 |    4 +-
 web/src/memory.ts                                  |    2 +-
 web/src/rpc_server.ts                              |   36 +-
 web/src/runtime.ts                                 |  104 +-
 web/src/support.ts                                 |    4 +-
 web/src/webgpu.ts                                  |   11 +-
 web/tests/python/prepare_test_libs.py              |    2 +-
 web/tests/python/webgpu_rpc_test.py                |    2 +-
 web/tests/python/websock_rpc_test.py               |    2 +-
 web/tsconfig.json                                  |    1 -
 505 files changed, 30748 insertions(+), 3310 deletions(-)
 copy python/tvm/script/parser/ir/__init__.py => 
cmake/modules/contrib/MSC.cmake (70%)
 create mode 100644 cmake/utils/FindNCCL.cmake
 create mode 100644 cmake/utils/FindRCCL.cmake
 copy src/ir/global_info.cc => include/tvm/relax/attrs/ccl.h (57%)
 create mode 100644 include/tvm/runtime/disco/session.h
 copy python/tvm/{script/ir_builder/ir => contrib/msc}/__init__.py (78%)
 copy python/tvm/{script/ir_builder/ir => contrib/msc/core}/__init__.py (78%)
 copy python/tvm/{script/ir_builder/ir/__init__.py => 
contrib/msc/core/_ffi_api.py} (78%)
 copy python/tvm/{script/ir_builder => contrib/msc/core}/ir/__init__.py (78%)
 create mode 100644 python/tvm/contrib/msc/core/ir/graph.py
 create mode 100644 python/tvm/contrib/msc/core/ir/translate.py
 copy python/tvm/{script/ir_builder/ir => 
contrib/msc/core/transform}/__init__.py (78%)
 create mode 100644 python/tvm/contrib/msc/core/transform/pattern.py
 create mode 100644 python/tvm/contrib/msc/core/transform/transform.py
 copy python/tvm/{script/ir_builder/ir => contrib/msc/core/utils}/__init__.py 
(78%)
 create mode 100644 python/tvm/contrib/msc/core/utils/expr.py
 create mode 100644 python/tvm/contrib/msc/core/utils/info.py
 create mode 100644 python/tvm/relax/backend/contrib/cudnn.py
 create mode 100644 python/tvm/relax/frontend/nn/subroutine.py
 copy python/tvm/{script/ir_builder/ir => relax/op/ccl}/__init__.py (78%)
 copy python/tvm/{script/ir_builder/ir/__init__.py => relax/op/ccl/_ffi_api.py} 
(78%)
 create mode 100644 python/tvm/relax/op/ccl/ccl.py
 create mode 100644 python/tvm/relax/transform/legalize_ops/ccl.py
 copy python/tvm/{script/ir_builder/ir => runtime/disco}/__init__.py (78%)
 copy python/tvm/{script/ir_builder/ir/__init__.py => 
runtime/disco/_ffi_api.py} (78%)
 create mode 100644 python/tvm/runtime/disco/session.py
 create mode 100644 src/arith/presburger_set.cc
 create mode 100644 src/arith/presburger_set.h
 create mode 100644 src/contrib/msc/core/ir/graph.cc
 create mode 100644 src/contrib/msc/core/ir/graph.h
 create mode 100644 src/contrib/msc/core/ir/graph_builder.cc
 create mode 100644 src/contrib/msc/core/ir/graph_builder.h
 create mode 100644 src/contrib/msc/core/printer/msc_base_printer.cc
 create mode 100644 src/contrib/msc/core/printer/msc_base_printer.h
 create mode 100644 src/contrib/msc/core/printer/print_utils.cc
 create mode 100644 src/contrib/msc/core/printer/print_utils.h
 create mode 100644 src/contrib/msc/core/printer/prototxt_printer.cc
 create mode 100644 src/contrib/msc/core/printer/prototxt_printer.h
 create mode 100644 src/contrib/msc/core/printer/python_printer.cc
 create mode 100644 src/contrib/msc/core/printer/python_printer.h
 create mode 100644 src/contrib/msc/core/transform/layout_utils.cc
 create mode 100644 src/contrib/msc/core/transform/layout_utils.h
 create mode 100644 src/contrib/msc/core/transform/set_expr_layout.cc
 create mode 100644 src/contrib/msc/core/transform/set_expr_name.cc
 create mode 100644 src/contrib/msc/core/utils.cc
 create mode 100644 src/contrib/msc/core/utils.h
 delete mode 100644 src/contrib/torch/base64.h
 create mode 100644 src/relax/backend/contrib/cudnn/codegen.cc
 create mode 100644 src/relax/op/ccl/ccl.cc
 copy src/{ir/global_info.cc => relax/op/ccl/ccl.h} (65%)
 create mode 100644 src/relax/transform/bind_symbolic_vars.cc
 create mode 100644 src/relax/transform/realize_vdevice.cc
 create mode 100644 src/relax/transform/update_vdevice.cc
 create mode 100644 src/relay/backend/contrib/cmsisnn/compute_luts.cc
 create mode 100644 src/relay/backend/contrib/cmsisnn/compute_luts.h
 create mode 100644 src/runtime/contrib/cudnn/cudnn_json_runtime.cc
 create mode 100644 src/runtime/disco/bcast_session.cc
 create mode 100644 src/runtime/disco/bcast_session.h
 create mode 100644 src/runtime/disco/builtin.cc
 create mode 100644 src/runtime/disco/builtin.h
 create mode 100644 src/runtime/disco/loader.cc
 create mode 100644 src/runtime/disco/nccl/nccl.cc
 create mode 100644 src/runtime/disco/nccl/utils.h
 create mode 100644 src/runtime/disco/session.cc
 create mode 100644 src/runtime/disco/threaded_session.cc
 create mode 100644 src/runtime/disco/utils.h
 create mode 100644 src/runtime/disco/worker.cc
 create mode 100644 src/runtime/disco/worker.h
 create mode 100644 src/runtime/relax_vm/ndarray_cache_support.h
 copy src/ir/global_info.cc => tests/cpp/arith_integer_set_test.cc (53%)
 create mode 100644 tests/python/contrib/test_msc/test_graph_build.py
 create mode 100644 
tests/python/contrib/test_msc/test_transform_set_expr_layout.py
 create mode 100644 
tests/python/contrib/test_msc/test_transform_set_expr_name.py
 create mode 100644 tests/python/disco/test_loader.py
 create mode 100644 tests/python/disco/test_nccl.py
 create mode 100644 tests/python/disco/test_session.py
 create mode 100644 tests/python/relax/test_bind_symbolic_vars.py
 create mode 100644 tests/python/relax/test_codegen_cudnn.py
 create mode 100644 tests/python/relax/test_frontend_nn_extern_module.py
 create mode 100644 tests/python/relax/test_frontend_nn_subroutines.py
 copy python/tvm/ir/global_info.py => tests/python/relax/test_json_compact.py 
(53%)
 create mode 100644 tests/python/relax/test_op_ccl.py
 create mode 100644 tests/python/relax/test_transform_bind_symbolic_vars.py
 create mode 100644 tests/python/relax/test_transform_legalize_ops_ccl.py
 create mode 100644 tests/python/relax/test_transform_realize_vdevice.py
 create mode 100644 tests/python/relax/test_transform_update_vdevice.py
 create mode 100644 tests/python/unittest/test_roundtrip_runtime_module.py
 create mode 100644 
tests/python/unittest/test_tir_transform_pointer_value_type_rewrite.py

Reply via email to