This is an automated email from the ASF dual-hosted git repository.

mbrookhart pushed a change to branch ci-docker-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git.


    from 9fc03b9  Change jenkinsfile
     add c8f54f9  [Bugfix, CuDNN] fix segfault when cudnnDestroy called with 
destroyed cuda context (#8267)
     add ae58f2c  [Topi][Unittests] Parametrized tests in `test_topi_dense.py`, 
split out gpu-independent implementations (#8336)
     add aa6fd43  [Build] Add CUDA_VERSION and GIT_COMMIT_TIME (#8372)
     add 4284e32  Fix compute library installation on AArch64 (#8371)
     add 9ed593e  [Unittests] Added a meta-test for tvm.testing.fixture 
behavior in case of a broken fixture. (#8343)
     add b1a946b  [Vulkan][Unittests] Add parametrization to vulkan unit tests. 
(#8348)
     add faadb7d  [FIX] Detect like cores by looking at scaling_max_freq 
instead of (#8370)
     add 6d1ced0  [Matmul] Add matmul op (#8234)
     add 8d4df91  Fix issue with importing models using Tensorflow Lite 2.4.x 
schema (#8375)
     add 073becd  [MyPy] Minimal type checking on TIR schedule (#8367)
     add 1d64fa5  Update the tvmc tutorial with additional requirements (#8334)
     add 39e1ffe  Support QLinearAdd from onnx runtime com.microsoft contrib 
ops. (#8305)
     add c989e4a  [Metal] Add pass for splitting kernel with huge number of 
args (#8313)
     add 578f617  [Tuning] Allow multiprocessing spawn to work (on macOS llvm 
at least) (#8363)
     add 7504a9c  fix ci-arm build process (#8377)
     add a66186b  [FIX] Fix depthwise conv2d on non-cuda GPU platforms (#8379)
     add 2e47947  [cuDNN] Add support for log_softmax (#8369)
     add 9112b6e  Allow tvmc to compile models with AOT executor in MLF (#8331)
     add 29e958d  [TIR][TVMScript] specialize (#8354)
     add 6f600f1  [Refactor] Remove dead code from depthwise_conv2d for Intel 
graphics (#8381)
     add ab01abc  [BugFix][Relay] Fix type relation for batch_matmul (#8376)
     add 22204be  fix keras install (#8391)
     add 7b898d0  Fix np.int and np.float usage in the tree. (#8389)
     add 354d996  Add missing annotation for requires_gpu in test_topi_dense.py 
Requires GPU (#8387)
     add 970aeff  Add "operator" style to Model Library Format (#8072)
     add 2e3d617  macOS is now supported (#8396)
     add 7e3f068  [microTVM] Add Nucleo stm32l4r5zi board to zephyr (#8386)
     add e19e979  [Torch] Remove unused conversion (#8397)
     add e32d47e  [Arith] Inverse affine map (#8384)
     add a00d211  Actually add Compute Library tests to the Jenkins File (#8394)
     add d17f753  Support aten::flip (#8398)
     add d3fc562  [Relay][TOPI] Resize 1D (#8346)
     add 6a3d950  [Docs] Fix for broken link in apps for wasm-standalone dir 
(#8045)
     add ec47129  add aten::masked_fill_ in pytorch frontend (#8403)
     add 6bcad2e  fix storage rewrite index remap (#8338)
     add bbfc52c  Cleanup more uses of np.bool and np.int. (#8399)
     add bd5cd9f  [Fix] Update stale relay.Module API in docs/comments (#8411)
     add 2628179  [ONNX] Wrap 'If' if it has multiple outputs (#8385)
     add a4775c2  [DOCS] Add docs for Pass Instrument (#8220)
     add 8fb4cdf  Revert "Actually add Compute Library tests to the Jenkins 
File (#8394)" (#8400)
     add 9c66587  Refactor the compile engine into a cleaner interface. (#7518)
     add 0b39af7  [Relay] Fix index order in conv2d computation for Arm CPU. 
(#8361)
     add e3e03df  [microTVM] Add fixture to zephyr test (#8393)
     add e7c5349  [Relay] Add support of conv2d with NHWC for Mali (#8422)
     add ee65ab7  [PyLint] Minor updates to pass pylint locally. (#8424)
     add 53cb8aa  [Frontend] Check LLVM enabled/installed (#8414)
     add 4b67e9d  [Bug] Fix x86 dense schedule extern ops (#8420)
     add e934b7e  [Doc] Fix Relay pattern rewrite (#8425)
     add 0fa4396  [CUDA] dense_tensorcore/batch_matmul_tensorcore support 
int8/int4 (#8402)
     add 683c5eb  [Arith] Simplify MatchFusePattern in InverseAffineMap (#8427)
     add e51f5bb  [TOPI] Bugfix for topi.prod (#8416)
     add f692fc7  Improve XGBTuner document (#8428)
     add 513fcf4  [TVMSCRIPT] TVMScript Parser support BufferSlice indices 
(#8408)
     add 6141cac  Replace RuntimeError in _lookup_task with deferred error. 
(#8421)
     add bdfbc86  fix flaky TF test (#8431)
     add 59b204d  [microTVM][RVM] Fix clock skew on virtualbox (#8395)
     add 972d7b5  [Relay] Add support of conv2d with NHWC for Bifrost (#8430)
     add 3a9a388  fix wrong log of tir pass VerifyMemory (#8445)
     add c3558a1  [Relay to onnx conversion fixes][Pool, Pad] (#8435)
     add 3424005  [Relay to onnx conversion][New ops] (#8436)
     add 1d7a9e9  [ROCM] Fix undefined symbols by adding library (#8446)
     add 15bdf28  Fix address and port reported by android_rpc to tracker 
(#8405)
     add c81d533  [Bugfix] Fix broadcast type func with incomplete type (#8438)
     add cd5a20e  [COMMUNITY] @junrushao1994 -> PMC (#8450)
     add 62adc77  [MyPy] Extend type checking and annotation for TIR (#8429)
     add d043cb9  [BugFix][TOPI] Fix the integer overflow problem of the 
scatter_nd op. (#8415)
     add 807373c  Add qnn batch_matmul operator (#8401)
     add 80f48c7  [microTVM] Fix Stack Size Issue for Zephyr AOT Demo on 
Physical Hardware (#8453)
     add 957cc12  [Relay] Modify create_executor to pass params (#8418)
     add d67514b  [PROFILING] Use PAPI to collect hardware performance counters 
on CPU and CUDA (#7983)
     add 136f218  [Relay][ONNX] Batch_matmul to dense optimization (#8440)
     add f62917e  [TOPI] Add support for arbitrary dtypes to CSRMV and CSRMM 
(#8437)
     add 1a26733  [Refactor] Enforce attaching storage scope to PointerType 
(#8366)
     add 73b38e8  [Fix] Explicitly retain `__hash__` of `StringImm` (#8449)
     add 5c1a1cf  [CUDA] Improve injective schedule to enable half2 (#8457)
     add c16d61b  [Fix] Remove unused variable in GraphExecutorCodegen (#8465)
     add a425d265 [Docs] Corrected typo in googletest build instructions. 
(#8459)
     add f15be8b  [RPC] Fix cpp_rpc connection to rpc_tracker (#8388)
     add e1b3ff4  [Relay][Frontend][ONNX] Add ConvInteger support. (#8456)
     add bbba5da  [COMMUNITY] comaniac added as new PMC member (#8470)
     add 1a9bcc5  [UnitTests] Minor fixes to unit tests for cudnn/vulkan 
targets (#8462)
     add 29f789f  [Codegen] Remove compile_enginer header (#8471)
     add 11c5b6d  [Relay][Onnx][Frontend] Add RandomUniform converter and tests 
to onnx frontend. (#8426)
     add 8a8c9b2  [AMP] Add default op attribute registration to __init__.py 
(#8460)
     add bd88ee2  Fix auto-scheduling after 9c6658721 (#8478)
     add ce15ca6  [Relay][Frontend][ONNX] Allow importing models with malformed 
Loop nodes. (#8475)
     add 63c6df8  FoldScaleAxis became non-recursive (#8325)
     add ea8b69b  DeviceType enums match dlpack (#8407)
     add 5bb01ef  fix typo (#8484)
     add c263f22  [TVMC][FIX] Compiler supports input with a slash (#8481)
     add 2b57907  fix minor misspelling (#8476)
     add cba9cf3  [VM] Fix the shape function of conv nhwc (#8480)
     add c8b9900  [BYOC] add multi functions support in partition pass (#8464)
     add 7388715  Fix _get_yolo_detections (#8477)
     add ff425c1  apps: microtvm: Disable `CONFIG_FPU ` for Zephyr runtime 
(#8055)
     add 44f1baf  [TVMSCRIPT] Support tir.abs node in tvm script (#8488)
     add c95d16e  [Frontend][Tensorflow2] Stridedslice and concat_v2 fix (#8483)
     add 5ecd6cd  [VM] Allow serialization of function attrs which are strings 
(#8485)
     add 7456cfc  [AutoTVM] Re-enable `ref_input` (#8113)
     add 0545962  [Bugfix] [tir] do not simplify 'Any() - Any()' to 0 (#8266)
     add 2c61afa  Switch from CompileEngine to TECompiler in Interpreter (#8486)
     add d3db5d6  Fix dynamic batching when use_implicit_batch=False (#8461)
     add 1534983  [ARITH] fix zero iter bug in arith (#8494)
     add f64ddff  Add missing shape functions for relay.nn operations (#8489)
     add 4b2ccde  src/runtime/module.cc (#8496)
     add 6d88bdd  Update Docker CI (#8193)
     add 2d1847c  [ETHOS-N] Re-enabled tests and updated module hashes (#8498)
     add 2b74f97  Keep CODEOWNERS file up to date. (#8500)
     add d5818b4  [Frontend, pytorch] Vc/pytorch lstm (#8447)
     add ca28dff  Rename runtime-config to executor-config and add 
documentation for Model Library Format (#8270)
     add 843d1c4  Merge branch 'main' into onnx_cpu_tests

No new revisions were added by this update.

Summary of changes:
 .github/CODEOWNERS                                 |  127 ++-
 CMakeLists.txt                                     |    2 +
 CONTRIBUTORS.md                                    |    4 +-
 Jenkinsfile                                        |    6 +-
 apps/README.md                                     |    2 +-
 apps/cpp_rpc/rpc_env.cc                            |    9 +-
 apps/cpp_rpc/rpc_server.cc                         |    2 +-
 apps/cpp_rpc/rpc_tracker_client.h                  |   15 +-
 apps/microtvm/reference-vm/zephyr/Vagrantfile      |    1 +
 .../aot_demo/boards/nrf5340dk_nrf5340_cpuapp.conf  |    3 +
 ...40dk_nrf5340_cpuapp.conf => nucleo_l4r5zi.conf} |    4 +-
 apps/microtvm/zephyr/aot_demo/boards/qemu_x86.conf |    3 +
 apps/microtvm/zephyr/aot_demo/prj.conf             |    3 -
 apps/microtvm/zephyr/aot_demo/src/zephyr_uart.c    |    6 +
 .../boards/nrf5340dk_nrf5340_cpuapp.conf           |    3 +
 .../zephyr/host_driven/boards/nucleo_f746zg.conf   |    3 +
 .../{nucleo_f746zg.conf => nucleo_l4r5zi.conf}     |    3 +-
 .../zephyr/host_driven/boards/qemu_riscv32.conf    |    3 +
 .../zephyr/host_driven/boards/qemu_riscv64.conf    |    3 +
 .../host_driven/boards/stm32f746g_disco.conf       |    3 +
 apps/microtvm/zephyr/host_driven/prj.conf          |    3 -
 cmake/modules/CUDA.cmake                           |    2 +-
 cmake/modules/Git.cmake                            |   15 +
 cmake/modules/LibInfo.cmake                        |    8 +
 cmake/modules/ROCM.cmake                           |    3 +
 .../modules/contrib/PAPI.cmake                     |   17 +-
 cmake/utils/FindROCM.cmake                         |    1 +
 .../ubuntu_download_arm_compute_lib_binaries.sh    |    2 +-
 docker/install/ubuntu_install_tensorflow.sh        |    2 +-
 docs/api/python/ir.rst                             |    8 +
 docs/conf.py                                       |    7 +-
 docs/dev/index.rst                                 |    1 +
 docs/dev/inferbound.rst                            |    2 -
 docs/dev/model_library_format.rst                  |  169 +++
 docs/dev/pass_infra.rst                            |  225 +++-
 docs/index.rst                                     |    1 +
 docs/install/from_source.rst                       |    2 +-
 docs/langref/relay_expr.rst                        |    2 +-
 docs/langref/relay_op.rst                          |    4 +-
 docs/langref/relay_pattern.rst                     |    6 +-
 .../{api/python/driver.rst => profiling/index.rst} |   10 +-
 docs/profiling/papi.rst                            |  114 ++
 include/tvm/arith/iter_affine_map.h                |   21 +
 include/tvm/relay/attrs/annotation.h               |   12 +
 include/tvm/relay/attrs/image.h                    |  107 +-
 include/tvm/relay/attrs/nn.h                       |   26 +
 include/tvm/relay/function.h                       |    2 +-
 include/tvm/relay/transform.h                      |    7 +
 .../tvm/runtime/contrib/papi.h                     |   39 +-
 include/tvm/runtime/device_api.h                   |   17 +-
 include/tvm/runtime/profiling.h                    |  102 +-
 include/tvm/runtime/threading_backend.h            |    8 +
 include/tvm/te/operation.h                         |   15 +-
 include/tvm/tir/analysis.h                         |    2 +-
 include/tvm/tir/buffer.h                           |    4 +-
 include/tvm/tir/function.h                         |   38 +
 include/tvm/tir/stmt.h                             |    9 +-
 .../tvm/rpc/ConnectTrackerServerProcessor.java     |    4 +-
 mypy.ini                                           |   20 +-
 python/gen_requirements.py                         |    2 +-
 python/setup.py                                    |    2 +-
 python/tvm/arith/__init__.py                       |    7 +-
 python/tvm/arith/iter_affine_map.py                |   27 +
 python/tvm/auto_scheduler/dispatcher.py            |    2 +-
 python/tvm/auto_scheduler/relay_integration.py     |   10 +
 python/tvm/auto_scheduler/task_scheduler.py        |    4 +-
 python/tvm/autotvm/measure/measure_methods.py      |  116 +-
 python/tvm/autotvm/task/task.py                    |   31 +-
 python/tvm/autotvm/tuner/xgboost_tuner.py          |    4 +-
 python/tvm/contrib/cudnn.py                        |   26 +
 python/tvm/contrib/debugger/debug_executor.py      |    9 +-
 python/tvm/contrib/target/onnx.py                  |   46 +-
 python/tvm/contrib/utils.py                        |   13 +
 python/tvm/driver/build_module.py                  |   31 +-
 python/tvm/driver/tvmc/common.py                   |    6 +-
 python/tvm/driver/tvmc/compiler.py                 |    6 +-
 python/tvm/driver/tvmc/frontends.py                |    2 +-
 python/tvm/driver/tvmc/model.py                    |   13 +-
 python/tvm/ir/instrument.py                        |   24 +-
 python/tvm/ir/transform.py                         |    4 +-
 python/tvm/micro/contrib/zephyr.py                 |    1 +
 python/tvm/micro/model_library_format.py           |  212 +++-
 python/tvm/relay/analysis/analysis.py              |    2 +-
 python/tvm/relay/backend/compile_engine.py         |    4 +-
 python/tvm/relay/backend/graph_executor_codegen.py |    6 +-
 python/tvm/relay/build_module.py                   |   35 +-
 python/tvm/relay/expr.py                           |   24 +-
 python/tvm/relay/frontend/caffe.py                 |    2 +-
 python/tvm/relay/frontend/common.py                |    5 +-
 python/tvm/relay/frontend/keras.py                 |    1 +
 python/tvm/relay/frontend/mxnet.py                 |    2 +-
 python/tvm/relay/frontend/onnx.py                  |  333 +++++-
 python/tvm/relay/frontend/pytorch.py               |  322 +++++-
 python/tvm/relay/frontend/pytorch_utils.py         |    2 +-
 python/tvm/relay/frontend/qnn_torch.py             |    2 +-
 python/tvm/relay/frontend/tensorflow.py            |   23 +-
 python/tvm/relay/frontend/tensorflow2.py           |    2 +-
 python/tvm/relay/frontend/tensorflow_ops.py        |   36 +-
 python/tvm/relay/frontend/tflite.py                |   33 +-
 python/tvm/relay/frontend/tflite_flexbuffer.py     |    2 +-
 python/tvm/relay/op/_tensor_grad.py                |   29 +
 python/tvm/relay/op/dyn/image/_image.py            |   26 +-
 python/tvm/relay/op/image/_image.py                |  149 ++-
 python/tvm/relay/op/image/image.py                 |  148 ++-
 python/tvm/relay/op/nn/_nn.py                      |  151 ++-
 python/tvm/relay/op/nn/nn.py                       |   44 +
 python/tvm/relay/op/op_attrs.py                    |   26 +-
 python/tvm/relay/op/strategy/bifrost.py            |    8 +
 python/tvm/relay/op/strategy/cuda.py               |   88 +-
 python/tvm/relay/op/strategy/generic.py            |   51 +-
 python/tvm/relay/op/strategy/hls.py                |   15 +-
 python/tvm/relay/op/strategy/mali.py               |   57 +-
 python/tvm/relay/op/strategy/x86.py                |   93 +-
 python/tvm/relay/qnn/op/qnn.py                     |   38 +
 python/tvm/relay/testing/yolo_detection.py         |    4 +-
 python/tvm/relay/transform/__init__.py             |    2 +-
 python/tvm/relay/transform/mixed_precision.py      |    9 +-
 python/tvm/relay/transform/transform.py            |   11 +
 python/tvm/runtime/profiler_vm.py                  |    8 +-
 python/tvm/runtime/profiling.py                    |   48 -
 python/tvm/runtime/profiling/__init__.py           |   85 ++
 .../tvm/{arith => runtime/profiling}/_ffi_api.py   |    7 +-
 python/tvm/script/diagnostics.py                   |    3 +-
 python/tvm/script/intrin.py                        |    5 +
 python/tvm/script/node.py                          |    6 +-
 python/tvm/script/scope_handler.py                 |    2 +-
 python/tvm/script/special_stmt.py                  |   16 +
 python/tvm/target/target.py                        |   16 +-
 python/tvm/te/hybrid/parser.py                     |    3 +-
 python/tvm/testing.py                              |    4 +-
 python/tvm/tir/analysis/analysis.py                |   34 +-
 python/tvm/tir/buffer.py                           |   12 +-
 python/tvm/tir/data_layout.py                      |   28 +-
 python/tvm/tir/expr.py                             |  160 +--
 python/tvm/tir/function.py                         |   57 +-
 python/tvm/tir/generic.py                          |   12 +-
 python/tvm/tir/ir_builder.py                       |   13 +-
 python/tvm/tir/op.py                               |   70 +-
 python/tvm/tir/schedule/block_scope.py             |   12 +-
 python/tvm/tir/schedule/schedule.py                |   28 +-
 python/tvm/tir/schedule/state.py                   |   10 +-
 python/tvm/tir/stmt.py                             |   67 +-
 python/tvm/tir/stmt_functor.py                     |    6 +-
 python/tvm/tir/transform/function_pass.py          |   17 +-
 python/tvm/tir/transform/transform.py              |  113 +-
 python/tvm/topi/__init__.py                        |    1 +
 python/tvm/topi/arm_cpu/conv2d_spatial_pack.py     |   21 +-
 python/tvm/topi/cuda/batch_matmul_tensorcore.py    |   83 +-
 python/tvm/topi/cuda/conv1d_transpose_ncw.py       |    2 +-
 python/tvm/topi/cuda/conv2d_nhwc.py                |    9 +-
 python/tvm/topi/cuda/dense.py                      |  237 +---
 python/tvm/topi/cuda/dense_tensorcore.py           |   81 +-
 python/tvm/topi/cuda/depthwise_conv2d.py           |   10 +-
 python/tvm/topi/cuda/injective.py                  |   36 +-
 python/tvm/topi/cuda/scatter.py                    |    6 +-
 python/tvm/topi/cuda/softmax.py                    |   10 +
 python/tvm/topi/cuda/tensorcore_alter_op.py        |  129 +--
 python/tvm/topi/generic/nn.py                      |   17 +
 python/tvm/{contrib/tf_op => topi/gpu}/__init__.py |    6 +-
 python/tvm/topi/{cuda => gpu}/dense.py             |  199 +---
 python/tvm/topi/image/resize.py                    | 1191 +++++++++++---------
 python/tvm/topi/intel_graphics/depthwise_conv2d.py |  183 ---
 python/tvm/topi/mali/conv2d.py                     |  124 +-
 python/tvm/topi/nn/batch_matmul.py                 |   28 +-
 python/tvm/topi/nn/dense.py                        |  138 ++-
 python/tvm/topi/nn/softmax.py                      |    5 +-
 python/tvm/topi/nn/sparse.py                       |   12 +-
 python/tvm/topi/nn/upsampling.py                   |    6 +-
 python/tvm/topi/random/kernel.py                   |    4 +-
 python/tvm/topi/scan.py                            |    6 +-
 python/tvm/topi/sparse/csrmm.py                    |   15 +-
 python/tvm/topi/sparse/csrmv.py                    |   15 +-
 python/tvm/topi/testing/__init__.py                |    5 +-
 python/tvm/topi/testing/bilinear_resize_python.py  |  105 --
 .../tvm/topi/testing/{batch_matmul.py => dense.py} |   31 +-
 python/tvm/topi/testing/depthwise_conv2d_python.py |   36 +-
 python/tvm/topi/testing/resize_python.py           |  294 +++++
 .../tvm/topi/testing/trilinear_resize3d_python.py  |  111 --
 python/tvm/topi/testing/upsampling_python.py       |  136 ---
 python/tvm/topi/utils.py                           |   10 +
 python/tvm/topi/x86/batch_matmul.py                |   25 +-
 python/tvm/topi/x86/dense.py                       |  103 +-
 rust/tvm-sys/src/device.rs                         |   16 +-
 rust/tvm/src/ir/relay/attrs/nn.rs                  |   12 +
 src/arith/iter_affine_map.cc                       |  126 ++-
 src/contrib/hybrid/codegen_hybrid.cc               |    9 +-
 src/contrib/hybrid/codegen_hybrid.h                |    2 -
 src/driver/driver_api.cc                           |   10 +-
 src/parser/source_map.cc                           |    1 -
 src/printer/model_library_format_printer.cc        |   81 ++
 src/printer/text_printer.h                         |   10 +
 src/printer/tir_text_printer.cc                    |   10 +
 src/printer/tvmscript_printer.cc                   |   14 +-
 src/relay/analysis/annotated_region_set.cc         |   18 +-
 src/relay/analysis/annotated_region_set.h          |   11 +-
 src/relay/backend/aot_executor_codegen.cc          |   21 +-
 src/relay/backend/build_module.cc                  |    6 +
 src/relay/backend/compile_engine.cc                |  663 +----------
 src/relay/backend/compile_engine.h                 |  211 +---
 src/relay/backend/graph_executor_codegen.cc        |  395 +++----
 src/relay/backend/graph_plan_memory.cc             |   60 +-
 src/relay/backend/interpreter.cc                   |   30 +-
 src/relay/backend/te_compiler.cc                   |  743 ++++++++++++
 src/relay/backend/te_compiler.h                    |  197 ++++
 .../{compile_engine.cc => te_compiler_cache.cc}    |  461 ++------
 .../{compile_engine.h => te_compiler_cache.h}      |  169 ++-
 src/relay/backend/utils.cc                         |   47 +
 src/relay/backend/utils.h                          |    4 +
 src/relay/backend/vm/compiler.cc                   |    7 +-
 src/relay/ir/function.cc                           |   14 +-
 src/relay/op/dyn/image/resize.cc                   |   30 +-
 src/relay/op/image/resize.cc                       |  129 ++-
 src/relay/op/make_op.h                             |    9 +-
 src/relay/op/nn/nn.cc                              |   94 +-
 src/relay/op/nn/nn.h                               |  126 ++-
 src/relay/op/tensor/transform.cc                   |   10 +-
 src/relay/qnn/op/batch_matmul.cc                   |  216 ++++
 src/relay/qnn/op/dense.cc                          |    2 +-
 .../transforms/auto_scheduler_layout_rewrite.cc    |   12 +-
 src/relay/transforms/device_annotation.cc          |   44 +-
 src/relay/transforms/dynamic_to_static.cc          |   10 +-
 src/relay/transforms/fold_scale_axis.cc            |  137 ++-
 src/relay/transforms/memory_alloc.cc               |   13 +-
 src/relay/transforms/partition_graph.cc            |   11 +-
 src/relay/transforms/split_args.cc                 |   95 ++
 src/relay/transforms/type_infer.cc                 |    9 +-
 src/runtime/contrib/cudnn/cudnn_utils.cc           |    6 +-
 src/runtime/contrib/cudnn/softmax.cc               |   91 +-
 src/runtime/contrib/papi/papi.cc                   |  299 +++++
 src/runtime/contrib/tensorrt/tensorrt_builder.cc   |   13 +-
 src/runtime/contrib/tensorrt/tensorrt_runtime.cc   |    8 +-
 .../graph_executor/debug/graph_executor_debug.cc   |   15 +-
 src/runtime/module.cc                              |    5 +-
 src/runtime/profiling.cc                           |   98 +-
 src/runtime/thread_pool.cc                         |   38 +-
 src/runtime/thread_storage_scope.h                 |    4 +-
 src/runtime/threading_backend.cc                   |    2 +-
 src/runtime/vm/executable.cc                       |   32 +-
 src/runtime/vm/profiler/vm.cc                      |   53 +-
 src/runtime/vm/profiler/vm.h                       |    5 +-
 src/support/libinfo.cc                             |   10 +
 src/target/llvm/codegen_amdgpu.cc                  |    5 +-
 src/target/llvm/codegen_llvm.cc                    |    8 +-
 src/target/llvm/codegen_llvm.h                     |    2 -
 src/target/llvm/codegen_nvptx.cc                   |    6 +-
 src/target/llvm/llvm_module.cc                     |   11 +-
 src/target/source/codegen_c.h                      |    1 +
 src/target/source/codegen_cuda.cc                  |    8 +-
 src/target/source/codegen_metal.cc                 |    9 +-
 src/target/source/codegen_metal.h                  |    3 +-
 src/target/spirv/codegen_spirv.cc                  |   40 +-
 src/target/spirv/codegen_spirv.h                   |    2 -
 src/target/target.cc                               |    3 -
 src/target/target_kind.cc                          |    5 +
 src/te/operation/compute_op.cc                     |    4 +-
 src/te/operation/create_primfunc.cc                |    5 +-
 src/te/operation/cross_thread_reduction.cc         |    5 +-
 src/te/operation/extern_op.cc                      |    4 +-
 src/te/operation/hybrid_op.cc                      |    4 +-
 src/te/operation/placeholder_op.cc                 |    2 +-
 src/te/operation/scan_op.cc                        |    4 +-
 src/te/schedule/schedule_ops.cc                    |   11 +-
 src/te/schedule/schedule_postproc_to_primfunc.cc   |   19 +-
 src/tir/analysis/deep_equal.cc                     |    3 +
 src/tir/analysis/verify_memory.cc                  |    2 +-
 src/tir/ir/buffer.cc                               |    5 +-
 src/tir/ir/specialize.cc                           |  337 ++++++
 src/tir/ir/stmt.cc                                 |   17 +-
 src/tir/transforms/ir_utils.cc                     |    6 +
 src/tir/transforms/ir_utils.h                      |    6 +
 src/tir/transforms/legalize_packed_calls.cc        |    2 +-
 src/tir/transforms/lower_thread_allreduce.cc       |   97 +-
 src/tir/transforms/lower_warp_memory.cc            |   12 +-
 src/tir/transforms/storage_access.cc               |   23 +-
 src/tir/transforms/storage_access.h                |    4 +-
 src/tir/transforms/storage_flatten.cc              |   20 +-
 src/tir/transforms/storage_rewrite.cc              |   36 +-
 src/tir/transforms/thread_storage_sync.cc          |   27 +-
 src/tir/transforms/update_pointer_storage_scope.cc |   91 ++
 .../transforms/update_pointer_storage_scope.h}     |   48 +-
 tests/lint/check_file_type.py                      |    2 +
 tests/micro/zephyr/conftest.py                     |    7 +-
 tests/micro/zephyr/test_zephyr.py                  |    7 +
 tests/micro/zephyr/test_zephyr_aot.py              |   10 +-
 .../contrib/test_bnns/test_conv2d_patterns.py      |    6 +-
 tests/python/contrib/test_cudnn.py                 |   30 +-
 tests/python/contrib/test_ethosn/test_networks.py  |   32 +-
 tests/python/contrib/test_onnx.py                  |  144 ++-
 tests/python/contrib/test_tensorrt.py              |   54 +-
 .../contrib/test_vitis_ai/test_vitis_ai_codegen.py |    4 +-
 tests/python/driver/tvmc/conftest.py               |   60 +-
 tests/python/driver/tvmc/test_mlf.py               |   52 +-
 tests/python/driver/tvmc/test_runner.py            |    5 +-
 tests/python/driver/tvmc/test_tvmc_common.py       |   21 +
 tests/python/frontend/coreml/test_forward.py       |   11 +-
 tests/python/frontend/onnx/test_forward.py         |  785 +++++++++++--
 tests/python/frontend/pytorch/test_forward.py      |   20 +
 tests/python/frontend/pytorch/test_lstms.py        |  363 ++++++
 tests/python/frontend/tensorflow/test_forward.py   |   23 +-
 .../frontend/tensorflow2/test_functional_models.py |    3 +-
 tests/python/frontend/tflite/test_forward.py       |    2 +-
 tests/python/integration/test_tuning.py            |  107 +-
 tests/python/relay/dyn/test_dynamic_op_level2.py   |   33 +-
 tests/python/relay/dyn/test_dynamic_op_level5.py   |   25 +-
 tests/python/relay/test_any.py                     |  166 ++-
 .../relay/test_auto_scheduler_task_extraction.py   |    1 +
 tests/python/relay/test_autotvm_task_extraction.py |    2 +-
 tests/python/relay/test_backend_graph_executor.py  |   27 +-
 tests/python/relay/test_op_grad_level2.py          |   17 +
 tests/python/relay/test_op_level1.py               |   63 +-
 tests/python/relay/test_op_level10.py              |   37 +-
 tests/python/relay/test_op_level2.py               |   34 +-
 tests/python/relay/test_op_level4.py               |    6 +-
 tests/python/relay/test_op_level5.py               |   89 +-
 tests/python/relay/test_op_qnn_batch_matmul.py     |  247 ++++
 tests/python/relay/test_pass_annotation.py         |   26 +-
 tests/python/relay/test_pass_convert_op_layout.py  |   20 +-
 tests/python/relay/test_pass_dynamic_to_static.py  |   29 +-
 tests/python/relay/test_pass_instrument.py         |    3 -
 .../python/relay/test_pass_legalize_tensorcore.py  |   68 +-
 tests/python/relay/test_pass_partition_graph.py    |  145 ++-
 tests/python/relay/test_pass_split_args.py         |   96 ++
 .../python/test_topi_batch_matmul_tensorcore.py    |   67 +-
 tests/python/topi/python/test_topi_conv2d_nhwc.py  |   26 +-
 tests/python/topi/python/test_topi_dense.py        |  211 ++--
 .../topi/python/test_topi_dense_tensorcore.py      |   96 +-
 .../topi/python/test_topi_depthwise_conv2d.py      |  121 +-
 .../test_topi_depthwise_conv2d_back_weight.py      |    4 +-
 tests/python/topi/python/test_topi_image.py        |   74 +-
 tests/python/topi/python/test_topi_matmul.py       |   26 +
 tests/python/topi/python/test_topi_sparse.py       |   35 +-
 tests/python/topi/python/test_topi_upsampling.py   |   32 +-
 .../python/unittest/test_arith_iter_affine_map.py  |   70 ++
 .../python/unittest/test_arith_rewrite_simplify.py |    3 +
 tests/python/unittest/test_autotvm_measure.py      |   24 +
 .../unittest/test_micro_model_library_format.py    |   70 +-
 tests/python/unittest/test_runtime_profiling.py    |   66 +-
 tests/python/unittest/test_runtime_vm_profiler.py  |    4 +-
 tests/python/unittest/test_target_codegen_bool.py  |   71 +-
 .../python/unittest/test_target_codegen_device.py  |    2 +-
 .../python/unittest/test_target_codegen_vulkan.py  |  312 ++---
 tests/python/unittest/test_target_target.py        |   22 +-
 tests/python/unittest/test_te_hybrid_script.py     |    4 +-
 .../python/unittest/test_te_schedule_tensorize.py  |    4 +-
 tests/python/unittest/test_te_tensor.py            |    2 +-
 tests/python/unittest/test_tir_nodes.py            |    4 +-
 tests/python/unittest/test_tir_specialize.py       |  199 ++++
 .../unittest/test_tir_transform_flatten_buffer.py  |    8 +-
 .../python/unittest/test_tir_transform_hoist_if.py |    2 +-
 .../unittest/test_tir_transform_loop_partition.py  |    4 +-
 .../test_tir_transform_lower_warp_memory.py        |    4 +-
 .../unittest/test_tir_transform_storage_rewrite.py |   42 +
 tests/python/unittest/test_tvm_testing_features.py |   37 +-
 tests/python/unittest/test_tvmscript_complete.py   |   66 ++
 .../python/unittest/test_tvmscript_error_report.py |   29 +
 tests/python/unittest/test_tvmscript_roundtrip.py  |   19 +-
 tests/scripts/task_mypy.sh                         |   12 +-
 tests/scripts/task_python_arm_compute_library.sh   |    3 +-
 tutorials/dev/use_pass_infra.py                    |   14 +-
 tutorials/dev/use_pass_instrument.py               |  372 ++++++
 tutorials/frontend/from_mxnet.py                   |    2 +-
 tutorials/frontend/from_onnx.py                    |    4 +-
 tutorials/get_started/tvmc_command_line_driver.py  |   25 +-
 363 files changed, 13099 insertions(+), 5743 deletions(-)
 copy apps/microtvm/zephyr/aot_demo/boards/{nrf5340dk_nrf5340_cpuapp.conf => 
nucleo_l4r5zi.conf} (92%)
 copy apps/microtvm/zephyr/host_driven/boards/{nucleo_f746zg.conf => 
nucleo_l4r5zi.conf} (92%)
 copy apps/microtvm/zephyr/aot_demo/boards/mps2_an521.conf => 
cmake/modules/contrib/PAPI.cmake (72%)
 create mode 100644 docs/dev/model_library_format.rst
 copy docs/{api/python/driver.rst => profiling/index.rst} (88%)
 create mode 100644 docs/profiling/papi.rst
 copy tests/cpp/profiling_test.cc => include/tvm/runtime/contrib/papi.h (54%)
 mode change 100644 => 100755 python/tvm/relay/frontend/common.py
 delete mode 100644 python/tvm/runtime/profiling.py
 create mode 100644 python/tvm/runtime/profiling/__init__.py
 copy python/tvm/{arith => runtime/profiling}/_ffi_api.py (89%)
 copy python/tvm/{contrib/tf_op => topi/gpu}/__init__.py (86%)
 copy python/tvm/topi/{cuda => gpu}/dense.py (59%)
 delete mode 100644 python/tvm/topi/testing/bilinear_resize_python.py
 copy python/tvm/topi/testing/{batch_matmul.py => dense.py} (68%)
 create mode 100644 python/tvm/topi/testing/resize_python.py
 delete mode 100644 python/tvm/topi/testing/trilinear_resize3d_python.py
 delete mode 100644 python/tvm/topi/testing/upsampling_python.py
 create mode 100644 src/printer/model_library_format_printer.cc
 create mode 100644 src/relay/backend/te_compiler.cc
 create mode 100644 src/relay/backend/te_compiler.h
 copy src/relay/backend/{compile_engine.cc => te_compiler_cache.cc} (60%)
 copy src/relay/backend/{compile_engine.h => te_compiler_cache.h} (73%)
 create mode 100644 src/relay/qnn/op/batch_matmul.cc
 create mode 100644 src/relay/transforms/split_args.cc
 create mode 100644 src/runtime/contrib/papi/papi.cc
 create mode 100644 src/tir/ir/specialize.cc
 create mode 100644 src/tir/transforms/update_pointer_storage_scope.cc
 copy src/{te/schedule/verify_compact_buffer.cc => 
tir/transforms/update_pointer_storage_scope.h} (54%)
 create mode 100644 tests/python/frontend/pytorch/test_lstms.py
 create mode 100644 tests/python/relay/test_op_qnn_batch_matmul.py
 create mode 100644 tests/python/relay/test_pass_split_args.py
 create mode 100644 tests/python/unittest/test_tir_specialize.py
 create mode 100644 tutorials/dev/use_pass_instrument.py

Reply via email to