This is an automated email from the ASF dual-hosted git repository.

vega pushed a change to branch ci-docker-staging
in repository https://gitbox.apache.org/repos/asf/tvm.git.


 discard 198f5ed  turn off test for now
 discard dad91eb  update images
 discard 04947e1  update jenkins
 discard 43ee197  update path
 discard 6af25b2  add verilator to ci
     add 374f809  Fix winograd infer tize (#7092)
     add dc51f17  Rollback changes to SSA begin/end scope for Store in (#7073)
     add 5782d2c  Fix missing header inclusion. (#7097)
     add 28647f2  clean standalone CRT files in microTVM VM rebuild script 
(#7095)
     add c213ac2  [metal] support int64 (#7105)
     add 6be4d0a  Update tune_relay_vta.py to support single board (#7100)
     add 054466b  [ONNX] NMS in ONNX (#6839)
     add 862655b   [TOPI] sparse_dense Op sparse_data input added  (#6889)
     add faed409  [ONNX] Fix a bug with reshape imports when an initialized 
target shape is used more than once (#7109)
     add 1da038e  [COMMUNITY] New reviewer @leandron (#7112)
     add 7a20b4a  [Relay][ConvertLayout] Support deformable conv2d (#7087)
     add 0a3e178  [metal] update language version (#7116)
     add 18cf9b9  [Relay][Frontend][Onnx] Auto extract onnx input shapes when 
possible. (#7115)
     add 053347c  [metal] Remove support of `double` type (#7118)
     add d5eb1da  [TF frontend] add support for StridedSlice to input a single 
constant (#6949)
     add 4060b4f  [BYOC] Added "include_non_call_ops" parameter to 
AnnotateTarget pass (#6655)
     add 829be98  [CI][ACL] Switched to ACL 20.11 (#7106)
     add fb8de5a  Add autoscheduler support to tvmc (#7070)
     add bad149e  [TOPI] Fix GPU Dynamic Op Schedule (#7117)
     add 9e766d9  [Relay][Topi]Add Sort Op to Relay (#6978)
     add 7cbc453  Fix spelling in some comments (#7124)
     add 6b9b898  Add ACL testing to the CI for AArch64. (#7122)
     add 833f238  Add `is_floating_point` and `div_` PyTorch ops (#7128)
     add 2036a0f  Fix a bug in batch_matmul that te.max should be used (#7111)
     add c7c9d86  [Frontend] Unnecessary default warning msg changed to debug 
(#7119)
     add 786a253  Update `is_floating_point` to handle bfloat16 (#7133)
     add 37af2d7  [CONTRIB] PopenPoolExecutor (#6959)
     add bf5e248  Added additional information to the from_onnx tutorial (#7127)
     add 38273ee  [CUDA] Parallel Cuda Mergesort (#7099)
     add 9914685  [TFLite] add support for float16 (#7093)
     add 82942fb  [TOPI] Simplify GPU NMS IR and optimize a bit (#7136)
     add 53c0641  Fix a few OpNode argument field descriptions when registered 
(#7140)
     add 9713d67  Created CSourceMetaData module for model metadata (#7002)
     add 24cd869  [COMMUNITY] @jcf94 -> Committer (#7141)
     add 98ca771  [Auto Scheduler] Mali Support (#7132)
     add 968b6f6  Add `is_floating_point()` test and better type support in 
`verify_model_vm()` (#7134)
     add 66744d9  [TFLite] pack operation extedned with const args (#6984)
     add bc43ed4  [BYOC] [ACL] include_non_call_ops = False (#7121)
     add 08a69d4  [Rust] Impl IsObjectRef for Array (#7138)
     add 4a7503d  Add a FunctionPattern, remove unused attributes in 
CallPattern (#7151)
     add 5a61089  [AutoScheduler] Improve SearchTask and ComputeDAG 
serialization (#7145)
     add e51bcdd  [AutoScheduler] Support string processing to records (#7144)
     add c000631  [TOPI] cuda reduction schedule (#7131)
     add 9956b5b  [TOPI] GPU sort IR refactor to enable sort by keys (#7157)
     add 0b2ab56  Support mode=instance, spatial for MXNet l2_normalize (#7062)
     add 68e7838  [AutoScheduler] Python based measure callbacks (#7143)
     add e27ad08  Slight optimize the default injective schedule (#7158)
     add 7dcafb0  [AutoScheduler] Add layout rewrite support for dense and 
batch matmul on CPU (#7161)
     add 6ffd740  [Relay] Add fast_softmax (#7163)
     add 3d8fd2a  [AutoScheduler] Fix the conflict of thread pool in 
measurement (#7166)
     add 4c13ae9  [Torch] Fix PyTorch NMS conversion for negative scores (#7137)
     add 592ecc0  [Doc][AutoScheduler] Improve hyperlinks in tutorials (#7167)
     add c1bca8e  [AutoScheduler] Enable winograd for conv2d and layout rewrite 
for conv3d (#7168)
     add 959669c  [Rust] More Rust bindings for Attrs (#7082)
     add 6fcc64c  [FIX] Fix using num_workers in omp (#7078)
     add 2f8187a  Update the docs stale links (#7169)
     add 2dec2dd  [AutoScheduler] Update layout rewrite option setting for 
measuring (#7156)
     add b8ac8d9  [µTVM] Add platform timer and RPCTimeEvaluator to enable 
AutoTVM (#6964)
     add d1399f3  [Torch] Support hard_swish op (#7174)
     add 59699a7  [TFLite] Reshape - support different qnn params for input and 
output (#7159)
     add cfdbf0ea Asymmetric padding and dilation in conv2d workload (#7142)
     add 466383a  [Relay][fix] Stack should take exprs that evaluate to tuples 
(#7130)
     add f2ab977  [AutoTVM-FIX] avoid unexpected value(1) of search space when 
get length for uninitiated search space (#7175)
     add 66e123f  [TOPI] Parallelize GPU NMS inner loop (#7172)
     add 6a4c51e  [AutoScheduler] Use VM to extract tasks for dynamic models 
(#7173)
     add f904d4f  [AutoScheduler] Fix policy for zero-rank output (#7180)
     add 712b4a5  [Auto Scheduler][fix] Add dense strategy for mali (#7181)
     add c6b766a  [Relay][Op] Remove reverse attribute from reshape and 
reverse_reshape operators. (#7086)
     add c02c9c5  Parallelize cumsum in get_valid_counts (#7123)
     add 6258fae  [Fix] Tensor core type issue for dense (#7187)
     add 76a9825  Remove seemingly invalid SoftPlus (#7189)
     add 86a8504  [Frontend][MXNet] add _npi_subtract_scalar (#7191)
     add 25f0252  Makes sure g_last_error is null terminated. (#7190)
     add eb64e25  Fix ICHECK_NOTNULL in logging.g (#7193)
     add 7053235  Fixed temporary lock_guard instances. (#7199)
     add 361f508  [CUBLAS, CUDNN] Support dynamic batch size (#7194)
     add 7163b5c  ReshapeAttrs no longer has reverse (#7205)
     add d052752  [ConvertLayout] slice_like support (#7184)
     add 23bd825  [AutoScheduler] Add custom build function (#7185)
     add 1812060  Fix prelu bug in onnx frontend. (#7208)
     add d3bb762  [PatternLang] Add Syntatic Sugar to the C++ pattern API and 
support DataType Attribute Matching (#7120)
     add 8b44741  [µTVM] Raise a better error when project_dir does not exist 
(#7165)
     add 197594b  Allow condition in if op to be an array. (#7215)
     add 3f0dc42  [Frontend][MXNet] add _npi_stack, issue #7186 (#7209)
     add 040afb0  [Fix][Autoscheduler] Costmodel enhancement & bug fix for 
graph debug runtime (#7197)
     add 1f931e3  [RELAY] Fix reshape header file (#7218)
     add 2dd5f8d  [µTVM] Add documentation (#7164)
     add d777e7c  [TIR][REFACTOR] Enforce allocate to use the correct var 
pointer hint. (#7216)
     add 93d79ba  [AutoScheduler][Relay] Control compile engine cache via 
PassContext (#7220)
     add 9815ae2  [Arith] Simplify cast (#7045)
     add 8910f72  [ConvertLayout] Support transpose (#7214)
     add 4911a08  [BUGFIX] Change debug_runtime to represent times in seconds 
internally (#7227)
     add 4e8cc4f  Fix Get Valid Counts when the number of boxes is zero (#7229)
     add 29da763  [CI] make sure submodule checkout in clean state (#7228)
     add 54c995d  [AutoScheduler] Do not return naive schedule in tracing mode 
(#7226)
     add 701bcc2  [RELAY,TOPI] Threefry PRNG: splittable and stateless (#7083)
     add 02ef6e6  [TOPI] Treat undefined elements as constants in Array (#7232)
     add d949d15  Revert "[AutoTVM-FIX] avoid unexpected value(1) of search 
space when get length for uninitiated search space (#7175)" (#7236)
     add 69a0628  [AutoTVM] Add index boundary check in ConfigSpace.get() 
(#7234)
     add 89e3688  [CUDA]batch_matmul tensorcore schedule (#7146)
     add 7f14769  [TFLite] Quantized version of unit test for Dense (#7113)
     add 77e4fd1  [BYOC][ACL] Depthwise convolution support (#7206)
     add 72c9a51  [FIX,TUTORIALS] Import tvm.testing in tutorials that use it 
(#7248)
     add 10b7929  add default value for leaky relu alpha (#7259)
     add b84eb16  [ONNX] Fix issues for Clip and RoiAlign (#7237)
     add e3b2984  Do not use ICHECK in nnvm (#7255)
     add ac684f9  Fix TRT weight conversion when first dim of weight shape is 1 
(#7253)
     add 4364386  Add op_name in error message for Pool (#7243)
     add b5a7de8  Remove check_correctness in AutoTVM, which is busted (#7250)
     add 86479ba  [Torch] Restore class-aware NMS for detection models by graph 
rewrite (#7154)
     add 1d07f1a  [THRUST] Faster multi dimensional argsort by segmented sort 
(#7195)
     add 1f2b40f  Unpack NMS inputs into bbox, scores and class ids (#7257)
     add 39ee8d5  [µTVM] Avoid listing links when probing serial ports (#7265)
     add 006b9b5  [Frontend][TFLite] Densify Op added (#7048)
     add 1410e68  Change the all #pragma once to ifdef include guard (#7264)
     add 384714b  Reorder dynamic to static and simplify inference, lower 
DynamicToStatic Opt Level (#7213)
     add 188c715  [DOCS] Fix figure links (#7268)
     add 35dabd6  [µTVM] Fix two warnings when deprecated forms are used (#7269)
     add 259652b  Adding aten::unsqueeze_ to PT Frontend (#7231)
     add af716e5  update vta-hw version (#7271)
     add c11959d  [FIX] Remove leftovers from check_correctness (#7272)
     add 8d3c0e7  [CUDA] [Codegen] Ensuring atleast one thread block for 
dynamism (#7273)
     add 7f4aa24  [AutoScheduler] Fix layout rewrite for axis with extent=1 
(#7279)
     add 51a2d66  [AutoScheduler] Fix typos in feature extraction and cost 
model (#7280)
     add 1677bb2  [PatternLang][Bugfix] Ensure CallNode attrs are not undefined 
before checking (#7278)
     add bb6c26b  switch to more portable bash pipeline syntax (#7274)
     add ac29624  Add MicroTVM support for the STM32F746 Discovery board (#7225)
     add f503d82  fix mcpu on os x (#7276)
     add d7a9a7c  [PatternLang] Add If pattern (#7282)
     add c947463  [Frontend][Tensorflow] Sparse_Dense Op CSR scheduling issue 
resolved for Cuda & X86 (#7148)
     add 4c5c086  [BYOC][bugfix] Handle empty tuples in annotation pass (#7288)
     add c3f50ff  [µTVM] Add ST STM32F746 disco board to tflite tutorial script 
(#7254)
     add 4f1f591  Bring back numbered lists to TVM docs. (#7290)
     add 2992e9b  [VM] Per-input, data dependence specification for shape func 
(#7210)
     add b52267e  [uTVM] Initial BYOC support with c-source module (#6950)
     add 637c9da  A few typo fixes in the uTVM design doc. (#7291)
     add ccccac2  Change const to used dtype if it is passed in (#7285)
     add 3f15d06  [TEST] Fix 
test_topi_batch_matmul_tensorcore.py:test_batch_matmul requirement (#7294)
     add 052ad3d  [TIR] Support Return in TIR (#7084)
     add 09bb60a  Add QEMU setup to uTVM tutorial. (#7296)
     add 6fb1019  [TUTORIAL] Add gpu instructions and results to deploy_sparse 
(#7298)
     add 5e92eed  [COMMUNITY] @Laurawly => PMC (#7307)
     add 5d95105  [AutoScheduler] Bug fix & Custom sketch support (#7260)
     add 7340c02  [TIR][REFACTOR] ForNode introduce thread binding and remove 
legacy field (#7306)
     add f91b51d  [Relay][Frontend][Onnx] Compare against onnxruntime more 
consistently during testing (#7300)
     add 2290cc0  [TOPI] Minor perf improvement for GPU scatter (#7233)
     add f8c55db  [TFLite] Added ability to infer shapes for arguments (#7293)
     add 62f251b  [TOPI] Make cumsum IR reusable, add thrust scan (#7303)
     add 969b77a  [BYOC][ACL] removed ACL 20.05 limitations (#7251)
     add 727345e  [COMMUNITY] tkonolige -> Reviewer (#7311)
     add e8ab607  [TFLite] Strided slice handling of shrink_axis_mask improved 
(#6998)
     add f829403  [TOPI] Rewrite GPU argwhere using exclusive scan (#7314)
     add 20e03bc  [COMMUNITY] @jwfromm -> Committer (#7316)
     add 8524b28  [µTVM] Add TVMPlatformGenerateRandom, a non-cryptographic 
random number generator. (#7266)
     add fc9e264  Made tensorflow IsNan actually work (#7320)
     add 7b6a1a7  Fix an issue with dynamic functions overwritting call arg 
types (#7295)
     add 17ae44d  add a shape function and dynamic test for round (#7324)
     add 790344c  relax tolerance for dlpack test (#7325)
     add 6787d74  get_top_results works on a copy of output (#7327)
     add af9d1d2  [BYOC][Verilator] add support to dynamically load hardware 
library (#7286)
     add 3ec67f0  [AutoScheduler] Fix conv3d's op strategy for auto-scheduler 
(#7328)
     add 75b565a  add Verilator to CI

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (198f5ed)
            \
             N -- N -- N   refs/heads/ci-docker-staging (75b565a)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

No new revisions were added by this update.

Summary of changes:
 CONTRIBUTORS.md                                    |  29 +-
 Jenkinsfile                                        |  22 +-
 apps/bundle_deploy/build_model.py                  |   2 -
 apps/bundle_deploy/bundle.c                        |   6 +
 apps/bundle_deploy/bundle_static.c                 |   6 +
 apps/cpp_rpc/rpc_server.cc                         |   1 +
 apps/microtvm/reference-vm/zephyr/pyproject.toml   |   4 +-
 apps/microtvm/reference-vm/zephyr/rebuild-tvm.sh   |   1 +
 cmake/config.cmake                                 |   4 +-
 cmake/modules/contrib/Verilator.cmake              |   8 +-
 docker/Dockerfile.ci_arm                           |   4 +
 docker/Dockerfile.demo_cpu                         |   2 +-
 docker/Dockerfile.demo_gpu                         |   2 +-
 docker/install/ubuntu_install_arm_compute_lib.sh   |   4 +-
 docker/install/ubuntu_install_qemu.sh              |   0
 docs/conf.py                                       |   7 +-
 docs/contribute/code_guide.rst                     |   2 +-
 docs/contribute/pull_request.rst                   |   4 +-
 docs/deploy/arm_compute_lib.rst                    |   8 +-
 docs/dev/benchmark.rst                             |   2 +-
 docs/dev/index.rst                                 |  12 +-
 docs/dev/microtvm_design.rst                       | 349 ++++++++
 docs/index.rst                                     |   1 +
 docs/langref/relay_pattern.rst                     |  35 +
 docs/microtvm/index.rst                            |  73 ++
 include/tvm/auto_scheduler/compute_dag.h           |  10 +-
 include/tvm/auto_scheduler/measure.h               |  29 +
 include/tvm/auto_scheduler/measure_record.h        |   6 +-
 include/tvm/auto_scheduler/search_task.h           |   6 +-
 include/tvm/relay/attrs/nn.h                       |  40 +-
 include/tvm/relay/attrs/{debug.h => random.h}      |  24 +-
 include/tvm/relay/attrs/transform.h                |   4 -
 include/tvm/relay/attrs/vision.h                   |  12 +-
 include/tvm/relay/dataflow_pattern.h               | 145 ++-
 include/tvm/relay/dataflow_pattern_functor.h       |   6 +
 include/tvm/relay/op_attr_types.h                  |   4 +-
 include/tvm/runtime/c_runtime_api.h                |   7 +
 include/tvm/runtime/crt/error_codes.h              |   5 +
 include/tvm/runtime/crt/platform.h                 |  38 +
 include/tvm/runtime/crt/rpc_common/session.h       |  10 +-
 include/tvm/runtime/data_type.h                    |   8 +-
 include/tvm/support/logging.h                      |   8 +-
 include/tvm/tir/builtin.h                          |   4 +
 include/tvm/tir/op.h                               |  11 +-
 include/tvm/tir/op_attr_types.h                    |   6 +-
 include/tvm/tir/stmt.h                             |  85 +-
 include/tvm/topi/detail/constant_utils.h           |   5 +-
 include/tvm/topi/transform.h                       |   1 +
 nnvm/include/nnvm/graph.h                          |   4 +-
 nnvm/include/nnvm/layout.h                         |  40 +-
 nnvm/include/nnvm/op.h                             |  12 +-
 nnvm/include/nnvm/tuple.h                          |   4 +-
 nnvm/src/core/graph.cc                             |  10 +-
 nnvm/src/core/op.cc                                |   2 +-
 nnvm/src/core/pass.cc                              |   2 +-
 nnvm/src/core/symbolic.cc                          |  22 +-
 nnvm/src/pass/correct_layout.cc                    |  12 +-
 nnvm/src/pass/gradient.cc                          |  16 +-
 nnvm/src/pass/graph_algorithm.h                    |  10 +-
 nnvm/src/pass/infer_shape_type.cc                  |  24 +-
 nnvm/src/pass/place_device.cc                      |  12 +-
 nnvm/src/pass/plan_memory.cc                       |   4 +-
 nnvm/src/pass/print_graph_ir.cc                    |   2 +-
 nnvm/src/pass/saveload_json.cc                     |  18 +-
 nnvm/tests/cpp/op_test.cc                          |   2 +-
 nnvm/tests/cpp/tuple_test.cc                       |   8 +-
 python/tvm/auto_scheduler/__init__.py              |   9 +-
 python/tvm/auto_scheduler/compute_dag.py           |  64 +-
 python/tvm/auto_scheduler/cost_model/cost_model.py |   4 +-
 python/tvm/auto_scheduler/cost_model/xgb_model.py  |  40 +-
 python/tvm/auto_scheduler/feature.py               |  10 +-
 python/tvm/auto_scheduler/measure.py               |  92 +-
 python/tvm/auto_scheduler/measure_record.py        |  37 +
 python/tvm/auto_scheduler/relay_integration.py     | 118 ++-
 python/tvm/auto_scheduler/search_policy.py         |  35 +-
 python/tvm/auto_scheduler/search_task.py           |  59 +-
 python/tvm/auto_scheduler/task_scheduler.py        |  25 +-
 python/tvm/auto_scheduler/utils.py                 |  64 +-
 python/tvm/autotvm/measure/measure_methods.py      |  66 +-
 python/tvm/autotvm/task/space.py                   |   8 +-
 python/tvm/autotvm/tuner/xgboost_tuner.py          |   2 +-
 python/tvm/contrib/cudnn.py                        |  81 +-
 python/tvm/contrib/debugger/debug_result.py        |   6 +-
 python/tvm/contrib/debugger/debug_runtime.py       |   2 +-
 python/tvm/contrib/nvcc.py                         |   2 +-
 python/tvm/contrib/popen_pool.py                   | 329 +++++++
 python/tvm/driver/build_module.py                  |  14 +-
 python/tvm/driver/tvmc/autotuner.py                | 259 +++++-
 python/tvm/driver/tvmc/compiler.py                 |  29 +-
 python/tvm/driver/tvmc/frontends.py                |  11 +-
 python/tvm/driver/tvmc/runner.py                   |   2 +-
 python/tvm/exec/popen_worker.py                    | 104 +++
 python/tvm/micro/build.py                          |  14 +-
 python/tvm/micro/compiler.py                       |   8 +-
 python/tvm/micro/contrib/zephyr.py                 |  14 +-
 python/tvm/micro/session.py                        |   1 +
 python/tvm/micro/transport/serial.py               |   2 +-
 python/tvm/relay/__init__.py                       |   1 +
 python/tvm/relay/dataflow_pattern/__init__.py      |  81 +-
 python/tvm/relay/expr.py                           |   8 +-
 python/tvm/relay/frontend/common.py                |   9 +-
 python/tvm/relay/frontend/mxnet.py                 |  24 +-
 python/tvm/relay/frontend/onnx.py                  | 430 +++++++--
 python/tvm/relay/frontend/pytorch.py               |  41 +-
 python/tvm/relay/frontend/pytorch_utils.py         | 153 +++-
 python/tvm/relay/frontend/tensorflow.py            |  33 +-
 python/tvm/relay/frontend/tflite.py                | 373 ++++++--
 python/tvm/relay/op/__init__.py                    |   1 +
 python/tvm/relay/op/_algorithm.py                  |   9 +-
 python/tvm/relay/op/_tensor.py                     |   1 +
 python/tvm/relay/op/algorithm.py                   |  24 +-
 python/tvm/relay/op/contrib/arm_compute_lib.py     | 150 +++-
 python/tvm/relay/op/contrib/tensorrt.py            |   6 +-
 python/tvm/relay/op/dyn/_transform.py              |  29 +-
 python/tvm/relay/op/nn/_nn.py                      |  93 +-
 python/tvm/relay/op/nn/nn.py                       |  69 +-
 python/tvm/relay/op/op.py                          |  12 +-
 python/tvm/relay/op/op_attrs.py                    |   5 +
 .../cuda/rcnn => relay/op/random}/__init__.py      |   6 +-
 .../__init__.py => relay/op/random/_kernel.py}     |  16 +-
 python/tvm/relay/op/{ => random}/_make.py          |   2 +-
 python/tvm/relay/op/random/kernel.py               | 134 +++
 python/tvm/relay/op/strategy/cuda.py               |  96 +-
 python/tvm/relay/op/strategy/generic.py            | 133 ++-
 python/tvm/relay/op/strategy/mali.py               |  72 +-
 python/tvm/relay/op/strategy/x86.py                | 117 ++-
 python/tvm/relay/op/tensor.py                      |   9 +-
 python/tvm/relay/op/vision/nms.py                  |   8 +-
 python/tvm/relay/testing/__init__.py               |   6 +-
 python/tvm/relay/transform/transform.py            |   9 +-
 python/tvm/runtime/module.py                       |  40 +-
 python/tvm/script/parser.py                        |  25 +-
 python/tvm/script/scope_handler.py                 |  21 +-
 python/tvm/support.py                              |  25 +
 python/tvm/target/target.py                        |   2 +-
 python/tvm/te/hybrid/calls.py                      |  20 +-
 python/tvm/te/hybrid/parser.py                     |  14 +-
 python/tvm/testing.py                              |  31 +
 python/tvm/tir/__init__.py                         |   4 +-
 python/tvm/tir/buffer.py                           |   5 +-
 python/tvm/tir/ir_builder.py                       |  28 +-
 python/tvm/tir/op.py                               |  28 +-
 python/tvm/tir/stmt.py                             |  58 +-
 python/tvm/topi/__init__.py                        |   1 +
 python/tvm/topi/arm_cpu/conv2d_int8.py             |   7 +-
 python/tvm/topi/cuda/__init__.py                   |   1 +
 python/tvm/topi/cuda/argwhere.py                   | 524 ++---------
 python/tvm/topi/cuda/batch_matmul.py               |  14 +-
 python/tvm/topi/cuda/batch_matmul_tensorcore.py    | 315 +++++++
 python/tvm/topi/cuda/conv2d.py                     |  24 +-
 python/tvm/topi/cuda/conv2d_int8.py                |   7 +-
 python/tvm/topi/cuda/conv2d_nhwc_tensorcore.py     |   1 +
 python/tvm/topi/cuda/conv2d_transpose_nchw.py      |   7 +-
 python/tvm/topi/cuda/conv2d_winograd.py            |  11 +-
 python/tvm/topi/cuda/conv3d.py                     |  26 +-
 python/tvm/topi/cuda/conv3d_ndhwc_tensorcore.py    |   1 +
 python/tvm/topi/cuda/dense.py                      |   3 +-
 python/tvm/topi/cuda/dense_tensorcore.py           |   1 +
 python/tvm/topi/cuda/injective.py                  |  19 +-
 python/tvm/topi/cuda/nms.py                        | 794 +++++++++++------
 python/tvm/topi/cuda/rcnn/proposal.py              |   6 +-
 python/tvm/topi/cuda/reduction.py                  |   9 +-
 python/tvm/topi/cuda/scan.py                       | 406 +++++++++
 python/tvm/topi/cuda/scatter.py                    | 191 ++--
 python/tvm/topi/cuda/sort.py                       | 691 ++++++++++-----
 python/tvm/topi/cuda/sparse.py                     |  70 +-
 python/tvm/topi/generic/conv2d.py                  |  15 +-
 python/tvm/topi/generic/sort.py                    |  17 +
 python/tvm/topi/mali/conv2d.py                     |  75 +-
 python/tvm/topi/nn/batch_matmul.py                 |  32 +-
 python/tvm/topi/nn/conv2d.py                       | 125 ++-
 python/tvm/topi/nn/conv3d.py                       |  40 +-
 python/tvm/topi/nn/dense.py                        |  30 +-
 python/tvm/topi/nn/depthwise_conv2d.py             |  33 +-
 python/tvm/topi/nn/softmax.py                      |  45 +-
 python/tvm/topi/nn/sparse.py                       | 152 +++-
 python/tvm/topi/{cuda/ssd => random}/__init__.py   |   6 +-
 python/tvm/topi/random/kernel.py                   | 410 +++++++++
 python/tvm/topi/sort.py                            |  42 +
 python/tvm/topi/sparse/csrmm.py                    |   4 +-
 python/tvm/topi/sparse/csrmv.py                    |   2 +-
 python/tvm/topi/sparse/dense.py                    |   8 +-
 python/tvm/topi/testing/depthwise_conv2d_python.py |   2 +-
 python/tvm/topi/utils.py                           |   5 +
 python/tvm/topi/vision/nms.py                      |  16 +-
 python/tvm/topi/vision/rcnn/proposal.py            |   6 +-
 python/tvm/topi/x86/batch_matmul.py                |   2 +-
 python/tvm/topi/x86/conv2d.py                      |  16 +-
 python/tvm/topi/x86/conv2d_alter_op.py             |  84 +-
 python/tvm/topi/x86/conv2d_avx_1x1.py              |  11 +-
 python/tvm/topi/x86/conv2d_avx_common.py           |  14 +-
 python/tvm/topi/x86/conv2d_int8.py                 |  14 +-
 python/tvm/topi/x86/depthwise_conv2d.py            |   9 +-
 python/tvm/topi/x86/scatter.py                     |   2 +-
 python/tvm/topi/x86/sparse.py                      |  18 +-
 rust/tvm-rt/src/array.rs                           |  34 +-
 rust/tvm/src/ir/relay/attrs/nn.rs                  |  36 +
 rust/tvm/src/ir/relay/attrs/transform.rs           |  52 ++
 src/arith/canonical_simplify.cc                    | 161 ++++
 src/auto_scheduler/compute_dag.cc                  |  65 +-
 src/auto_scheduler/feature.cc                      |  36 +-
 src/auto_scheduler/measure.cc                      |  27 +
 src/auto_scheduler/measure_record.cc               |  35 +-
 src/auto_scheduler/search_policy/sketch_policy.cc  | 105 ++-
 src/auto_scheduler/search_policy/sketch_policy.h   |  41 +
 .../search_policy/sketch_policy_rules.cc           |  40 +-
 .../search_policy/sketch_policy_rules.h            |  23 +
 src/auto_scheduler/search_policy/utils.h           |   7 +-
 src/auto_scheduler/search_task.cc                  |  26 +-
 src/auto_scheduler/transform_step.cc               |  35 +-
 src/autotvm/feature_visitor.cc                     |  14 +-
 src/driver/driver_api.cc                           |   3 +-
 src/printer/tir_text_printer.cc                    |  19 +-
 src/printer/tvmscript_printer.cc                   |  21 +-
 src/relay/analysis/type_solver.cc                  |  18 +-
 src/relay/analysis/type_solver.h                   |   3 +-
 src/relay/analysis/util.cc                         |  13 +-
 src/relay/backend/build_module.cc                  |  18 +-
 src/relay/backend/compile_engine.cc                |  52 +-
 .../backend/contrib/arm_compute_lib/codegen.cc     |  48 +-
 src/relay/backend/contrib/codegen_c/codegen.cc     |  53 +-
 src/relay/backend/contrib/codegen_c/codegen_c.h    | 104 ++-
 src/relay/backend/contrib/dnnl/codegen.cc          |   3 +-
 src/relay/backend/contrib/verilator/codegen.cc     |  30 +-
 src/relay/backend/utils.h                          |   9 +
 src/relay/backend/vm/compiler.cc                   |   7 +-
 src/relay/backend/vm/lambda_lift.cc                |  31 +-
 src/relay/ir/dataflow_matcher.cc                   |  74 +-
 src/relay/ir/dataflow_pattern.cc                   | 119 ++-
 src/relay/ir/dataflow_pattern_functor.cc           |  13 +
 src/relay/ir/indexed_graph.cc                      |  13 +
 src/relay/op/algorithm/{argsort.cc => sort.cc}     |  26 +-
 src/relay/op/annotation/annotation.cc              |   6 +
 src/relay/op/device_copy.cc                        |   1 +
 src/relay/op/dyn/tensor/transform.cc               |   1 -
 src/relay/op/image/grid_sample.cc                  |   3 +-
 src/relay/op/make_op.h                             |   2 +
 src/relay/op/memory/memory.cc                      |   2 +-
 src/relay/op/nn/convolution.cc                     |   4 +-
 src/relay/op/nn/convolution.h                      |  24 +-
 src/relay/op/nn/nn.cc                              |  61 +-
 src/relay/op/nn/nn.h                               |  10 +-
 src/relay/op/nn/pooling.cc                         |   2 +
 src/relay/op/nn/sparse.cc                          |  88 +-
 src/relay/op/random/kernel.cc                      |  89 ++
 src/relay/op/tensor/reduce.cc                      |   3 +-
 src/relay/op/tensor/transform.cc                   | 199 ++++-
 src/relay/op/tensor/transform.h                    |  20 +-
 src/relay/op/vision/nms.cc                         |  29 +-
 src/relay/op/vm/vm.cc                              |   4 +-
 src/relay/transforms/annotate_target.cc            | 188 +++-
 .../transforms/auto_scheduler_layout_rewrite.cc    |  26 +-
 .../transforms/combine_parallel_batch_matmul.cc    |   7 +-
 src/relay/transforms/combine_parallel_dense.cc     |  16 +
 src/relay/transforms/combine_parallel_op_batch.h   |   2 +-
 src/relay/transforms/convert_sparse_dense.cc       |   4 +-
 src/relay/transforms/dynamic_to_static.cc          |   2 +-
 src/relay/transforms/fuse_ops.cc                   |   2 +-
 src/relay/transforms/pass_utils.h                  |   6 +-
 src/relay/transforms/simplify_expr.cc              |   9 +-
 src/relay/transforms/type_infer.cc                 |  12 +-
 src/runtime/c_runtime_api.cc                       |   9 +
 src/runtime/contrib/arm_compute_lib/acl_runtime.cc |  69 +-
 src/runtime/contrib/arm_compute_lib/acl_utils.cc   |  10 +
 src/runtime/contrib/arm_compute_lib/acl_utils.h    |   9 +
 src/runtime/contrib/cblas/gemm_common.h            |   5 +-
 src/runtime/contrib/mps/conv.mm                    |   9 +-
 src/runtime/contrib/sort/sort.cc                   |  63 +-
 src/runtime/contrib/tensorrt/tensorrt_builder.cc   |  18 +-
 src/runtime/contrib/tensorrt/tensorrt_ops.cc       |   1 -
 src/runtime/contrib/thrust/thrust.cu               | 202 +++--
 src/runtime/contrib/verilator/verilator_runtime.cc |  69 +-
 src/runtime/crt/common/crt_runtime_api.c           | 174 +++-
 src/runtime/crt/host/crt_config.h                  |   2 +-
 src/runtime/crt/host/main.cc                       |  38 +-
 src/runtime/crt/utvm_rpc_common/session.cc         |   5 +-
 src/runtime/crt/utvm_rpc_server/rpc_server.cc      |  12 +-
 src/runtime/graph/debug/graph_runtime_debug.cc     | 132 ++-
 src/runtime/metal/metal_module.mm                  |   3 +-
 src/runtime/micro/micro_session.cc                 |  30 +-
 src/runtime/minrpc/minrpc_server.h                 |   1 +
 src/runtime/thread_pool.cc                         |   4 +-
 src/runtime/thread_storage_scope.h                 |   6 +-
 src/runtime/vm/vm.cc                               |  17 +-
 src/runtime/vulkan/vulkan_common.h                 |   5 +-
 src/runtime/vulkan/vulkan_module.h                 |   5 +-
 src/runtime/vulkan/vulkan_shader.h                 |   5 +-
 src/runtime/vulkan/vulkan_stream.h                 |   5 +-
 src/support/ffi_testing.cc                         |  42 +
 src/target/func_registry_generator.cc              |   2 +-
 src/target/func_registry_generator.h               |   7 +-
 src/target/generic_func.cc                         |   4 +-
 src/target/llvm/codegen_cpu.cc                     |  15 +-
 src/target/llvm/codegen_llvm.cc                    |  16 +-
 src/target/llvm/llvm_module.cc                     |  14 +-
 src/target/source/codegen_c.cc                     |   9 +-
 src/target/source/codegen_c_host.cc                |  31 +-
 src/target/source/codegen_c_host.h                 |   8 +-
 src/target/source/codegen_cuda.cc                  |  10 +-
 src/target/source/codegen_metal.cc                 |   3 +
 src/target/source/codegen_source_base.h            |  22 +-
 src/target/source/intrin_rule_cuda.cc              |  12 +
 src/target/source/source_module.cc                 | 183 +++-
 src/target/spirv/codegen_spirv.cc                  |   2 +-
 src/te/operation/cross_thread_reduction.cc         |   6 +-
 src/te/operation/hybrid_op.cc                      |  27 +-
 src/te/operation/op_utils.cc                       |  36 +-
 src/te/operation/op_utils.h                        |  10 +-
 .../schedule_postproc_rewrite_for_tensor_core.cc   |   3 +-
 src/tir/ir/buffer.cc                               |  14 +-
 src/tir/ir/stmt.cc                                 |  45 +-
 src/tir/ir/stmt_functor.cc                         |  14 +-
 src/tir/op/builtin.cc                              |   4 +
 src/tir/op/op.cc                                   |   4 +
 src/tir/transforms/combine_context_call.cc         |   2 +-
 src/tir/transforms/inject_double_buffer.cc         |   3 +-
 src/tir/transforms/inject_prefetch.cc              |   4 +-
 src/tir/transforms/inject_virtual_thread.cc        |   7 +-
 src/tir/transforms/ir_utils.cc                     |   3 +-
 src/tir/transforms/loop_partition.cc               |   4 +-
 src/tir/transforms/lower_custom_datatypes.cc       | 147 +++-
 src/tir/transforms/lower_thread_allreduce.cc       |  16 +-
 src/tir/transforms/make_packed_api.cc              |  66 +-
 src/tir/transforms/narrow_datatype.cc              |   4 +-
 src/tir/transforms/storage_flatten.cc              |   4 +-
 src/tir/transforms/storage_rewrite.cc              |  42 +-
 src/tir/transforms/unroll_loop.cc                  |  10 +-
 src/tir/transforms/vectorize_loop.cc               |  13 +-
 tests/cpp/dataflow_pattern_test.cc                 | 200 +++++
 tests/cpp/ir_functor_test.cc                       |  10 +-
 tests/cpp/relay_build_module_test.cc               |   4 +-
 tests/crt/session_test.cc                          |  14 +-
 tests/micro/qemu/test_zephyr.py                    | 197 ++++-
 tests/micro/qemu/zephyr-runtime/prj.conf           |   4 +
 tests/micro/qemu/zephyr-runtime/src/main.c         |  39 +-
 .../contrib/test_arm_compute_lib/infrastructure.py |  44 +-
 .../contrib/test_arm_compute_lib/test_conv2d.py    | 283 +++---
 .../contrib/test_arm_compute_lib/test_dense.py     | 102 ++-
 .../contrib/test_arm_compute_lib/test_network.py   |  29 +-
 .../contrib/test_arm_compute_lib/test_reshape.py   |   1 -
 tests/python/contrib/test_dlpack.py                |   2 +-
 tests/python/contrib/test_popen_pool.py            |  71 ++
 tests/python/contrib/test_sort.py                  |  56 +-
 tests/python/contrib/test_tensorrt.py              |   2 +
 tests/python/contrib/test_thrust.py                | 123 +++
 .../contrib/test_verilator/infrastructure.py       |  39 +-
 tests/python/driver/tvmc/test_autoscheduler.py     | 101 +++
 tests/python/driver/tvmc/test_autotuner.py         |   2 +-
 tests/python/frontend/mxnet/test_forward.py        |  54 +-
 tests/python/frontend/onnx/test_forward.py         | 974 +++++++++------------
 tests/python/frontend/pytorch/test_forward.py      | 111 ++-
 .../frontend/pytorch/test_object_detection.py      |  83 +-
 tests/python/frontend/tensorflow/test_forward.py   |  86 +-
 tests/python/frontend/tflite/test_forward.py       | 323 +++++--
 tests/python/integration/test_dot.py               |   4 +-
 tests/python/integration/test_reduce.py            |  14 +-
 tests/python/relay/test_any.py                     | 144 ++-
 .../relay/test_auto_scheduler_layout_rewrite.py    | 124 ++-
 .../relay/test_auto_scheduler_task_extraction.py   |  84 +-
 tests/python/relay/test_auto_scheduler_tuning.py   |  32 +-
 .../test_const.py}                                 |  36 +-
 tests/python/relay/test_dataflow_pattern.py        | 134 +++
 tests/python/relay/test_op_level1.py               |  10 +
 tests/python/relay/test_op_level3.py               |  60 +-
 tests/python/relay/test_op_level5.py               |   4 +-
 tests/python/relay/test_op_level6.py               |  68 +-
 tests/python/relay/test_pass_annotate_target.py    | 261 +++++-
 .../relay/test_pass_combine_parallel_dense.py      |   2 -
 tests/python/relay/test_pass_convert_op_layout.py  | 272 ++++++
 tests/python/relay/test_pass_lambda_lift.py        |   3 +
 tests/python/relay/test_pass_partition_graph.py    |  28 +-
 tests/python/relay/test_prng.py                    | 142 +++
 tests/python/relay/test_type_infer.py              |  14 +
 tests/python/topi/python/test_topi_argwhere.py     |   7 +-
 tests/python/topi/python/test_topi_batch_matmul.py |  42 +-
 ...mul.py => test_topi_batch_matmul_tensorcore.py} |  23 +-
 tests/python/topi/python/test_topi_conv2d_int8.py  |  23 +-
 tests/python/topi/python/test_topi_conv2d_nchw.py  |  17 +
 .../topi/python/test_topi_depthwise_conv2d.py      |  23 +-
 tests/python/topi/python/test_topi_math.py         |  10 +-
 tests/python/topi/python/test_topi_prng.py         | 116 +++
 tests/python/topi/python/test_topi_reduce.py       |  26 +
 tests/python/topi/python/test_topi_sort.py         |  53 ++
 tests/python/topi/python/test_topi_sparse.py       |  65 +-
 tests/python/topi/python/test_topi_transform.py    |   1 +
 tests/python/topi/python/test_topi_vision.py       |  21 +-
 .../unittest/test_arith_canonical_simplify.py      |  41 +
 tests/python/unittest/test_arith_domain_touched.py |   6 +-
 .../python/unittest/test_auto_scheduler_common.py  |  35 +-
 .../unittest/test_auto_scheduler_compute_dag.py    |  23 +-
 .../python/unittest/test_auto_scheduler_feature.py |   4 +-
 .../unittest/test_auto_scheduler_layout_rewrite.py |  16 +
 .../python/unittest/test_auto_scheduler_measure.py |  14 +-
 .../unittest/test_auto_scheduler_search_policy.py  | 201 +++--
 .../test_auto_scheduler_sketch_generation.py       |  61 +-
 tests/python/unittest/test_autotvm_common.py       |   2 +-
 tests/python/unittest/test_autotvm_measure.py      |  28 -
 tests/python/unittest/test_crt.py                  |  30 +-
 tests/python/unittest/test_link_params.py          |   4 +-
 tests/python/unittest/test_runtime_graph_debug.py  |  54 +-
 .../test_runtime_module_based_interface.py         |   3 +-
 .../python/unittest/test_runtime_module_export.py  |   2 +-
 tests/python/unittest/test_runtime_module_load.py  |   6 +-
 tests/python/unittest/test_target_codegen_cuda.py  |   2 +-
 tests/python/unittest/test_target_codegen_llvm.py  |   2 +-
 .../unittest/test_target_codegen_static_init.py    |   2 +-
 .../unittest/test_target_codegen_vm_basic.py       |   2 +-
 tests/python/unittest/test_te_hybrid_script.py     |   6 +-
 tests/python/unittest/test_tir_base.py             |  60 ++
 tests/python/unittest/test_tir_constructor.py      |   3 +-
 tests/python/unittest/test_tir_nodes.py            |   2 +-
 .../unittest/test_tir_transform_remove_no_op.py    |  11 +-
 .../unittest/test_tir_transform_storage_rewrite.py |   4 +-
 .../unittest/test_tir_transform_unroll_loop.py     |   8 +-
 .../unittest/test_tir_transform_vectorize.py       |  16 +-
 tests/scripts/task_ci_python_setup.sh              |   2 +-
 tests/scripts/task_config_build_arm.sh             |   1 +
 tests/scripts/task_config_build_cpu.sh             |   1 +
 tests/scripts/task_config_build_i386.sh            |   1 +
 ...tests.sh => task_python_arm_compute_library.sh} |   2 +-
 tests/scripts/task_sphinx_precheck.sh              |   2 +-
 tutorials/auto_scheduler/tune_conv2d_layer_cuda.py |  34 +-
 tutorials/auto_scheduler/tune_matmul_x86.py        |  31 +-
 tutorials/auto_scheduler/tune_network_cuda.py      |   2 +-
 .../{tune_network_x86.py => tune_network_mali.py}  | 143 ++-
 tutorials/auto_scheduler/tune_network_x86.py       |   2 +-
 tutorials/autotvm/tune_conv2d_cuda.py              |   1 +
 tutorials/autotvm/tune_relay_arm.py                |   2 +-
 tutorials/autotvm/tune_relay_cuda.py               |   8 +-
 tutorials/autotvm/tune_relay_mobile_gpu.py         |   5 +-
 tutorials/autotvm/tune_simple_template.py          |   3 +-
 tutorials/dev/low_level_custom_pass.py             |   4 +-
 tutorials/frontend/deploy_sparse.py                |  20 +-
 tutorials/frontend/from_onnx.py                    |  23 +-
 tutorials/frontend/using_external_lib.py           |   1 +
 tutorials/get_started/relay_quick_start.py         |   1 +
 tutorials/language/extern_op.py                    |   1 +
 tutorials/language/tensorize.py                    |   1 +
 tutorials/micro/README.txt                         |   4 +-
 tutorials/micro/micro_reference_vm.py              |  23 +-
 tutorials/micro/micro_tflite.py                    | 181 ++--
 tutorials/optimize/opt_matmul_auto_tensorcore.py   |   1 +
 vta/python/vta/transform.py                        |  12 +-
 vta/scripts/tune_conv2d.py                         |   2 +-
 vta/scripts/tune_conv2d_transpose.py               |   2 +-
 vta/scripts/tune_dense.py                          |   2 +-
 vta/scripts/tune_group_conv2d.py                   |   2 +-
 vta/scripts/tune_resnet.py                         |   2 +-
 .../python/integration/test_benchmark_gemm.py      |  23 +-
 vta/tutorials/autotvm/tune_relay_vta.py            |  33 +-
 450 files changed, 15036 insertions(+), 4621 deletions(-)
 mode change 100644 => 100755 docker/install/ubuntu_install_qemu.sh
 create mode 100644 docs/dev/microtvm_design.rst
 create mode 100644 docs/microtvm/index.rst
 copy include/tvm/relay/attrs/{debug.h => random.h} (64%)
 create mode 100644 python/tvm/contrib/popen_pool.py
 create mode 100644 python/tvm/exec/popen_worker.py
 copy python/tvm/{topi/cuda/rcnn => relay/op/random}/__init__.py (91%)
 copy python/tvm/{autotvm/graph_tuner/utils/__init__.py => 
relay/op/random/_kernel.py} (63%)
 copy python/tvm/relay/op/{ => random}/_make.py (93%)
 create mode 100644 python/tvm/relay/op/random/kernel.py
 create mode 100644 python/tvm/topi/cuda/batch_matmul_tensorcore.py
 create mode 100644 python/tvm/topi/cuda/scan.py
 copy python/tvm/topi/{cuda/ssd => random}/__init__.py (87%)
 create mode 100644 python/tvm/topi/random/kernel.py
 copy src/relay/op/algorithm/{argsort.cc => sort.cc} (69%)
 create mode 100644 src/relay/op/random/kernel.cc
 create mode 100644 tests/cpp/dataflow_pattern_test.cc
 create mode 100644 tests/python/contrib/test_popen_pool.py
 create mode 100644 tests/python/contrib/test_thrust.py
 create mode 100644 tests/python/driver/tvmc/test_autoscheduler.py
 copy tests/python/{unittest/test_runtime_vm_profiler.py => 
relay/test_const.py} (51%)
 create mode 100644 tests/python/relay/test_prng.py
 copy tests/python/topi/python/{test_topi_batch_matmul.py => 
test_topi_batch_matmul_tensorcore.py} (77%)
 create mode 100644 tests/python/topi/python/test_topi_prng.py
 create mode 100644 tests/python/unittest/test_tir_base.py
 copy tests/scripts/{task_python_ethosn_tests.sh => 
task_python_arm_compute_library.sh} (92%)
 copy tutorials/auto_scheduler/{tune_network_x86.py => tune_network_mali.py} 
(75%)

Reply via email to