This is an automated email from the ASF dual-hosted git repository.
ruihangl pushed a change to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 3fad0109bf [Unity][Contrib] Add vLLM paged attention kernel (#16350)
add 3190f284a3 [DOC] Add v0.14.0 docs to site (#16152)
add bce82432f9 [Relay][Pytorch] Add support for `aten::unflatten` (#16131)
add 1a2cc18091 [Relay] conv3d depthwise bug fix (#16151)
add f38dc146e4 [TOPI][Relay] Add conv2d NHWC hybrid schedule for `arm_cpu`
(#16106)
add 26aeaee046 [Community] Shuai Yuan -> Committers (#16162)
add 3fd3a63652 [Relay][Pytorch] Add support for `aten::linalg_vector_norm`
(#16123)
add e5c6f74460 [CI][ADRENO] Enhancements to Adreno specific CI utils
(#15991)
add f33722e59a [Community] Ruihang Lai -> PMC (#16165)
add d9d3bc585f [Community] Bohan Hou -> PMC (#16166)
add 79052574be [Community] Qiang Zhang -> Committer (#16164)
add 3eec10f16a [COMMUNITY] New Reviewer: Yixin Dong (#16172)
add 604b263dd5 [BugFix] [Relay][Pytorch] Fix missing `.dtype` (#16167)
add 3136ff4bb6 [FRONTEND][KERAS] Fix bug concat convert for NCHW (#16159)
add 97ddd667c8 [Relay][Pytorch] Add support for
`aten::scaled_dot_product_attention` (#16143)
add 1994f402e6 Enable ccache to accelerate contrib compilation (#16176)
add e9a3b60f49 [Relay][Pytorch] Fix bug when converting models with
torch.nn.ParameterList (#16180)
add 2eb17fa87f [Device][Metal] Fix metal warp size (#16192)
add 37329bf8c3 [BugFix] Fix the error of reloading the model library on
the ROCm platform: "MIOpen Error: No invoker was registered for convolution
forward.” (#16190)
add 71081a8616 Fix IRModule initialization with attrs (#16202)
add fe27973da0 Bump cryptography from 37.0.2 to 41.0.6 in /docker/python
(#16174)
add bf071dea54 [Relay][Frontend] Preserve Pytorch Span Names (#16171)
add 65121c878a [Relay][Frontend] Add support for aten::concat (#16199)
add a9fcac1a47 [Python] Fix setup.py for inplace build (#16214)
add a59db03777 remove deprecated np.int in slice converter (pytorch)
(#16221)
add b0e146f767 [TIR] ConvertSSA process entry func first (#16236)
add c8bfdb21ab [BugFix][TIR] Fix dynamic smem merge leaf alloc (#16216)
add 37c38acc46 [Target] Add Jetson AGX Orin tags (#16231)
add b3eec91ee6 [TFLite] Add support for quantized mirror pad (#16243)
add 870246a369 [LoopPartition] Fix a bug of LoopPartition in single point
scenarioes (#16104)
add 799e81036d [ARITH] Simplify nested if_then_else when constant is
appearing in then_expr (#16227)
add 3df798d422 [Relay][TOPI] Add support for group_conv1d_transpose_ncw
for generic (#16248)
add 943861c85c [TIR][Schedule] TileWithTensorIntrin skip incorrect
ComputeInline for input-padding (#16239)
add 759ee1236a [Support] Add Interrupt Handling in Pipe (#16255)
add 3a57a40c1b [RUNTIME][CLML] Fix for CLML ops and enable more test case
(#15896)
add f36a093c20 Update conv2d.py (#16262)
add d310d7db59 [TOPI] Add support for group_conv3d_transpose_ncdhw for
generic (#16259)
add a906504389 [TVMScript] Disable concise scoping when the scope stmt is
explicitly annotated (#16271)
add a050696ca5 replace deprecated np.int with int to avoid crash (#16279)
add 1c4538947b [BugFix] Fixed Inappropriate Logical Expression (#16272)
add 506eff23b0 [Relay][Frontend][QNN] fix access `param_debug_name_map` to
node output name in fx-quantized graph node replacement (#16217)
add 2da3798dd1 [Relay][Frontend][Torch] add aten:broadcast_to (#16319)
add 8b2cff8ab9 [Doc] Fix minor error in doc (Add an operator to Relay)
(#16282)
add c157ca1a52 [BugFix] Update pillow usage (#16269)
add 7445b8723c [release] Update version to 0.15.0 on main branch
add e69363ab1e [release] Update version to 0.16.dev0 on main branch
add 97f6e6507f [CI] Upgrade cmake version to 3.24.0 (#16336)
add 8eec0bff6b [TIR][Transform] Implement InlinePrivateFunctions (#16184)
add eb15d04c3b [TIR] In SplitHostDevice, check for variables in thread
extents (#16250)
add 42b4f213a7 [Hexagon][UnitTest] Disable flaky quantization test (#16337)
add 380147d059 [Doc] Fix minor error in "Expressions in Relay" (#16346)
add 56bdcee750 [CMake] Use ccache as CMAKE_CUDA_COMPILER_LAUNCHER (#16341)
add 5308739741 [TIR] Allow sync threads inside condition (#16345)
add ae7d9dbe06 [Codegen] Fix if_then_else codegen (#16242)
add e3d031bc7c [CMake][MSVC] Disable permissive mode for MSVC builds
(#16343)
add 51bdaec6e3 [Docker] Upgrade pip in i386 container (#16348)
add b47280b1fa Merge branch 'main' into 'unity'
add 155dd73ac6 Fix after merging 'main' into 'unity'
No new revisions were added by this update.
Summary of changes:
CMakeLists.txt | 5 +
CONTRIBUTORS.md | 8 +-
cmake/utils/CCache.cmake | 23 +-
conda/recipe/meta.yaml | 2 +-
docker/install/ubuntu2004_install_python.sh | 2 +-
docker/install/ubuntu_install_cmake_source.sh | 3 +-
docker/python/bootstrap-requirements.txt | 26 +-
docs/conf.py | 1 +
docs/dev/how_to/relay_add_op.rst | 4 +-
docs/reference/langref/relay_expr.rst | 2 +-
include/tvm/relay/attrs/nn.h | 10 +-
include/tvm/runtime/c_runtime_api.h | 2 +-
include/tvm/tir/analysis.h | 29 +-
include/tvm/tir/builtin.h | 6 +
include/tvm/tir/transform.h | 7 +
python/setup.py | 7 +-
python/tvm/_ffi/libinfo.py | 2 +-
python/tvm/ir/module.py | 2 -
python/tvm/relay/frontend/keras.py | 2 +-
python/tvm/relay/frontend/pytorch.py | 326 ++++++-
python/tvm/relay/frontend/qnn_torch.py | 11 +-
python/tvm/relay/frontend/tflite.py | 6 -
python/tvm/relay/op/contrib/clml.py | 118 ++-
python/tvm/relay/op/nn/_nn.py | 2 +-
python/tvm/relay/op/nn/nn.py | 12 +-
python/tvm/relay/op/strategy/arm_cpu.py | 123 ++-
python/tvm/relay/op/strategy/generic.py | 52 +-
python/tvm/relay/testing/yolo_detection.py | 6 +-
python/tvm/script/ir_builder/tir/ir.py | 2 +
python/tvm/tir/op.py | 17 +
python/tvm/tir/transform/transform.py | 11 +
python/tvm/topi/arm_cpu/arm_utils.py | 105 +--
python/tvm/topi/arm_cpu/conv2d.py | 111 +++
python/tvm/topi/arm_cpu/conv2d_alter_op.py | 57 +-
python/tvm/topi/arm_cpu/conv2d_gemm.py | 345 +++++---
python/tvm/topi/arm_cpu/conv2d_int8.py | 96 +--
python/tvm/topi/generic/nn.py | 34 +
python/tvm/topi/intel_graphics/conv2d.py | 3 +-
python/tvm/topi/nn/conv1d_transpose.py | 142 +++-
python/tvm/topi/nn/conv2d.py | 30 +-
python/tvm/topi/nn/conv3d_transpose.py | 75 ++
python/tvm/topi/testing/__init__.py | 5 +-
.../topi/testing/conv1d_transpose_ncw_python.py | 12 +
.../topi/testing/conv3d_transpose_ncdhw_python.py | 40 +-
src/arith/ir_mutator_with_analyzer.cc | 5 +-
src/relay/backend/contrib/clml/codegen.cc | 2 +-
src/relay/op/nn/convolution.cc | 99 ++-
src/runtime/contrib/clml/clml_runtime.cc | 521 +++++++-----
src/runtime/contrib/miopen/conv_forward.cc | 21 +
src/runtime/metal/metal_device_api.mm | 8 +-
src/script/printer/tir/stmt.cc | 22 +-
src/support/errno_handling.h | 69 ++
src/support/pipe.h | 42 +-
src/support/socket.h | 65 +-
src/target/source/codegen_c.cc | 43 +-
src/target/tag.cc | 24 +
src/te/operation/create_primfunc.cc | 19 +-
src/tir/analysis/verify_well_formed.cc | 214 +++++
src/tir/ir/specialize.cc | 106 ++-
src/tir/ir/tir_visitor_with_path.cc | 434 ++++++++++
src/tir/ir/tir_visitor_with_path.h | 210 +++++
src/tir/op/builtin.cc | 4 +
src/tir/schedule/transform.cc | 6 +-
src/tir/transforms/inline_private_functions.cc | 300 +++++++
src/tir/transforms/ir_utils.cc | 54 +-
src/tir/transforms/loop_partition.cc | 35 +
.../transforms/merge_shared_memory_allocations.cc | 12 +-
src/tir/transforms/split_host_device.cc | 2 +-
src/tir/transforms/storage_access.cc | 30 +-
tests/python/codegen/test_target_codegen_cuda.py | 46 +
tests/python/contrib/test_clml/conftest.py | 21 +-
tests/python/contrib/test_clml/infrastructure.py | 242 +++---
tests/python/contrib/test_clml/test_network.py | 249 +++---
tests/python/contrib/test_clml/test_ops.py | 942 +++++++++++++++------
.../test_hexagon/test_pass_fq2i_avg_pool2d.py | 115 ++-
tests/python/dlight/test_gpu_gemv.py | 3 +-
tests/python/frontend/keras/test_forward.py | 24 +
tests/python/frontend/pytorch/test_forward.py | 211 ++++-
tests/python/frontend/pytorch/test_span_naming.py | 106 +++
tests/python/frontend/tflite/test_forward.py | 24 +-
tests/python/integration/test_arm_aprofile.py | 1 +
.../relax/test_backend_dispatch_sort_scan.py | 6 +-
.../relax/test_transform_legalize_ops_grad.py | 4 -
.../test_transform_legalize_ops_manipulate.py | 12 -
...st_transform_legalize_ops_search_statistical.py | 4 -
.../relay/strategy/test_select_implementation.py | 128 ++-
tests/python/relay/test_op_level2.py | 24 +
.../test_tir_analysis_verify_well_formed.py | 148 +++-
tests/python/tir-base/test_tir_specialize.py | 138 ++-
.../test_tir_inline_private_functions.py | 253 ++++++
.../test_tir_transform_convert_ssa.py | 216 +++++
.../test_tir_transform_loop_partition.py | 228 +++++
...form_merge_dynamic_shared_memory_allocations.py | 61 +-
.../tir-transform/test_tir_transform_simplify.py | 12 +
.../test_tir_transform_split_host_device.py | 72 ++
tests/python/topi/test_topi_conv2d_nhwc.py | 39 +
....py => test_topi_group_conv1d_transpose_ncw.py} | 61 +-
...y => test_topi_group_conv3d_transpose_ncdhw.py} | 76 +-
.../tvmscript/test_tvmscript_printer_annotation.py | 25 +
tests/scripts/ci.py | 2 +
tests/scripts/setup-pytest-env.sh | 4 +-
tests/scripts/task_python_adreno.sh | 22 +-
version.py | 2 +-
web/package.json | 2 +-
104 files changed, 6164 insertions(+), 1590 deletions(-)
create mode 100644 src/support/errno_handling.h
create mode 100644 src/tir/ir/tir_visitor_with_path.cc
create mode 100644 src/tir/ir/tir_visitor_with_path.h
create mode 100644 src/tir/transforms/inline_private_functions.cc
create mode 100644 tests/python/frontend/pytorch/test_span_naming.py
create mode 100644
tests/python/tir-transform/test_tir_inline_private_functions.py
copy tests/python/topi/{test_topi_conv1d_transpose_ncw.py =>
test_topi_group_conv1d_transpose_ncw.py} (64%)
copy tests/python/topi/{test_topi_conv1d_transpose_ncw.py =>
test_topi_group_conv3d_transpose_ncdhw.py} (56%)