This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a change to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git
omit e5a4437413 [Unity][MSC][M0.4 && M0.5] Codegen && Test (#15645)
add ac99367aa4 [Relay] [Bugfix] Fix some bugs of dominator pattern (#15473)
add 9bcf0bc107 [Relay] add redirecting operation to dataflow pattern graph
(#15392)
add 18467c95dd [Script] Be more careful when generating ast.ExtSlice for
Subscript (#15483)
add 2bf127bdd3 [TVMScript] Allow use of Python builtins in script (#15492)
add 93356cdcbe [Bugfix][TOPI] Fix a bug in arm_cpu int8 conv2d i8mm
schedule (#15484)
add 40bac5717a [CI] Remove cython version pin (#15485)
add ae45b04772 [Relay][Strategy] Use x86 dense schedules for arm_cpu
(#15470)
add 8cadd1fbc5 [ARITH] Enhance Canonical Simplify for LE (#15471)
add 34cacb0a64 [VM][Textures] Enable OpenCL textures for VM (#15419)
add 9ff71f4a9f [CodeGenC] Handle GlobalVar callee as internal function
call (#15103)
add 8b37d4d11a [Relay][Strategy] Use x86 pool schedules for arm_cpu
(#15506)
add 77b71fc830 [CMSIS-NN] Support for Softmax Int16 operator (#15407)
add ff47aeac20 [docs] Add v0.13.0 docs to site (#15508)
add 907b29e544 [TOPI] check empty array of x86 injective's iters (#15513)
add ed8b82c5d6 Remove duplicate the word (#15524)
add 624f8a73c7 [TEST] Run tests/python/relay/aot tests in ci-cortexm
(#15519)
add 326f8e7435 Fix typo mistake and change whethe to whether (#15525)
add 81fd9f3476 Remove IRModule Dependency from Target (#15511)
add 6c33787cd2 [MetaSchedule] Enable subprocess to stdout for DEBUG level
(#15532)
add 513bd1a198 Remove duplicate msg word and condition inside the function
doc (#15530)
add a1d6e82291 [Runtime][Minor] Suppress verbose logging in Metal device
API (#15543)
add 927df59662 [Relay] Disable exception for ADT in mixed precision pass
(#15533)
add 94f6b37b06 [Relay][TFLite] Fix in qnn.conv2d when parameter groups not
equal to 1 (#15472)
add 482b3c2c99 [Fix] Fix the typo in compile flag (#15542)
add 2b718e5d00 Fixed search task comment (#15535)
add 63592fd71d [quantize] fix bug of annotate for output of add op (#15529)
add 1422c1826f fixed typo [TypoFix] (#15536)
add 208e01f485 [TVMScript] Create loop var with min_val dtype in for frame
(#15547)
add 096b4a28c0 Bump tornado from 6.1 to 6.3.3 in /apps/microtvm/cmsisnn
(#15553)
add f34e7256a1 Bump tornado from 6.1 to 6.3.3 in /apps/microtvm/ethosu
(#15552)
add 325c7aef90 Bump tornado from 6.1 to 6.3.3 in /apps/microtvm (#15554)
add 0c6fbb8923 [TVMScript] Use triple-quoted python strings for metadata
(#15564)
add 760c03085a [Arith] Fix handling of overlapping predicates (#15555)
add 8afa6d2571 [CUTLASS][Cherry-pick] Introduce several features of
cutlass profiler (#15573)
add ee54e98c78 Remove duplicate 'from' word inside python script (#15582)
add 5db18efab9 Remove duplicate load word inside .cc file (#15581)
add f45ed308cb Fix "to" duplicate word in python and C header file (#15580)
add 98320ab5bf [Testing] Allow Capitalized name in CompareBeforeAfter
(#15568)
add 925148e444 [TIR] Shuffle in PointerValueTypeRewrite for scalar reads
(#15517)
add 63b0c8fecc [Runtime] Enhance PackedFunc Metaprogramming with
`PackArgs` (#15595)
add 072a5c1bc0 [TVMScript] Optionally output the address as part of
variable names (#15579)
add a99bc9de29 [MetaSchedule] Fix metaschedule flop estimation for
non-integer loop dimensions (#15574)
add 32658f851a [ACL] Update Compute Library to v23.05.1 (#15600)
add 483a8c3234 [Arith] Add tvm::arith::PresburgerSetNode to work with
Presburger Set in MLIR (#14690)
add d0c94d447b [Bugfix][Relay][Strategy] Enable compile time
transformation of weights matrix for arm_cpu NHWC quantized conv2d (#15584)
add 6166121de4 [CMake] Add NCCL to TVM and TVM Runtime (#15605)
add 512b114322 [CPP_RPC] export listdir for RPC (#15537)
add b5dae98ebf [Arith] Fix detect linear equation with uint var (#15558)
add b9652a2db0 [Hopper TMA] CUDA codegen for async copy with barrier
synchronization (#15616)
add 220f57dc68 [DOCS] community strategy decision process (#15619)
add 344fd2d20c [ONNX][BugFix] Support If body with free variable from
graph input (#15602)
add aa805f2f86 [Runtime] Expose ModuleGetFunction as PackedFunc (#15623)
add 909c8fab8c [CMake] Add RCCL to TVM and TVM Runtime (#15624)
add 6554e2e082 [RPC] Enhance RPC Protocol to support TVM Object (#15631)
add 0e2fabd0e4 [BugFix][VTA] tvm.tir.Call has no name attribute (#15629)
add 0166400906 [Runtime] Utils to Stringify Device (#15630)
add 8f60213cb5 [Runtime] Serialization/Deserialization of runtime module
(#15244)
add c921781c46 [TIR] Output DeclBuffer in SplitHostDevice (#15493)
add 3c6f9c9bcc [Arith] Added simplification rule for multiple equality
compares (#15628)
add 34bd9be0a8 [Backport][Runtime] Fix ICE from Clang (#15637)
add 79f9e577f7 Do not link LLVM libraries into cpptest binary (#15639)
add 022299b51f [Arith] MLIR PresburgerSet compile fix mlir >= 160 (#15638)
add f9e6018cfe [Runtime] Make `export_library` parameters after
`file_name` keyword-only (#15658)
add d75083cd97 [Relay] Fix an adaptive_max_pool1d operator conversion bug
(#15386)
add 476d9d1d41 Add output_data_sec section in corstone300.ld (#15649)
add 04ee895d8d [IR] Use structural equal for Range equality (#15664)
add d26fdcf3d1 [Hopper TMA] Add CUDA codegen support for bulk asynchronous
copy (#15656)
add ea388e5e74 [CI] Allow Limit CPUs in Docker (#15668)
add 1e6e2b35ae [BugFix][Arith] IterMapRewriter abort rewriting once
failure (#15677)
add 90c64c6dce [MERGE] Merge main into unity 2023-09-06
add 3b6a788d77 Fix after merge
add 89c40611b3 [Unity][MSC][M0.4 && M0.5] Codegen && Test (#15645)
This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version. This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:
* -- * -- B -- O -- O -- O (e5a4437413)
\
N -- N -- N refs/heads/unity (89c40611b3)
You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.
Any revisions marked "omit" are not gone; other references still
refer to them. Any revisions marked "discard" are gone forever.
No new revisions were added by this update.
Summary of changes:
CMakeLists.txt | 38 ++-
apps/android_camera/models/prepare_model.py | 2 +-
apps/android_rpc/tests/android_rpc_test.py | 6 +-
apps/benchmark/adreno/adreno_gpu_bench_clml.py | 2 +-
apps/benchmark/adreno/adreno_gpu_bench_texture.py | 2 +-
apps/benchmark/arm_cpu_imagenet_bench.py | 2 +-
apps/benchmark/mobile_gpu_imagenet_bench.py | 2 +-
apps/cpp_rpc/rpc_env.cc | 16 +
apps/hexagon_launcher/README.md | 2 +-
apps/ios_rpc/tests/ios_rpc_mobilenet.py | 2 +-
apps/ios_rpc/tests/ios_rpc_test.py | 4 +-
apps/microtvm/cmsisnn/requirements.txt | 54 +--
apps/microtvm/ethosu/corstone300.ld | 1 +
apps/microtvm/ethosu/requirements.txt | 54 +--
apps/microtvm/poetry.lock | 175 +---------
apps/microtvm/pyproject.toml | 2 +-
apps/topi_recipe/gemm/android_gemm_square.py | 2 +-
cmake/config.cmake | 10 +
cmake/modules/LLVM.cmake | 3 +
cmake/modules/LibInfo.cmake | 8 +
cmake/utils/FindLLVM.cmake | 12 +
cmake/utils/FindRCCL.cmake | 52 +++
docker/bash.sh | 17 +-
.../ubuntu_download_arm_compute_lib_binaries.sh | 2 +-
docker/install/ubuntu_install_python_package.sh | 2 +-
docs/conf.py | 10 +-
docs/contribute/community.rst | 11 +
docs/how_to/deploy/android.rst | 2 +-
.../how_to/deploy_models/deploy_model_on_adreno.py | 2 +-
.../deploy_models/deploy_model_on_android.py | 2 +-
.../ci_logs/resnet-18-NHWC-B1-cuda.json | 48 +--
.../tune_with_autoscheduler/tune_network_arm.py | 2 +-
.../tune_with_autoscheduler/tune_network_mali.py | 2 +-
gallery/how_to/tune_with_autotvm/tune_relay_arm.py | 2 +-
.../tune_with_autotvm/tune_relay_mobile_gpu.py | 2 +-
.../how_to/work_with_microtvm/micro_mlperftiny.py | 2 +-
include/tvm/arith/analyzer.h | 4 +-
include/tvm/node/script_printer.h | 3 +
include/tvm/relay/dataflow_pattern.h | 6 +
include/tvm/relay/transform.h | 14 +-
include/tvm/runtime/module.h | 5 +
include/tvm/runtime/ndarray.h | 8 +-
include/tvm/runtime/vm/bytecode.h | 19 +-
include/tvm/runtime/vm/executable.h | 5 +-
include/tvm/target/codegen.h | 16 +
include/tvm/target/target_kind.h | 2 +-
include/tvm/tir/analysis.h | 4 +-
include/tvm/tir/builtin.h | 63 +++-
include/tvm/tir/schedule/schedule.h | 2 +-
include/tvm/topi/x86/injective.h | 2 +-
python/tvm/arith/__init__.py | 1 +
python/tvm/arith/analyzer.py | 10 +-
python/tvm/arith/int_set.py | 8 +
python/tvm/auto_scheduler/measure.py | 2 +-
python/tvm/auto_scheduler/search_task.py | 2 +-
python/tvm/autotvm/measure/measure_methods.py | 2 +-
python/tvm/contrib/cc.py | 6 +-
python/tvm/contrib/pipeline_executor_build.py | 2 +-
python/tvm/contrib/torch/optimize_torch.py | 24 +-
python/tvm/driver/tvmc/model.py | 5 +-
python/tvm/ir/expr.py | 6 +
python/tvm/meta_schedule/builder/local_builder.py | 2 +-
python/tvm/meta_schedule/runner/local_runner.py | 7 +-
.../meta_schedule/testing/custom_builder_runner.py | 2 +-
python/tvm/relay/backend/contrib/ethosu/codegen.py | 2 +-
python/tvm/relay/backend/executor_factory.py | 4 +-
python/tvm/relay/dataflow_pattern/__init__.py | 13 +
python/tvm/relay/frontend/onnx.py | 9 +-
python/tvm/relay/frontend/pytorch.py | 12 +-
python/tvm/relay/op/contrib/cmsisnn.py | 14 +-
python/tvm/relay/op/contrib/dnnl.py | 2 +-
python/tvm/relay/op/strategy/arm_cpu.py | 98 +++---
python/tvm/relay/quantize/_annotate.py | 4 +-
python/tvm/rpc/client.py | 17 +
python/tvm/runtime/module.py | 5 +-
python/tvm/runtime/script_printer.py | 13 +
python/tvm/script/ir_builder/tir/ir.py | 12 +
python/tvm/script/parser/core/doc.py | 21 +-
python/tvm/script/parser/core/evaluator.py | 21 +-
python/tvm/testing/runner.py | 2 +-
python/tvm/testing/utils.py | 27 +-
python/tvm/tir/__init__.py | 13 +-
python/tvm/tir/op.py | 164 +++++++++-
python/tvm/tir/schedule/schedule.py | 2 +-
python/tvm/tir/transform/transform.py | 14 +
python/tvm/topi/arm_cpu/conv2d_alter_op.py | 23 +-
python/tvm/topi/arm_cpu/conv2d_gemm.py | 45 ++-
.../arm_cpu/mprofile/dsp/micro_kernel/avg_pool.py | 8 +-
.../topi/arm_cpu/mprofile/dsp/micro_kernel/gemm.py | 87 ++++-
.../arm_cpu/mprofile/dsp/micro_kernel/max_pool.py | 13 +-
.../arm_cpu/mprofile/dsp/micro_kernel/tensordot.py | 7 +-
python/tvm/topi/hexagon/compute_poolarea.py | 2 +-
python/tvm/topi/hexagon/slice_ops/max_pool2d.py | 2 +-
src/arith/canonical_simplify.cc | 58 +++-
src/arith/detect_linear_equation.cc | 3 +-
src/arith/iter_affine_map.cc | 35 +-
src/arith/presburger_set.cc | 276 ++++++++++++++++
src/arith/presburger_set.h | 194 +++++++++++
src/arith/rewrite_simplify.cc | 2 +
src/contrib/torch/base64.h | 75 -----
.../tvm_module_wrapper/RuntimeModuleWrapperTVM.cc | 143 +++++---
src/node/script_printer.cc | 3 +
src/node/structural_hash.cc | 17 +
src/relay/backend/contrib/cmsisnn/compute_luts.cc | 76 +++++
src/relay/backend/contrib/cmsisnn/compute_luts.h | 55 ++++
src/relay/backend/contrib/cmsisnn/relay_to_tir.cc | 151 +++++++--
src/relay/backend/contrib/cmsisnn/target.cc | 3 +-
.../backend/contrib/cmsisnn/tir_to_runtime.cc | 84 ++++-
src/relay/backend/contrib/ethosu/codegen.cc | 4 +-
.../backend/contrib/example_target_hooks/target.cc | 5 +-
.../contrib/example_target_hooks/tir_to_runtime.cc | 26 +-
src/relay/backend/contrib/uma/targets.cc | 30 +-
src/relay/backend/contrib/uma/tir_to_runtime.cc | 34 +-
src/relay/backend/vm/compiler.cc | 41 ++-
src/relay/backend/vm/manifest_lifetimes.cc | 4 +-
src/relay/ir/dataflow_matcher.cc | 21 +-
src/relay/ir/dataflow_pattern.cc | 10 +
src/relay/ir/dataflow_pattern_functor.cc | 6 +-
src/relay/ir/indexed_graph.cc | 7 +-
src/relay/op/memory/memory.cc | 20 +-
src/relay/op/memory/memory.h | 5 +-
src/relay/op/nn/convolution.cc | 4 +-
src/relay/qnn/op/convolution.cc | 17 +-
src/relay/transforms/annotate_texture_storage.cc | 9 +
src/relay/transforms/device_domains.cc | 7 +-
src/relay/transforms/memory_alloc.cc | 4 +-
src/relay/transforms/to_mixed_precision.cc | 9 +-
src/runtime/c_runtime_api.cc | 2 +-
src/runtime/contrib/papi/papi.cc | 7 +-
src/runtime/hexagon/hexagon_device_api.cc | 2 +-
src/runtime/hexagon/ops/conv2d_fp16_hvx.cc | 2 +-
src/runtime/library_module.cc | 9 -
src/runtime/library_module.h | 10 +
src/runtime/metal/metal_device_api.mm | 2 +-
src/runtime/opencl/opencl_device_api.cc | 2 +-
src/runtime/vm/bytecode.cc | 56 +++-
src/runtime/vm/executable.cc | 34 +-
src/runtime/vm/profiler/vm.cc | 16 +-
src/runtime/vm/vm.cc | 47 ++-
src/script/ir_builder/tir/ir.cc | 4 +-
src/script/printer/ir_docsifier.cc | 10 +-
src/script/printer/utils.h | 7 +-
src/support/libinfo.cc | 11 +
src/support/str_escape.h | 56 +++-
src/target/codegen.cc | 102 ++++--
src/target/opt/build_cuda_on.cc | 18 +-
src/target/source/codegen_aocl.cc | 19 +-
src/target/source/codegen_c.cc | 153 ++++++---
src/target/source/codegen_c.h | 59 +++-
src/target/source/codegen_c_host.cc | 93 +++---
src/target/source/codegen_c_host.h | 3 +-
src/target/source/codegen_cuda.cc | 56 +++-
src/target/source/codegen_cuda.h | 4 +-
src/target/source/codegen_metal.cc | 77 +++--
src/target/source/codegen_metal.h | 3 +-
src/target/source/codegen_opencl.cc | 24 +-
src/target/source/codegen_vhls.cc | 34 +-
src/target/source/ptx.cc | 145 ++++++++-
src/target/source/ptx.h | 62 ++++
src/target/source/source_module.cc | 6 +-
src/target/spirv/codegen_spirv.cc | 11 +
src/target/spirv/codegen_spirv.h | 1 +
src/tir/analysis/estimate_flops.cc | 20 +-
src/tir/op/builtin.cc | 16 +
src/tir/op/op.cc | 26 ++
src/tir/schedule/analysis.h | 2 +-
src/tir/schedule/primitive.h | 2 +-
.../schedule/primitive/layout_transformation.cc | 2 +-
src/tir/transforms/inject_virtual_thread.cc | 2 +-
src/tir/transforms/split_host_device.cc | 9 +-
src/tir/transforms/storage_rewrite.cc | 112 +++++--
...is_side_effect.cc => arith_integer_set_test.cc} | 27 +-
tests/cpp/target_test.cc | 3 +-
tests/python/contrib/test_clml/infrastructure.py | 2 +-
tests/python/contrib/test_cmsisnn/test_softmax.py | 43 +++
tests/python/contrib/test_ethosn/test_codegen.py | 4 +-
tests/python/frontend/onnx/test_forward.py | 95 ++++++
tests/python/frontend/pytorch/test_forward.py | 10 +
tests/python/frontend/tflite/test_forward.py | 35 +-
tests/python/relay/aot/test_c_device_api.py | 42 +--
.../relay/aot/test_crt_forward_declarations.py | 10 +-
.../opencl_texture/test_conv2d_nchw_texture.py | 361 ++++++++++++++++-----
.../opencl_texture/test_conv2d_nhwc_texture.py | 245 ++++++++++----
.../test_depthwise_conv2d_nchw_texture.py | 52 ++-
.../test_depthwise_conv2d_nhwc_texture.py | 50 ++-
.../relay/opencl_texture/test_injection_texture.py | 33 +-
tests/python/relay/opencl_texture/test_network.py | 24 +-
.../relay/opencl_texture/test_pool_texture.py | 63 +++-
.../relay/opencl_texture/test_reduction_texture.py | 87 +++--
.../relay/opencl_texture/utils/adreno_utils.py | 86 ++++-
.../relay/strategy/test_select_implementation.py | 89 ++++-
tests/python/relay/test_dataflow_pattern.py | 87 +++++
tests/python/relay/test_pass_alter_op_layout.py | 2 +-
tests/python/relay/test_pass_auto_quantize.py | 75 +++++
.../relay/test_pass_dead_code_elimination.py | 18 +-
tests/python/relay/test_pass_plan_devices.py | 11 +-
tests/python/relay/test_to_mixed_precision.py | 35 +-
tests/python/topi/python/test_topi_conv2d_int8.py | 11 +-
.../topi/python/test_topi_conv2d_tensordot_opts.py | 28 +-
.../unittest/test_arith_canonical_simplify.py | 40 +++
.../unittest/test_arith_detect_linear_equation.py | 4 +
.../python/unittest/test_arith_iter_affine_map.py | 27 ++
.../python/unittest/test_arith_rewrite_simplify.py | 2 +
...e_postproc_rewrite_parallel_vectorize_unroll.py | 6 +-
.../test_meta_schedule_space_cuda_winograd.py | 4 +-
.../unittest/test_roundtrip_runtime_module.py | 121 +++++++
tests/python/unittest/test_runtime_module_load.py | 2 +-
tests/python/unittest/test_runtime_rpc.py | 2 +-
tests/python/unittest/test_target_codegen_blob.py | 4 +-
.../python/unittest/test_target_codegen_c_host.py | 51 ++-
.../test_tir_analysis_estimate_tir_flops.py | 28 +-
tests/python/unittest/test_tir_op_types.py | 35 ++
tests/python/unittest/test_tir_ptx_cp_async.py | 112 +++++++
.../test_tir_transform_inject_ptx_async_copy.py | 109 ++++---
.../test_tir_transform_lower_warp_memory.py | 10 +-
...form_merge_dynamic_shared_memory_allocations.py | 2 +-
...est_tir_transform_pointer_value_type_rewrite.py | 73 +++++
.../unittest/test_tir_transform_thread_sync.py | 2 +-
.../python/unittest/test_tvmscript_error_report.py | 8 -
.../unittest/test_tvmscript_ir_builder_tir.py | 20 ++
tests/python/unittest/test_tvmscript_parser_tir.py | 33 ++
.../python/unittest/test_tvmscript_printer_tir.py | 41 +++
tests/scripts/task_python_integration.sh | 2 +-
tests/scripts/task_python_microtvm.sh | 1 +
vta/python/vta/transform.py | 2 +-
web/src/runtime.ts | 2 +-
web/src/support.ts | 4 +-
web/tests/python/prepare_test_libs.py | 2 +-
web/tests/python/webgpu_rpc_test.py | 2 +-
web/tests/python/websock_rpc_test.py | 2 +-
230 files changed, 5162 insertions(+), 1440 deletions(-)
create mode 100644 cmake/utils/FindRCCL.cmake
create mode 100644 src/arith/presburger_set.cc
create mode 100644 src/arith/presburger_set.h
delete mode 100644 src/contrib/torch/base64.h
create mode 100644 src/relay/backend/contrib/cmsisnn/compute_luts.cc
create mode 100644 src/relay/backend/contrib/cmsisnn/compute_luts.h
copy tests/cpp/{tir_analysis_side_effect.cc => arith_integer_set_test.cc} (59%)
create mode 100644 tests/python/unittest/test_roundtrip_runtime_module.py
create mode 100644
tests/python/unittest/test_tir_transform_pointer_value_type_rewrite.py