This is an automated email from the ASF dual-hosted git repository.
tqchen pushed a change to branch unity
in repository https://gitbox.apache.org/repos/asf/tvm.git
from f360556a0f [Unity][CUTLASS] Fix circular import bug in relax cutlass
backend (#15001)
add 68800fa810 [Contrib] Use f-strings for string formatting, NFC (#14893)
add 5a094bce74 [TIR] Expand unit tests for ConvertSSA (#14892)
add 53cee4bca3 [TVMScript] Round-trip DeclBuffer with undefined data
pointer (#14900)
add cd4551353b [LLVM] Codegen subroutine call when CallNode::op is
GlobalVar (#14901)
add dddc339d2f [Test Cases][BugFix] Add some version check to make test
cases run in all PyTorch versions (#14903)
add 4f99750611 [TypoFix] fix some typo problem in keras frontend (#14916)
add d776bccc1e [Relay] add a dimension check to reject invalid input
(#14925)
add 35ac2262eb [Docker] Add polly package (#14912)
add 172120a952 [TestCases] fix unreachable test cases due to outside the
for-loop (#14934)
add d9c1ba60f4 [microNPU][ETHOSU] Add offloading to the NPU the
nn.avg_pool2d operator with a stride > 3 (#14861)
add 1b9678cee0 [TVMScript] Allow T.target("device", host="host") to
specify host (#14915)
add dbcd198670 [BUILD] Enable log before throw message in windows (#14937)
add e11913be06 [Target] Add target to all TVM callbacks (#14939)
add 1c39613811 [LLVM] Expose Host CPU Feature Detection (#14946)
add bcf7abba29 Fix pytorch axis (#14930)
add 41a616ffba [TIR] Handle subroutine calls in MakePackedAPI (#14913)
add 94f4e25a40 [TIR] Handle subroutine calls in MakeUnpackedAPI (#14914)
add 5fd49f78ad [TIR] Restrict tir.transform.CombineContextCall to host
functions (#14945)
add 86ba26d854 [Bugfix][TIR] Avoid symbol conflicts in
MakePackedAPI/MakeUnpackedAPI (#14950)
add 81056cccd5 [TIR] Preserve existing kTarget function attribute in
BindTarget (#14942)
add 875217c79b [TIR] Restrict tir.transform.InstallDebugSpans to host
functions (#14943)
add 3a15eafd14 [TVMScript] Prevent bool to int conversion in T.Assert
condition (#14941)
add 1aeb34af33 [Codegen][LLVM] Allow void return type from PackedFunc
(#14958)
add 6198c7fd8a [METAL] Fix int8 vectorized cast (#14962)
add 6eb0779442 [TIR] SplitHostDevice, handle subroutines (#14918)
add 227c4e87f3 [CI] Update the expected CI jobs list in the update_branch
script (#14908)
add 4f041c96e7 [Runtime,RPC] Use f-strings for string formatting, NFC
(#14967)
add c47d36e77c [Hexagon] Add support for v73, make v68 default (#14965)
add 5f1421dd0f [Bugfix][PyTorch] Support use_input_stats in instance_norm
(#14963)
add 94c1b89abc [TVMScript][TIR] Parse subroutine calls with no arguments
(#14919)
add 43f06ca42a [TIR] Avoid re-defining `var = arg_var` in ArgBinder
(#14952)
add 4267fbf6a1 [IR][SIBuilder] (#14574)
add 7131411f0a [Bugfix][TIR][VTA] Update host-side target, even without
device func (#14982)
add c8f97aa877 [TIR] Fix incorrect construction of block frames (#14993)
add c98e29bbf4 [Bugfix] Make ThreadAllReduce pass compatible with int64
(#14991)
add ea57778087 [CMAKE] Add a dummy target to defer libtvm dep (#14975)
add 4eb1a4fdf0 [Hexagon] Update instructions to compile hexagon runtime
(#14948)
add 7fe58a181c [OpenCL] Improve diagnostic message (#14995)
add 443fa20e20 [RUNTIME] Update Module and Registry to use String
Container (#14902)
add 9da026194f [OpenCL][Adreno] Fix conv2d when output channels < 4
(#14996)
add 8543cec133 [Hexagon] Remove "c" as aot_host_target
tvm/contrib/hexagon/pytest_pl… (#14997)
add b13be936a0 [DataType] Initial support of fp8 (e4m3/e5m2) (#14863)
add 1608ca82ce [CMAKE] Introduce dummy build as an option (#15000)
add ca30b13668 [DOCKER][ADRENO] Docker setup changes for multi user
environments (#15003)
add 7f02606af2 [OPENCL] Always use convert_T for type conversion (#14972)
add 153f70aa60 [MERGE] Merge main into unity 2023-06-01
No new revisions were added by this update.
Summary of changes:
CMakeLists.txt | 21 +-
apps/dso_plugin_module/plugin_module.cc | 3 +-
apps/hexagon_launcher/README.md | 14 +-
apps/ios_rpc/tests/ios_rpc_mobilenet.py | 27 +-
apps/ios_rpc/tests/ios_rpc_test.py | 13 +-
apps/topi_recipe/broadcast/test_broadcast_map.py | 12 +-
apps/topi_recipe/conv/depthwise_conv2d_test.py | 13 +-
apps/topi_recipe/conv/test_conv2d_hwcn_map.py | 9 +-
apps/topi_recipe/reduce/test_reduce_map.py | 10 +-
apps/topi_recipe/rnn/lstm.py | 9 +-
apps/topi_recipe/rnn/matexp.py | 13 +-
ci/scripts/github/update_branch.py | 19 +-
cmake/config.cmake | 4 +-
cmake/modules/HexagonSDK.cmake | 9 +-
cmake/modules/LibInfo.cmake | 1 +
docker/install/ubuntu2204_install_llvm.sh | 2 +-
docker/install/ubuntu_install_python_package.sh | 3 +-
include/tvm/ir/si_builder.h | 103 ++++++
include/tvm/ir/source_map.h | 46 ++-
include/tvm/runtime/data_type.h | 34 ++
include/tvm/runtime/logging.h | 6 +-
include/tvm/runtime/module.h | 19 +-
include/tvm/runtime/packed_func.h | 2 +-
include/tvm/runtime/registry.h | 12 +-
include/tvm/runtime/relax_vm/executable.h | 6 +-
include/tvm/runtime/vm/executable.h | 4 +-
include/tvm/runtime/vm/vm.h | 2 +-
include/tvm/target/target.h | 13 +
include/tvm/tir/op.h | 3 +-
include/tvm/tir/transform.h | 52 +++
jvm/core/src/test/scripts/test_add_gpu.py | 4 +-
python/gen_requirements.py | 1 +
python/tvm/_ffi/runtime_ctypes.py | 22 ++
python/tvm/contrib/clang.py | 6 +-
python/tvm/contrib/cudnn.py | 6 +-
python/tvm/contrib/cutlass/build.py | 29 +-
python/tvm/contrib/cutlass/conv2d_operation.py | 11 +-
python/tvm/contrib/cutlass/gemm_operation.py | 22 +-
python/tvm/contrib/cutlass/gen_conv2d.py | 2 +-
python/tvm/contrib/cutlass/gen_gemm.py | 2 +-
python/tvm/contrib/cutlass/gen_tensor_op.py | 49 +--
python/tvm/contrib/cutlass/library.py | 23 +-
python/tvm/contrib/graph_executor.py | 2 +-
.../contrib/hexagon/profiling/process_lwp_data.py | 2 +-
python/tvm/contrib/hexagon/pytest_plugin.py | 2 +-
python/tvm/contrib/hexagon/tools.py | 8 +-
python/tvm/contrib/nvcc.py | 29 +-
python/tvm/contrib/peak.py | 4 +-
python/tvm/contrib/pickle_memoize.py | 4 +-
python/tvm/contrib/pipeline_executor.py | 20 +-
python/tvm/contrib/pipeline_executor_build.py | 22 +-
python/tvm/contrib/rocm.py | 4 +-
python/tvm/contrib/sdaccel.py | 10 +-
python/tvm/contrib/sparse.py | 8 +-
python/tvm/contrib/tar.py | 2 +-
python/tvm/contrib/target/coreml.py | 23 +-
python/tvm/contrib/target/onnx.py | 79 ++--
python/tvm/contrib/target/vitis_ai.py | 2 +-
python/tvm/contrib/tf_op/module.py | 2 +-
python/tvm/contrib/utils.py | 2 +-
python/tvm/contrib/xcode.py | 6 +-
python/tvm/ir/__init__.py | 1 +
python/tvm/ir/base.py | 17 +
python/tvm/relay/__init__.py | 1 +
.../tvm/relay/backend/contrib/ethosu/legalize.py | 11 +-
python/tvm/relay/base.py | 2 +-
python/tvm/relay/frontend/keras.py | 4 +-
python/tvm/relay/frontend/pytorch.py | 26 +-
python/tvm/relay/op/contrib/ethosu.py | 27 +-
python/tvm/relay/op/strategy/adreno.py | 47 ++-
python/tvm/rpc/base.py | 6 +-
python/tvm/rpc/client.py | 23 +-
python/tvm/rpc/minrpc.py | 4 +-
python/tvm/rpc/proxy.py | 22 +-
python/tvm/rpc/server.py | 12 +-
python/tvm/rpc/testing.py | 4 +-
python/tvm/rpc/tracker.py | 16 +-
python/tvm/runtime/container.py | 14 +-
python/tvm/runtime/executor/aot_executor.py | 2 +-
python/tvm/runtime/module.py | 38 +-
python/tvm/runtime/ndarray.py | 36 +-
python/tvm/runtime/object.py | 7 +-
python/tvm/runtime/object_generic.py | 6 +-
python/tvm/runtime/vm.py | 15 +-
python/tvm/script/ir_builder/tir/ir.py | 24 +-
python/tvm/script/parser/tir/parser.py | 14 +-
python/tvm/target/target.py | 8 +-
python/tvm/tir/op.py | 2 +-
python/tvm/tir/transform/transform.py | 84 +++++
src/driver/driver_api.cc | 5 +
src/ir/expr.cc | 14 +-
src/ir/si_builder.cc | 325 +++++++++++++++++
src/ir/source_map.cc | 60 ++++
src/relay/backend/aot_executor_codegen.cc | 2 +-
src/relay/backend/build_module.cc | 2 +-
src/relay/backend/contrib/ethosu/source_module.cc | 8 +-
src/relay/backend/graph_executor_codegen.cc | 2 +-
src/relay/backend/vm/compiler.cc | 2 +-
src/relay/backend/vm/compiler.h | 2 +-
src/relay/printer/model_library_format_printer.cc | 2 +-
src/runtime/aot_executor/aot_executor.cc | 3 +-
src/runtime/aot_executor/aot_executor.h | 2 +-
src/runtime/aot_executor/aot_executor_factory.cc | 2 +-
src/runtime/aot_executor/aot_executor_factory.h | 2 +-
src/runtime/const_loader_module.cc | 2 +-
src/runtime/contrib/coreml/coreml_runtime.h | 2 +-
src/runtime/contrib/coreml/coreml_runtime.mm | 3 +-
src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 2 +-
src/runtime/contrib/ethosn/ethosn_runtime.cc | 5 +-
src/runtime/contrib/ethosn/ethosn_runtime.h | 4 +-
src/runtime/contrib/json/json_runtime.h | 4 +-
src/runtime/contrib/libtorch/libtorch_runtime.cc | 4 +-
src/runtime/contrib/onnx/onnx_module.cc | 6 +-
src/runtime/contrib/tflite/tflite_runtime.cc | 3 +-
src/runtime/contrib/tflite/tflite_runtime.h | 2 +-
src/runtime/contrib/vitis_ai/vitis_ai_runtime.cc | 3 +-
src/runtime/contrib/vitis_ai/vitis_ai_runtime.h | 2 +-
src/runtime/cuda/cuda_module.cc | 11 +-
.../cuda_graph/graph_runtime_cuda_graph.cc | 4 +-
.../graph_executor/debug/graph_executor_debug.cc | 2 +-
.../graph_executor/debug/graph_executor_debug.h | 2 +-
src/runtime/graph_executor/graph_executor.cc | 3 +-
src/runtime/graph_executor/graph_executor.h | 2 +-
.../graph_executor/graph_executor_factory.cc | 2 +-
.../graph_executor/graph_executor_factory.h | 2 +-
src/runtime/hexagon/README.md | 9 +-
src/runtime/hexagon/hexagon_module.cc | 6 +-
src/runtime/hexagon/hexagon_module.h | 6 +-
src/runtime/hexagon/rpc/simulator/session.cc | 30 ++
src/runtime/library_module.cc | 5 +-
src/runtime/metadata.cc | 2 +-
src/runtime/metal/metal_module.mm | 9 +-
src/runtime/module.cc | 15 +-
src/runtime/opencl/opencl_common.h | 8 +-
src/runtime/opencl/opencl_module.cc | 14 +-
src/runtime/opencl/opencl_module_spirv.cc | 6 +-
src/runtime/opencl/sdaccel/sdaccel_module.cc | 2 +-
src/runtime/pipeline/pipeline_executor.cc | 2 +-
src/runtime/pipeline/pipeline_executor.h | 2 +-
src/runtime/registry.cc | 18 +-
src/runtime/relax_vm/executable.cc | 6 +-
src/runtime/relax_vm/ndarray_cache_support.cc | 4 +-
src/runtime/relax_vm/vm.cc | 6 +-
src/runtime/rocm/rocm_module.cc | 9 +-
src/runtime/rpc/rpc_module.cc | 4 +-
src/runtime/stackvm/stackvm_module.cc | 9 +-
src/runtime/static_library.cc | 4 +-
src/runtime/vm/executable.cc | 6 +-
src/runtime/vm/profiler/vm.cc | 2 +-
src/runtime/vm/profiler/vm.h | 2 +-
src/runtime/vm/vm.cc | 3 +-
src/runtime/vulkan/vulkan_module.cc | 2 +-
src/runtime/vulkan/vulkan_wrapped_func.cc | 6 +-
src/runtime/vulkan/vulkan_wrapped_func.h | 6 +-
src/script/printer/tir/block.cc | 3 +-
src/script/printer/tir/buffer.cc | 29 +-
src/script/printer/tir/function.cc | 6 +-
src/script/printer/tir/stmt.cc | 24 +-
src/script/printer/tir/utils.h | 28 +-
src/support/ffi_testing.cc | 4 +-
src/support/libinfo.cc | 1 +
src/support/scalars.h | 12 +
src/target/llvm/codegen_amdgpu.cc | 10 +-
src/target/llvm/codegen_cpu.cc | 51 +--
src/target/llvm/codegen_cpu.h | 2 +-
src/target/llvm/codegen_hexagon.cc | 4 +-
src/target/llvm/codegen_llvm.cc | 110 +++++-
src/target/llvm/codegen_llvm.h | 61 +++-
src/target/llvm/codegen_nvptx.cc | 14 +-
src/target/llvm/llvm_module.cc | 33 +-
src/target/opt/build_cuda_on.cc | 4 +-
src/target/source/codegen_aocl.cc | 2 +-
src/target/source/codegen_cuda.cc | 17 +
src/target/source/codegen_cuda.h | 5 +-
src/target/source/codegen_metal.cc | 7 +-
src/target/source/codegen_opencl.cc | 12 +-
src/target/source/codegen_vhls.cc | 5 +-
src/target/source/codegen_webgpu.cc | 8 +-
src/target/source/interface_c.cc | 4 +-
src/target/source/source_module.cc | 28 +-
src/target/spirv/spirv_utils.cc | 2 +-
src/target/target.cc | 15 +
src/tir/ir/stmt.cc | 3 +
src/tir/op/op.cc | 7 +
src/tir/schedule/state.cc | 2 +-
src/tir/transforms/annotate_device_regions.cc | 81 +++++
src/tir/transforms/combine_context_call.cc | 7 +-
src/tir/transforms/dtype_conversion.cc | 101 ++++++
src/tir/transforms/dtype_conversion.h | 165 +++++++++
src/tir/transforms/install_debug_spans.cc | 36 +-
src/tir/transforms/ir_utils.cc | 32 +-
src/tir/transforms/ir_utils.h | 13 +
src/tir/transforms/lower_device_kernel_launch.cc | 305 ++++++++++++++++
src/tir/transforms/lower_thread_allreduce.cc | 3 +-
src/tir/transforms/make_packed_api.cc | 160 ++++++---
src/tir/transforms/make_unpacked_api.cc | 113 +++++-
src/tir/transforms/primfunc_utils.cc | 30 +-
src/tir/transforms/split_host_device.cc | 272 ++++----------
...6_legalize.cc => unsupported_dtype_legalize.cc} | 264 +++++++++-----
tests/cpp/si_builder_test.cc | 399 +++++++++++++++++++++
tests/python/ci/test_ci.py | 49 +--
tests/python/contrib/test_ethosu/test_codegen.py | 32 ++
tests/python/contrib/test_ethosu/test_legalize.py | 211 +++++++++++
tests/python/contrib/test_hexagon/README.md | 2 +-
tests/python/frontend/keras/test_forward.py | 2 +-
tests/python/frontend/pytorch/test_forward.py | 29 +-
tests/python/integration/test_ewise.py | 5 +-
tests/python/integration/test_ewise_fpga.py | 3 +-
.../opencl_texture/test_conv2d_nchw_texture.py | 30 ++
.../opencl_texture/test_conv2d_nhwc_texture.py | 30 ++
.../test_depthwise_conv2d_nchw_texture.py | 31 ++
.../test_depthwise_conv2d_nhwc_texture.py | 31 ++
tests/python/tir/test_debug_info.py | 50 ++-
tests/python/unittest/test_datatype_nv_fp8.py | 104 ++++++
tests/python/unittest/test_target_codegen_llvm.py | 89 ++++-
tests/python/unittest/test_target_codegen_metal.py | 30 +-
.../python/unittest/test_target_codegen_opencl.py | 80 ++---
.../unittest/test_target_texture_codegen_opencl.py | 38 +-
.../unittest/test_te_schedule_bound_inference.py | 56 ++-
.../test_tir_transform_annotate_device_regions.py | 58 +++
.../unittest/test_tir_transform_bf16_legalize.py | 5 +-
.../test_tir_transform_combine_context_call.py | 102 ++++--
.../unittest/test_tir_transform_convert_ssa.py | 253 +++++++++++++
.../test_tir_transform_device_kernel_launch.py | 193 ++++++++++
.../unittest/test_tir_transform_fp8_legalize.py | 224 ++++++++++++
.../python/unittest/test_tir_transform_helpers.py | 112 ++++++
.../test_tir_transform_inject_ptx_async_copy.py | 4 +-
.../python/unittest/test_tir_transform_ir_utils.py | 40 ---
.../test_tir_transform_lower_warp_memory.py | 37 +-
.../unittest/test_tir_transform_make_packed_api.py | 128 ++++++-
.../test_tir_transform_make_unpacked_api.py | 158 +++++++-
.../test_tir_transform_split_host_device.py | 129 ++++++-
.../unittest/test_tir_transform_thread_sync.py | 5 +-
.../python/unittest/test_tvmscript_printer_tir.py | 7 +-
tests/python/unittest/test_tvmscript_roundtrip.py | 80 +++++
.../python/unittest/test_tvmscript_syntax_sugar.py | 35 ++
tests/scripts/setup-adreno-env.sh | 13 +-
tests/scripts/task_config_build_cpu.sh | 3 -
tests/scripts/task_config_build_i386.sh | 3 -
tests/scripts/task_python_adreno.sh | 13 +-
tests/scripts/task_python_vta_fsim.sh | 3 +
web/emcc/webgpu_runtime.cc | 6 +-
242 files changed, 5684 insertions(+), 1394 deletions(-)
create mode 100644 include/tvm/ir/si_builder.h
create mode 100644 src/ir/si_builder.cc
create mode 100644 src/tir/transforms/annotate_device_regions.cc
create mode 100644 src/tir/transforms/dtype_conversion.cc
create mode 100644 src/tir/transforms/dtype_conversion.h
create mode 100644 src/tir/transforms/lower_device_kernel_launch.cc
rename src/tir/transforms/{bf16_legalize.cc => unsupported_dtype_legalize.cc}
(69%)
create mode 100644 tests/cpp/si_builder_test.cc
create mode 100644 tests/python/unittest/test_datatype_nv_fp8.py
create mode 100644
tests/python/unittest/test_tir_transform_annotate_device_regions.py
create mode 100644 tests/python/unittest/test_tir_transform_convert_ssa.py
create mode 100644
tests/python/unittest/test_tir_transform_device_kernel_launch.py
create mode 100644 tests/python/unittest/test_tir_transform_fp8_legalize.py
delete mode 100644 tests/python/unittest/test_tir_transform_ir_utils.py