This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 95d1268982 [REFACTOR] Introduce and modernize FFI system (#17920)
add bcb68b1303 [Relax][PyTorch] Add div.Tensor_mode and trunc Op Support
for Exported Program and FX graph (#17924)
add 16b2783702 [REFACTOR][FFI] Cleanup PackedFunc related redirection
(#17923)
add da6d510ce0 [FFI][FEAT] AutoDLPack for taking external tensor objects
(#17927)
add 914590a22d [Relax][PyTorch] Add tests for all the dtypes supported in
the PyTorch frontend (#17926)
No new revisions were added by this update.
Summary of changes:
apps/cpp_rpc/rpc_env.cc | 8 +-
apps/hexagon_launcher/launcher_core.cc | 22 +-
apps/hexagon_launcher/launcher_core.h | 7 +-
apps/hexagon_launcher/launcher_hexagon.cc | 14 +-
apps/ios_rpc/tvmrpc/RPCServer.mm | 4 +-
apps/ios_rpc/tvmrpc/TVMRuntime.mm | 15 +-
docs/arch/index.rst | 2 +-
docs/arch/runtime.rst | 10 +-
ffi/include/tvm/ffi/function.h | 56 +--
ffi/include/tvm/ffi/function_details.h | 4 +-
ffi/include/tvm/ffi/rvalue_ref.h | 2 +-
ffi/scripts/benchmark_dlpack.py | 345 +++++++++++++++
ffi/src/ffi/container.cc | 2 +-
ffi/src/ffi/function.cc | 2 +-
ffi/tests/cpp/test_array.cc | 3 +-
ffi/tests/cpp/test_function.cc | 16 +-
ffi/tests/cpp/test_map.cc | 6 +-
ffi/tests/cpp/test_rvalue_ref.cc | 8 +-
ffi/tests/cpp/test_tuple.cc | 4 +-
ffi/tests/cpp/test_variant.cc | 6 +-
golang/src/gotvm.cc | 2 +-
include/tvm/ir/attrs.h | 18 +-
include/tvm/ir/diagnostic.h | 8 +-
include/tvm/ir/env_func.h | 6 +-
include/tvm/ir/function.h | 8 +-
include/tvm/ir/op.h | 2 +-
include/tvm/meta_schedule/builder.h | 2 +-
include/tvm/meta_schedule/cost_model.h | 14 +-
include/tvm/meta_schedule/database.h | 28 +-
include/tvm/meta_schedule/feature_extractor.h | 4 +-
include/tvm/meta_schedule/measure_callback.h | 13 +-
include/tvm/meta_schedule/mutator.h | 8 +-
include/tvm/meta_schedule/postproc.h | 8 +-
include/tvm/meta_schedule/profiler.h | 6 +-
include/tvm/meta_schedule/runner.h | 6 +-
include/tvm/meta_schedule/schedule_rule.h | 10 +-
include/tvm/meta_schedule/search_strategy.h | 16 +-
include/tvm/meta_schedule/space_generator.h | 10 +-
include/tvm/meta_schedule/task_scheduler.h | 32 +-
include/tvm/meta_schedule/tune_context.h | 4 +-
include/tvm/node/attr_registry_map.h | 4 +-
include/tvm/node/node.h | 18 +-
include/tvm/node/reflection.h | 4 +-
include/tvm/relax/analysis.h | 2 +-
include/tvm/relax/dataflow_matcher.h | 5 +-
include/tvm/relax/exec_builder.h | 4 +-
include/tvm/relax/op_attr_types.h | 13 +-
include/tvm/relax/tir_pattern.h | 2 +-
include/tvm/relax/transform.h | 8 +-
include/tvm/relax/tuning_api.h | 8 +-
include/tvm/runtime/c_backend_api.h | 2 +-
include/tvm/runtime/c_runtime_api.h | 2 +-
include/tvm/runtime/disco/disco_worker.h | 4 +-
include/tvm/runtime/disco/session.h | 32 +-
include/tvm/runtime/module.h | 28 +-
include/tvm/runtime/object.h | 3 +-
include/tvm/runtime/packed_func.h | 55 +--
include/tvm/runtime/profiling.h | 18 +-
include/tvm/runtime/registry.h | 2 +-
include/tvm/runtime/relax_vm/executable.h | 2 +-
include/tvm/runtime/relax_vm/vm.h | 18 +-
include/tvm/script/ir_builder/base.h | 4 +-
include/tvm/script/ir_builder/tir/frame.h | 2 +-
include/tvm/script/printer/ir_docsifier.h | 2 +-
include/tvm/script/printer/ir_docsifier_functor.h | 28 +-
include/tvm/target/codegen.h | 6 +-
include/tvm/target/target_kind.h | 10 +-
include/tvm/tir/builtin.h | 20 +-
include/tvm/tir/function.h | 2 +-
include/tvm/tir/index_map.h | 2 +-
include/tvm/tir/op_attr_types.h | 4 +-
include/tvm/tir/schedule/instruction.h | 10 +-
include/tvm/tir/schedule/trace.h | 4 +-
include/tvm/tir/stmt_functor.h | 3 +-
include/tvm/tir/transform.h | 2 +-
include/tvm/topi/detail/extern.h | 8 +-
python/tvm/contrib/msc/plugin/codegen/sources.py | 18 +-
python/tvm/ffi/convert.py | 5 +
python/tvm/ffi/cython/function.pxi | 16 +
python/tvm/ffi/cython/ndarray.pxi | 2 +
python/tvm/ir/base.py | 2 +-
.../frontend/torch/base_fx_graph_translator.py | 26 ++
.../frontend/torch/exported_program_translator.py | 2 +
python/tvm/relax/frontend/torch/fx_translator.py | 2 +
python/tvm/relax/op/__init__.py | 1 +
python/tvm/relax/op/unary.py | 14 +
python/tvm/relax/transform/legalize_ops/unary.py | 1 +
python/tvm/script/ir_builder/relax/ir.py | 2 +
python/tvm/tir/transform/transform.py | 4 +-
src/arith/analyzer.cc | 176 ++++----
src/arith/int_constraints.cc | 2 +-
src/arith/solve_linear_equation.cc | 2 +-
src/arith/solve_linear_inequality.cc | 6 +-
.../msc/framework/tensorrt/transform_tensorrt.cc | 4 +-
src/contrib/msc/plugin/tvm_codegen.cc | 6 +-
src/ir/attrs.cc | 2 +-
src/ir/diagnostic.cc | 10 +-
src/ir/env_func.cc | 10 +-
src/ir/instrument.cc | 47 +-
src/ir/op.cc | 22 +-
src/ir/transform.cc | 27 +-
src/meta_schedule/database/schedule_fn_database.cc | 4 +-
src/meta_schedule/profiler.cc | 8 +-
.../schedule_rule/apply_custom_rule.cc | 2 +-
.../schedule_rule/multi_level_tiling.cc | 2 +-
.../schedule_rule/multi_level_tiling.h | 4 +-
.../space_generator/post_order_apply.cc | 4 +-
src/meta_schedule/space_generator/schedule_fn.cc | 6 +-
src/meta_schedule/task_scheduler/gradient_based.cc | 2 +-
src/meta_schedule/task_scheduler/round_robin.cc | 2 +-
src/meta_schedule/task_scheduler/task_scheduler.cc | 4 +-
src/meta_schedule/tune_context.cc | 5 +-
src/meta_schedule/utils.h | 13 +-
src/node/attr_registry.h | 6 +-
src/node/reflection.cc | 35 +-
src/relax/backend/vm/exec_builder.cc | 4 +-
src/relax/ir/dataflow_block_rewriter.cc | 11 +-
src/relax/ir/dataflow_expr_rewriter.cc | 16 +-
src/relax/ir/dataflow_matcher.cc | 4 +-
src/relax/ir/dataflow_rewriter.h | 14 +-
src/relax/ir/expr_functor.cc | 7 +-
src/relax/ir/py_expr_functor.cc | 155 +++----
src/relax/ir/transform.cc | 21 +-
src/relax/op/tensor/unary.cc | 1 +
src/relax/op/tensor/unary.h | 3 +
src/relax/transform/adjust_matmul_order.cc | 6 +-
src/relax/transform/combine_parallel_matmul.cc | 4 +-
src/relax/transform/convert_layout.cc | 2 +-
src/relax/transform/expand_matmul_of_sum.cc | 2 +-
src/relax/transform/fold_constant.cc | 16 +-
src/relax/transform/fuse_ops.cc | 10 +-
src/relax/transform/infer_amp_utils.h | 2 +-
src/relax/transform/infer_layout_utils.h | 2 +-
src/relax/transform/legalize_ops.cc | 6 +-
src/relax/transform/meta_schedule.cc | 2 +-
.../transform/reorder_permute_dims_after_concat.cc | 2 +-
src/relax/transform/reorder_take_after_matmul.cc | 2 +-
src/relax/transform/update_param_struct_info.cc | 6 +-
src/runtime/c_runtime_api.cc | 91 ++--
src/runtime/const_loader_module.cc | 10 +-
src/runtime/contrib/amx/amx_config.cc | 25 +-
src/runtime/contrib/cblas/cblas.cc | 21 +-
src/runtime/contrib/cblas/dnnl_blas.cc | 11 +-
src/runtime/contrib/cblas/gemm_common.h | 10 +-
src/runtime/contrib/cblas/mkl.cc | 23 +-
src/runtime/contrib/coreml/coreml_runtime.h | 6 +-
src/runtime/contrib/coreml/coreml_runtime.mm | 24 +-
src/runtime/contrib/cublas/cublas.cc | 13 +-
src/runtime/contrib/cublas/cublas_json_runtime.cc | 8 +-
src/runtime/contrib/cudnn/conv_backward.cc | 12 +-
src/runtime/contrib/cudnn/conv_forward.cc | 10 +-
src/runtime/contrib/cudnn/cudnn_json_runtime.cc | 2 +-
src/runtime/contrib/cudnn/cudnn_utils.h | 2 +-
src/runtime/contrib/cudnn/softmax.cc | 6 +-
src/runtime/contrib/dnnl/dnnl.cc | 65 +--
src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 8 +-
src/runtime/contrib/edgetpu/edgetpu_runtime.cc | 2 +-
src/runtime/contrib/edgetpu/edgetpu_runtime.h | 4 +-
src/runtime/contrib/hipblas/hipblas.cc | 8 +-
.../contrib/hipblas/hipblas_json_runtime.cc | 8 +-
src/runtime/contrib/json/json_runtime.h | 22 +-
src/runtime/contrib/miopen/conv_forward.cc | 4 +-
src/runtime/contrib/miopen/softmax.cc | 6 +-
src/runtime/contrib/mps/conv.mm | 166 ++++----
src/runtime/contrib/mps/gemm.mm | 131 +++---
src/runtime/contrib/mrvl/mrvl_hw_runtime.cc | 24 +-
src/runtime/contrib/mrvl/mrvl_runtime.cc | 16 +-
src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.cc | 13 +-
src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.h | 2 +-
src/runtime/contrib/random/random.cc | 10 +-
src/runtime/contrib/rocblas/rocblas.cc | 4 +-
src/runtime/contrib/sort/sort.cc | 472 +++++++++++----------
src/runtime/contrib/tflite/tflite_runtime.cc | 21 +-
src/runtime/contrib/tflite/tflite_runtime.h | 6 +-
src/runtime/contrib/thrust/thrust.cu | 7 +-
src/runtime/cpu_device_api.cc | 4 +-
src/runtime/cuda/cuda_device_api.cc | 6 +-
src/runtime/cuda/cuda_module.cc | 13 +-
src/runtime/cuda/l2_cache_flush.cc | 2 +-
src/runtime/disco/bcast_session.cc | 4 +-
src/runtime/disco/bcast_session.h | 16 +-
src/runtime/disco/builtin.cc | 6 +-
src/runtime/disco/disco_worker.cc | 16 +-
src/runtime/disco/distributed/socket_session.cc | 28 +-
src/runtime/disco/loader.cc | 12 +-
src/runtime/disco/message_queue.h | 4 +-
src/runtime/disco/process_session.cc | 30 +-
src/runtime/disco/protocol.h | 8 +-
src/runtime/disco/session.cc | 4 +-
src/runtime/disco/threaded_session.cc | 20 +-
src/runtime/hexagon/hexagon_common.cc | 2 +-
src/runtime/hexagon/hexagon_device_api.cc | 24 +-
src/runtime/hexagon/hexagon_device_api.h | 2 +-
src/runtime/hexagon/hexagon_module.cc | 4 +-
src/runtime/hexagon/hexagon_module.h | 2 +-
src/runtime/hexagon/rpc/android/session.cc | 2 +-
src/runtime/hexagon/rpc/hexagon/rpc_server.cc | 12 +-
src/runtime/hexagon/rpc/simulator/rpc_server.cc | 6 +-
src/runtime/hexagon/rpc/simulator/session.cc | 2 +-
src/runtime/library_module.cc | 14 +-
src/runtime/library_module.h | 11 +-
src/runtime/metal/metal_common.h | 2 +-
src/runtime/metal/metal_device_api.mm | 4 +-
src/runtime/metal/metal_module.mm | 11 +-
src/runtime/module.cc | 12 +-
src/runtime/opencl/opencl_common.h | 6 +-
src/runtime/opencl/opencl_device_api.cc | 42 +-
src/runtime/opencl/opencl_module.cc | 16 +-
src/runtime/pack_args.h | 39 +-
src/runtime/packed_func.cc | 4 +-
src/runtime/profiling.cc | 25 +-
src/runtime/regex.cc | 2 +-
src/runtime/registry.cc | 2 +-
src/runtime/relax_vm/attn_backend.cc | 22 +-
src/runtime/relax_vm/attn_backend.h | 71 ++--
src/runtime/relax_vm/builtin.cc | 8 +-
src/runtime/relax_vm/cuda/cuda_graph_builtin.cc | 16 +-
src/runtime/relax_vm/executable.cc | 12 +-
src/runtime/relax_vm/hexagon/builtin.cc | 4 +-
src/runtime/relax_vm/kv_state.cc | 2 +-
src/runtime/relax_vm/lm_support.cc | 2 +-
src/runtime/relax_vm/ndarray_cache_support.cc | 6 +-
src/runtime/relax_vm/paged_kv_cache.cc | 47 +-
src/runtime/relax_vm/rnn_state.cc | 26 +-
src/runtime/relax_vm/vm.cc | 141 +++---
src/runtime/rocm/rocm_device_api.cc | 6 +-
src/runtime/rocm/rocm_module.cc | 10 +-
src/runtime/rpc/rpc_channel.h | 6 +-
src/runtime/rpc/rpc_device_api.cc | 4 +-
src/runtime/rpc/rpc_endpoint.cc | 93 ++--
src/runtime/rpc/rpc_endpoint.h | 12 +-
src/runtime/rpc/rpc_event_impl.cc | 9 +-
src/runtime/rpc/rpc_local_session.cc | 2 +-
src/runtime/rpc/rpc_local_session.h | 2 +-
src/runtime/rpc/rpc_module.cc | 58 +--
src/runtime/rpc/rpc_pipe_impl.cc | 4 +-
src/runtime/rpc/rpc_server_env.cc | 39 +-
src/runtime/rpc/rpc_session.cc | 2 +-
src/runtime/rpc/rpc_session.h | 18 +-
src/runtime/rpc/rpc_socket_impl.cc | 13 +-
src/runtime/static_library.cc | 5 +-
src/runtime/system_library.cc | 4 +-
src/runtime/thread_pool.cc | 29 +-
src/runtime/thread_storage_scope.h | 4 +-
src/runtime/vulkan/vulkan_device_api.cc | 8 +-
src/runtime/vulkan/vulkan_device_api.h | 4 +-
src/runtime/vulkan/vulkan_wrapped_func.cc | 8 +-
src/runtime/vulkan/vulkan_wrapped_func.h | 4 +-
src/script/ir_builder/base.cc | 2 +-
src/script/printer/ir_docsifier.cc | 4 +-
src/support/array.h | 12 +-
src/support/ffi_testing.cc | 56 +--
src/target/datatype/registry.cc | 12 +-
src/target/intrin_rule.cc | 3 +
src/target/llvm/codegen_aarch64.cc | 2 +-
src/target/llvm/codegen_amdgpu.cc | 4 +-
src/target/llvm/codegen_arm.cc | 2 +-
src/target/llvm/codegen_cpu.cc | 4 +-
src/target/llvm/codegen_hexagon.cc | 2 +-
src/target/llvm/codegen_nvptx.cc | 2 +-
src/target/llvm/codegen_x86_64.cc | 2 +-
src/target/llvm/llvm_module.cc | 30 +-
src/target/source/codegen_webgpu.cc | 4 +-
src/target/source/source_module.cc | 30 +-
src/target/target.cc | 10 +-
src/target/target_kind.cc | 18 +-
src/te/operation/create_primfunc.cc | 2 +-
src/tir/analysis/var_use_def_analysis.cc | 2 +-
src/tir/ir/buffer.cc | 2 +-
src/tir/ir/expr.cc | 3 +-
src/tir/ir/index_map.cc | 2 +-
src/tir/ir/stmt_functor.cc | 14 +-
src/tir/ir/transform.cc | 3 +-
src/tir/op/op.cc | 4 +-
src/tir/schedule/analysis.h | 2 +-
src/tir/schedule/analysis/reducer.cc | 2 +-
src/tir/schedule/instruction_traits.h | 8 +-
src/tir/schedule/primitive/reduction.cc | 17 +-
src/tir/schedule/trace.cc | 6 +-
src/tir/transforms/ir_utils.h | 2 +-
src/tir/transforms/lower_device_kernel_launch.cc | 2 +-
src/tir/transforms/lower_intrin.cc | 2 +-
src/tir/transforms/make_packed_api.cc | 2 +-
src/tir/transforms/primfunc_utils.cc | 2 +-
src/topi/broadcast.cc | 28 +-
src/topi/einsum.cc | 2 +-
src/topi/elemwise.cc | 68 +--
src/topi/nn.cc | 141 +++---
src/topi/reduction.cc | 18 +-
src/topi/transform.cc | 74 ++--
src/topi/utils.cc | 11 +-
src/topi/vision.cc | 2 +-
tests/cpp-runtime/hexagon/run_all_tests.cc | 39 +-
tests/cpp-runtime/hexagon/run_unit_tests.cc | 73 ++--
tests/python/contrib/test_hexagon/README_RPC.md | 10 +-
tests/python/ffi/test_ndarray.py | 27 ++
.../relax/test_frontend_from_exported_program.py | 99 ++++-
tests/python/relax/test_frontend_from_fx.py | 107 ++++-
web/emcc/tvmjs_support.cc | 50 +--
web/emcc/wasm_runtime.cc | 50 ++-
web/emcc/webgpu_runtime.cc | 35 +-
301 files changed, 3028 insertions(+), 2361 deletions(-)
create mode 100644 ffi/scripts/benchmark_dlpack.py