This is an automated email from the ASF dual-hosted git repository.
github-bot pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 4ef582a331 [Relax][PyTorch] Add support for linspace op in fx graph
(#17915)
add fa26a05162 [Relax][PyTorch] Add Meshgrid Op Support for Exported
Program and FX graph (#17904)
add 95d1268982 [REFACTOR] Introduce and modernize FFI system (#17920)
No new revisions were added by this update.
Summary of changes:
.github/workflows/main.yml | 139 +-
.gitmodules | 3 +
3rdparty/cutlass_fpA_intB_gemm | 2 +-
CMakeLists.txt | 43 +-
apps/android_rpc/app/src/main/jni/Android.mk | 3 +-
apps/android_rpc/app/src/main/jni/tvm_runtime.h | 9 +
apps/cpp_rpc/rpc_env.cc | 70 +-
apps/cpp_rpc/rpc_server.cc | 4 +-
apps/hexagon_api/CMakeLists.txt | 6 +-
apps/hexagon_launcher/cmake/android/CMakeLists.txt | 2 +-
apps/hexagon_launcher/cmake/hexagon/CMakeLists.txt | 2 +-
apps/hexagon_launcher/launcher_core.cc | 6 +-
apps/hexagon_launcher/launcher_main.cc | 4 +-
apps/ios_rpc/tvmrpc/RPCServer.mm | 4 +-
apps/ios_rpc/tvmrpc/TVMRuntime.mm | 55 +-
cmake/modules/Logging.cmake | 87 -
cmake/modules/contrib/CUTLASS.cmake | 3 +
cmake/utils/FindLLVM.cmake | 5 +-
docs/arch/device_target_interactions.rst | 2 +-
docs/arch/pass_infra.rst | 20 +-
docs/arch/runtime.rst | 8 +-
.../tensor_ir/tutorials/tir_transformation.py | 2 +-
{3rdparty => ffi/3rdparty}/dlpack | 0
ffi/CMakeLists.txt | 133 +
ffi/cmake/Utils/AddGoogleTest.cmake | 59 +
ffi/cmake/Utils/AddLibbacktrace.cmake | 67 +
.../BNNS.cmake => ffi/cmake/Utils/CxxWarning.cmake | 24 +-
ffi/cmake/Utils/Library.cmake | 67 +
.../Makefile => ffi/cmake/Utils/Sanitizer.cmake | 35 +-
ffi/include/tvm/ffi/any.h | 492 ++++
ffi/include/tvm/ffi/base_details.h | 274 ++
ffi/include/tvm/ffi/c_api.h | 718 +++++
ffi/include/tvm/ffi/cast.h | 160 ++
.../include/tvm/ffi}/container/array.h | 538 ++--
.../include/tvm/ffi/container/container_details.h | 137 +-
.../include/tvm/ffi}/container/map.h | 883 +++---
ffi/include/tvm/ffi/container/ndarray.h | 337 +++
ffi/include/tvm/ffi/container/shape.h | 219 ++
ffi/include/tvm/ffi/container/tuple.h | 279 ++
ffi/include/tvm/ffi/container/variant.h | 198 ++
ffi/include/tvm/ffi/dtype.h | 177 ++
ffi/include/tvm/ffi/endian.h | 89 +
ffi/include/tvm/ffi/error.h | 294 ++
ffi/include/tvm/ffi/function.h | 914 ++++++
ffi/include/tvm/ffi/function_details.h | 210 ++
.../tvm/runtime => ffi/include/tvm/ffi}/memory.h | 56 +-
ffi/include/tvm/ffi/object.h | 798 ++++++
ffi/include/tvm/ffi/optional.h | 299 ++
ffi/include/tvm/ffi/reflection/reflection.h | 155 +
ffi/include/tvm/ffi/rvalue_ref.h | 151 +
ffi/include/tvm/ffi/string.h | 662 +++++
ffi/include/tvm/ffi/type_traits.h | 683 +++++
.../scripts/run_tests.sh | 15 +-
ffi/src/ffi/container.cc | 95 +
ffi/src/ffi/dtype.cc | 328 +++
ffi/src/ffi/error.cc | 80 +
ffi/src/ffi/function.cc | 291 ++
ffi/src/ffi/ndarray.cc | 78 +
ffi/src/ffi/object.cc | 312 ++
ffi/src/ffi/testing.cc | 65 +
ffi/src/ffi/traceback.cc | 174 ++
ffi/src/ffi/traceback.h | 166 ++
ffi/src/ffi/traceback_win.cc | 133 +
ffi/tests/cpp/CMakeLists.txt | 26 +
ffi/tests/cpp/test_any.cc | 342 +++
ffi/tests/cpp/test_array.cc | 286 ++
.../tests/cpp/test_c_ffi_abi.cc | 22 +-
ffi/tests/cpp/test_dtype.cc | 129 +
ffi/tests/cpp/test_error.cc | 70 +
ffi/tests/cpp/test_function.cc | 246 ++
ffi/tests/cpp/test_map.cc | 359 +++
ffi/tests/cpp/test_ndarray.cc | 111 +
ffi/tests/cpp/test_object.cc | 106 +
ffi/tests/cpp/test_optional.cc | 173 ++
ffi/tests/cpp/test_reflection.cc | 52 +
ffi/tests/cpp/test_rvalue_ref.cc | 97 +
ffi/tests/cpp/test_shape.cc | 72 +
ffi/tests/cpp/test_string.cc | 371 +++
ffi/tests/cpp/test_tuple.cc | 139 +
ffi/tests/cpp/test_variant.cc | 137 +
ffi/tests/cpp/testing_object.h | 144 +
include/tvm/ir/attrs.h | 117 +-
include/tvm/ir/env_func.h | 12 +-
include/tvm/ir/expr.h | 193 +-
include/tvm/ir/function.h | 5 +-
include/tvm/ir/module.h | 7 +-
include/tvm/ir/name_supply.h | 1 +
include/tvm/ir/op.h | 8 +-
include/tvm/ir/source_map.h | 12 +-
include/tvm/ir/transform.h | 54 +-
include/tvm/meta_schedule/schedule_rule.h | 28 +-
include/tvm/meta_schedule/search_strategy.h | 64 +-
include/tvm/meta_schedule/space_generator.h | 64 +-
include/tvm/meta_schedule/task_scheduler.h | 64 +-
include/tvm/node/attr_registry_map.h | 14 +-
include/tvm/node/object_path.h | 6 +-
include/tvm/node/reflection.h | 8 +-
include/tvm/node/repr_printer.h | 20 +-
include/tvm/node/script_printer.h | 5 +-
include/tvm/node/serialization.h | 4 +-
include/tvm/node/structural_equal.h | 56 +-
include/tvm/node/structural_hash.h | 51 +-
include/tvm/relax/analysis.h | 6 +-
include/tvm/relax/attrs/index.h | 2 +-
include/tvm/relax/attrs/manipulate.h | 13 +-
include/tvm/relax/attrs/nn.h | 2 +-
include/tvm/relax/attrs/op.h | 2 +-
include/tvm/relax/attrs/search.h | 2 +-
include/tvm/relax/attrs/statistical.h | 2 +-
include/tvm/relax/block_builder.h | 1 -
include/tvm/relax/dataflow_pattern.h | 2 +-
include/tvm/relax/distributed/transform.h | 2 +
include/tvm/relax/exec_builder.h | 1 -
include/tvm/relax/expr_functor.h | 4 +-
include/tvm/relax/nested_msg.h | 4 +-
include/tvm/relax/tir_pattern.h | 2 +-
include/tvm/relax/transform.h | 19 +-
include/tvm/relax/tuning_api.h | 48 +-
include/tvm/runtime/container/array.h | 894 +-----
include/tvm/runtime/container/base.h | 23 -
include/tvm/runtime/container/boxed_primitive.h | 143 -
include/tvm/runtime/container/map.h | 1453 +---------
include/tvm/runtime/container/optional.h | 147 +-
include/tvm/runtime/container/shape_tuple.h | 166 +-
include/tvm/runtime/container/string.h | 515 +---
include/tvm/runtime/container/variant.h | 94 +-
include/tvm/runtime/data_type.h | 221 +-
include/tvm/runtime/device_api.h | 59 +-
include/tvm/runtime/disco/session.h | 22 +-
include/tvm/runtime/logging.h | 106 +-
include/tvm/runtime/memory.h | 182 +-
include/tvm/runtime/memory/memory_manager.h | 7 -
include/tvm/runtime/module.h | 13 +-
include/tvm/runtime/ndarray.h | 296 +-
include/tvm/runtime/object.h | 926 +-----
include/tvm/runtime/packed_func.h | 2976 +++-----------------
include/tvm/runtime/profiling.h | 21 +-
include/tvm/runtime/registry.h | 299 +-
.../tvm/runtime/relax_vm/ndarray_cache_support.h | 1 +
include/tvm/runtime/relax_vm/vm.h | 8 +-
include/tvm/runtime/threading_backend.h | 2 +-
include/tvm/script/ir_builder/base.h | 2 -
include/tvm/script/ir_builder/ir/frame.h | 2 +-
include/tvm/script/ir_builder/relax/frame.h | 2 +-
include/tvm/script/ir_builder/relax/ir.h | 2 +-
include/tvm/script/ir_builder/tir/frame.h | 8 +-
include/tvm/script/ir_builder/tir/ir.h | 27 +-
include/tvm/script/printer/doc.h | 14 +-
include/tvm/script/printer/ir_docsifier.h | 37 +-
include/tvm/script/printer/ir_docsifier_functor.h | 6 +-
include/tvm/target/tag.h | 12 +-
include/tvm/target/target.h | 35 +-
include/tvm/target/target_kind.h | 21 +-
include/tvm/te/operation.h | 26 +-
include/tvm/te/tensor.h | 6 +-
include/tvm/tir/buffer.h | 11 +-
include/tvm/tir/builtin.h | 55 +-
include/tvm/tir/data_type_rewriter.h | 2 +-
include/tvm/tir/expr.h | 61 +-
include/tvm/tir/expr_functor.h | 1 +
include/tvm/tir/function.h | 4 +-
include/tvm/tir/schedule/instruction.h | 29 +-
include/tvm/tir/schedule/schedule.h | 10 +-
include/tvm/tir/schedule/trace.h | 18 +-
include/tvm/tir/stmt.h | 44 +-
include/tvm/tir/stmt_functor.h | 1 +
include/tvm/tir/transform.h | 12 +-
include/tvm/tir/utils.h | 2 +-
include/tvm/tir/var.h | 10 +-
include/tvm/topi/detail/extern.h | 2 +-
include/tvm/topi/nn/group_norm.h | 4 +-
include/tvm/topi/nn/pooling.h | 4 +-
include/tvm/topi/nn/softmax.h | 2 +-
include/tvm/topi/reduction.h | 34 +-
include/tvm/topi/transform.h | 28 +-
include/tvm/topi/utils.h | 11 +-
jvm/README.md | 2 +-
jvm/core/src/main/java/org/apache/tvm/Device.java | 42 +-
jvm/native/linux-x86_64/pom.xml | 1 +
jvm/native/osx-x86_64/pom.xml | 1 +
python/setup.py | 13 +-
python/tvm/__init__.py | 7 +-
python/tvm/_ffi/__init__.py | 7 +-
python/tvm/_ffi/_cy3/__init__.py | 17 -
python/tvm/_ffi/_cython/base.pxi | 226 --
python/tvm/_ffi/_cython/ndarray.pxi | 180 --
python/tvm/_ffi/_cython/object.pxi | 162 --
python/tvm/_ffi/_cython/packed_func.pxi | 378 ---
python/tvm/_ffi/base.py | 421 +--
python/tvm/_ffi/libinfo.py | 2 +
python/tvm/_ffi/registry.py | 305 +-
python/tvm/_ffi/runtime_ctypes.py | 696 -----
python/tvm/contrib/cutlass/attention_operation.py | 20 +-
python/tvm/contrib/cutlass/conv2d_operation.py | 5 +-
python/tvm/contrib/cutlass/gemm_operation.py | 10 +-
python/tvm/contrib/cutlass/gen_conv2d.py | 1 -
python/tvm/contrib/cutlass/layer_norm_operation.py | 5 +-
python/tvm/contrib/cutlass/rms_norm_operation.py | 5 +-
python/tvm/contrib/download.py | 1 -
python/tvm/contrib/msc/core/frontend/translate.py | 4 +-
python/tvm/contrib/msc/core/tools/prune/pruner.py | 2 +-
python/tvm/contrib/msc/core/utils/info.py | 4 +-
.../framework/tensorrt/tools/quantize/quantizer.py | 2 +-
.../msc/framework/tensorrt/tools/track/tracker.py | 4 +-
.../contrib/msc/framework/torch/codegen/codegen.py | 2 +-
python/tvm/contrib/tvmjs.py | 4 +-
python/tvm/dlight/base/transform.py | 4 +-
python/tvm/dlight/base/utils.py | 2 +-
python/tvm/dlight/benchmark/bench.py | 2 +-
python/tvm/error.py | 17 +-
python/tvm/ffi/.gitignore | 2 +
python/tvm/ffi/__init__.py | 68 +
.../tvm/ffi/_ffi_api.py | 7 +-
python/tvm/ffi/container.py | 200 ++
python/tvm/ffi/convert.py | 63 +
python/tvm/ffi/cython/base.pxi | 243 ++
python/tvm/{_ffi/_cython => ffi/cython}/core.pyx | 7 +-
python/tvm/ffi/cython/device.pxi | 168 ++
python/tvm/ffi/cython/dtype.pxi | 109 +
python/tvm/ffi/cython/error.pxi | 170 ++
python/tvm/ffi/cython/function.pxi | 291 ++
python/tvm/ffi/cython/ndarray.pxi | 290 ++
python/tvm/ffi/cython/object.pxi | 282 ++
python/tvm/ffi/cython/string.pxi | 85 +
python/tvm/ffi/dtype.py | 135 +
python/tvm/ffi/error.py | 125 +
python/tvm/ffi/ndarray.py | 255 ++
python/tvm/ffi/registry.py | 178 ++
python/tvm/ir/container.py | 98 +-
python/tvm/ir/instrument.py | 2 -
python/tvm/ir/transform.py | 2 -
python/tvm/meta_schedule/cost_model/mlp_model.py | 3 +-
.../meta_schedule/post_optimization/post_opt.py | 1 -
.../tvm/meta_schedule/testing/space_generation.py | 1 +
python/tvm/meta_schedule/tune_context.py | 3 +-
python/tvm/meta_schedule/utils.py | 3 +-
python/tvm/relax/backend/adreno/clml.py | 7 -
python/tvm/relax/backend/cuda/cublas.py | 4 +-
python/tvm/relax/backend/dispatch_sort_scan.py | 4 +-
python/tvm/relax/backend/gpu_generic/cumsum.py | 2 +-
python/tvm/relax/backend/gpu_generic/sampling.py | 2 +-
python/tvm/relax/expr.py | 10 +-
python/tvm/relax/frontend/nn/core.py | 2 +-
python/tvm/relax/frontend/nn/extern.py | 3 +-
python/tvm/relax/frontend/nn/llm/kv_cache.py | 34 +-
.../relax/frontend/nn/llm/position_embedding.py | 6 +-
python/tvm/relax/frontend/nn/llm/tree_attn.py | 4 +-
.../frontend/torch/base_fx_graph_translator.py | 55 +
.../frontend/torch/exported_program_translator.py | 2 +
python/tvm/relax/frontend/torch/fx_translator.py | 1 +
python/tvm/relax/op/__init__.py | 1 +
python/tvm/relax/op/manipulate.py | 23 +
.../tvm/relax/transform/lazy_transform_params.py | 4 +-
.../tvm/relax/transform/legalize_ops/inspect_op.py | 6 +-
.../tvm/relax/transform/legalize_ops/manipulate.py | 21 +-
python/tvm/relax/transform/legalize_ops/search.py | 2 +-
python/tvm/relax/transform/transform.py | 4 -
.../transform/tuning_api/default_functions.py | 1 +
python/tvm/relax/utils.py | 6 +-
python/tvm/rpc/base.py | 1 +
python/tvm/rpc/client.py | 6 +-
python/tvm/rpc/proxy.py | 1 -
python/tvm/rpc/server.py | 1 -
python/tvm/runtime/__init__.py | 12 +-
python/tvm/runtime/_ffi_node_api.py | 7 +-
python/tvm/runtime/container.py | 142 +-
python/tvm/runtime/device.py | 337 +++
python/tvm/runtime/disco/session.py | 8 +-
python/tvm/runtime/executable.py | 3 +-
python/tvm/runtime/module.py | 51 +-
python/tvm/runtime/ndarray.py | 458 +--
python/tvm/runtime/object.py | 115 +-
python/tvm/runtime/object_generic.py | 95 +-
python/tvm/runtime/packed_func.py | 35 +-
python/tvm/runtime/params.py | 2 +-
.../tvm/script/ir_builder/relax/distributed/ir.py | 2 +-
python/tvm/script/ir_builder/relax/ir.py | 4 +-
python/tvm/script/ir_builder/tir/ir.py | 4 +-
python/tvm/script/parser/tir/operation.py | 3 +-
python/tvm/target/detect_target.py | 4 +-
python/tvm/target/target.py | 8 +-
python/tvm/te/tensor.py | 5 +-
python/tvm/testing/__init__.py | 2 +-
python/tvm/testing/plugin.py | 1 -
python/tvm/tir/__init__.py | 3 +-
python/tvm/tir/expr.py | 6 +-
python/tvm/tir/op.py | 68 +-
python/tvm/tir/schedule/schedule.py | 8 +-
python/tvm/tir/schedule/trace.py | 2 +-
python/tvm/tir/transform/function_pass.py | 3 +-
python/tvm/tir/transform/transform.py | 11 -
python/tvm/topi/image/resize.py | 3 -
python/tvm/topi/testing/crop_and_resize_python.py | 1 -
rust/tvm-rt/src/array.rs | 219 --
rust/tvm-rt/src/lib.rs | 2 -
rust/tvm-rt/src/map.rs | 275 --
rust/tvm-rt/src/ndarray.rs | 4 +-
src/arith/analyzer.cc | 55 +-
src/arith/int_constraints.cc | 6 +-
src/arith/solve_linear_equation.cc | 25 +-
src/arith/solve_linear_inequality.cc | 39 +-
src/contrib/msc/core/codegen/codegen_utils.h | 66 +-
src/contrib/msc/core/codegen/cpp_codegen.h | 25 +-
src/contrib/msc/core/codegen/py_codegen.h | 4 +-
src/contrib/msc/core/ir/graph.cc | 22 +-
src/contrib/msc/core/ir/graph_builder.cc | 4 +-
src/contrib/msc/core/ir/graph_builder.h | 18 +-
src/contrib/msc/core/printer/msc_base_printer.cc | 23 +-
src/contrib/msc/core/printer/prototxt_printer.cc | 20 +-
src/contrib/msc/core/printer/prototxt_printer.h | 10 +-
.../msc/core/transform/bind_named_params.cc | 3 +-
src/contrib/msc/core/transform/bind_shape.cc | 3 +-
src/contrib/msc/core/transform/fuse_tuple.cc | 9 +-
src/contrib/msc/core/transform/inline_params.cc | 5 +-
src/contrib/msc/core/transform/set_byoc_attrs.cc | 5 +-
src/contrib/msc/core/transform/set_expr_layout.cc | 24 +-
src/contrib/msc/core/transform/set_expr_name.cc | 6 +-
src/contrib/msc/core/utils.cc | 8 +-
src/contrib/msc/framework/tensorflow/codegen.cc | 2 +-
src/contrib/msc/framework/tensorrt/codegen.cc | 22 +-
.../msc/framework/tensorrt/transform_tensorrt.cc | 15 +-
src/contrib/msc/framework/torch/codegen.cc | 2 +-
src/contrib/msc/framework/tvm/codegen.cc | 2 +-
src/ir/apply_pass_to_function.cc | 2 +-
src/ir/attr_functor.h | 4 +-
src/ir/attrs.cc | 91 +-
src/ir/diagnostic.cc | 8 +-
src/ir/env_func.cc | 10 +-
src/ir/expr.cc | 37 -
src/ir/function.cc | 18 +-
src/ir/global_var_supply.cc | 6 +-
src/ir/module.cc | 27 +-
src/ir/name_supply.cc | 9 +-
src/ir/op.cc | 18 +-
src/ir/replace_global_vars.cc | 1 +
src/ir/source_map.cc | 4 +-
src/ir/transform.cc | 106 +-
src/meta_schedule/arg_info.cc | 16 +-
src/meta_schedule/builder/builder.cc | 2 +-
src/meta_schedule/cost_model/cost_model.cc | 7 +-
src/meta_schedule/database/database.cc | 68 +-
src/meta_schedule/database/database_utils.cc | 66 +-
src/meta_schedule/database/json_database.cc | 21 +-
.../feature_extractor/feature_extractor.cc | 2 +-
.../measure_callback/measure_callback.cc | 2 +-
.../measure_callback/remove_build_artifact.cc | 5 +-
.../mutator/mutate_compute_location.cc | 16 +-
src/meta_schedule/mutator/mutate_parallel.cc | 6 +-
src/meta_schedule/mutator/mutate_thread_binding.cc | 6 +-
src/meta_schedule/mutator/mutate_tile_size.cc | 16 +-
src/meta_schedule/mutator/mutate_unroll.cc | 7 +-
src/meta_schedule/mutator/mutator.cc | 4 +-
src/meta_schedule/postproc/postproc.cc | 6 +-
src/meta_schedule/postproc/rewrite_layout.cc | 6 +-
.../postproc/rewrite_parallel_vectorize_unroll.cc | 16 +-
src/meta_schedule/postproc/verify_gpu_code.cc | 4 +-
src/meta_schedule/profiler.cc | 4 +-
src/meta_schedule/runner/runner.cc | 11 +-
src/meta_schedule/schedule/cuda/thread_bind.cc | 6 +-
.../schedule_rule/apply_custom_rule.cc | 7 +-
.../schedule_rule/cross_thread_reduction.cc | 12 +-
.../schedule_rule/multi_level_tiling.cc | 11 +-
.../schedule_rule/multi_level_tiling.h | 6 +-
.../multi_level_tiling_tensor_core.cc | 2 +-
.../multi_level_tiling_wide_vector.cc | 8 +-
.../multi_level_tiling_with_intrin.cc | 10 +-
.../schedule_rule/parallel_vectorize_unroll.cc | 6 +-
src/meta_schedule/schedule_rule/schedule_rule.cc | 110 +-
src/meta_schedule/search_strategy/replay_trace.cc | 8 +-
.../search_strategy/search_strategy.cc | 16 +-
src/meta_schedule/space_generator/schedule_fn.cc | 10 +-
.../space_generator/space_generator.cc | 20 +-
src/meta_schedule/task_scheduler/task_scheduler.cc | 13 +-
src/meta_schedule/trace_apply.cc | 26 +-
src/meta_schedule/tune_context.cc | 5 +-
src/meta_schedule/utils.h | 66 +-
src/node/attr_registry.h | 22 +-
src/node/boxed_primitive.cc | 134 -
src/node/container_printing.cc | 12 +-
src/node/object_path.cc | 36 +-
src/node/reflection.cc | 90 +-
src/node/repr_printer.cc | 50 +-
src/node/script_printer.cc | 36 +-
src/node/serialization.cc | 286 +-
src/node/structural_equal.cc | 135 +-
src/node/structural_hash.cc | 234 +-
src/relax/analysis/graph_partitioner.h | 2 +-
src/relax/analysis/udchain.cc | 4 +-
src/relax/analysis/well_formed.cc | 6 +-
src/relax/backend/contrib/clml/codegen.cc | 8 +-
.../backend/contrib/codegen_json/codegen_json.h | 22 +-
src/relax/backend/contrib/cublas/codegen.cc | 7 +-
src/relax/backend/contrib/cudnn/codegen.cc | 7 +-
src/relax/backend/contrib/cutlass/codegen.cc | 42 +-
src/relax/backend/contrib/dnnl/codegen.cc | 7 +-
src/relax/backend/contrib/hipblas/codegen.cc | 5 +-
src/relax/backend/contrib/nnapi/codegen.cc | 9 +-
src/relax/backend/contrib/tensorrt/codegen.cc | 8 +-
src/relax/backend/contrib/utils.h | 35 +-
src/relax/backend/task_extraction.cc | 8 +-
src/relax/backend/vm/codegen_vm.cc | 4 +-
src/relax/backend/vm/codegen_vm_tir.cc | 3 -
src/relax/backend/vm/exec_builder.cc | 24 +-
src/relax/backend/vm/lower_runtime_builtin.cc | 7 +-
src/relax/backend/vm/vm_shape_lower.cc | 9 +-
.../distributed/transform/legalize_redistribute.cc | 5 +-
src/relax/distributed/transform/lower_distir.cc | 3 +-
.../transform/lower_global_view_to_local_view.cc | 3 +-
.../distributed/transform/propagate_sharding.cc | 5 +-
src/relax/ir/binding_rewrite.cc | 2 +-
src/relax/ir/block_builder.cc | 29 +-
src/relax/ir/dataflow_matcher.cc | 54 +-
src/relax/ir/dataflow_pattern.cc | 4 +-
src/relax/ir/expr.cc | 2 +-
src/relax/ir/py_expr_functor.cc | 2 +-
src/relax/ir/struct_info.cc | 9 +-
src/relax/ir/transform.cc | 62 +-
src/relax/op/image/resize.cc | 4 +-
src/relax/op/image/resize.h | 2 +-
src/relax/op/nn/convolution.cc | 20 +-
src/relax/op/nn/convolution.h | 10 +-
src/relax/op/nn/nn.cc | 2 +-
src/relax/op/op.cc | 2 +-
src/relax/op/op_common.h | 2 +-
src/relax/op/tensor/create.cc | 27 +-
src/relax/op/tensor/create.h | 11 +-
src/relax/op/tensor/grad.cc | 2 +-
src/relax/op/tensor/grad.h | 2 +-
src/relax/op/tensor/index.cc | 11 +-
src/relax/op/tensor/index.h | 2 +-
src/relax/op/tensor/inspect.cc | 4 +-
src/relax/op/tensor/linear_algebra.cc | 4 +-
src/relax/op/tensor/linear_algebra.h | 2 +-
src/relax/op/tensor/manipulate.cc | 142 +-
src/relax/op/tensor/manipulate.h | 13 +-
src/relax/op/tensor/search.cc | 12 +-
src/relax/op/tensor/search.h | 4 +-
src/relax/op/tensor/sorting.cc | 1 +
src/relax/op/tensor/statistical.cc | 10 +-
src/relax/op/tensor/statistical.h | 4 +-
src/relax/training/utils.cc | 3 +-
src/relax/transform/adjust_matmul_order.cc | 2 +-
src/relax/transform/allocate_workspace.cc | 3 +-
src/relax/transform/alter_op_impl.cc | 15 +-
src/relax/transform/annotate_tir_op_pattern.cc | 2 +-
.../transform/attach_attr_layout_free_buffers.cc | 7 +-
src/relax/transform/attach_global_symbol.cc | 3 +-
src/relax/transform/bind_params.cc | 5 +-
src/relax/transform/bundle_model_params.cc | 3 +-
src/relax/transform/call_tir_rewrite.cc | 3 +-
src/relax/transform/combine_parallel_matmul.cc | 11 +-
src/relax/transform/compute_prim_value.cc | 5 +-
src/relax/transform/convert_dataflow.cc | 7 +-
src/relax/transform/dead_code_elimination.cc | 5 +-
src/relax/transform/eliminate_common_subexpr.cc | 7 +-
src/relax/transform/expand_tuple_arguments.cc | 3 +-
src/relax/transform/few_shot_tuning.cc | 23 +-
src/relax/transform/fold_constant.cc | 24 +-
src/relax/transform/fuse_ops.cc | 21 +-
src/relax/transform/fuse_tir.cc | 8 +-
src/relax/transform/gradient.cc | 10 +-
src/relax/transform/infer_amp_utils.cc | 8 +-
src/relax/transform/infer_amp_utils.h | 4 +-
src/relax/transform/kill_after_last_use.cc | 7 +-
src/relax/transform/lazy_transform_params.cc | 4 +-
src/relax/transform/legalize_ops.cc | 5 +-
src/relax/transform/lift_transform_params.cc | 18 +-
src/relax/transform/lower_alloc_tensor.cc | 8 +-
src/relax/transform/merge_composite_functions.cc | 7 +-
src/relax/transform/meta_schedule.cc | 27 +-
src/relax/transform/normalize.cc | 10 +-
src/relax/transform/realize_vdevice.cc | 4 +-
src/relax/transform/remove_purity_checking.cc | 9 +-
src/relax/transform/remove_unused_outputs.cc | 3 +-
src/relax/transform/remove_unused_parameters.cc | 3 +-
.../transform/reorder_permute_dims_after_concat.cc | 10 +-
src/relax/transform/reorder_take_after_matmul.cc | 8 +-
src/relax/transform/rewrite_cuda_graph.cc | 6 +-
src/relax/transform/rewrite_dataflow_reshape.cc | 9 +-
src/relax/transform/run_codegen.cc | 25 +-
src/relax/transform/split_call_tir_by_pattern.cc | 6 +-
.../transform/split_layout_rewrite_preproc.cc | 4 +-
src/relax/transform/static_plan_block_memory.cc | 16 +-
src/relax/transform/to_mixed_precision.cc | 9 +-
src/relax/transform/to_non_dataflow.cc | 5 +-
src/relax/transform/tuning_api/database.cc | 64 +-
src/relax/transform/tuning_api/primitives.cc | 92 +-
src/relax/transform/update_vdevice.cc | 3 +-
src/runtime/boxed_primitive.cc | 65 -
src/runtime/c_runtime_api.cc | 167 +-
src/runtime/const_loader_module.cc | 5 +-
src/runtime/container.cc | 104 +-
src/runtime/contrib/amx/amx_config.cc | 8 +-
src/runtime/contrib/cblas/cblas.cc | 27 +-
src/runtime/contrib/cblas/dnnl_blas.cc | 4 +-
src/runtime/contrib/cblas/gemm_common.h | 42 +-
src/runtime/contrib/cblas/mkl.cc | 46 +-
src/runtime/contrib/clml/clml_runtime.cc | 8 +-
src/runtime/contrib/coreml/coreml_runtime.mm | 2 +-
src/runtime/contrib/cublas/cublas.cc | 162 +-
src/runtime/contrib/cublas/cublas_json_runtime.cc | 13 +-
src/runtime/contrib/cudnn/conv_backward.cc | 108 +-
src/runtime/contrib/cudnn/conv_forward.cc | 108 +-
src/runtime/contrib/cudnn/cudnn_json_runtime.cc | 2 +-
src/runtime/contrib/cudnn/cudnn_utils.cc | 6 +-
src/runtime/contrib/cudnn/softmax.cc | 12 +-
src/runtime/contrib/curand/curand.cc | 5 +-
src/runtime/contrib/cutlass/fp16_group_gemm.cu | 5 +-
.../contrib/cutlass/fp8_blockwise_scaled_gemm.cu | 12 +-
src/runtime/contrib/cutlass/fp8_gemm.cu | 6 +-
src/runtime/contrib/cutlass/fp8_group_gemm.cu | 5 +-
src/runtime/contrib/dnnl/dnnl.cc | 19 +-
src/runtime/contrib/dnnl/dnnl_json_runtime.cc | 9 +-
src/runtime/contrib/edgetpu/edgetpu_runtime.cc | 7 +-
src/runtime/contrib/hipblas/hipblas.cc | 69 +-
.../contrib/hipblas/hipblas_json_runtime.cc | 8 +-
src/runtime/contrib/json/json_node.h | 2 +-
src/runtime/contrib/json/json_runtime.h | 18 +-
src/runtime/contrib/miopen/conv_forward.cc | 251 +-
src/runtime/contrib/miopen/miopen_utils.cc | 4 +-
src/runtime/contrib/miopen/softmax.cc | 12 +-
src/runtime/contrib/mps/conv.mm | 118 +-
src/runtime/contrib/mps/gemm.mm | 12 +-
src/runtime/contrib/mps/mps_utils.mm | 4 +-
src/runtime/contrib/mrvl/mrvl_runtime.cc | 2 +-
src/runtime/contrib/mrvl/mrvl_sw_runtime_lib.cc | 12 +-
src/runtime/contrib/msc/tensorrt_runtime.cc | 8 +-
src/runtime/contrib/nnapi/nnapi_ops.cc | 2 +-
src/runtime/contrib/papi/papi.cc | 4 +-
src/runtime/contrib/random/mt_random_engine.cc | 4 +-
src/runtime/contrib/random/random.cc | 118 +-
src/runtime/contrib/rocblas/rocblas.cc | 84 +-
src/runtime/contrib/sort/sort.cc | 175 +-
src/runtime/contrib/tflite/tflite_runtime.cc | 15 +-
src/runtime/contrib/thrust/thrust.cu | 41 +-
src/runtime/cpu_device_api.cc | 2 +-
src/runtime/cuda/cuda_device_api.cc | 4 +-
src/runtime/cuda/l2_cache_flush.cc | 2 +-
.../contrib/papi.h => src/runtime/debug_compile.cc | 49 +-
src/runtime/disco/bcast_session.cc | 54 +-
src/runtime/disco/bcast_session.h | 2 +-
src/runtime/disco/builtin.cc | 17 +-
src/runtime/disco/cuda_ipc/cuda_ipc_memory.cc | 2 +-
src/runtime/disco/disco_worker.cc | 81 +-
src/runtime/disco/distributed/socket_session.cc | 111 +-
src/runtime/disco/loader.cc | 20 +-
src/runtime/disco/message_queue.h | 25 +-
src/runtime/disco/nccl/nccl.cc | 5 +-
src/runtime/disco/nccl/nccl_context.h | 6 +-
src/runtime/disco/process_session.cc | 37 +-
src/runtime/disco/protocol.h | 49 +-
src/runtime/disco/session.cc | 31 +-
src/runtime/disco/threaded_session.cc | 14 +-
src/runtime/dso_library.cc | 9 +-
src/runtime/file_utils.cc | 12 +-
src/runtime/hexagon/hexagon_common.cc | 9 +-
src/runtime/hexagon/hexagon_device_api.cc | 153 +-
src/runtime/hexagon/ops/conv2d.h | 2 +-
src/runtime/hexagon/ops/conv2d_fp16_hvx.cc | 3 +-
src/runtime/hexagon/ops/conv2d_quant_hvx.cc | 2 +-
src/runtime/hexagon/ops/conv_utils.cc | 4 +-
src/runtime/hexagon/rpc/android/session.cc | 8 +-
src/runtime/hexagon/rpc/hexagon/rpc_server.cc | 20 +-
src/runtime/hexagon/rpc/simulator/rpc_server.cc | 22 +-
src/runtime/hexagon/rpc/simulator/session.cc | 8 +-
src/runtime/library_module.cc | 53 +-
src/runtime/library_module.h | 8 +-
src/runtime/memory/memory_manager.cc | 161 +-
src/runtime/memory/pooled_allocator.h | 2 +-
src/runtime/metal/metal_device_api.mm | 2 +-
src/runtime/metal/metal_module.mm | 2 +-
src/runtime/minrpc/minrpc_server.h | 18 +-
src/runtime/minrpc/rpc_reference.h | 10 +-
src/runtime/module.cc | 59 +-
src/runtime/ndarray.cc | 238 +-
src/runtime/object.cc | 212 +-
src/runtime/object_internal.h | 26 +-
src/runtime/opencl/opencl_device_api.cc | 77 +-
src/runtime/opencl/opencl_module.cc | 4 +-
src/runtime/pack_args.h | 35 +-
src/runtime/profiling.cc | 94 +-
src/runtime/regex.cc | 11 +-
src/runtime/registry.cc | 91 +-
src/runtime/relax_vm/attn_backend.h | 31 +-
src/runtime/relax_vm/attn_utils.h | 2 +-
src/runtime/relax_vm/builtin.cc | 209 +-
src/runtime/relax_vm/cuda/cuda_graph_builtin.cc | 37 +-
src/runtime/relax_vm/executable.cc | 60 +-
src/runtime/relax_vm/kv_state.cc | 52 +-
src/runtime/relax_vm/kv_state.h | 6 +-
src/runtime/relax_vm/lm_support.cc | 8 +-
src/runtime/relax_vm/ndarray_cache_support.cc | 53 +-
src/runtime/relax_vm/paged_kv_cache.cc | 89 +-
src/runtime/relax_vm/rnn_state.cc | 1 -
src/runtime/relax_vm/vm.cc | 226 +-
src/runtime/rocm/rocm_device_api.cc | 4 +-
src/runtime/rpc/rpc_channel.cc | 18 +-
src/runtime/rpc/rpc_device_api.cc | 2 +-
src/runtime/rpc/rpc_endpoint.cc | 245 +-
src/runtime/rpc/rpc_endpoint.h | 10 +-
src/runtime/rpc/rpc_event_impl.cc | 2 +-
src/runtime/rpc/rpc_local_session.cc | 125 +-
src/runtime/rpc/rpc_local_session.h | 6 +-
src/runtime/rpc/rpc_module.cc | 165 +-
src/runtime/rpc/rpc_pipe_impl.cc | 6 +-
src/runtime/rpc/rpc_server_env.cc | 27 +-
src/runtime/rpc/rpc_session.cc | 30 +-
src/runtime/rpc/rpc_session.h | 23 +-
src/runtime/rpc/rpc_socket_impl.cc | 23 +-
src/runtime/static_library.h | 1 +
src/runtime/system_library.cc | 4 +-
src/runtime/thread_pool.cc | 32 +-
src/runtime/thread_storage_scope.h | 8 +-
src/runtime/threading_backend.cc | 2 +-
src/runtime/vulkan/vulkan_device_api.cc | 2 +-
src/runtime/vulkan/vulkan_wrapped_func.cc | 2 +-
src/script/ir_builder/base.cc | 8 +-
src/script/ir_builder/ir/ir.cc | 4 +-
src/script/ir_builder/relax/ir.cc | 2 +-
src/script/ir_builder/tir/frame.cc | 2 +-
src/script/ir_builder/tir/ir.cc | 40 +-
src/script/printer/doc.cc | 7 +-
.../printer/doc_printer/python_doc_printer.cc | 2 +-
src/script/printer/ir/ir.cc | 2 +-
src/script/printer/ir/misc.cc | 29 +-
src/script/printer/ir_docsifier.cc | 25 +-
src/script/printer/legacy_repr.cc | 18 +-
src/script/printer/relax/call.cc | 23 +-
src/script/printer/relax/function.cc | 2 +-
src/script/printer/relax/tir.cc | 1 +
src/script/printer/tir/buffer.cc | 1 +
src/script/printer/tir/expr.cc | 5 +-
src/script/printer/tir/function.cc | 2 +-
src/script/printer/tir/ir.cc | 2 +-
src/script/printer/utils.h | 4 +-
src/support/array.h | 12 +-
src/support/errno_handling.h | 4 +-
src/support/ffi_testing.cc | 108 +-
src/support/libinfo.cc | 6 +-
src/support/scalars.cc | 10 +-
src/support/utils.h | 2 +-
src/target/codegen.cc | 16 +-
src/target/datatype/registry.cc | 55 +-
src/target/datatype/registry.h | 23 +-
src/target/llvm/codegen_aarch64.cc | 2 +-
src/target/llvm/codegen_amdgpu.cc | 19 +-
src/target/llvm/codegen_arm.cc | 2 +-
src/target/llvm/codegen_cpu.cc | 221 +-
src/target/llvm/codegen_cpu.h | 28 +-
src/target/llvm/codegen_hexagon.cc | 98 +-
src/target/llvm/codegen_llvm.cc | 14 +-
src/target/llvm/codegen_llvm.h | 6 +-
src/target/llvm/codegen_nvptx.cc | 8 +-
src/target/llvm/codegen_x86_64.cc | 2 +-
src/target/llvm/intrin_rule_hexagon.cc | 36 +-
src/target/llvm/llvm_instance.cc | 20 +-
src/target/llvm/llvm_module.cc | 9 +-
src/target/opt/build_cuda_on.cc | 13 +-
src/target/parsers/aprofile.cc | 10 +-
src/target/parsers/cpu.cc | 10 +-
src/target/parsers/mprofile.cc | 6 +-
src/target/source/codegen_c.cc | 52 +-
src/target/source/codegen_c_host.cc | 128 +-
src/target/source/codegen_c_host.h | 15 +-
src/target/source/codegen_cuda.cc | 6 +-
src/target/source/codegen_metal.cc | 5 +-
src/target/source/codegen_opencl.cc | 5 +-
src/target/source/source_module.cc | 6 +-
src/target/spirv/ir_builder.h | 2 +-
src/target/spirv/spirv_utils.cc | 9 +-
src/target/tag.cc | 130 +-
src/target/target.cc | 308 +-
src/target/target_info.cc | 6 +-
src/target/target_kind.cc | 158 +-
src/te/operation/compute_op.cc | 15 +-
src/te/operation/create_primfunc.cc | 49 +-
src/te/operation/extern_op.cc | 9 +-
src/te/operation/placeholder_op.cc | 1 +
src/te/operation/scan_op.cc | 14 +-
src/te/tensor.cc | 6 +-
src/tir/analysis/deep_equal.cc | 2 +-
src/tir/analysis/stmt_finding.cc | 1 +
src/tir/analysis/var_use_def_analysis.cc | 19 +-
src/tir/analysis/verify_gpu_code.cc | 2 +-
src/tir/analysis/verify_memory.cc | 2 +-
src/tir/analysis/verify_ssa.cc | 2 +-
src/tir/ir/block_dependence_info.cc | 2 +-
src/tir/ir/block_scope.cc | 6 +-
src/tir/ir/buffer.cc | 23 +-
src/tir/ir/data_type_rewriter.cc | 16 +-
src/tir/ir/expr.cc | 71 +-
src/tir/ir/function.cc | 6 -
src/tir/ir/specialize.cc | 14 +-
src/tir/ir/stmt.cc | 72 +-
src/tir/ir/stmt_functor.cc | 8 +-
src/tir/ir/transform.cc | 43 +-
src/tir/ir/utils.cc | 24 +-
src/tir/ir/utils.h | 6 +-
src/tir/op/builtin.cc | 8 +-
src/tir/op/op.cc | 38 +-
src/tir/schedule/analysis/analysis.cc | 4 +-
src/tir/schedule/concrete_schedule.cc | 84 +-
src/tir/schedule/concrete_schedule.h | 11 +-
src/tir/schedule/error.h | 2 +-
src/tir/schedule/instruction.cc | 24 +-
src/tir/schedule/instruction_traits.h | 168 +-
src/tir/schedule/ir_comparator.cc | 35 +-
src/tir/schedule/ir_comparator.h | 10 +-
src/tir/schedule/primitive.h | 7 +-
src/tir/schedule/primitive/annotate.cc | 20 +-
.../schedule/primitive/annotate_buffer_access.cc | 4 +-
src/tir/schedule/primitive/block_annotate.cc | 32 +-
src/tir/schedule/primitive/blockize_tensorize.cc | 2 +-
src/tir/schedule/primitive/cache_read_write.cc | 8 +-
src/tir/schedule/primitive/for_kind.cc | 1 -
.../schedule/primitive/layout_transformation.cc | 30 +-
src/tir/schedule/primitive/loop_transformation.cc | 53 +-
src/tir/schedule/primitive/read_write_at.cc | 10 +-
src/tir/schedule/primitive/sampling.cc | 16 +-
src/tir/schedule/schedule.cc | 117 +-
src/tir/schedule/state.cc | 6 +-
src/tir/schedule/trace.cc | 284 +-
src/tir/schedule/traced_schedule.cc | 50 +-
src/tir/schedule/traced_schedule.h | 9 +-
src/tir/schedule/transform.cc | 2 +-
src/tir/schedule/utils.h | 8 +-
src/tir/transforms/compact_buffer_region.cc | 9 +-
src/tir/transforms/default_gpu_schedule.cc | 5 +-
src/tir/transforms/extract_constants.cc | 2 +-
src/tir/transforms/inject_permuted_layout.cc | 6 +-
src/tir/transforms/inject_software_pipeline.cc | 10 +-
src/tir/transforms/ir_utils.cc | 6 +-
src/tir/transforms/ir_utils.h | 3 +-
src/tir/transforms/legalize_packed_calls.cc | 138 -
src/tir/transforms/lift_thread_binding.cc | 12 +-
src/tir/transforms/lower_custom_datatypes.cc | 8 +-
src/tir/transforms/lower_device_kernel_launch.cc | 10 +-
src/tir/transforms/lower_opaque_block.cc | 19 +-
src/tir/transforms/lower_tvm_builtin.cc | 129 +-
src/tir/transforms/make_packed_api.cc | 180 +-
src/tir/transforms/memhammer_lower_auto_copy.cc | 2 +-
src/tir/transforms/memhammer_rewrite_rule.h | 6 +-
src/tir/transforms/primfunc_utils.cc | 6 +-
src/tir/transforms/split_host_device.cc | 4 +-
src/tir/transforms/transform_mma_buffer_layout.cc | 8 +-
src/tir/transforms/unify_thread_binding.cc | 2 +-
src/tir/transforms/unsupported_dtype_legalize.cc | 12 +-
src/topi/broadcast.cc | 30 +-
src/topi/einsum.cc | 4 +-
src/topi/elemwise.cc | 136 +-
src/topi/nn.cc | 168 +-
src/topi/reduction.cc | 38 +-
src/topi/transform.cc | 220 +-
src/topi/utils.cc | 22 +-
src/topi/vision.cc | 4 +-
tests/cpp-runtime/hexagon/hexagon_buffer_tests.cc | 42 +-
.../cpp-runtime/hexagon/hexagon_conv_utils_test.h | 2 +-
.../hexagon/hexagon_device_api_tests.cc | 6 +-
.../cpp-runtime/hexagon/hexagon_user_dma_tests.cc | 4 +-
tests/cpp-runtime/hexagon/run_all_tests.cc | 4 +-
tests/cpp-runtime/hexagon/run_unit_tests.cc | 4 +-
tests/cpp-runtime/opencl/opencl_compile_to_bin.cc | 2 +-
tests/cpp/container_test.cc | 888 ------
tests/cpp/llvm_codegen_registry_test.cc | 18 +-
tests/cpp/nested_msg_test.cc | 2 +-
tests/cpp/object_protocol_test.cc | 4 -
tests/cpp/packed_func_test.cc | 321 ---
tests/cpp/target_test.cc | 81 +-
tests/cpp/tir_scalable_datatype.cc | 12 +-
tests/lint/check_file_type.py | 3 +
tests/lint/cppdocs.sh | 2 +-
tests/lint/cpplint.sh | 1 +
tests/lint/pylintrc | 3 +-
.../test_runtime_ndarray.py | 2 +-
.../test_runtime_packed_func.py | 37 +-
tests/python/codegen/test_target_codegen_cuda.py | 28 -
.../python/codegen/test_target_codegen_cuda_fp4.py | 10 +-
.../python/codegen/test_target_codegen_cuda_fp8.py | 12 +-
tests/python/codegen/test_target_codegen_llvm.py | 21 +-
tests/python/contrib/test_coreml_runtime.py | 1 -
tests/python/contrib/test_hexagon/README_RPC.md | 18 +-
.../python/contrib/test_hexagon/infrastructure.py | 2 +-
.../test_hexagon/test_async_dma_pipeline.py | 3 +-
.../test_hexagon/test_benchmark_elemwise_add.py | 7 +-
.../contrib/test_hexagon/test_dma_builtin.py | 6 +-
.../contrib/test_hexagon/test_parallel_hvx.py | 2 +-
.../test_hexagon/test_parallel_hvx_load_vtcm.py | 4 +-
.../contrib/test_hexagon/test_parallel_scalar.py | 2 +-
.../contrib/test_hexagon/test_vtcm_bandwidth.py | 2 +-
tests/python/contrib/test_msc/test_plugin.py | 6 +-
.../contrib/test_msc/test_translate_tensorrt.py | 4 +-
tests/python/dlight/test_benchmark.py | 8 +-
tests/python/dlight/test_cpu_gemv.py | 32 +-
tests/python/dlight/test_gpu_conv.py | 2 +-
tests/python/dlight/test_gpu_fallback.py | 8 +-
tests/python/dlight/test_gpu_gemv.py | 32 +-
tests/python/dlight/test_gpu_general_reduction.py | 24 +-
tests/python/dlight/test_gpu_low_batch_gemv.py | 16 +-
tests/python/dlight/test_gpu_matmul.py | 22 +-
tests/python/dlight/test_gpu_matmul_tensorize.py | 24 +-
tests/python/dlight/test_gpu_reduction.py | 60 +-
tests/python/dlight/test_gpu_rmsnorm.py | 8 +-
tests/python/dlight/test_gpu_transpose.py | 12 +-
tests/python/dlight/test_primitives.py | 2 +-
tests/python/driver/test_compile.py | 1 +
tests/python/ffi/test_container.py | 81 +
tests/python/{target => ffi}/test_device.py | 43 +-
tests/python/{ir => ffi}/test_dtype.py | 41 +-
tests/python/ffi/test_error.py | 113 +
tests/python/ffi/test_function.py | 165 ++
tests/python/ffi/test_ndarray.py | 49 +
tests/python/ffi/test_string.py | 55 +
tests/python/ir/test_dtype.py | 4 +-
tests/python/ir/test_ir_attrs.py | 4 +-
tests/python/ir/test_ir_container.py | 12 +-
tests/python/ir/test_node_reflection.py | 2 +-
.../meta_schedule/test_meta_schedule_database.py | 2 +-
.../test_meta_schedule_post_order_apply.py | 3 +-
.../meta_schedule/test_meta_schedule_runner.py | 4 +-
.../test_meta_schedule_schedule_rule_mlt.py | 4 +-
.../test_meta_schedule_schedule_rule_mlt_intrin.py | 8 +-
.../test_meta_schedule_schedule_rule_mlt_tc.py | 14 +-
.../meta_schedule/test_meta_schedule_space_cpu.py | 88 +-
.../meta_schedule/test_meta_schedule_space_cuda.py | 34 +-
.../test_meta_schedule_space_cuda_async.py | 8 +-
.../test_meta_schedule_trace_apply.py | 17 +-
tests/python/nightly/test_nnapi/conftest.py | 1 -
tests/python/relax/backend/clml/utils.py | 1 -
.../test_distributed_transform_lower_distir.py | 14 +-
...ributed_transform_lower_global_to_local_view.py | 88 +-
...est_distributed_transform_propagate_sharding.py | 56 +-
.../test_distributed_tvmscript_printer.py | 2 +-
...est_runtime_builtin_kv_cache_transfer_kernel.py | 2 +-
tests/python/relax/test_analysis.py | 6 +-
.../python/relax/test_backend_dispatch_sampling.py | 2 +-
.../relax/test_backend_transform_shape_lower.py | 20 +-
tests/python/relax/test_blockbuilder_emit_te.py | 4 +-
tests/python/relax/test_codegen_cutlass.py | 20 +-
tests/python/relax/test_dataflow_inplace.py | 14 +-
...eliminate_pad_branch_using_buffer_assumption.py | 12 +-
.../relax/test_frontend_from_exported_program.py | 53 +
tests/python/relax/test_frontend_from_fx.py | 60 +
tests/python/relax/test_frontend_nn_op.py | 9 +-
tests/python/relax/test_frontend_onnx.py | 11 -
tests/python/relax/test_op_create.py | 8 +-
tests/python/relax/test_op_grad.py | 8 +-
tests/python/relax/test_op_gradient_numeric.py | 4 +-
tests/python/relax/test_op_index.py | 4 +-
tests/python/relax/test_op_inspect.py | 53 +-
tests/python/relax/test_op_manipulate.py | 89 +-
tests/python/relax/test_op_misc.py | 4 +-
tests/python/relax/test_op_nn.py | 8 +-
tests/python/relax/test_op_set.py | 4 +-
tests/python/relax/test_op_sort.py | 2 +-
tests/python/relax/test_op_statistical.py | 2 +-
.../python/relax/test_optimize_layout_transform.py | 4 +-
.../relax/test_runtime_sampling_flashinfer.py | 2 +-
tests/python/relax/test_struct_info.py | 6 +-
.../relax/test_transform_allocate_workspace.py | 6 +-
tests/python/relax/test_transform_alter_op_impl.py | 2 +-
tests/python/relax/test_transform_codegen_pass.py | 2 +-
tests/python/relax/test_transform_fuse_ops.py | 68 +-
.../relax/test_transform_fuse_ops_by_pattern.py | 30 +-
tests/python/relax/test_transform_fuse_tir.py | 86 +-
.../relax/test_transform_fuse_transpose_matmul.py | 4 +-
.../relax/test_transform_gradient_te_register.py | 16 +-
tests/python/relax/test_transform_legalize_ops.py | 4 +-
.../relax/test_transform_legalize_ops_ccl.py | 4 +-
.../test_transform_legalize_ops_create_datatype.py | 2 +-
.../test_transform_legalize_ops_distributed.py | 2 +-
.../relax/test_transform_legalize_ops_grad.py | 6 +-
..._transform_legalize_ops_index_linear_algebra.py | 16 +-
.../test_transform_legalize_ops_manipulate.py | 21 +-
.../python/relax/test_transform_legalize_ops_nn.py | 22 +-
.../relax/test_transform_legalize_ops_qdq.py | 22 +-
.../relax/test_transform_lift_transform_params.py | 8 +-
.../test_transform_merge_composite_functions.py | 52 +-
.../test_transform_meta_schedule_apply_database.py | 8 +-
.../relax/test_transform_meta_schedule_tuning.py | 5 +-
.../relax/test_transform_rewrite_cuda_graph.py | 8 +-
.../test_transform_rewrite_dataflow_reshape.py | 20 +-
.../test_transform_split_layout_rewrite_preproc.py | 14 +-
.../test_transform_static_plan_block_memory.py | 14 +-
tests/python/relax/test_tvmscript_ir_builder.py | 4 +-
tests/python/relax/test_tvmscript_parser.py | 12 +-
tests/python/relax/test_vm_build.py | 8 +-
tests/python/relax/test_vm_callback_function.py | 11 +-
tests/python/relax/test_vm_codegen_only.py | 7 +-
tests/python/relax/test_vm_codegen_tir.py | 4 +-
tests/python/relax/test_vm_cuda_graph.py | 2 +-
tests/python/relax/test_vm_multi_device.py | 2 +-
tests/python/runtime/test_runtime_container.py | 17 +-
tests/python/runtime/test_runtime_error.py | 11 +-
tests/python/runtime/test_runtime_extension.py | 15 +-
tests/python/runtime/test_runtime_rpc.py | 7 +-
tests/python/target/test_llvm_features_info.py | 2 -
tests/python/target/test_target_target.py | 10 +-
tests/python/target/test_x86_features.py | 11 +-
tests/python/te/test_te_create_primfunc.py | 10 +-
.../test_tir_analysis_verify_well_formed.py | 3 -
tests/python/tir-base/test_tir_base.py | 2 +-
tests/python/tir-base/test_tir_host_func.py | 2 +-
tests/python/tir-base/test_tir_nodes.py | 1 -
.../python/tir-base/test_tir_scalable_datatype.py | 3 +-
.../test_tir_schedule_annotate_buffer_access.py | 14 +-
.../tir-schedule/test_tir_schedule_compute_at.py | 2 +-
.../test_tir_schedule_compute_inline.py | 10 +-
.../python/tir-schedule/test_tir_schedule_error.py | 2 +-
.../tir-schedule/test_tir_schedule_pad_einsum.py | 8 +-
.../test_tir_schedule_read_write_at.py | 12 +-
.../tir-schedule/test_tir_schedule_sampling.py | 2 +-
.../python/tir-schedule/test_tir_schedule_trace.py | 6 +-
.../test_tir_schedule_transform_layout.py | 8 +-
.../test_tir_transform_inject_double_buffer.py | 1 -
.../test_tir_transform_lower_tvm_builtin.py | 194 +-
.../test_tir_transform_make_packed_api.py | 164 +-
...test_tir_transform_memhammer_lower_auto_copy.py | 38 +-
.../test_tir_transform_split_host_device.py | 8 +-
.../tir-transform/test_tir_transform_vectorize.py | 12 +-
.../test_transform_default_gpu_schedule.py | 14 +-
tests/python/tvmscript/test_tvmscript_ops.py | 1 -
.../python/tvmscript/test_tvmscript_printer_doc.py | 2 -
.../test_tvmscript_printer_structural_equal.py | 5 +-
tests/python/tvmscript/test_tvmscript_roundtrip.py | 4 +-
tests/scripts/task_python_unittest.sh | 1 +
tests/scripts/task_rust.sh | 3 +
web/Makefile | 3 +-
web/emcc/tvmjs_support.cc | 103 +-
web/emcc/wasm_runtime.cc | 84 +-
web/emcc/webgpu_runtime.cc | 20 +-
web/src/runtime.ts | 50 +-
web/tests/node/test_object.js | 7 +-
932 files changed, 26117 insertions(+), 23446 deletions(-)
copy {3rdparty => ffi/3rdparty}/dlpack (100%)
create mode 100644 ffi/CMakeLists.txt
create mode 100644 ffi/cmake/Utils/AddGoogleTest.cmake
create mode 100644 ffi/cmake/Utils/AddLibbacktrace.cmake
copy cmake/modules/contrib/BNNS.cmake => ffi/cmake/Utils/CxxWarning.cmake (63%)
create mode 100644 ffi/cmake/Utils/Library.cmake
copy golang/sample/Makefile => ffi/cmake/Utils/Sanitizer.cmake (53%)
create mode 100644 ffi/include/tvm/ffi/any.h
create mode 100644 ffi/include/tvm/ffi/base_details.h
create mode 100644 ffi/include/tvm/ffi/c_api.h
create mode 100644 ffi/include/tvm/ffi/cast.h
copy {include/tvm/runtime => ffi/include/tvm/ffi}/container/array.h (59%)
copy include/tvm/runtime/container/base.h =>
ffi/include/tvm/ffi/container/container_details.h (74%)
copy {include/tvm/runtime => ffi/include/tvm/ffi}/container/map.h (62%)
create mode 100644 ffi/include/tvm/ffi/container/ndarray.h
create mode 100644 ffi/include/tvm/ffi/container/shape.h
create mode 100644 ffi/include/tvm/ffi/container/tuple.h
create mode 100644 ffi/include/tvm/ffi/container/variant.h
create mode 100644 ffi/include/tvm/ffi/dtype.h
create mode 100644 ffi/include/tvm/ffi/endian.h
create mode 100644 ffi/include/tvm/ffi/error.h
create mode 100644 ffi/include/tvm/ffi/function.h
create mode 100644 ffi/include/tvm/ffi/function_details.h
copy {include/tvm/runtime => ffi/include/tvm/ffi}/memory.h (85%)
create mode 100644 ffi/include/tvm/ffi/object.h
create mode 100644 ffi/include/tvm/ffi/optional.h
create mode 100644 ffi/include/tvm/ffi/reflection/reflection.h
create mode 100644 ffi/include/tvm/ffi/rvalue_ref.h
create mode 100644 ffi/include/tvm/ffi/string.h
create mode 100644 ffi/include/tvm/ffi/type_traits.h
copy docker/install/ubuntu_install_tensorflow_aarch64.sh =>
ffi/scripts/run_tests.sh (66%)
create mode 100644 ffi/src/ffi/container.cc
create mode 100644 ffi/src/ffi/dtype.cc
create mode 100644 ffi/src/ffi/error.cc
create mode 100644 ffi/src/ffi/function.cc
create mode 100644 ffi/src/ffi/ndarray.cc
create mode 100644 ffi/src/ffi/object.cc
create mode 100644 ffi/src/ffi/testing.cc
create mode 100644 ffi/src/ffi/traceback.cc
create mode 100644 ffi/src/ffi/traceback.h
create mode 100644 ffi/src/ffi/traceback_win.cc
create mode 100644 ffi/tests/cpp/CMakeLists.txt
create mode 100644 ffi/tests/cpp/test_any.cc
create mode 100644 ffi/tests/cpp/test_array.cc
copy tests/cpp/topi_ewise_test.cc => ffi/tests/cpp/test_c_ffi_abi.cc (74%)
create mode 100644 ffi/tests/cpp/test_dtype.cc
create mode 100644 ffi/tests/cpp/test_error.cc
create mode 100644 ffi/tests/cpp/test_function.cc
create mode 100644 ffi/tests/cpp/test_map.cc
create mode 100644 ffi/tests/cpp/test_ndarray.cc
create mode 100644 ffi/tests/cpp/test_object.cc
create mode 100644 ffi/tests/cpp/test_optional.cc
create mode 100644 ffi/tests/cpp/test_reflection.cc
create mode 100644 ffi/tests/cpp/test_rvalue_ref.cc
create mode 100644 ffi/tests/cpp/test_shape.cc
create mode 100644 ffi/tests/cpp/test_string.cc
create mode 100644 ffi/tests/cpp/test_tuple.cc
create mode 100644 ffi/tests/cpp/test_variant.cc
create mode 100644 ffi/tests/cpp/testing_object.h
delete mode 100644 include/tvm/runtime/container/boxed_primitive.h
delete mode 100644 python/tvm/_ffi/_cy3/__init__.py
delete mode 100644 python/tvm/_ffi/_cython/base.pxi
delete mode 100644 python/tvm/_ffi/_cython/ndarray.pxi
delete mode 100644 python/tvm/_ffi/_cython/object.pxi
delete mode 100644 python/tvm/_ffi/_cython/packed_func.pxi
delete mode 100644 python/tvm/_ffi/runtime_ctypes.py
create mode 100644 python/tvm/ffi/.gitignore
create mode 100644 python/tvm/ffi/__init__.py
copy conda/recipe/conda_build_config.yaml => python/tvm/ffi/_ffi_api.py (91%)
create mode 100644 python/tvm/ffi/container.py
create mode 100644 python/tvm/ffi/convert.py
create mode 100644 python/tvm/ffi/cython/base.pxi
rename python/tvm/{_ffi/_cython => ffi/cython}/core.pyx (88%)
create mode 100644 python/tvm/ffi/cython/device.pxi
create mode 100644 python/tvm/ffi/cython/dtype.pxi
create mode 100644 python/tvm/ffi/cython/error.pxi
create mode 100644 python/tvm/ffi/cython/function.pxi
create mode 100644 python/tvm/ffi/cython/ndarray.pxi
create mode 100644 python/tvm/ffi/cython/object.pxi
create mode 100644 python/tvm/ffi/cython/string.pxi
create mode 100644 python/tvm/ffi/dtype.py
create mode 100644 python/tvm/ffi/error.py
create mode 100644 python/tvm/ffi/ndarray.py
create mode 100644 python/tvm/ffi/registry.py
create mode 100644 python/tvm/runtime/device.py
delete mode 100644 rust/tvm-rt/src/array.rs
delete mode 100644 rust/tvm-rt/src/map.rs
delete mode 100644 src/node/boxed_primitive.cc
delete mode 100644 src/runtime/boxed_primitive.cc
copy include/tvm/runtime/contrib/papi.h => src/runtime/debug_compile.cc (53%)
delete mode 100644 src/tir/transforms/legalize_packed_calls.cc
delete mode 100644 tests/cpp/container_test.cc
delete mode 100644 tests/cpp/packed_func_test.cc
create mode 100644 tests/python/ffi/test_container.py
rename tests/python/{target => ffi}/test_device.py (62%)
copy tests/python/{ir => ffi}/test_dtype.py (52%)
create mode 100644 tests/python/ffi/test_error.py
create mode 100644 tests/python/ffi/test_function.py
create mode 100644 tests/python/ffi/test_ndarray.py
create mode 100644 tests/python/ffi/test_string.py