This is an automated email from the ASF dual-hosted git repository.
github-actions[bot] pushed a change to branch nightly
in repository https://gitbox.apache.org/repos/asf/tvm.git
from 730459c4ce [Relax][Frontend][TFLite] Support dynamic RANGE scalar
bounds (#19867)
add 5a8dae4d95 [Relax][Frontend][TFLite] Support static hashtable find
(#19879)
add 1e1920bcbd [REFACTOR][IR] Unify PrimExpr type mechanism to PrimType
instead of DataType (#19875)
add 68429af88b [DOCS] Add PyPI install guidance and update install command
(#19883)
No new revisions were added by this update.
Summary of changes:
docs/install/index.rst | 14 +-
.../api/stmt_functor.rst => install/pypi.rst} | 23 +-
docs/tirx/install.rst | 2 +-
include/tvm/ir/base_expr.h | 320 +++++++++
include/tvm/ir/expr.h | 135 +---
include/tvm/ir/type.h | 65 +-
include/tvm/relax/attrs/create.h | 2 +-
include/tvm/relax/attrs/datatype.h | 4 +-
include/tvm/relax/attrs/image.h | 4 +-
include/tvm/relax/attrs/linear_algebra.h | 2 +-
include/tvm/relax/attrs/nn.h | 12 +-
include/tvm/relax/attrs/qdq.h | 2 +-
include/tvm/relax/attrs/sampling.h | 4 +-
include/tvm/relax/attrs/sorting.h | 8 +-
include/tvm/relax/attrs/statistical.h | 2 +-
include/tvm/relax/dataflow_pattern.h | 8 +-
include/tvm/relax/distributed/global_info.h | 1 +
include/tvm/relax/expr.h | 4 +-
include/tvm/relax/transform.h | 5 +-
include/tvm/relax/type.h | 14 +-
include/tvm/runtime/data_type.h | 522 --------------
include/tvm/runtime/disco/builtin.h | 4 +-
include/tvm/runtime/tensor.h | 4 +-
include/tvm/runtime/vm/bytecode.h | 2 +-
include/tvm/runtime/vm/tensor_cache_support.h | 2 +-
include/tvm/s_tir/data_layout.h | 4 +-
include/tvm/s_tir/meta_schedule/arg_info.h | 6 +-
include/tvm/script/printer/config.h | 9 +-
include/tvm/script/printer/doc.h | 10 +-
include/tvm/script/printer/ir_docsifier.h | 2 +-
include/tvm/te/operation.h | 25 +-
include/tvm/te/tensor.h | 8 +-
include/tvm/tirx/buffer.h | 47 +-
include/tvm/tirx/expr.h | 8 +-
include/tvm/tirx/op.h | 166 ++---
include/tvm/tirx/script/builder/ir.h | 147 ++--
include/tvm/tirx/stmt.h | 2 +-
include/tvm/tirx/var.h | 10 +-
include/tvm/topi/broadcast.h | 12 +-
include/tvm/topi/contrib/cublas.h | 4 +-
include/tvm/topi/detail/broadcast.h | 16 +-
include/tvm/topi/detail/extern.h | 13 +-
include/tvm/topi/detail/strided_slice.h | 6 +-
include/tvm/topi/detail/tensor_utils.h | 8 +-
include/tvm/topi/elemwise.h | 111 +--
include/tvm/topi/nn.h | 40 +-
include/tvm/topi/nn/bnn.h | 8 +-
include/tvm/topi/nn/dense.h | 2 +-
include/tvm/topi/nn/dilate.h | 2 +-
include/tvm/topi/nn/group_norm.h | 14 +-
include/tvm/topi/nn/instance_norm.h | 22 +-
include/tvm/topi/nn/layer_norm.h | 21 +-
include/tvm/topi/nn/local_response_norm.h | 9 +-
include/tvm/topi/nn/pooling.h | 31 +-
include/tvm/topi/nn/rms_norm.h | 10 +-
include/tvm/topi/reduction.h | 15 +-
include/tvm/topi/transform.h | 112 +--
python/tvm/backend/cuda/op.py | 8 +-
.../tvm/backend/cuda/operator/intrinsics/math.py | 4 +-
.../tvm/backend/cuda/operator/intrinsics/memory.py | 6 +-
.../tvm/backend/cuda/operator/intrinsics/misc.py | 2 +-
.../tile_primitive/elementwise/ops/unary.py | 4 +-
.../operator/tile_primitive/elementwise/smem.py | 4 +-
.../operator/tile_primitive/gemm_async/tcgen05.py | 2 +-
python/tvm/backend/cuda/script.py | 12 +-
.../tvm/backend/trn/transform/naive_allocator.py | 2 +-
python/tvm/ir/expr.py | 2 -
python/tvm/ir/type.py | 29 +
python/tvm/relax/analysis/analysis.py | 2 +-
python/tvm/relax/backend/adreno/clml.py | 16 +-
python/tvm/relax/backend/dispatch_sampling.py | 2 +-
python/tvm/relax/backend/dispatch_sort_scan.py | 4 +-
python/tvm/relax/expr.py | 3 +
python/tvm/relax/frontend/nn/core.py | 2 +-
python/tvm/relax/frontend/nn/extern.py | 2 +-
python/tvm/relax/frontend/onnx/onnx_frontend.py | 36 +-
.../tvm/relax/frontend/tflite/tflite_frontend.py | 141 +++-
.../frontend/torch/base_fx_graph_translator.py | 38 +-
.../frontend/torch/exported_program_translator.py | 36 +-
python/tvm/relax/frontend/torch/fx_translator.py | 8 +-
python/tvm/relax/op/_op_gradient.py | 3 +
python/tvm/relax/op/create.py | 25 +-
python/tvm/relax/op/datatype.py | 13 +-
python/tvm/relax/op/manipulate.py | 11 +-
python/tvm/relax/op/mask.py | 2 +-
python/tvm/relax/op/statistical.py | 9 +-
python/tvm/relax/script/parser/entry.py | 4 +-
python/tvm/relax/script/parser/parser.py | 5 +-
python/tvm/relax/training/loss.py | 2 +-
python/tvm/relax/training/optimizer.py | 8 +-
python/tvm/relax/training/trainer.py | 4 +-
python/tvm/relax/transform/legalize_ops/common.py | 13 +-
.../relax/transform/legalize_ops/linear_algebra.py | 17 +-
.../tvm/relax/transform/legalize_ops/manipulate.py | 8 +-
python/tvm/relax/transform/legalize_ops/qdq.py | 7 +-
python/tvm/relax/type.py | 6 +-
python/tvm/relax/utils.py | 2 +-
.../tvm/s_tir/dlight/analysis/common_analysis.py | 3 +-
python/tvm/s_tir/dlight/base/utils.py | 5 +-
python/tvm/s_tir/dlight/benchmark/extract.py | 6 +-
python/tvm/s_tir/dlight/benchmark/utils.py | 8 +-
python/tvm/s_tir/dlight/cpu/reduction.py | 4 +-
python/tvm/s_tir/dlight/gpu/gemv.py | 2 +-
python/tvm/s_tir/dlight/gpu/general_reduction.py | 8 +-
python/tvm/s_tir/dlight/gpu/low_batch_gemv.py | 2 +-
python/tvm/s_tir/dlight/gpu/matmul.py | 4 +-
python/tvm/s_tir/schedule/schedule.py | 18 +-
python/tvm/script/parser/core/evaluator.py | 6 +-
python/tvm/target/intrin.py | 12 +-
python/tvm/te/tensor.py | 13 +
python/tvm/tirx/buffer.py | 15 +-
python/tvm/tirx/expr.py | 57 +-
python/tvm/tirx/expr_functor.py | 4 +-
python/tvm/tirx/layout.py | 4 +-
python/tvm/tirx/op.py | 114 +--
python/tvm/tirx/script/builder/external_kernel.py | 7 +-
python/tvm/tirx/script/builder/ir.py | 41 +-
python/tvm/tirx/script/parser/entry.py | 2 +-
python/tvm/tirx/script/parser/operation.py | 73 +-
python/tvm/tirx/script/parser/parser.py | 14 +-
python/tvm/tirx/stmt.py | 4 +-
python/tvm/topi/math.py | 66 +-
python/tvm/topi/nn/elemwise.py | 6 +-
python/tvm/topi/nn/upsampling.py | 40 +-
python/tvm/topi/scatter.py | 4 +-
python/tvm/topi/sort.py | 2 +-
python/tvm/topi/transform.py | 8 +-
src/arith/analyzer.cc | 11 +-
src/arith/bound_deducer.cc | 3 +-
src/arith/canonical_simplify.cc | 106 +--
src/arith/const_fold.h | 134 ++--
src/arith/const_int_bound.cc | 41 +-
src/arith/detect_linear_equation.cc | 24 +-
src/arith/int_constraints.cc | 8 +-
src/arith/int_set.cc | 46 +-
src/arith/ir_mutator_with_analyzer.cc | 9 +-
src/arith/ir_visitor_with_analyzer.cc | 2 +-
src/arith/iter_affine_map.cc | 72 +-
src/arith/pattern_match.h | 26 +-
src/arith/product_normal_form.h | 5 +-
src/arith/rewrite_simplify.cc | 119 ++--
src/arith/solve_linear_equation.cc | 28 +-
src/arith/solve_linear_inequality.cc | 25 +-
src/arith/transitive_comparison_analyzer.cc | 3 +-
src/arith/unwrap_vector_expr.cc | 6 +-
src/arith/z3_prover.cc | 51 +-
src/backend/cuda/codegen/codegen_cuda.cc | 558 ++++++++-------
src/backend/cuda/codegen/codegen_cuda.h | 21 +-
src/backend/cuda/codegen/intrin_rule_cuda.cc | 36 +-
src/backend/cuda/codegen/llvm/codegen_nvptx.cc | 12 +-
src/backend/cuda/codegen/llvm/intrin_rule_nvptx.cc | 9 +-
src/backend/cuda/runtime/cuda_device_api.cc | 37 +-
.../hexagon/codegen/llvm/codegen_hexagon.cc | 42 +-
.../hexagon/codegen/llvm/intrin_rule_hexagon.cc | 40 +-
src/backend/hexagon/runtime/ops/conv2d_fp16_hvx.cc | 4 +-
src/backend/metal/codegen/codegen_metal.cc | 77 +-
src/backend/metal/codegen/codegen_metal.h | 7 +-
src/backend/metal/codegen/intrin_rule_metal.cc | 6 +-
src/backend/opencl/codegen/codegen_opencl.cc | 122 ++--
src/backend/opencl/codegen/codegen_opencl.h | 23 +-
src/backend/opencl/codegen/intrin_rule_opencl.cc | 4 +-
src/backend/opencl/runtime/opencl_common.h | 21 +-
src/backend/opencl/runtime/opencl_device_api.cc | 6 +-
src/backend/opencl/runtime/texture.h | 10 +-
src/backend/rocm/codegen/llvm/codegen_amdgpu.cc | 9 +-
src/backend/rocm/codegen/llvm/intrin_rule_rocm.cc | 22 +-
src/backend/trn/codegen/codegen_trn.cc | 20 +-
src/backend/trn/codegen/codegen_trn.h | 4 +-
src/backend/trn/transform/lower_trainium_layout.cc | 20 +-
src/backend/vulkan/codegen/codegen_spirv.cc | 112 +--
src/backend/vulkan/codegen/codegen_spirv.h | 14 +-
src/backend/vulkan/codegen/intrin_rule_spirv.cc | 13 +-
src/backend/vulkan/codegen/ir_builder.cc | 219 +++---
src/backend/vulkan/codegen/ir_builder.h | 10 +-
src/backend/webgpu/codegen/codegen_webgpu.cc | 150 ++--
src/backend/webgpu/codegen/codegen_webgpu.h | 17 +-
src/backend/webgpu/codegen/intrin_rule_webgpu.cc | 10 +-
src/ir/expr.cc | 150 ++--
src/ir/type.cc | 115 ++-
src/relax/analysis/tir_op_pattern_kind.cc | 7 +-
src/relax/analysis/type_analysis.cc | 18 +-
src/relax/analysis/well_formed.cc | 4 +-
src/relax/backend/contrib/codegen_c/codegen_c.h | 15 +-
.../backend/contrib/codegen_json/codegen_json.h | 10 +-
src/relax/backend/contrib/cublas/codegen.cc | 4 +-
src/relax/backend/contrib/cutlass/codegen.cc | 6 +-
src/relax/backend/contrib/utils.h | 4 +-
src/relax/backend/vm/codegen_vm_tir.cc | 32 +-
src/relax/backend/vm/lower_runtime_builtin.cc | 4 +-
src/relax/backend/vm/vm_shape_lower.cc | 21 +-
src/relax/ir/dataflow_expr_rewriter.cc | 2 +-
src/relax/ir/dataflow_matcher.cc | 3 +-
src/relax/ir/dataflow_pattern.cc | 10 +-
src/relax/ir/dependent_type.cc | 15 +-
src/relax/ir/emit_te.cc | 2 +-
src/relax/ir/expr.cc | 14 +-
src/relax/op/ccl/ccl.cc | 4 +-
src/relax/op/distributed/binary.cc | 2 +-
src/relax/op/distributed/binary.h | 4 +-
src/relax/op/distributed/distributed.cc | 2 +-
src/relax/op/distributed/linear_algebra.cc | 4 +-
src/relax/op/distributed/nn.cc | 4 +-
src/relax/op/distributed/unary.cc | 2 +-
src/relax/op/distributed/unary.h | 7 +-
src/relax/op/image/resize.cc | 20 +-
src/relax/op/image/resize.h | 4 +-
src/relax/op/memory/view.cc | 30 +-
src/relax/op/nn/attention.cc | 2 +-
src/relax/op/nn/convolution.cc | 70 +-
src/relax/op/nn/convolution.h | 16 +-
src/relax/op/nn/nn.cc | 68 +-
src/relax/op/nn/pooling.cc | 6 +-
src/relax/op/op.cc | 20 +-
src/relax/op/op_common.h | 31 +-
src/relax/op/tensor/binary.cc | 8 +-
src/relax/op/tensor/create.cc | 73 +-
src/relax/op/tensor/create.h | 20 +-
src/relax/op/tensor/datatype.cc | 8 +-
src/relax/op/tensor/datatype.h | 4 +-
src/relax/op/tensor/index.cc | 26 +-
src/relax/op/tensor/inspect.cc | 122 ++--
src/relax/op/tensor/inspect.h | 16 +-
src/relax/op/tensor/linear_algebra.cc | 24 +-
src/relax/op/tensor/linear_algebra.h | 2 +-
src/relax/op/tensor/manipulate.cc | 132 ++--
src/relax/op/tensor/qdq.cc | 63 +-
src/relax/op/tensor/qdq.h | 4 +-
src/relax/op/tensor/sampling.cc | 18 +-
src/relax/op/tensor/sampling.h | 3 +-
src/relax/op/tensor/search.cc | 15 +-
src/relax/op/tensor/set.cc | 14 +-
src/relax/op/tensor/sorting.cc | 10 +-
src/relax/op/tensor/sorting.h | 4 +-
src/relax/op/tensor/statistical.cc | 16 +-
src/relax/op/tensor/statistical.h | 4 +-
src/relax/op/tensor/ternary.cc | 4 +-
src/relax/op/tensor/unary.cc | 2 +-
src/relax/op/vision/nms.cc | 38 +-
src/relax/script/printer/dependent_type.cc | 2 +-
src/relax/script/printer/distributed.cc | 4 +-
src/relax/script/printer/expr.cc | 18 +-
src/relax/script/printer/tir.cc | 7 +-
src/relax/transform/adjust_matmul_order.cc | 20 +-
src/relax/transform/allocate_workspace.cc | 4 +-
src/relax/transform/alter_op_impl.cc | 12 +-
src/relax/transform/call_tir_rewrite.cc | 16 +-
src/relax/transform/combine_parallel_matmul.cc | 2 +-
src/relax/transform/compute_prim_value.cc | 7 +-
src/relax/transform/convert_layout.cc | 2 +-
src/relax/transform/dataflow_inplace.cc | 2 +-
src/relax/transform/decompose_ops.cc | 10 +-
src/relax/transform/expand_matmul_of_sum.cc | 3 +-
src/relax/transform/fold_constant.cc | 10 +-
src/relax/transform/fuse_tir.cc | 38 +-
src/relax/transform/gradient.cc | 7 +-
src/relax/transform/infer_amp_utils.cc | 22 +-
src/relax/transform/infer_amp_utils.h | 11 +-
src/relax/transform/lazy_transform_params.cc | 5 +-
src/relax/transform/legalize_ops.cc | 2 +-
src/relax/transform/lower_alloc_tensor.cc | 7 +-
src/relax/transform/remove_unused_outputs.cc | 2 +-
src/relax/transform/remove_unused_parameters.cc | 2 +-
src/relax/transform/reorder_take_after_matmul.cc | 4 +-
src/relax/transform/split_call_tir_by_pattern.cc | 14 +-
.../transform/split_layout_rewrite_preproc.cc | 4 +-
src/relax/transform/static_plan_block_memory.cc | 56 +-
src/relax/transform/to_mixed_precision.cc | 53 +-
src/relax/transform/utils.h | 32 +-
src/relax/utils.cc | 8 +-
src/runtime/extra/contrib/cblas/cblas.cc | 66 +-
src/runtime/extra/contrib/cblas/dnnl_blas.cc | 5 +-
src/runtime/extra/contrib/cblas/gemm_common.h | 17 +-
src/runtime/extra/contrib/cblas/mkl.cc | 57 +-
src/runtime/extra/contrib/coreml/coreml_runtime.mm | 16 +-
src/runtime/extra/contrib/cublas/cublas.cc | 73 +-
src/runtime/extra/contrib/cudnn/conv_backward.cc | 4 +-
src/runtime/extra/contrib/cudnn/conv_forward.cc | 4 +-
src/runtime/extra/contrib/cudnn/cudnn_utils.cc | 2 +-
src/runtime/extra/contrib/cudnn/softmax.cc | 2 -
.../extra/contrib/cutlass/fp16_group_gemm.cuh | 12 +-
.../contrib/cutlass/fp8_groupwise_scaled_gemm.cuh | 50 +-
.../fp8_groupwise_scaled_group_gemm_sm100.cu | 17 +-
src/runtime/extra/contrib/dnnl/dnnl_utils.cc | 8 +-
src/runtime/extra/contrib/dnnl/dnnl_utils.h | 2 +-
src/runtime/extra/contrib/hipblas/hipblas.cc | 56 +-
src/runtime/extra/contrib/json/json_node.h | 2 +-
.../extra/contrib/nvshmem/memory_allocator.cc | 4 +-
src/runtime/extra/contrib/random/random.cc | 4 +-
src/runtime/extra/contrib/sort/sort.cc | 4 +-
src/runtime/extra/contrib/vllm/cache_alloc.cc | 4 +-
src/runtime/extra/contrib/vllm/cache_kernels.cu | 6 +-
src/runtime/extra/disco/builtin.cc | 4 +-
.../extra/disco/cuda_ipc/cuda_ipc_memory.cc | 10 +-
.../extra/disco/cuda_ipc/custom_allreduce.cc | 2 +-
src/runtime/extra/disco/loader.cc | 9 +-
src/runtime/extra/disco/nccl/nccl.cc | 20 +-
src/runtime/extra/disco/nccl/nccl_context.h | 26 +-
src/runtime/tensor.cc | 8 +-
src/runtime/vm/attn_backend.h | 4 +-
src/runtime/vm/attn_utils.h | 8 +-
src/runtime/vm/builtin.cc | 31 +-
src/runtime/vm/executable.cc | 3 +-
src/runtime/vm/lm_support.cc | 23 +-
src/runtime/vm/paged_kv_cache.cc | 22 +-
src/runtime/vm/rnn_state.cc | 2 +-
src/runtime/vm/tensor_cache_support.cc | 4 +-
src/s_tir/analysis/calculate_allocated_memory.cc | 2 +-
src/s_tir/analysis/estimate_flops.cc | 10 +-
.../analysis/sblock_access_region_detector.cc | 2 +-
src/s_tir/analysis/verify_gpu_code.cc | 57 +-
src/s_tir/backend/adreno/inject_texture_alloc.cc | 4 +-
src/s_tir/backend/adreno/texture_flatten.cc | 4 +-
src/s_tir/data_layout.cc | 42 +-
src/s_tir/meta_schedule/arg_info.cc | 13 +-
src/s_tir/meta_schedule/database/database_utils.cc | 5 +-
.../feature_extractor/per_store_feature.cc | 24 +-
.../measure_callback/add_to_database.cc | 2 +-
src/s_tir/meta_schedule/mutator/mutator.cc | 22 +-
.../postproc/rewrite_cooperative_fetch.cc | 6 +-
src/s_tir/meta_schedule/profiler.cc | 2 +-
.../meta_schedule/schedule/cuda/thread_bind.cc | 2 +-
.../schedule_rule/cross_thread_reduction.cc | 2 +-
.../schedule_rule/multi_level_tiling.cc | 10 +-
.../multi_level_tiling_tensor_core.cc | 8 +-
.../schedule_rule/parallel_vectorize_unroll.cc | 2 +-
.../meta_schedule/schedule_rule/schedule_rule.cc | 2 +-
src/s_tir/meta_schedule/utils.h | 2 +-
src/s_tir/schedule/analysis/layout.cc | 6 +-
src/s_tir/schedule/analysis/reducer.cc | 2 +-
src/s_tir/schedule/concrete_schedule.cc | 8 +-
src/s_tir/schedule/concrete_schedule.h | 2 +-
src/s_tir/schedule/ir_comparator.cc | 18 +-
src/s_tir/schedule/primitive/block_annotate.cc | 8 +-
src/s_tir/schedule/primitive/blockize_tensorize.cc | 12 +-
src/s_tir/schedule/primitive/cache_index.cc | 24 +-
src/s_tir/schedule/primitive/cache_read_write.cc | 30 +-
src/s_tir/schedule/primitive/compute_at.cc | 8 +-
src/s_tir/schedule/primitive/compute_inline.cc | 4 +-
src/s_tir/schedule/primitive/decompose_padding.cc | 4 +-
src/s_tir/schedule/primitive/for_kind.cc | 6 +-
.../schedule/primitive/layout_transformation.cc | 42 +-
.../schedule/primitive/loop_transformation.cc | 34 +-
src/s_tir/schedule/primitive/pad_einsum.cc | 16 +-
src/s_tir/schedule/primitive/reduction.cc | 38 +-
src/s_tir/schedule/transform.cc | 5 +-
src/s_tir/schedule/transform.h | 2 +-
src/s_tir/schedule/utils.h | 4 +-
src/s_tir/transform/bound_checker.cc | 13 +-
src/s_tir/transform/canonicalize_loop.cc | 4 +-
src/s_tir/transform/compact_buffer_region.cc | 23 +-
src/s_tir/transform/default_gpu_schedule.cc | 8 +-
src/s_tir/transform/inject_double_buffer.cc | 18 +-
src/s_tir/transform/inject_permuted_layout.cc | 2 +-
src/s_tir/transform/inject_ptx_async_copy.cc | 10 +-
src/s_tir/transform/inject_ptx_ldg32.cc | 4 +-
src/s_tir/transform/inject_software_pipeline.cc | 8 +-
src/s_tir/transform/inject_virtual_thread.cc | 15 +-
src/s_tir/transform/lift_thread_binding.cc | 2 +-
src/s_tir/transform/loop_partition.cc | 8 +-
src/s_tir/transform/lower_async_dma.cc | 18 +-
.../transform/lower_cross_thread_reduction.cc | 12 +-
src/s_tir/transform/lower_match_buffer.cc | 21 +-
src/s_tir/transform/lower_opaque_block.cc | 4 +-
src/s_tir/transform/lower_thread_allreduce.cc | 79 ++-
src/s_tir/transform/lower_vtcm_alloc.cc | 4 +-
.../transform/memhammer_tensorcore_rewrite.cc | 53 +-
.../transform/merge_shared_memory_allocations.cc | 36 +-
src/s_tir/transform/profile_instrumentation.cc | 8 +-
src/s_tir/transform/renew_defs.cc | 4 +-
src/s_tir/transform/renormalize_split_pattern.cc | 26 +-
src/s_tir/transform/rewrite_unsafe_select.cc | 5 +-
src/s_tir/transform/storage_access.cc | 6 +-
src/s_tir/transform/storage_access.h | 2 +-
src/s_tir/transform/thread_storage_sync.cc | 4 +-
src/s_tir/transform/unify_thread_binding.cc | 18 +-
.../printer/doc_printer/python_doc_printer.cc | 3 +-
src/script/printer/ir/distributed.cc | 1 +
src/script/printer/script_printer.cc | 6 +-
src/script/printer/utils.h | 6 +-
src/target/build_common.h | 2 +-
src/target/intrin_rule.cc | 63 +-
src/target/intrin_rule.h | 16 +-
src/target/llvm/codegen_arm.cc | 34 +-
src/target/llvm/codegen_cpu.cc | 47 +-
src/target/llvm/codegen_cpu.h | 4 +-
src/target/llvm/codegen_llvm.cc | 317 +++++----
src/target/llvm/codegen_llvm.h | 32 +-
src/target/llvm/codegen_params.cc | 46 +-
src/target/llvm/codegen_x86_64.cc | 15 +-
src/target/llvm/intrin_rule_llvm.cc | 15 +-
src/target/llvm/intrin_rule_llvm.h | 8 +-
src/target/source/codegen_c.cc | 212 +++---
src/target/source/codegen_c.h | 35 +-
src/target/source/codegen_c_host.cc | 29 +-
src/target/source/codegen_c_host.h | 4 +-
src/target/source/codegen_params.cc | 57 +-
src/target/source/codegen_source_base.cc | 21 +-
src/target/source/codegen_source_base.h | 11 +-
src/target/source/source_module.cc | 2 +-
src/te/operation/compute_op.cc | 12 +-
src/te/operation/create_primfunc.cc | 24 +-
src/te/operation/create_primfunc.h | 4 +-
src/te/operation/extern_op.cc | 4 +-
src/te/operation/placeholder_op.cc | 8 +-
src/te/operation/scan_op.cc | 2 +-
src/te/tensor.cc | 15 +-
src/tirx/analysis/deep_equal.cc | 41 +-
src/tirx/ir/buffer.cc | 104 +--
src/tirx/ir/buffer_common.h | 8 +-
src/tirx/ir/data_type_rewriter.cc | 152 ++--
src/tirx/ir/data_type_rewriter.h | 6 +-
src/tirx/ir/exec_scope.cc | 6 +-
src/tirx/ir/expr.cc | 283 ++++----
src/tirx/ir/expr_functor.cc | 4 +-
src/tirx/ir/function.cc | 18 +-
src/tirx/ir/index_map.cc | 14 +-
src/tirx/ir/layout/axis_registry.cc | 2 +-
src/tirx/ir/layout/tile_slice.cc | 4 +-
src/tirx/ir/layout/utils.cc | 2 +-
src/tirx/ir/script/script_complete.cc | 5 +-
src/tirx/ir/stmt.cc | 97 +--
src/tirx/ir/stmt_functor.cc | 7 +-
src/tirx/op/op.cc | 776 +++++++++++----------
src/tirx/script/builder/ir.cc | 113 +--
src/tirx/script/builder/utils.h | 2 +-
src/tirx/script/printer/block.cc | 4 +-
src/tirx/script/printer/buffer.cc | 10 +-
src/tirx/script/printer/expr.cc | 25 +-
src/tirx/script/printer/for_loop.cc | 4 +-
src/tirx/script/printer/ir.cc | 6 +-
src/tirx/script/printer/stmt.cc | 6 +-
src/tirx/transform/common_subexpr_elim.cc | 5 +-
src/tirx/transform/dtype_conversion.cc | 31 +-
src/tirx/transform/dtype_conversion.h | 54 +-
src/tirx/transform/flatten_buffer.cc | 25 +-
src/tirx/transform/force_narrow_index_to_i32.cc | 8 +-
src/tirx/transform/ir_utils.cc | 9 +-
src/tirx/transform/ir_utils.h | 39 +-
src/tirx/transform/lower_intrin.cc | 38 +-
src/tirx/transform/lower_tirx_cleanup.cc | 20 +-
src/tirx/transform/lower_tirx_opaque.cc | 4 +-
src/tirx/transform/lower_tvm_builtin.cc | 110 +--
src/tirx/transform/lower_warp_memory.cc | 24 +-
src/tirx/transform/make_packed_api.cc | 40 +-
src/tirx/transform/narrow_datatype.cc | 97 +--
src/tirx/transform/split_host_device.cc | 20 +-
src/tirx/transform/storage_rewrite.cc | 156 +++--
src/tirx/transform/tile_primitive_dispatch.cc | 21 +-
src/tirx/transform/tvm_ffi_binder.cc | 135 ++--
src/tirx/transform/tvm_ffi_binder.h | 6 +-
src/tirx/transform/unroll_loop.cc | 2 +-
src/tirx/transform/unsupported_dtype_legalize.cc | 183 +++--
src/tirx/transform/vectorize_loop.cc | 244 ++++---
src/topi/einsum.cc | 10 +-
src/topi/elemwise.cc | 6 +-
src/topi/nn.cc | 2 +-
src/topi/transform.cc | 8 +-
tests/cpp/arith_simplify_test.cc | 8 +-
tests/cpp/expr_test.cc | 15 +-
tests/cpp/ir_functor_test.cc | 24 +-
tests/cpp/ndarray_test.cc | 8 +-
tests/cpp/nested_msg_test.cc | 27 +-
tests/cpp/pattern_match_test.cc | 36 +-
tests/cpp/te_compute_test.cc | 10 +-
tests/cpp/tir_analysis_side_effect.cc | 8 +-
tests/cpp/tir_scalable_datatype.cc | 132 ++--
tests/cpp/topi_ewise_test.cc | 2 +-
tests/python/arith/test_arith_deduce_bound.py | 2 +-
tests/python/codegen/test_target_codegen_llvm.py | 2 +-
tests/python/contrib/test_sort.py | 16 +-
tests/python/ir/test_node_reflection.py | 8 +-
tests/python/relax/frontend_nn_extern_module.cc | 16 +-
tests/python/relax/test_analysis_well_formed.py | 2 +-
tests/python/relax/test_frontend_tflite.py | 229 +++++-
tests/python/relax/test_op_manipulate.py | 6 +-
tests/python/s_tir/base/test_tir_data_layout.py | 24 +-
.../s_tir/schedule/test_tir_schedule_tensorize.py | 2 +-
.../schedule/test_tir_schedule_transform_layout.py | 2 +-
tests/python/te/test_te_create_primfunc.py | 16 +-
tests/python/te/test_te_tensor.py | 2 +-
tests/python/tirx-base/test_tir_buffer.py | 4 +-
tests/python/tirx-base/test_tir_constructor.py | 14 +-
tests/python/tirx-base/test_tir_imm_values.py | 2 +-
tests/python/tirx-base/test_tir_intrin.py | 24 +-
tests/python/tirx-base/test_tir_nodes.py | 40 +-
tests/python/tirx-base/test_tir_ops.py | 34 +-
.../python/tirx-base/test_tir_scalable_datatype.py | 2 +-
tests/python/tirx-base/test_tir_specialize.py | 4 +-
tests/python/tirx-base/test_tir_stmt_functor.py | 2 +-
.../test_tir_transform_lower_intrin.py | 8 +-
.../test_tir_transform_lower_tvm_builtin.py | 2 +-
.../test_tir_transform_narrow_datatype.py | 18 +-
.../python/tvmscript/test_tvmscript_parser_tir.py | 20 +-
.../python/tvmscript/test_tvmscript_regression.py | 2 +-
tests/python/tvmscript/test_tvmscript_roundtrip.py | 4 +-
495 files changed, 7318 insertions(+), 6255 deletions(-)
copy docs/{tirx/api/stmt_functor.rst => install/pypi.rst} (57%)
create mode 100644 include/tvm/ir/base_expr.h
delete mode 100644 include/tvm/runtime/data_type.h