This is an automated email from the ASF dual-hosted git repository. mousius pushed a commit to branch target-parser-aprofile-rollout in repository https://gitbox.apache.org/repos/asf/tvm.git
commit df0ad058805b87999f477aee245cc04b444931bc Author: Chris Sidebottom <[email protected]> AuthorDate: Tue Aug 16 09:34:06 2022 +0000 [Target] Replace utility functions with target.features Following on from #12454 this patch removes the utility functions in favour of the centralised `target.features` property. --- python/tvm/relay/op/strategy/arm_cpu.py | 7 ++-- python/tvm/relay/qnn/op/legalizations.py | 23 ++---------- python/tvm/topi/arm_cpu/arm_utils.py | 58 +++-------------------------- python/tvm/topi/arm_cpu/conv2d_gemm.py | 23 +++++++----- python/tvm/topi/arm_cpu/conv2d_int8.py | 10 +++-- python/tvm/topi/arm_cpu/depthwise_conv2d.py | 5 ++- src/target/parsers/cpu.cc | 5 +++ 7 files changed, 39 insertions(+), 92 deletions(-) diff --git a/python/tvm/relay/op/strategy/arm_cpu.py b/python/tvm/relay/op/strategy/arm_cpu.py index e56e7ba12e..9dc62d6736 100644 --- a/python/tvm/relay/op/strategy/arm_cpu.py +++ b/python/tvm/relay/op/strategy/arm_cpu.py @@ -207,8 +207,8 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target): name="conv2d_nhwc_dsp.arm_cpu", ) elif kernel_layout == "HWIO": - is_aarch64 = topi.arm_cpu.arm_utils.is_aarch64_arm() - has_dot_prod = topi.arm_cpu.arm_utils.is_dotprod_available() + is_aarch64 = target.features.is_aarch64 + has_dot_prod = target.features.had_dotprod if has_dot_prod and data.dtype in ["int8", "uint8"]: strategy.add_implementation( wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_native), @@ -281,8 +281,7 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, target): ) elif layout == "NHWC": assert kernel_layout == "HWOI" - is_aarch64 = topi.arm_cpu.arm_utils.is_aarch64_arm() - if is_aarch64 or "+neon" in target.mattr: + if target.features.has_asimd: strategy.add_implementation( wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc), wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc), diff --git a/python/tvm/relay/qnn/op/legalizations.py b/python/tvm/relay/qnn/op/legalizations.py index 9bc6efdad0..86fa293c90 100644 --- a/python/tvm/relay/qnn/op/legalizations.py +++ b/python/tvm/relay/qnn/op/legalizations.py @@ -405,18 +405,6 @@ def is_fast_int8_on_intel(): return target_has_sse42(target.mcpu) -def is_fast_int8_on_arm(): - """Checks whether the hardware has support for fast Int8 arithmetic operations.""" - target = tvm.target.Target.current(allow_none=False) - return "+v8.2a" in target.mattr and "+dotprod" in target.mattr - - -def is_aarch64_arm(): - """Checks whether we are compiling for an AArch64 target.""" - target = tvm.target.Target.current(allow_none=False) - return "aarch64" in target.attrs.get("mtriple", "") - - ######################## # ARM CPU legalizations. ######################## @@ -425,7 +413,6 @@ def is_aarch64_arm(): @qnn_conv2d_legalize.register("arm_cpu") def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types): target = tvm.target.Target.current(allow_none=False) - has_asimd = is_aarch64_arm() or "+neon" in target.mattr is_depthwise = relay.op.strategy.is_depthwise_conv2d( types[0].shape, attrs["data_layout"], @@ -434,9 +421,8 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types): attrs["groups"], ) use_int8_on_arm = (not is_depthwise) and attrs["data_layout"] == "NHWC" - has_dotprod = is_fast_int8_on_arm() - other_options = use_int8_on_arm or has_dotprod - if has_asimd and not other_options: + other_options = use_int8_on_arm or target.features.has_dotprod + if target.features.has_asimd and not other_options: return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.conv2d) # ARM prefers the dtypes to be same. return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.conv2d) @@ -444,11 +430,10 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types): @qnn_dense_legalize.register("arm_cpu") def _qnn_dense_legalize_arm_cpu(attrs, inputs, types): + # ARM prefers the dtypes to be same. target = tvm.target.Target.current(allow_none=False) - has_asimd = is_aarch64_arm() or "+neon" in target.mattr - if has_asimd and not is_fast_int8_on_arm(): + if target.features.has_asimd and not target.features.has_dotprod: return helper_no_fast_int8_hw_legalization(attrs, inputs, types, relay.nn.dense) - # ARM prefers the dtypes to be same. return helper_change_dtypes_to_be_same(attrs, inputs, types, relay.qnn.op.dense) diff --git a/python/tvm/topi/arm_cpu/arm_utils.py b/python/tvm/topi/arm_cpu/arm_utils.py index 4ab72178b3..1b2efc61ea 100644 --- a/python/tvm/topi/arm_cpu/arm_utils.py +++ b/python/tvm/topi/arm_cpu/arm_utils.py @@ -17,57 +17,7 @@ # pylint: disable=invalid-name,unused-variable,unused-argument,no-member """Arm target utility functions""" -import re -import tvm - - -def get_arch_version(target_mattr): - """Parse the LLVM target -mattr, and return - the architecture version in a decimal representation - (e.g., if -mattr=v8.4a, return 8.4) - """ - - arch_version = 8.0 - m = re.compile(r"\+v(.*)\.(.*)a") - for attr in target_mattr: - match_obj = m.match(attr) - if match_obj: - major = int(match_obj.group(1)) - minor = int(match_obj.group(2)) - decimal = 10 - if minor >= 10: - decimal = 100 - arch_version = major + float(minor) / decimal - - return arch_version - - -def is_dotprod_available(): - """Checks whether the hardware has support for udot/sdot instructions.""" - target = tvm.target.Target.current(allow_none=False) - arch_version = get_arch_version(target.mattr) - return arch_version >= 8.4 or ((arch_version in (8.2, 8.3)) and "+dotprod" in target.mattr) - - -def is_mmla_available(): - """Checks whether the hardware has support for ummla/smmla instructions.""" - target = tvm.target.Target.current(allow_none=False) - arch_version = get_arch_version(target.mattr) - return arch_version >= 8.6 or ( - (arch_version in (8.2, 8.3, 8.4, 8.5)) and "+i8mm" in target.mattr - ) - - -def is_aarch64_arm(): - """Checks whether we are compiling for an AArch64 target.""" - target = tvm.target.Target.current(allow_none=False) - return "aarch64" in target.attrs.get("mtriple", "") - - -def is_neon_available(): - """Check if neon instructions are available""" - target = tvm.target.Target.current(allow_none=False) - return "+neon" in target.mattr +from tvm.target import Target def get_tiling_B_interleaved_t(interleave_A): @@ -94,13 +44,15 @@ def get_tiling_B_interleaved_t(interleave_A): tile_rows_B: the output tile rows of B' tile_cols_B: the output tile columns of B' """ - if is_mmla_available(): + target = Target.current(allow_none=False) + + if target.features.has_matmul_i8: # If smmla/ummla is available, A must be interleaved. # Each load from B' will contain 8 elements # and we are loading 12 rows of B' (i.e., 12 columns of B) tile_rows_B = 12 tile_cols_B = 8 - elif is_dotprod_available(): + elif target.features.has_dotprod: # The number of tile rows of B' vary depending on the # strategy: # * If we are interleaving A, then we select 12 columns from B'(i.e., diff --git a/python/tvm/topi/arm_cpu/conv2d_gemm.py b/python/tvm/topi/arm_cpu/conv2d_gemm.py index 8e416be8da..1dac8b8f95 100644 --- a/python/tvm/topi/arm_cpu/conv2d_gemm.py +++ b/python/tvm/topi/arm_cpu/conv2d_gemm.py @@ -18,6 +18,7 @@ # pylint: disable=unused-argument, redefined-builtin """GEMM Convolution schedule on ARM""" import tvm +from tvm.target import Target from tvm import te from tvm.topi import nn from tvm.autotvm.task.space import AnnotateEntity, ReorderEntity, OtherOptionEntity @@ -29,10 +30,9 @@ from .tensor_intrin import ( gemm_acc_nx16_int8_int8_int32, gemm_acc_2x2_int8_int8_int32, ) -from .arm_utils import is_aarch64_arm, is_dotprod_available, is_mmla_available -def configure_knobs(cfg, M, K): +def configure_knobs(cfg, M, K, target): """Configure auto-tuning knobs for the interleaved strategy""" x, y = cfg.axis(M // 4), cfg.axis(K // 16) @@ -48,7 +48,7 @@ def configure_knobs(cfg, M, K): cfg["reorder_gemm"] = ReorderEntity([0, 1]) cfg["A_interleaved_unroll_vec"] = AnnotateEntity(["unroll", "vec"]) - if not is_dotprod_available(): + if not target.features.has_dotprod: cfg.define_knob("gemm_quantized_unroll", [True, False]) if cfg.is_fallback: cfg["gemm_quantized_unroll"] = OtherOptionEntity(False) @@ -133,12 +133,13 @@ def compute_conv2d_gemm_without_weight_transform( # - Conv2DGemmWeightTransformRel in src/relay/op/nn/convolution.h # In order to have more information # - if is_mmla_available(): + target = Target.current(allow_none=False) + if target.features.has_matmul_i8: # If smmla/ummla is enabled, we are loading 8 rows from A. Each row # will contain 8 elements tile_rows_A = 8 tile_cols_A = 8 - elif is_dotprod_available() and interleave_A: + elif target.features.has_dotprod and interleave_A: # If dot product has been enabled, and we are interleaving A # tile size should be 8x4 tile_rows_A = 8 @@ -173,7 +174,7 @@ def compute_conv2d_gemm_without_weight_transform( if interleave_A: # Configuration space - configure_knobs(cfg, M_padded, K_padded) + configure_knobs(cfg, M_padded, K_padded, target) # Pack the input data A_interleaved = te.compute( @@ -181,7 +182,8 @@ def compute_conv2d_gemm_without_weight_transform( lambda b, x, y, z, w: A[b, z + tile_rows_A * x, w + tile_cols_A * y], name="A_interleaved", ) - if is_mmla_available(): + target = Target.current(allow_none=False) + if target.features.has_matmul_i8: # Execute GEMM. In the case of mmla, we need to enforce the tiling # from the compute. This is because mmla is doing a tiled computation # as well. So we have a big 8x12 tile, with small 2x2 sub-tiles @@ -323,7 +325,8 @@ def schedule_conv2d_gemm_interleaved(cfg, s, out, final_out): k = C_interleaved.op.reduce_axis[0] _, M, N = C.shape if in_type in ["int8", "uint8"]: - if is_mmla_available(): + target = Target.current(allow_none=False) + if target.features.has_matmul_i8: gemm_acc = gemm_acc_2x2_int8_int8_int32(in_type) xi_inner, yi_inner = C_interleaved.op.axis[-2:] k_outer, k_inner = s[C_interleaved].split(k, 8) @@ -333,7 +336,7 @@ def schedule_conv2d_gemm_interleaved(cfg, s, out, final_out): s[C_interleaved].tensorize(xi_inner, gemm_acc) s[C_interleaved].unroll(xi) s[C_interleaved].unroll(yi) - elif is_dotprod_available(): + elif target.features.has_dotprod: gemm_acc = gemm_acc_4x4_int8_int8_int32(in_type) xi_outer, yi_outer, xi_inner, yi_inner = s[C_interleaved].tile( xi, yi, x_factor=8, y_factor=4 @@ -354,7 +357,7 @@ def schedule_conv2d_gemm_interleaved(cfg, s, out, final_out): s[C_interleaved].tensorize(xi_inner_inner, gemm_acc) s[C_interleaved].unroll(xi_inner_outer) - elif is_aarch64_arm(): + elif target.features.is_aarch64: s[C_interleaved].reorder(yi, xi) K = A_interleaved_input.shape[2] assert in_type in ["int8", "uint8"], "Only int8 and uint8 gemm are supported" diff --git a/python/tvm/topi/arm_cpu/conv2d_int8.py b/python/tvm/topi/arm_cpu/conv2d_int8.py index 224d21b34d..6f69582435 100644 --- a/python/tvm/topi/arm_cpu/conv2d_int8.py +++ b/python/tvm/topi/arm_cpu/conv2d_int8.py @@ -30,7 +30,7 @@ from .conv2d_gemm import ( schedule_conv2d_gemm_interleaved, schedule_conv2d_gemm_native, ) -from .arm_utils import get_tiling_B_interleaved_t, is_dotprod_available, is_neon_available +from .arm_utils import get_tiling_B_interleaved_t def _get_default_config(cfg, data, kernel, strides, padding, dilation, out_dtype): @@ -124,7 +124,8 @@ def is_int8_hw_support(data_dtype, kernel_dtype): is_llvm_support = llvm_version >= 8 # 3) Check target - is_target_support = is_neon_available() or is_dotprod_available() + current_target = target.Target.current(allow_none=False) + is_target_support = current_target.features.has_asimd or current_target.features.has_dotprod return is_dtype_support and is_llvm_support and is_target_support @@ -154,9 +155,10 @@ def schedule_conv2d_NCHWc_int8(cfg, outs): _, _, kh, kw, _, _, n_elems = get_const_tuple(kernel_vec.shape) assert n_elems == 4 dtype = "uint" if data.dtype == "uint8" else "int" - if is_dotprod_available(): + current_target = target.Target.current(allow_none=False) + if current_target.features.has_dotprod: intrin = dot_int8_int8_int32_neon_82(int32_lanes=4, dtype=dtype) - elif is_neon_available(): + elif current_target.features.has_asimd: assert dtype == "int", "uint8 not supported if dot product is not available" intrin = dot_int8_int8_int32_neon() else: diff --git a/python/tvm/topi/arm_cpu/depthwise_conv2d.py b/python/tvm/topi/arm_cpu/depthwise_conv2d.py index 58cd11e8cc..a2161453c5 100644 --- a/python/tvm/topi/arm_cpu/depthwise_conv2d.py +++ b/python/tvm/topi/arm_cpu/depthwise_conv2d.py @@ -18,6 +18,7 @@ """Depthwise convolution schedule for ARM CPU""" import tvm +from tvm.target import Target from tvm import te from tvm import autotvm from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity @@ -26,7 +27,6 @@ from .. import nn from ..utils import traverse_inline, get_const_tuple, get_const_int from ..nn.utils import get_pad_tuple from .tensor_intrin import smlal_int16_int32 -from .arm_utils import is_aarch64_arm from .mprofile.dsp.depthwise_conv2d import ( depthwise_conv2d_nhwc_dsp_compute, depthwise_conv2d_nhwc_dsp_schedule, @@ -333,12 +333,13 @@ def schedule_depthwise_conv2d_nhwc(cfg, outs): co, ci = cfg["tile_c"].apply(s, conv, c) split_val = cfg["tile_c"].size[-1] + target = Target.current(allow_none=False) use_tensorization = ( (in_type == "int16") and (split_val == 8) and (IC % split_val == 0) and (channel_multiplier == 1) - and is_aarch64_arm() + and target.features.is_aarch64 ) data_pad_value = -1 diff --git a/src/target/parsers/cpu.cc b/src/target/parsers/cpu.cc index fbf55f4683..3cfabb7639 100644 --- a/src/target/parsers/cpu.cc +++ b/src/target/parsers/cpu.cc @@ -20,6 +20,7 @@ #include <string> +#include "aprofile.h" #include "mprofile.h" namespace tvm { @@ -32,6 +33,10 @@ TargetJSON ParseTarget(TargetJSON target) { return mprofile::ParseTarget(target); } + if (aprofile::IsArch(target)) { + return aprofile::ParseTarget(target); + } + return target; }
