This is an automated email from the ASF dual-hosted git repository.

mousius pushed a commit to branch target-parser-aprofile-rollout
in repository https://gitbox.apache.org/repos/asf/tvm.git

commit df0ad058805b87999f477aee245cc04b444931bc
Author: Chris Sidebottom <[email protected]>
AuthorDate: Tue Aug 16 09:34:06 2022 +0000

    [Target] Replace utility functions with target.features
    
    Following on from #12454 this patch removes the utility functions in favour 
of the centralised `target.features` property.
---
 python/tvm/relay/op/strategy/arm_cpu.py     |  7 ++--
 python/tvm/relay/qnn/op/legalizations.py    | 23 ++----------
 python/tvm/topi/arm_cpu/arm_utils.py        | 58 +++--------------------------
 python/tvm/topi/arm_cpu/conv2d_gemm.py      | 23 +++++++-----
 python/tvm/topi/arm_cpu/conv2d_int8.py      | 10 +++--
 python/tvm/topi/arm_cpu/depthwise_conv2d.py |  5 ++-
 src/target/parsers/cpu.cc                   |  5 +++
 7 files changed, 39 insertions(+), 92 deletions(-)

diff --git a/python/tvm/relay/op/strategy/arm_cpu.py 
b/python/tvm/relay/op/strategy/arm_cpu.py
index e56e7ba12e..9dc62d6736 100644
--- a/python/tvm/relay/op/strategy/arm_cpu.py
+++ b/python/tvm/relay/op/strategy/arm_cpu.py
@@ -207,8 +207,8 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, 
target):
                     name="conv2d_nhwc_dsp.arm_cpu",
                 )
             elif kernel_layout == "HWIO":
-                is_aarch64 = topi.arm_cpu.arm_utils.is_aarch64_arm()
-                has_dot_prod = topi.arm_cpu.arm_utils.is_dotprod_available()
+                is_aarch64 = target.features.is_aarch64
+                has_dot_prod = target.features.had_dotprod
                 if has_dot_prod and data.dtype in ["int8", "uint8"]:
                     strategy.add_implementation(
                         
wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_native),
@@ -281,8 +281,7 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, 
target):
                 )
         elif layout == "NHWC":
             assert kernel_layout == "HWOI"
-            is_aarch64 = topi.arm_cpu.arm_utils.is_aarch64_arm()
-            if is_aarch64 or "+neon" in target.mattr:
+            if target.features.has_asimd:
                 strategy.add_implementation(
                     
wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
                     
wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
diff --git a/python/tvm/relay/qnn/op/legalizations.py 
b/python/tvm/relay/qnn/op/legalizations.py
index 9bc6efdad0..86fa293c90 100644
--- a/python/tvm/relay/qnn/op/legalizations.py
+++ b/python/tvm/relay/qnn/op/legalizations.py
@@ -405,18 +405,6 @@ def is_fast_int8_on_intel():
     return target_has_sse42(target.mcpu)
 
 
-def is_fast_int8_on_arm():
-    """Checks whether the hardware has support for fast Int8 arithmetic 
operations."""
-    target = tvm.target.Target.current(allow_none=False)
-    return "+v8.2a" in target.mattr and "+dotprod" in target.mattr
-
-
-def is_aarch64_arm():
-    """Checks whether we are compiling for an AArch64 target."""
-    target = tvm.target.Target.current(allow_none=False)
-    return "aarch64" in target.attrs.get("mtriple", "")
-
-
 ########################
 # ARM CPU legalizations.
 ########################
@@ -425,7 +413,6 @@ def is_aarch64_arm():
 @qnn_conv2d_legalize.register("arm_cpu")
 def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types):
     target = tvm.target.Target.current(allow_none=False)
-    has_asimd = is_aarch64_arm() or "+neon" in target.mattr
     is_depthwise = relay.op.strategy.is_depthwise_conv2d(
         types[0].shape,
         attrs["data_layout"],
@@ -434,9 +421,8 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types):
         attrs["groups"],
     )
     use_int8_on_arm = (not is_depthwise) and attrs["data_layout"] == "NHWC"
-    has_dotprod = is_fast_int8_on_arm()
-    other_options = use_int8_on_arm or has_dotprod
-    if has_asimd and not other_options:
+    other_options = use_int8_on_arm or target.features.has_dotprod
+    if target.features.has_asimd and not other_options:
         return helper_no_fast_int8_hw_legalization(attrs, inputs, types, 
relay.nn.conv2d)
     # ARM prefers the dtypes to be same.
     return helper_change_dtypes_to_be_same(attrs, inputs, types, 
relay.qnn.op.conv2d)
@@ -444,11 +430,10 @@ def _qnn_conv2d_legalize_arm_cpu(attrs, inputs, types):
 
 @qnn_dense_legalize.register("arm_cpu")
 def _qnn_dense_legalize_arm_cpu(attrs, inputs, types):
+    # ARM prefers the dtypes to be same.
     target = tvm.target.Target.current(allow_none=False)
-    has_asimd = is_aarch64_arm() or "+neon" in target.mattr
-    if has_asimd and not is_fast_int8_on_arm():
+    if target.features.has_asimd and not target.features.has_dotprod:
         return helper_no_fast_int8_hw_legalization(attrs, inputs, types, 
relay.nn.dense)
-    # ARM prefers the dtypes to be same.
     return helper_change_dtypes_to_be_same(attrs, inputs, types, 
relay.qnn.op.dense)
 
 
diff --git a/python/tvm/topi/arm_cpu/arm_utils.py 
b/python/tvm/topi/arm_cpu/arm_utils.py
index 4ab72178b3..1b2efc61ea 100644
--- a/python/tvm/topi/arm_cpu/arm_utils.py
+++ b/python/tvm/topi/arm_cpu/arm_utils.py
@@ -17,57 +17,7 @@
 # pylint: disable=invalid-name,unused-variable,unused-argument,no-member
 """Arm target utility functions"""
 
-import re
-import tvm
-
-
-def get_arch_version(target_mattr):
-    """Parse the LLVM target -mattr, and return
-    the architecture version in a decimal representation
-    (e.g., if -mattr=v8.4a, return 8.4)
-    """
-
-    arch_version = 8.0
-    m = re.compile(r"\+v(.*)\.(.*)a")
-    for attr in target_mattr:
-        match_obj = m.match(attr)
-        if match_obj:
-            major = int(match_obj.group(1))
-            minor = int(match_obj.group(2))
-            decimal = 10
-            if minor >= 10:
-                decimal = 100
-            arch_version = major + float(minor) / decimal
-
-    return arch_version
-
-
-def is_dotprod_available():
-    """Checks whether the hardware has support for udot/sdot instructions."""
-    target = tvm.target.Target.current(allow_none=False)
-    arch_version = get_arch_version(target.mattr)
-    return arch_version >= 8.4 or ((arch_version in (8.2, 8.3)) and "+dotprod" 
in target.mattr)
-
-
-def is_mmla_available():
-    """Checks whether the hardware has support for ummla/smmla instructions."""
-    target = tvm.target.Target.current(allow_none=False)
-    arch_version = get_arch_version(target.mattr)
-    return arch_version >= 8.6 or (
-        (arch_version in (8.2, 8.3, 8.4, 8.5)) and "+i8mm" in target.mattr
-    )
-
-
-def is_aarch64_arm():
-    """Checks whether we are compiling for an AArch64 target."""
-    target = tvm.target.Target.current(allow_none=False)
-    return "aarch64" in target.attrs.get("mtriple", "")
-
-
-def is_neon_available():
-    """Check if neon instructions are available"""
-    target = tvm.target.Target.current(allow_none=False)
-    return "+neon" in target.mattr
+from tvm.target import Target
 
 
 def get_tiling_B_interleaved_t(interleave_A):
@@ -94,13 +44,15 @@ def get_tiling_B_interleaved_t(interleave_A):
     tile_rows_B: the output tile rows of B'
     tile_cols_B: the output tile columns of B'
     """
-    if is_mmla_available():
+    target = Target.current(allow_none=False)
+
+    if target.features.has_matmul_i8:
         # If smmla/ummla is available,  A must be interleaved.
         # Each load from B' will contain 8 elements
         # and we are loading 12 rows of B' (i.e., 12 columns of B)
         tile_rows_B = 12
         tile_cols_B = 8
-    elif is_dotprod_available():
+    elif target.features.has_dotprod:
         # The number of tile rows of B' vary depending on the
         # strategy:
         # * If we are interleaving A, then we select 12 columns from B'(i.e.,
diff --git a/python/tvm/topi/arm_cpu/conv2d_gemm.py 
b/python/tvm/topi/arm_cpu/conv2d_gemm.py
index 8e416be8da..1dac8b8f95 100644
--- a/python/tvm/topi/arm_cpu/conv2d_gemm.py
+++ b/python/tvm/topi/arm_cpu/conv2d_gemm.py
@@ -18,6 +18,7 @@
 # pylint: disable=unused-argument, redefined-builtin
 """GEMM Convolution schedule on ARM"""
 import tvm
+from tvm.target import Target
 from tvm import te
 from tvm.topi import nn
 from tvm.autotvm.task.space import AnnotateEntity, ReorderEntity, 
OtherOptionEntity
@@ -29,10 +30,9 @@ from .tensor_intrin import (
     gemm_acc_nx16_int8_int8_int32,
     gemm_acc_2x2_int8_int8_int32,
 )
-from .arm_utils import is_aarch64_arm, is_dotprod_available, is_mmla_available
 
 
-def configure_knobs(cfg, M, K):
+def configure_knobs(cfg, M, K, target):
     """Configure auto-tuning knobs for the interleaved strategy"""
 
     x, y = cfg.axis(M // 4), cfg.axis(K // 16)
@@ -48,7 +48,7 @@ def configure_knobs(cfg, M, K):
         cfg["reorder_gemm"] = ReorderEntity([0, 1])
         cfg["A_interleaved_unroll_vec"] = AnnotateEntity(["unroll", "vec"])
 
-    if not is_dotprod_available():
+    if not target.features.has_dotprod:
         cfg.define_knob("gemm_quantized_unroll", [True, False])
         if cfg.is_fallback:
             cfg["gemm_quantized_unroll"] = OtherOptionEntity(False)
@@ -133,12 +133,13 @@ def compute_conv2d_gemm_without_weight_transform(
     # - Conv2DGemmWeightTransformRel in src/relay/op/nn/convolution.h
     # In order to have more information
     #
-    if is_mmla_available():
+    target = Target.current(allow_none=False)
+    if target.features.has_matmul_i8:
         # If smmla/ummla is enabled, we are loading 8 rows from A. Each row
         # will contain 8 elements
         tile_rows_A = 8
         tile_cols_A = 8
-    elif is_dotprod_available() and interleave_A:
+    elif target.features.has_dotprod and interleave_A:
         # If dot product has been enabled, and we are interleaving A
         # tile size should be 8x4
         tile_rows_A = 8
@@ -173,7 +174,7 @@ def compute_conv2d_gemm_without_weight_transform(
 
     if interleave_A:
         # Configuration space
-        configure_knobs(cfg, M_padded, K_padded)
+        configure_knobs(cfg, M_padded, K_padded, target)
 
         # Pack the input data
         A_interleaved = te.compute(
@@ -181,7 +182,8 @@ def compute_conv2d_gemm_without_weight_transform(
             lambda b, x, y, z, w: A[b, z + tile_rows_A * x, w + tile_cols_A * 
y],
             name="A_interleaved",
         )
-        if is_mmla_available():
+        target = Target.current(allow_none=False)
+        if target.features.has_matmul_i8:
             # Execute GEMM. In the case of mmla, we need to enforce the tiling
             # from the compute. This is because mmla is doing a tiled 
computation
             # as well. So we have a big 8x12 tile, with small 2x2 sub-tiles
@@ -323,7 +325,8 @@ def schedule_conv2d_gemm_interleaved(cfg, s, out, 
final_out):
     k = C_interleaved.op.reduce_axis[0]
     _, M, N = C.shape
     if in_type in ["int8", "uint8"]:
-        if is_mmla_available():
+        target = Target.current(allow_none=False)
+        if target.features.has_matmul_i8:
             gemm_acc = gemm_acc_2x2_int8_int8_int32(in_type)
             xi_inner, yi_inner = C_interleaved.op.axis[-2:]
             k_outer, k_inner = s[C_interleaved].split(k, 8)
@@ -333,7 +336,7 @@ def schedule_conv2d_gemm_interleaved(cfg, s, out, 
final_out):
             s[C_interleaved].tensorize(xi_inner, gemm_acc)
             s[C_interleaved].unroll(xi)
             s[C_interleaved].unroll(yi)
-        elif is_dotprod_available():
+        elif target.features.has_dotprod:
             gemm_acc = gemm_acc_4x4_int8_int8_int32(in_type)
             xi_outer, yi_outer, xi_inner, yi_inner = s[C_interleaved].tile(
                 xi, yi, x_factor=8, y_factor=4
@@ -354,7 +357,7 @@ def schedule_conv2d_gemm_interleaved(cfg, s, out, 
final_out):
             s[C_interleaved].tensorize(xi_inner_inner, gemm_acc)
             s[C_interleaved].unroll(xi_inner_outer)
 
-        elif is_aarch64_arm():
+        elif target.features.is_aarch64:
             s[C_interleaved].reorder(yi, xi)
             K = A_interleaved_input.shape[2]
             assert in_type in ["int8", "uint8"], "Only int8 and uint8 gemm are 
supported"
diff --git a/python/tvm/topi/arm_cpu/conv2d_int8.py 
b/python/tvm/topi/arm_cpu/conv2d_int8.py
index 224d21b34d..6f69582435 100644
--- a/python/tvm/topi/arm_cpu/conv2d_int8.py
+++ b/python/tvm/topi/arm_cpu/conv2d_int8.py
@@ -30,7 +30,7 @@ from .conv2d_gemm import (
     schedule_conv2d_gemm_interleaved,
     schedule_conv2d_gemm_native,
 )
-from .arm_utils import get_tiling_B_interleaved_t, is_dotprod_available, 
is_neon_available
+from .arm_utils import get_tiling_B_interleaved_t
 
 
 def _get_default_config(cfg, data, kernel, strides, padding, dilation, 
out_dtype):
@@ -124,7 +124,8 @@ def is_int8_hw_support(data_dtype, kernel_dtype):
     is_llvm_support = llvm_version >= 8
 
     # 3) Check target
-    is_target_support = is_neon_available() or is_dotprod_available()
+    current_target = target.Target.current(allow_none=False)
+    is_target_support = current_target.features.has_asimd or 
current_target.features.has_dotprod
 
     return is_dtype_support and is_llvm_support and is_target_support
 
@@ -154,9 +155,10 @@ def schedule_conv2d_NCHWc_int8(cfg, outs):
             _, _, kh, kw, _, _, n_elems = get_const_tuple(kernel_vec.shape)
             assert n_elems == 4
             dtype = "uint" if data.dtype == "uint8" else "int"
-            if is_dotprod_available():
+            current_target = target.Target.current(allow_none=False)
+            if current_target.features.has_dotprod:
                 intrin = dot_int8_int8_int32_neon_82(int32_lanes=4, 
dtype=dtype)
-            elif is_neon_available():
+            elif current_target.features.has_asimd:
                 assert dtype == "int", "uint8 not supported if dot product is 
not available"
                 intrin = dot_int8_int8_int32_neon()
             else:
diff --git a/python/tvm/topi/arm_cpu/depthwise_conv2d.py 
b/python/tvm/topi/arm_cpu/depthwise_conv2d.py
index 58cd11e8cc..a2161453c5 100644
--- a/python/tvm/topi/arm_cpu/depthwise_conv2d.py
+++ b/python/tvm/topi/arm_cpu/depthwise_conv2d.py
@@ -18,6 +18,7 @@
 """Depthwise convolution schedule for ARM CPU"""
 
 import tvm
+from tvm.target import Target
 from tvm import te
 from tvm import autotvm
 from tvm.autotvm.task.space import SplitEntity, OtherOptionEntity
@@ -26,7 +27,6 @@ from .. import nn
 from ..utils import traverse_inline, get_const_tuple, get_const_int
 from ..nn.utils import get_pad_tuple
 from .tensor_intrin import smlal_int16_int32
-from .arm_utils import is_aarch64_arm
 from .mprofile.dsp.depthwise_conv2d import (
     depthwise_conv2d_nhwc_dsp_compute,
     depthwise_conv2d_nhwc_dsp_schedule,
@@ -333,12 +333,13 @@ def schedule_depthwise_conv2d_nhwc(cfg, outs):
         co, ci = cfg["tile_c"].apply(s, conv, c)
 
         split_val = cfg["tile_c"].size[-1]
+        target = Target.current(allow_none=False)
         use_tensorization = (
             (in_type == "int16")
             and (split_val == 8)
             and (IC % split_val == 0)
             and (channel_multiplier == 1)
-            and is_aarch64_arm()
+            and target.features.is_aarch64
         )
 
         data_pad_value = -1
diff --git a/src/target/parsers/cpu.cc b/src/target/parsers/cpu.cc
index fbf55f4683..3cfabb7639 100644
--- a/src/target/parsers/cpu.cc
+++ b/src/target/parsers/cpu.cc
@@ -20,6 +20,7 @@
 
 #include <string>
 
+#include "aprofile.h"
 #include "mprofile.h"
 
 namespace tvm {
@@ -32,6 +33,10 @@ TargetJSON ParseTarget(TargetJSON target) {
     return mprofile::ParseTarget(target);
   }
 
+  if (aprofile::IsArch(target)) {
+    return aprofile::ParseTarget(target);
+  }
+
   return target;
 }
 

Reply via email to