[tvm] branch main updated: [Relay][Strategy] Fix `arm_cpu` int8 conv2d schedule selection for 32-bit targets (#15468)

ekalda Fri, 04 Aug 2023 08:26:12 -0700

This is an automated email from the ASF dual-hosted git repository.

ekalda pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tvm.git



The following commit(s) were added to refs/heads/main by this push:
     new 63a95d54f4 [Relay][Strategy] Fix `arm_cpu` int8 conv2d schedule 
selection for 32-bit targets (#15468)
63a95d54f4 is described below

commit 63a95d54f44032eb5c0658a4ca6f775119722239
Author: Luke Hutton <[email protected]>
AuthorDate: Fri Aug 4 16:26:03 2023 +0100

    [Relay][Strategy] Fix `arm_cpu` int8 conv2d schedule selection for 32-bit 
targets (#15468)
    
    [Relay][Strategy] Fix `arm_cpu` int8 conv2d schedule selection for
    32-bit targets
    
    https://github.com/apache/tvm/pull/12455 slightly altered the behaviour
    when selecting an int8 conv2d schedule. Previously conditions that
    decide which schedule to select used `is_aarch64` which checks for the
    existance of `aarch64` in the target triple. However, the conditions now
    use `has_asimd` which is true if `aarch64` exists in the target triple
    OR `+neon` is used in the mattr.
    
    Both `conv2d_NHWC_quantized_interleaved.arm_cpu` and
    `depthwise_conv2d_nhwc.arm_cpu` makes calls to LLVM intrinsics that
    require both `aarch64` and `+neon`. But in the case of the target
    `rasp4b`, the updated conditions result in compilation failure since
    the target has `+neon` but doesn't have `aarch64` in the target triple.
    The conditions have been updated to fix the compilation failure.
    
    Likewise, the previous behaviour of the condition for
    `conv2d_nhwc_spatial_pack.arm_cpu` has been restored ensure a program
    with a 32-bit target can still be compiled.
    
    Finally, we should only select the `depthwise_conv2d_nhwc_dsp.arm_cpu`
    schedule when a backend that understands `pragma_import_c` has been
    selected, i.e. "c".
    
    For a more detailed discussion of the issue please see:
    
https://discuss.tvm.apache.org/t/tflite-llvm-llvm-error-when-compiling-tflite-model/15411
---
 python/tvm/relay/op/strategy/arm_cpu.py            | 10 ++-
 .../relay/strategy/test_select_implementation.py   | 97 ++++++++++++++++++++++
 2 files changed, 103 insertions(+), 4 deletions(-)

diff --git a/python/tvm/relay/op/strategy/arm_cpu.py 
b/python/tvm/relay/op/strategy/arm_cpu.py
index dc3b16aa82..bbbe5bb732 100644
--- a/python/tvm/relay/op/strategy/arm_cpu.py
+++ b/python/tvm/relay/op/strategy/arm_cpu.py
@@ -211,21 +211,23 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, 
target):
                     name="conv2d_nhwc_dsp.arm_cpu",
                 )
             elif kernel_layout == "HWIO":
+                is_aarch64 = target.features.is_aarch64
                 has_asimd = target.features.has_asimd
                 has_dot_prod = target.features.has_dotprod
+
                 if has_dot_prod and data.dtype in ["int8", "uint8"]:
                     strategy.add_implementation(
                         
wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_native),
                         
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_NHWC_quantized_native),
                         name="conv2d_NHWC_quantized_native.arm_cpu",
                     )
-                if has_asimd and data.dtype in ["int8", "uint8"]:
+                if is_aarch64 and has_asimd and data.dtype in ["int8", 
"uint8"]:
                     strategy.add_implementation(
                         
wrap_compute_conv2d(topi.arm_cpu.compute_conv2d_NHWC_quantized_interleaved),
                         
wrap_topi_schedule(topi.arm_cpu.schedule_conv2d_NHWC_quantized_interleaved),
                         name="conv2d_NHWC_quantized_interleaved.arm_cpu",
                     )
-                if (not has_asimd) or (data.dtype not in ["int8", "uint8"]):
+                if (not is_aarch64) or (data.dtype not in ["int8", "uint8"]):
                     # TODO(@giuseros)
                     # This strategy errors out for quantized data types when 
tuning.
                     # Let's use this only for non-aarch64 or non-quantized 
cases
@@ -285,7 +287,7 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, 
target):
                 )
         elif layout == "NHWC":
             assert kernel_layout == "HWOI"
-            if target.features.has_asimd:
+            if target.features.is_aarch64 and target.features.has_asimd:
                 strategy.add_implementation(
                     
wrap_compute_conv2d(topi.arm_cpu.compute_depthwise_conv2d_nhwc),
                     
wrap_topi_schedule(topi.arm_cpu.schedule_depthwise_conv2d_nhwc),
@@ -298,7 +300,6 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, 
target):
             # The int8 implementation DOES need the DSP unit (for SXTB16), but 
it is not
             # possible to use the DSP unit to speed up a NHWC depthwise 
convolution (though
             # an NCHW convolution would benefit).
-
             elif (
                 dilation_w == dilation_h == 1
                 and kernel.shape[3] == 1  # channel_multiplier == 1
@@ -308,6 +309,7 @@ def conv2d_strategy_arm_cpu(attrs, inputs, out_type, 
target):
                     or (data.shape[3] % 2 == 0 and data.dtype == "int16")
                 )
                 and (padding != "SAME" or data.shape[1] % stride_h == 
data.shape[2] % stride_w == 0)
+                and target.kind.name == "c"
                 # Ideally we should check that kernel is a Relay constant, but 
strategy functions
                 # don't have access to the data needed to check this.
             ):
diff --git a/tests/python/relay/strategy/test_select_implementation.py 
b/tests/python/relay/strategy/test_select_implementation.py
index 3e63bc4751..eae186524c 100644
--- a/tests/python/relay/strategy/test_select_implementation.py
+++ b/tests/python/relay/strategy/test_select_implementation.py
@@ -52,5 +52,102 @@ def test_concatenate(target, expected_implementation):
     assert impl.name == expected_implementation
 
 
[email protected](
+    "target,expected_impl",
+    [
+        ("llvm -device=arm_cpu", "conv2d_nhwc_spatial_pack.arm_cpu"),
+        (
+            "llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+neon",
+            "conv2d_NHWC_quantized_interleaved.arm_cpu",
+        ),
+        (
+            "llvm -device=arm_cpu -mtriple=armv8l-linux-gnu -mattr=+neon",
+            "conv2d_nhwc_spatial_pack.arm_cpu",
+        ),
+    ],
+)
+def test_int8_conv2d(target, expected_impl):
+    target = tvm.target.Target(target)
+
+    dtype = "int8"
+    data_shape = (1, 1, 1, 4)
+    weight_shape = (1, 1, 4, 4)
+    data_layout = "NHWC"
+    kernel_layout = "HWIO"
+    channels = 4
+    kernel_size = (1, 1)
+
+    out = relay.nn.conv2d(
+        relay.var("data", shape=data_shape, dtype=dtype),
+        relay.var("weight", shape=weight_shape, dtype=dtype),
+        kernel_size=kernel_size,
+        channels=channels,
+        data_layout=data_layout,
+        kernel_layout=kernel_layout,
+    )
+    out = run_infer_type(out)
+
+    with target:
+        impl, _ = relay.backend.te_compiler.select_implementation(
+            out.op,
+            out.attrs,
+            [te.placeholder(data_shape, dtype), te.placeholder(weight_shape, 
dtype)],
+            out.checked_type,
+            target,
+        )
+
+    assert impl.name == expected_impl
+
+
[email protected](
+    "target,expected_impl",
+    [
+        ("llvm -device=arm_cpu", "depthwise_conv2d_nhwc.generic"),
+        (
+            "llvm -device=arm_cpu -mtriple=aarch64-linux-gnu -mattr=+neon",
+            "depthwise_conv2d_nhwc.arm_cpu",
+        ),
+        (
+            "llvm -device=arm_cpu -mtriple=armv8l-linux-gnu -mattr=+neon",
+            "depthwise_conv2d_nhwc.generic",
+        ),
+        ("c -device=arm_cpu -mcpu=cortex-m55", 
"depthwise_conv2d_nhwc_dsp.arm_cpu"),
+    ],
+)
+def test_int8_depthwise_conv2d(target, expected_impl):
+    target = tvm.target.Target(target)
+
+    dtype = "int8"
+    out_dtype = "int32"
+    data_shape = (2, 2, 4, 8)
+    weight_shape = (2, 2, 8, 1)
+    data_layout = "NHWC"
+    kernel_layout = "HWOI"
+    groups = 8
+    kernel_size = (2, 2)
+
+    out = relay.nn.conv2d(
+        relay.var("data", shape=data_shape, dtype=dtype),
+        relay.var("weight", shape=weight_shape, dtype=dtype),
+        kernel_size=kernel_size,
+        data_layout=data_layout,
+        kernel_layout=kernel_layout,
+        groups=groups,
+        out_dtype=out_dtype,
+    )
+    out = run_infer_type(out)
+
+    with target:
+        impl, _ = relay.backend.te_compiler.select_implementation(
+            out.op,
+            out.attrs,
+            [te.placeholder(data_shape, dtype), te.placeholder(weight_shape, 
dtype)],
+            out.checked_type,
+            target,
+        )
+
+    assert impl.name == expected_impl
+
+
 if __name__ == "__main__":
     tvm.testing.main()

[tvm] branch main updated: [Relay][Strategy] Fix `arm_cpu` int8 conv2d schedule selection for 32-bit targets (#15468)

Reply via email to