This patch adds support for auto-vectorization of absolute value computation using vabs.
We use a similar pattern to what is used in neon.md and extend the existing neg<mode>2 expander to match both 'neg' and 'abs'. This implies renaming the existing abs<mode>2 define_insn in neon.md to avoid a clash with the new expander with the same name. 2021-05-26 Christophe Lyon <christophe.l...@linaro.org> gcc/ * config/arm/mve.md (mve_vabsq_f<mode>): Use 'abs' instead of unspec. (mve_vabsq_s<mode>): Likewise. * config/arm/neon.md (abs<mode>2): Rename to neon_abs<mode>2. * config/arm/unspecs.md (VABSQ_F, VABSQ_S): Delete. * config/arm/vec-common.md (neg<mode>2): Rename to <absneg_str><mode>2. gcc/testsuite/ * gcc.target/arm/simd/mve-vabs.c: New test. --- gcc/config/arm/mve.md | 6 +-- gcc/config/arm/neon.md | 2 +- gcc/config/arm/unspecs.md | 2 - gcc/config/arm/vec-common.md | 4 +- gcc/testsuite/gcc.target/arm/simd/mve-vabs.c | 44 ++++++++++++++++++++ 5 files changed, 49 insertions(+), 9 deletions(-) create mode 100644 gcc/testsuite/gcc.target/arm/simd/mve-vabs.c diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 0a6ba80c99d..0bfa6a91d55 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -269,8 +269,7 @@ (define_insn "mve_vdupq_n_f<mode>" (define_insn "mve_vabsq_f<mode>" [ (set (match_operand:MVE_0 0 "s_register_operand" "=w") - (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")] - VABSQ_F)) + (abs:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT" "vabs.f%#<V_sz_elem> %q0, %q1" @@ -481,8 +480,7 @@ (define_insn "@mve_vaddvq_<supf><mode>" (define_insn "mve_vabsq_s<mode>" [ (set (match_operand:MVE_2 0 "s_register_operand" "=w") - (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "w")] - VABSQ_S)) + (abs:MVE_2 (match_operand:MVE_2 1 "s_register_operand" "w"))) ] "TARGET_HAVE_MVE" "vabs.s%#<V_sz_elem>\t%q0, %q1" diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 6a6573317cf..077c62ffd20 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -739,7 +739,7 @@ (define_insn "one_cmpl<mode>2_neon" [(set_attr "type" "neon_move<q>")] ) -(define_insn "abs<mode>2" +(define_insn "neon_abs<mode>2" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))] "TARGET_NEON" diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 0778db1bf4f..ed1bc293b78 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -538,7 +538,6 @@ (define_c_enum "unspec" [ VRNDAQ_F VREV64Q_F VDUPQ_N_F - VABSQ_F VREV32Q_F VCVTTQ_F32_F16 VCVTBQ_F32_F16 @@ -562,7 +561,6 @@ (define_c_enum "unspec" [ VCLSQ_S VADDVQ_S VADDVQ_U - VABSQ_S VREV32Q_U VREV32Q_S VMOVLTQ_U diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 8e35151da46..80b273229f5 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -208,9 +208,9 @@ (define_expand "one_cmpl<mode>2" "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT" ) -(define_expand "neg<mode>2" +(define_expand "<absneg_str><mode>2" [(set (match_operand:VDQWH 0 "s_register_operand" "") - (neg:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))] + (ABSNEG:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))] "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT" ) diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vabs.c b/gcc/testsuite/gcc.target/arm/simd/mve-vabs.c new file mode 100644 index 00000000000..64cd1c2eb4a --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vabs.c @@ -0,0 +1,44 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ + +#include <stdint.h> +#include <arm_mve.h> + +#define ABS(a) ((a < 0) ? -a : a) + +#define FUNC(SIGN, TYPE, BITS, NB, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a) { \ + int i; \ + for (i=0; i<NB; i++) { \ + dest[i] = ABS(a[i]); \ + } \ +} + +#define FUNC_FLOAT(SIGN, TYPE, BITS, NB, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE * __restrict__ dest, TYPE *a) { \ + int i; \ + for (i=0; i<NB; i++) { \ + dest[i] = ABS(a[i]); \ + } \ +} + +/* 128-bit vectors. */ +FUNC(s, int, 32, 4, vabs) +FUNC(u, uint, 32, 4, vabs) +FUNC(s, int, 16, 8, vabs) +FUNC(u, uint, 16, 8, vabs) +FUNC(s, int, 8, 16, vabs) +FUNC(u, uint, 8, 16, vabs) +FUNC_FLOAT(f, float, 32, 4, vabs) +FUNC(f, float, 16, 8, vabs) + +/* Taking the absolute value of an unsigned value is a no-op, so half of the + integer optimizations actually generate a call to memmove, the other ones a + 'vabs'. */ +/* { dg-final { scan-assembler-times {vabs.s[0-9]+\tq[0-9]+, q[0-9]+} 3 } } */ +/* { dg-final { scan-assembler-times {vabs.f[0-9]+ q[0-9]+, q[0-9]+} 2 } } */ +/* { dg-final { scan-assembler-times {vldr[bhw].[0-9]+\tq[0-9]+} 5 } } */ +/* { dg-final { scan-assembler-times {vstr[bhw].[0-9]+\tq[0-9]+} 5 } } */ +/* { dg-final { scan-assembler-times {memmove} 3 } } */ -- 2.25.1