Spencer Abson <spencer.ab...@arm.com> writes:
> This patch extends the splitting patterns for combining FP comparisons
> with predicated logical operations such that they cover all of SVE_F.
>
> gcc/ChangeLog:
>
>       * config/aarch64/aarch64-sve.md (*fcm<cmp_op><mode>_and_combine):
>       Extend from SVE_FULL_F to SVE_F.
>       (*fcmuo<mode>_and_combine): Likewise.
>       (*fcm<cmp_op><mode>_bic_combine): Likewise.
>       (*fcm<cmp_op><mode>_nor_combine): Likewise.
>       (*fcmuo<mode>_bic_combine): Likewise.
>       (*fcmuo<mode>_nor_combine): Likewise.  Move the comment here to
>       above fcmuo<mode>_bic_combine, since it applies to both patterns.
>
> gcc/testsuite/ChangeLog:
>
>       * gcc.target/aarch64/sve/unpacked_fcm_combines_1.c: New test.
>       * gcc.target/aarch64/sve/unpacked_fcm_combines_2.c: Likewise.

OK.  Thanks for catching the extra optimisations.

Richard

> ---
>  gcc/config/aarch64/aarch64-sve.md             | 26 +++++++-------
>  .../aarch64/sve/unpacked_fcm_combines_1.c     | 17 +++++++++
>  .../aarch64/sve/unpacked_fcm_combines_2.c     | 35 +++++++++++++++++++
>  3 files changed, 65 insertions(+), 13 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c
>  create mode 100644 
> gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c
>
> diff --git a/gcc/config/aarch64/aarch64-sve.md 
> b/gcc/config/aarch64/aarch64-sve.md
> index 6b5113eb70f..10aecf1f190 100644
> --- a/gcc/config/aarch64/aarch64-sve.md
> +++ b/gcc/config/aarch64/aarch64-sve.md
> @@ -8690,8 +8690,8 @@
>         (unspec:<VPRED>
>           [(match_operand:<VPRED> 1)
>            (const_int SVE_KNOWN_PTRUE)
> -          (match_operand:SVE_FULL_F 2 "register_operand" "w, w")
> -          (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
> +          (match_operand:SVE_F 2 "register_operand" "w, w")
> +          (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "Dz, w")]
>           SVE_COND_FP_CMP_I0)
>         (match_operand:<VPRED> 4 "register_operand" "Upl, Upl")))]
>    "TARGET_SVE"
> @@ -8713,8 +8713,8 @@
>         (unspec:<VPRED>
>           [(match_operand:<VPRED> 1)
>            (const_int SVE_KNOWN_PTRUE)
> -          (match_operand:SVE_FULL_F 2 "register_operand" "w")
> -          (match_operand:SVE_FULL_F 3 "register_operand" "w")]
> +          (match_operand:SVE_F 2 "register_operand" "w")
> +          (match_operand:SVE_F 3 "register_operand" "w")]
>           UNSPEC_COND_FCMUO)
>         (match_operand:<VPRED> 4 "register_operand" "Upl")))]
>    "TARGET_SVE"
> @@ -8740,8 +8740,8 @@
>             (unspec:<VPRED>
>               [(match_operand:<VPRED> 1)
>                (const_int SVE_KNOWN_PTRUE)
> -              (match_operand:SVE_FULL_F 2 "register_operand" "w")
> -              (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
> +              (match_operand:SVE_F 2 "register_operand" "w")
> +              (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")]
>               SVE_COND_FP_CMP_I0))
>           (match_operand:<VPRED> 4 "register_operand" "Upa"))
>         (match_dup:<VPRED> 1)))
> @@ -8777,8 +8777,8 @@
>             (unspec:<VPRED>
>               [(match_operand:<VPRED> 1)
>                (const_int SVE_KNOWN_PTRUE)
> -              (match_operand:SVE_FULL_F 2 "register_operand" "w")
> -              (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero" "wDz")]
> +              (match_operand:SVE_F 2 "register_operand" "w")
> +              (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero" "wDz")]
>               SVE_COND_FP_CMP_I0))
>           (not:<VPRED>
>             (match_operand:<VPRED> 4 "register_operand" "Upa")))
> @@ -8808,6 +8808,7 @@
>  }
>  )
>  
> +;; Same for unordered comparisons.
>  (define_insn_and_split "*fcmuo<mode>_bic_combine"
>    [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
>       (and:<VPRED>
> @@ -8816,8 +8817,8 @@
>             (unspec:<VPRED>
>               [(match_operand:<VPRED> 1)
>                (const_int SVE_KNOWN_PTRUE)
> -              (match_operand:SVE_FULL_F 2 "register_operand" "w")
> -              (match_operand:SVE_FULL_F 3 "register_operand" "w")]
> +              (match_operand:SVE_F 2 "register_operand" "w")
> +              (match_operand:SVE_F 3 "register_operand" "w")]
>               UNSPEC_COND_FCMUO))
>           (match_operand:<VPRED> 4 "register_operand" "Upa"))
>         (match_dup:<VPRED> 1)))
> @@ -8843,7 +8844,6 @@
>  }
>  )
>  
> -;; Same for unordered comparisons.
>  (define_insn_and_split "*fcmuo<mode>_nor_combine"
>    [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
>       (and:<VPRED>
> @@ -8852,8 +8852,8 @@
>             (unspec:<VPRED>
>               [(match_operand:<VPRED> 1)
>                (const_int SVE_KNOWN_PTRUE)
> -              (match_operand:SVE_FULL_F 2 "register_operand" "w")
> -              (match_operand:SVE_FULL_F 3 "register_operand" "w")]
> +              (match_operand:SVE_F 2 "register_operand" "w")
> +              (match_operand:SVE_F 3 "register_operand" "w")]
>               UNSPEC_COND_FCMUO))
>           (not:<VPRED>
>             (match_operand:<VPRED> 4 "register_operand" "Upa")))
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c
> new file mode 100644
> index 00000000000..c1f729e9f0a
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_1.c
> @@ -0,0 +1,17 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -moverride=sve_width=2048 
> --param=aarch64-autovec-preference=sve-only -fno-trapping-math" } */
> +
> +#include "unpacked_fcm_1.c"
> +
> +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 32 } } */
> +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 32 } } */
> +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 32 } } */
> +
> +/* Drop a PTRUE predicated AND with the loop mask and comparison result in
> +   favour of predicating the comparison with the loop mask.  */
> +/* { dg-final { scan-assembler-not {\tand\t} } } */
> +
> +/* Similarly, for codes that are implemented via an inversion, prefer
> +   NOT (predicated with the loop mask) over BIC+PTRUE.  */
> +/* { dg-final { scan-assembler-not {\tbic\t} } } */
> +/* { dg-final { scan-assembler-times {\tnot\t} 15 } } */
> diff --git a/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c 
> b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c
> new file mode 100644
> index 00000000000..e7f7680ce53
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/aarch64/sve/unpacked_fcm_combines_2.c
> @@ -0,0 +1,35 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -moverride=sve_width=2048 
> --param=aarch64-autovec-preference=sve-only -fno-trapping-math" } */
> +
> +#include <stdint.h>
> +
> +/* Ensure that we still emit NOR here, rather than two NOTs.  */
> +
> +#define TEST_FCM_NOR(TYPE0, TYPE1, CMP, COUNT)               \
> +  void                                                       \
> +  f_##TYPE0##_##TYPE1##_##CMP (TYPE0 *__restrict out,        \
> +                            TYPE1 *__restrict a,     \
> +                            TYPE1 *__restrict b,     \
> +                            TYPE1 *__restrict c)     \
> +  {                                                              \
> +    for (unsigned int i = 0; i < COUNT; i++)                     \
> +      out[i] = !(CMP (a[i], c[i]) | CMP (b[i], c[i])) ? 3 : out[i]; \
> +  }
> +
> +#define GT(A, B) ((A) > (B))
> +
> +TEST_FCM_NOR (uint64_t, float, GT, 32)
> +TEST_FCM_NOR (uint64_t, _Float16, GT, 32)
> +TEST_FCM_NOR (uint32_t, _Float16, GT, 64)
> +
> +TEST_FCM_NOR (uint64_t, float, __builtin_isunordered, 32)
> +TEST_FCM_NOR (uint64_t, _Float16, __builtin_isunordered, 32)
> +TEST_FCM_NOR (uint32_t, _Float16, __builtin_isunordered, 64)
> +
> +/* { dg-final { scan-assembler-times {\tld1w\tz[0-9]+\.d} 6 } } */
> +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.s} 6 } } */
> +/* { dg-final { scan-assembler-times {\tld1h\tz[0-9]+\.d} 6 } } */
> +
> +/* { dg-final { scan-assembler-not {\tbic\t} } } */
> +/* { dg-final { scan-assembler-not {\tnot\t} } } */
> +/* { dg-final { scan-assembler-times {\tnor\tp[0-9]+\.b, p[0-9]+/z, 
> p[0-9]+\.b, p[0-9]+\.b\n} 6 } } */

Reply via email to