On 07/11/2025 13:26, Christophe Lyon wrote:
> Hi,
> 
> Is this combination of commits
> 027205879733933ec991c230795da6c01ac50029 and
> 697ccadd7217316ea91ddd978ddc944e6df09522 OK for gcc-15?
> 
> Thanks,
> 
> Christophe
> 
> The vadcq and vsbcq patterns had two problems:
> - the adc / sbc part of the pattern did not mention the use of vfpcc
> - the carry calcultation part should use a different unspec code
> 
> In addtion, the get_fpscr_nzcvqc and set_fpscr_nzcvqc were
> over-cautious by using unspec_volatile when unspec is really what they
> need.  Making them unspec enables to remove redundant accesses to
> FPSCR_nzcvqc.
> 
> With unspec_volatile, we used to generate:
> test_2:
>       @ args = 0, pretend = 0, frame = 8
>       @ frame_needed = 0, uses_anonymous_args = 0
>       vmov.i32        q0, #0x1  @ v4si
>       push    {lr}
>       sub     sp, sp, #12
>       vmrs    r3, FPSCR_nzcvqc    ;; [1]
>       bic     r3, r3, #536870912
>       vmsr    FPSCR_nzcvqc, r3
>       vadc.i32        q3, q0, q0
>       vmrs    r3, FPSCR_nzcvqc     ;; [2]
>       vmrs    r3, FPSCR_nzcvqc
>       orr     r3, r3, #536870912
>       vmsr    FPSCR_nzcvqc, r3
>       vadc.i32        q0, q0, q0
>       vmrs    r3, FPSCR_nzcvqc
>       ldr     r0, .L8
>       ubfx    r3, r3, #29, #1
>       str     r3, [sp, #4]
>       bl      print_uint32x4_t
>       add     sp, sp, #12
>       @ sp needed
>       pop     {pc}
> .L9:
>       .align  2
> .L8:
>       .word   .LC1
> 
> with unspec, we generate:
> test_2:
>       @ args = 0, pretend = 0, frame = 8
>       @ frame_needed = 0, uses_anonymous_args = 0
>       vmrs    r3, FPSCR_nzcvqc     ;; [1]
>       bic     r3, r3, #536870912   ;; [3]
>       vmov.i32        q0, #0x1  @ v4si
>       vmsr    FPSCR_nzcvqc, r3
>       vadc.i32        q3, q0, q0
>       vmrs    r3, FPSCR_nzcvqc
>       orr     r3, r3, #536870912
>       vmsr    FPSCR_nzcvqc, r3
>       vadc.i32        q0, q0, q0
>       vmrs    r3, FPSCR_nzcvqc
>       push    {lr}
>       ubfx    r3, r3, #29, #1
>       sub     sp, sp, #12
>       ldr     r0, .L8
>       str     r3, [sp, #4]
>       bl      print_uint32x4_t
>       add     sp, sp, #12
>       @ sp needed
>       pop     {pc}
> .L9:
>       .align  2
> .L8:
>       .word   .LC1
> 
> That is, unspec in get_fpscr_nzcvqc enables to:
> - move [1] earlier
> - delete redundant [2]
> 
> and unspec in set_fpscr_nzcvqc enables to move push {lr} and stack
> manipulation later.
> 
> gcc/ChangeLog:
> 
>       PR target/122189
>       * config/arm/iterators.md (VxCIQ_carry, VxCIQ_M_carry, VxCQ_carry)
>       (VxCQ_M_carry): New iterators.
>       * config/arm/mve.md (get_fpscr_nzcvqc, set_fpscr_nzcvqc): Use
>       unspec instead of unspec_volatile.
>       (vadciq, vadciq_m, vadcq, vadcq_m): Use vfpcc in operation.  Use a
>       different unspec code for carry calcultation.
>       * config/arm/unspecs.md (VADCQ_U_carry, VADCQ_M_U_carry)
>       (VADCQ_S_carry, VADCQ_M_S_carry, VSBCIQ_U_carry ,VSBCIQ_S_carry
>       ,VSBCIQ_M_U_carry ,VSBCIQ_M_S_carry ,VSBCQ_U_carry ,VSBCQ_S_carry
>       ,VSBCQ_M_U_carry ,VSBCQ_M_S_carry ,VADCIQ_U_carry
>       ,VADCIQ_M_U_carry ,VADCIQ_S_carry ,VADCIQ_M_S_carry): New unspec
>       codes.
> 
> gcc/testsuite/ChangeLog:
> 
>       PR target/122189
>       * gcc.target/arm/mve/intrinsics/vadcq-check-carry.c: New test.
>       * gcc.target/arm/mve/intrinsics/vadcq_m_s32.c: Adjust instructions
>       order.
>       * gcc.target/arm/mve/intrinsics/vadcq_m_u32.c: Likewise.
>       * gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c: Likewise.
>       * gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c: Likewise.
> 
>       (cherry picked from commits
>       027205879733933ec991c230795da6c01ac50029 and
>       697ccadd7217316ea91ddd978ddc944e6df09522)
> ---
>  gcc/config/arm/iterators.md                   | 17 +++++++
>  gcc/config/arm/mve.md                         | 36 +++++++++-----
>  gcc/config/arm/unspecs.md                     | 16 +++++++
>  .../arm/mve/intrinsics/vadcq-check-carry.c    | 48 +++++++++++++++++++
>  .../arm/mve/intrinsics/vadcq_m_s32.c          |  2 +-
>  .../arm/mve/intrinsics/vadcq_m_u32.c          |  2 +-
>  .../arm/mve/intrinsics/vsbcq_m_s32.c          |  2 +-
>  .../arm/mve/intrinsics/vsbcq_m_u32.c          |  2 +-
>  8 files changed, 109 insertions(+), 16 deletions(-)
>  create mode 100644 
> gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c
> 
> diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
> index 743fe48e6cc..d1126e76720 100644
> --- a/gcc/config/arm/iterators.md
> +++ b/gcc/config/arm/iterators.md
> @@ -3022,3 +3022,20 @@ (define_int_iterator VCMLA_OP [UNSPEC_VCMLA
>  ;; Define iterators for VCMLA operations as MUL
>  (define_int_iterator VCMUL_OP [UNSPEC_VCMUL
>                              UNSPEC_VCMUL_CONJ])
> +
> +(define_int_attr VxCIQ_carry   [(VADCIQ_U "VADCIQ_U_carry")
> +                             (VADCIQ_S "VADCIQ_S_carry")
> +                             (VSBCIQ_U "VSBCIQ_U_carry")
> +                             (VSBCIQ_S "VSBCIQ_S_carry")])
> +(define_int_attr VxCIQ_M_carry [(VADCIQ_M_U "VADCIQ_M_U_carry")
> +                             (VADCIQ_M_S "VADCIQ_M_S_carry")
> +                             (VSBCIQ_M_U "VSBCIQ_M_U_carry")
> +                             (VSBCIQ_M_S "VSBCIQ_M_S_carry")])
> +(define_int_attr VxCQ_carry [(VADCQ_U "VADCQ_U_carry")
> +                          (VADCQ_S "VADCQ_S_carry")
> +                          (VSBCQ_U "VSBCQ_U_carry")
> +                          (VSBCQ_S "VSBCQ_S_carry")])
> +(define_int_attr VxCQ_M_carry [(VADCQ_M_U "VADCQ_M_U_carry")
> +                            (VADCQ_M_S "VADCQ_M_S_carry")
> +                            (VSBCQ_M_U "VSBCQ_M_U_carry")
> +                            (VSBCQ_M_S "VSBCQ_M_S_carry")])
> diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
> index cc266f89cdf..1ec3b2900f9 100644
> --- a/gcc/config/arm/mve.md
> +++ b/gcc/config/arm/mve.md
> @@ -3965,14 +3965,14 @@ (define_insn "@mve_vldrq_gather_base_wb_z_<mode>"
>  
>  (define_insn "get_fpscr_nzcvqc"
>   [(set (match_operand:SI 0 "register_operand" "=r")
> -   (unspec_volatile:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))]
> +   (unspec:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))]
>   "TARGET_HAVE_MVE"
>   "vmrs\\t%0, FPSCR_nzcvqc"
>   [(set_attr "type" "mve_move")])
>  
>  (define_insn "set_fpscr_nzcvqc"
>   [(set (reg:SI VFPCC_REGNUM)
> -   (unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
> +   (unspec:SI [(match_operand:SI 0 "register_operand" "r")]
>      VUNSPEC_SET_FPSCR_NZCVQC))]
>   "TARGET_HAVE_MVE"
>   "vmsr\\tFPSCR_nzcvqc, %0"
> @@ -3988,8 +3988,9 @@ (define_insn "@mve_<mve_insn>q_<supf>v4si"
>                     (match_operand:V4SI 2 "s_register_operand" "w")]
>        VxCIQ))
>     (set (reg:SI VFPCC_REGNUM)
> -     (unspec:SI [(const_int 0)]
> -      VxCIQ))
> +     (unspec:SI [(match_dup 1)
> +                 (match_dup 2)]
> +      <VxCIQ_carry>))
>    ]
>    "TARGET_HAVE_MVE"
>    "<mve_insn>.i32\t%q0, %q1, %q2"
> @@ -4009,8 +4010,11 @@ (define_insn "@mve_<mve_insn>q_m_<supf>v4si"
>                     (match_operand:V4BI 4 "vpr_register_operand" "Up")]
>        VxCIQ_M))
>     (set (reg:SI VFPCC_REGNUM)
> -     (unspec:SI [(const_int 0)]
> -      VxCIQ_M))
> +    (unspec:SI [(match_dup 1)
> +             (match_dup 2)
> +             (match_dup 3)
> +             (match_dup 4)]
> +      <VxCIQ_M_carry>))
>    ]
>    "TARGET_HAVE_MVE"
>    "vpst\;<mve_insn>t.i32\t%q0, %q2, %q3"
> @@ -4025,11 +4029,14 @@ (define_insn "@mve_<mve_insn>q_m_<supf>v4si"
>  (define_insn "@mve_<mve_insn>q_<supf>v4si"
>    [(set (match_operand:V4SI 0 "s_register_operand" "=w")
>       (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
> -                    (match_operand:V4SI 2 "s_register_operand" "w")]
> +                   (match_operand:V4SI 2 "s_register_operand" "w")
> +                   (reg:SI VFPCC_REGNUM)]
>        VxCQ))
>     (set (reg:SI VFPCC_REGNUM)
> -     (unspec:SI [(reg:SI VFPCC_REGNUM)]
> -      VxCQ))
> +    (unspec:SI [(match_dup 1)
> +             (match_dup 2)
> +             (reg:SI VFPCC_REGNUM)]
> +      <VxCQ_carry>))
>    ]
>    "TARGET_HAVE_MVE"
>    "<mve_insn>.i32\t%q0, %q1, %q2"
> @@ -4047,11 +4054,16 @@ (define_insn "@mve_<mve_insn>q_m_<supf>v4si"
>       (unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0")
>                     (match_operand:V4SI 2 "s_register_operand" "w")
>                     (match_operand:V4SI 3 "s_register_operand" "w")
> -                   (match_operand:V4BI 4 "vpr_register_operand" "Up")]
> +                   (match_operand:V4BI 4 "vpr_register_operand" "Up")
> +                   (reg:SI VFPCC_REGNUM)]
>        VxCQ_M))
>     (set (reg:SI VFPCC_REGNUM)
> -     (unspec:SI [(reg:SI VFPCC_REGNUM)]
> -      VxCQ_M))
> +    (unspec:SI [(match_dup 1)
> +             (match_dup 2)
> +             (match_dup 3)
> +             (match_dup 4)
> +             (reg:SI VFPCC_REGNUM)]
> +      <VxCQ_M_carry>))
>    ]
>    "TARGET_HAVE_MVE"
>    "vpst\;<mve_insn>t.i32\t%q0, %q2, %q3"
> diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
> index a03609d1de4..ecc6d611529 100644
> --- a/gcc/config/arm/unspecs.md
> +++ b/gcc/config/arm/unspecs.md
> @@ -1189,21 +1189,37 @@ (define_c_enum "unspec" [
>    VLDRGBWBQ
>    VLDRGBWBQ_Z
>    VADCQ_U
> +  VADCQ_U_carry
>    VADCQ_M_U
> +  VADCQ_M_U_carry
>    VADCQ_S
> +  VADCQ_S_carry
>    VADCQ_M_S
> +  VADCQ_M_S_carry
>    VSBCIQ_U
> +  VSBCIQ_U_carry
>    VSBCIQ_S
> +  VSBCIQ_S_carry
>    VSBCIQ_M_U
> +  VSBCIQ_M_U_carry
>    VSBCIQ_M_S
> +  VSBCIQ_M_S_carry
>    VSBCQ_U
> +  VSBCQ_U_carry
>    VSBCQ_S
> +  VSBCQ_S_carry
>    VSBCQ_M_U
> +  VSBCQ_M_U_carry
>    VSBCQ_M_S
> +  VSBCQ_M_S_carry
>    VADCIQ_U
> +  VADCIQ_U_carry
>    VADCIQ_M_U
> +  VADCIQ_M_U_carry
>    VADCIQ_S
> +  VADCIQ_S_carry
>    VADCIQ_M_S
> +  VADCIQ_M_S_carry
>    VLD2Q
>    VLD4Q
>    VST2Q
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c
> new file mode 100644
> index 00000000000..3a9b8debf98
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq-check-carry.c
> @@ -0,0 +1,48 @@
> +/* { dg-do run } */
> +/* { dg-require-effective-target arm_v8_1m_mve_ok } */
> +/* { dg-require-effective-target arm_mve_hw } */
> +/* { dg-options "-O2" } */
> +/* { dg-add-options arm_v8_1m_mve } */
> +
> +#include "arm_mve.h"
> +
> +#ifdef __cplusplus
> +extern "C" {
> +#endif
> +
> +#include <inttypes.h>
> +#include <stdio.h>
> +
> +__attribute((noinline)) void print_uint32x4_t(const char *name, uint32x4_t 
> val)
> +{
> +  printf("%s: %u, %u, %u, %u\n",
> +      name,
> +      vgetq_lane_u32(val, 0),
> +         vgetq_lane_u32(val, 1),
> +      vgetq_lane_u32(val, 2),
> +         vgetq_lane_u32(val, 3));
> +}
> +
> +void __attribute__ ((noinline))  test_2(void)
> +{
> +  uint32x4_t v12, v18, v108;
> +  unsigned v17 = 0;
> +  v12 = vdupq_n_u32(1);
> +  v18 = vadcq_u32(v12, v12, &v17);
> +  v17 = 1;
> +  v108 = vadcq_u32(v12, v12, &v17);
> +  print_uint32x4_t("v108", v108);
> +}
> +
> +int main()
> +{
> +  test_2();
> +  return 0;
> +}
> +  
> +#ifdef __cplusplus
> +}
> +#endif
> +
> +/* { dg-output "v108: 3, 2, 2, 2" } */
> +/* { dg-final { scan-assembler-times {\tvmrs\t(?:ip|fp|r[0-9]+), 
> FPSCR_nzcvqc} 3 } } */
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c
> index 0d4cb779254..1802c20a397 100644
> --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_s32.c
> @@ -1,6 +1,6 @@
>  /* { dg-require-effective-target arm_v8_1m_mve_ok } */
>  /* { dg-add-options arm_v8_1m_mve } */
> -/* { dg-additional-options "-O2" } */
> +/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } 
> */
>  /* { dg-final { check-function-bodies "**" "" } } */
>  
>  #include "arm_mve.h"
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c
> index a0ba6825d8c..64f221df868 100644
> --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vadcq_m_u32.c
> @@ -1,6 +1,6 @@
>  /* { dg-require-effective-target arm_v8_1m_mve_ok } */
>  /* { dg-add-options arm_v8_1m_mve } */
> -/* { dg-additional-options "-O2" } */
> +/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } 
> */
>  /* { dg-final { check-function-bodies "**" "" } } */
>  
>  #include "arm_mve.h"
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c
> index 7a332610c69..da36d694ddf 100644
> --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_s32.c
> @@ -1,6 +1,6 @@
>  /* { dg-require-effective-target arm_v8_1m_mve_ok } */
>  /* { dg-add-options arm_v8_1m_mve } */
> -/* { dg-additional-options "-O2" } */
> +/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } 
> */
>  /* { dg-final { check-function-bodies "**" "" } } */
>  
>  #include "arm_mve.h"
> diff --git a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c 
> b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c
> index 60902196502..555690f5fb8 100644
> --- a/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c
> +++ b/gcc/testsuite/gcc.target/arm/mve/intrinsics/vsbcq_m_u32.c
> @@ -1,6 +1,6 @@
>  /* { dg-require-effective-target arm_v8_1m_mve_ok } */
>  /* { dg-add-options arm_v8_1m_mve } */
> -/* { dg-additional-options "-O2" } */
> +/* { dg-additional-options "-O2 -fno-schedule-insns -fno-schedule-insns2" } 
> */
>  /* { dg-final { check-function-bodies "**" "" } } */
>  
>  #include "arm_mve.h"


OK.

R.

Reply via email to