LGTM
> From: "Robin Dapp"<[email protected]>
> Date:  Wed, May 6, 2026, 02:38
> Subject:  [PATCH] RISC-V: Use more whole-reg loads/stores.
> To: "gcc-patches"<[email protected]>
> Cc: <[email protected]>, <[email protected]>, <[email protected]>, 
> <[email protected]>, <[email protected]>
> Hi,
> 
> This patch allows pred_mov, which usually results in vle/vse insns, to
> split off whole-register loads and stores so we can emit more of them.
> The advantage of whole-reg operations is that they don't require a vtype
> and therefore allow more freedom in vsetvl placement.
> 
> Regtested on rv64gcv_zvl512b.
> 
> Regards
>  Robin
> 
> gcc/ChangeLog:
> 
>         * config/riscv/riscv-protos.h (whole_reg_to_reg_move_p):
>         Rename from this...
>         (whole_reg_move_p): ...to this.
>         (whole_reg_loadstore_p): Declare.
>         * config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto.
>         (whole_reg_move_p): Ditto.
>         (whole_reg_loadstore_p): New function.
>         * config/riscv/thead-vector.md: Use renamed function.
>         * config/riscv/vector.md (@pred_store<mode>): Use new function.
> 
> gcc/testsuite/ChangeLog:
> 
>         * gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c: Disable
>         instruction scheduling.
>         * gcc.target/riscv/rvv/base/vle-vl1r.c: New test.
> ---
>  gcc/config/riscv/riscv-protos.h               |  4 ++-
>  gcc/config/riscv/riscv-v.cc                   | 28 ++++++++++++++++++-
>  gcc/config/riscv/thead-vector.md              |  2 +-
>  gcc/config/riscv/vector.md                    | 21 ++++++++++++--
>  .../riscv/rvv/autovec/reduc/reduc_call-4.c    |  2 +-
>  .../gcc.target/riscv/rvv/base/vle-vl1r.c      | 14 ++++++++++
>  6 files changed, 64 insertions(+), 7 deletions(-)
>  create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
> 
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index e2858a8b19f..8b362e323d9 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -790,7 +790,9 @@ bool can_be_broadcast_p (rtx);
>  bool strided_broadcast_p (rtx);
>  bool gather_scatter_valid_offset_p (machine_mode);
>  HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
> -bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
> +bool whole_reg_move_p (rtx *, machine_mode, int);
> +bool whole_reg_loadstore_p (rtx dest, rtx src, rtx mask, rtx avl, rtx
> +                            avl_type);
>  bool splat_to_scalar_move_p (rtx *);
>  rtx get_fp_rounding_coefficient (machine_mode);
>  }
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 3c7e749cb60..2103764da06 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -6451,7 +6451,7 @@ estimated_poly_value (poly_int64 val, unsigned int kind)
>  
>  /* Return true it is whole register-register move.  */
>  bool
> -whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
> +whole_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
>  {
>    /* An operation is a whole-register move if either
>       (1) Its vlmax operand equals VLMAX
> @@ -6469,6 +6469,32 @@ whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, 
> int avl_type_index)
>                 && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
>          return true;
>      }
> +
> +  return false;
> +}
> +
> +/* Same but for a whole-register load or store.  */
> +bool
> +whole_reg_loadstore_p (rtx dest, rtx src, rtx mask, rtx avl, rtx avl_type)
> +{
> +  machine_mode mode = GET_MODE (dest);
> +  if (!multiple_p (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR))
> +    return false;
> +
> +  if (((memory_operand (dest, mode)
> +       && register_operand (src, mode))
> +      || (register_operand (dest, mode)
> +          && memory_operand (src, mode)))
> +      && satisfies_constraint_Wc1 (mask))
> +    {
> +      if (INTVAL (avl_type) == VLMAX)
> +        return true;
> +      /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
> +         into NON-VLMAX with LEN = NUNITS.  */
> +      else if (CONST_INT_P (avl)
> +               && known_eq (INTVAL (avl), GET_MODE_NUNITS (mode)))
> +        return true;
> +    }
>    return false;
>  }
>  
> diff --git a/gcc/config/riscv/thead-vector.md 
> b/gcc/config/riscv/thead-vector.md
> index 5a02debdd20..4ad37bb441d 100644
> --- a/gcc/config/riscv/thead-vector.md
> +++ b/gcc/config/riscv/thead-vector.md
> @@ -228,7 +228,7 @@ (define_insn_and_split 
> "*pred_mov_width<vlmem_op_attr><mode>"
>     vs<vlmem_op_attr>.v\t%3,%0%p1
>     vmv.v.v\t%0,%3
>     vmv.v.v\t%0,%3"
> -  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
> +  "&& riscv_vector::whole_reg_move_p (operands, <MODE>mode, 7)"
>    [(set (match_dup 0) (match_dup 3))]
>    ""
>    [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 45be9e6fe17..136ecdc787e 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -1967,7 +1967,16 @@ (define_insn_and_split "*pred_mov<mode>"
>     vse<sew>.v\t%3,%0%p1
>     vmv.v.v\t%0,%3
>     vmv.v.v\t%0,%3"
> -  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
> +  "&& (register_operand (operands[0], <MODE>mode)
> +       && register_operand (operands[3], <MODE>mode)
> +       && riscv_vector::whole_reg_move_p (operands, <MODE>mode, 7))
> +      || ((memory_operand (operands[0], <MODE>mode)
> +          || memory_operand (operands[3], <MODE>mode))
> +         && operands[2] != operands[0]
> +         && !reload_completed
> +         && riscv_vector::whole_reg_loadstore_p (operands[0], operands[3],
> +                                                 operands[1], operands[4],
> +                                                 operands[7]))"
>    [(set (match_dup 0) (match_dup 3))]
>    ""
>    [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
> @@ -1975,7 +1984,7 @@ (define_insn_and_split "*pred_mov<mode>"
>  
>  ;; Dedicated pattern for vse.v instruction since we can't reuse pred_mov 
> pattern to include
>  ;; memory operand as input which will produce inferior codegen.
> -(define_insn "@pred_store<mode>"
> +(define_insn_and_split "@pred_store<mode>"
>    [(set (match_operand:V_VLS 0 "memory_operand"                 "+m")
>          (if_then_else:V_VLS
>            (unspec:<VM>
> @@ -1988,6 +1997,12 @@ (define_insn "@pred_store<mode>"
>            (match_dup 0)))]
>    "TARGET_VECTOR"
>    "vse<sew>.v\t%2,%0%p1"
> +  "&& !reload_completed
> +  && riscv_vector::whole_reg_loadstore_p (operands[0], operands[2],
> +                                          operands[1], operands[3],
> +                                          operands[4])"
> +  [(set (match_dup 0) (match_dup 2))]
> +  ""
>    [(set_attr "type" "vste")
>     (set_attr "mode" "<MODE>")
>     (set (attr "avl_type_idx") (const_int 4))
> @@ -2016,7 +2031,7 @@ (define_insn_and_split "@pred_mov<mode>"
>     vmmv.m\t%0,%3
>     vmclr.m\t%0
>     vmset.m\t%0"
> -  "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 5)"
> +  "&& riscv_vector::whole_reg_move_p (operands, <MODE>mode, 5)"
>    [(set (match_dup 0) (match_dup 3))]
>    ""
>    [(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu")
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
> index 1a99df6adf6..498ede9d10d 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
> @@ -1,5 +1,5 @@
>  /* { dg-do compile } */
> -/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d 
> -mrvv-vector-bits=zvl -fno-vect-cost-model -ffast-math" } */
> +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d 
> -mrvv-vector-bits=zvl -fno-vect-cost-model -ffast-math -fno-schedule-insns 
> -fno-schedule-insns2" } */
>  
>  #include "reduc_call-1.c"
>  
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c 
> b/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
> new file mode 100644
> index 00000000000..0dc3ff5b91c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=rv64gcv -mabi=lp64d -mrvv-vector-bits=zvl" } */
> +
> +#include <riscv_vector.h>
> +
> +vfloat32m1_t
> +foo (float *a)
> +{
> +    vfloat32m1_t a0 = __riscv_vle32_v_f32m1 (a, 4);
> +    return a0;
> +}
> +
> +/* { dg-final { scan-assembler-not "vle32" } } */
> +/* { dg-final { scan-assembler-times "vl1re32.v" 1 } } */
> -- 
> 2.53.0
> 

Reply via email to