LGTM
> From: "Robin Dapp"<[email protected]>
> Date: Wed, May 6, 2026, 02:38
> Subject: [PATCH] RISC-V: Use more whole-reg loads/stores.
> To: "gcc-patches"<[email protected]>
> Cc: <[email protected]>, <[email protected]>, <[email protected]>,
> <[email protected]>, <[email protected]>
> Hi,
>
> This patch allows pred_mov, which usually results in vle/vse insns, to
> split off whole-register loads and stores so we can emit more of them.
> The advantage of whole-reg operations is that they don't require a vtype
> and therefore allow more freedom in vsetvl placement.
>
> Regtested on rv64gcv_zvl512b.
>
> Regards
> Robin
>
> gcc/ChangeLog:
>
> * config/riscv/riscv-protos.h (whole_reg_to_reg_move_p):
> Rename from this...
> (whole_reg_move_p): ...to this.
> (whole_reg_loadstore_p): Declare.
> * config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto.
> (whole_reg_move_p): Ditto.
> (whole_reg_loadstore_p): New function.
> * config/riscv/thead-vector.md: Use renamed function.
> * config/riscv/vector.md (@pred_store<mode>): Use new function.
>
> gcc/testsuite/ChangeLog:
>
> * gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c: Disable
> instruction scheduling.
> * gcc.target/riscv/rvv/base/vle-vl1r.c: New test.
> ---
> gcc/config/riscv/riscv-protos.h | 4 ++-
> gcc/config/riscv/riscv-v.cc | 28 ++++++++++++++++++-
> gcc/config/riscv/thead-vector.md | 2 +-
> gcc/config/riscv/vector.md | 21 ++++++++++++--
> .../riscv/rvv/autovec/reduc/reduc_call-4.c | 2 +-
> .../gcc.target/riscv/rvv/base/vle-vl1r.c | 14 ++++++++++
> 6 files changed, 64 insertions(+), 7 deletions(-)
> create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
>
> diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
> index e2858a8b19f..8b362e323d9 100644
> --- a/gcc/config/riscv/riscv-protos.h
> +++ b/gcc/config/riscv/riscv-protos.h
> @@ -790,7 +790,9 @@ bool can_be_broadcast_p (rtx);
> bool strided_broadcast_p (rtx);
> bool gather_scatter_valid_offset_p (machine_mode);
> HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
> -bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
> +bool whole_reg_move_p (rtx *, machine_mode, int);
> +bool whole_reg_loadstore_p (rtx dest, rtx src, rtx mask, rtx avl, rtx
> + avl_type);
> bool splat_to_scalar_move_p (rtx *);
> rtx get_fp_rounding_coefficient (machine_mode);
> }
> diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
> index 3c7e749cb60..2103764da06 100644
> --- a/gcc/config/riscv/riscv-v.cc
> +++ b/gcc/config/riscv/riscv-v.cc
> @@ -6451,7 +6451,7 @@ estimated_poly_value (poly_int64 val, unsigned int kind)
>
> /* Return true it is whole register-register move. */
> bool
> -whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
> +whole_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
> {
> /* An operation is a whole-register move if either
> (1) Its vlmax operand equals VLMAX
> @@ -6469,6 +6469,32 @@ whole_reg_to_reg_move_p (rtx *ops, machine_mode mode,
> int avl_type_index)
> && known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
> return true;
> }
> +
> + return false;
> +}
> +
> +/* Same but for a whole-register load or store. */
> +bool
> +whole_reg_loadstore_p (rtx dest, rtx src, rtx mask, rtx avl, rtx avl_type)
> +{
> + machine_mode mode = GET_MODE (dest);
> + if (!multiple_p (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR))
> + return false;
> +
> + if (((memory_operand (dest, mode)
> + && register_operand (src, mode))
> + || (register_operand (dest, mode)
> + && memory_operand (src, mode)))
> + && satisfies_constraint_Wc1 (mask))
> + {
> + if (INTVAL (avl_type) == VLMAX)
> + return true;
> + /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
> + into NON-VLMAX with LEN = NUNITS. */
> + else if (CONST_INT_P (avl)
> + && known_eq (INTVAL (avl), GET_MODE_NUNITS (mode)))
> + return true;
> + }
> return false;
> }
>
> diff --git a/gcc/config/riscv/thead-vector.md
> b/gcc/config/riscv/thead-vector.md
> index 5a02debdd20..4ad37bb441d 100644
> --- a/gcc/config/riscv/thead-vector.md
> +++ b/gcc/config/riscv/thead-vector.md
> @@ -228,7 +228,7 @@ (define_insn_and_split
> "*pred_mov_width<vlmem_op_attr><mode>"
> vs<vlmem_op_attr>.v\t%3,%0%p1
> vmv.v.v\t%0,%3
> vmv.v.v\t%0,%3"
> - "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
> + "&& riscv_vector::whole_reg_move_p (operands, <MODE>mode, 7)"
> [(set (match_dup 0) (match_dup 3))]
> ""
> [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
> diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
> index 45be9e6fe17..136ecdc787e 100644
> --- a/gcc/config/riscv/vector.md
> +++ b/gcc/config/riscv/vector.md
> @@ -1967,7 +1967,16 @@ (define_insn_and_split "*pred_mov<mode>"
> vse<sew>.v\t%3,%0%p1
> vmv.v.v\t%0,%3
> vmv.v.v\t%0,%3"
> - "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
> + "&& (register_operand (operands[0], <MODE>mode)
> + && register_operand (operands[3], <MODE>mode)
> + && riscv_vector::whole_reg_move_p (operands, <MODE>mode, 7))
> + || ((memory_operand (operands[0], <MODE>mode)
> + || memory_operand (operands[3], <MODE>mode))
> + && operands[2] != operands[0]
> + && !reload_completed
> + && riscv_vector::whole_reg_loadstore_p (operands[0], operands[3],
> + operands[1], operands[4],
> + operands[7]))"
> [(set (match_dup 0) (match_dup 3))]
> ""
> [(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
> @@ -1975,7 +1984,7 @@ (define_insn_and_split "*pred_mov<mode>"
>
> ;; Dedicated pattern for vse.v instruction since we can't reuse pred_mov
> pattern to include
> ;; memory operand as input which will produce inferior codegen.
> -(define_insn "@pred_store<mode>"
> +(define_insn_and_split "@pred_store<mode>"
> [(set (match_operand:V_VLS 0 "memory_operand" "+m")
> (if_then_else:V_VLS
> (unspec:<VM>
> @@ -1988,6 +1997,12 @@ (define_insn "@pred_store<mode>"
> (match_dup 0)))]
> "TARGET_VECTOR"
> "vse<sew>.v\t%2,%0%p1"
> + "&& !reload_completed
> + && riscv_vector::whole_reg_loadstore_p (operands[0], operands[2],
> + operands[1], operands[3],
> + operands[4])"
> + [(set (match_dup 0) (match_dup 2))]
> + ""
> [(set_attr "type" "vste")
> (set_attr "mode" "<MODE>")
> (set (attr "avl_type_idx") (const_int 4))
> @@ -2016,7 +2031,7 @@ (define_insn_and_split "@pred_mov<mode>"
> vmmv.m\t%0,%3
> vmclr.m\t%0
> vmset.m\t%0"
> - "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 5)"
> + "&& riscv_vector::whole_reg_move_p (operands, <MODE>mode, 5)"
> [(set (match_dup 0) (match_dup 3))]
> ""
> [(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu")
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
> b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
> index 1a99df6adf6..498ede9d10d 100644
> --- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
> @@ -1,5 +1,5 @@
> /* { dg-do compile } */
> -/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d
> -mrvv-vector-bits=zvl -fno-vect-cost-model -ffast-math" } */
> +/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d
> -mrvv-vector-bits=zvl -fno-vect-cost-model -ffast-math -fno-schedule-insns
> -fno-schedule-insns2" } */
>
> #include "reduc_call-1.c"
>
> diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
> b/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
> new file mode 100644
> index 00000000000..0dc3ff5b91c
> --- /dev/null
> +++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
> @@ -0,0 +1,14 @@
> +/* { dg-do compile } */
> +/* { dg-options "-O2 -march=rv64gcv -mabi=lp64d -mrvv-vector-bits=zvl" } */
> +
> +#include <riscv_vector.h>
> +
> +vfloat32m1_t
> +foo (float *a)
> +{
> + vfloat32m1_t a0 = __riscv_vle32_v_f32m1 (a, 4);
> + return a0;
> +}
> +
> +/* { dg-final { scan-assembler-not "vle32" } } */
> +/* { dg-final { scan-assembler-times "vl1re32.v" 1 } } */
> --
> 2.53.0
>