Hi,
This patch allows pred_mov, which usually results in vle/vse insns, to
split off whole-register loads and stores so we can emit more of them.
The advantage of whole-reg operations is that they don't require a vtype
and therefore allow more freedom in vsetvl placement.
Regtested on rv64gcv_zvl512b.
Regards
Robin
gcc/ChangeLog:
* config/riscv/riscv-protos.h (whole_reg_to_reg_move_p):
Rename from this...
(whole_reg_move_p): ...to this.
(whole_reg_loadstore_p): Declare.
* config/riscv/riscv-v.cc (whole_reg_to_reg_move_p): Ditto.
(whole_reg_move_p): Ditto.
(whole_reg_loadstore_p): New function.
* config/riscv/thead-vector.md: Use renamed function.
* config/riscv/vector.md (@pred_store<mode>): Use new function.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c: Disable
instruction scheduling.
* gcc.target/riscv/rvv/base/vle-vl1r.c: New test.
---
gcc/config/riscv/riscv-protos.h | 4 ++-
gcc/config/riscv/riscv-v.cc | 28 ++++++++++++++++++-
gcc/config/riscv/thead-vector.md | 2 +-
gcc/config/riscv/vector.md | 21 ++++++++++++--
.../riscv/rvv/autovec/reduc/reduc_call-4.c | 2 +-
.../gcc.target/riscv/rvv/base/vle-vl1r.c | 14 ++++++++++
6 files changed, 64 insertions(+), 7 deletions(-)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index e2858a8b19f..8b362e323d9 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -790,7 +790,9 @@ bool can_be_broadcast_p (rtx);
bool strided_broadcast_p (rtx);
bool gather_scatter_valid_offset_p (machine_mode);
HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
-bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
+bool whole_reg_move_p (rtx *, machine_mode, int);
+bool whole_reg_loadstore_p (rtx dest, rtx src, rtx mask, rtx avl, rtx
+ avl_type);
bool splat_to_scalar_move_p (rtx *);
rtx get_fp_rounding_coefficient (machine_mode);
}
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index 3c7e749cb60..2103764da06 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -6451,7 +6451,7 @@ estimated_poly_value (poly_int64 val, unsigned int kind)
/* Return true it is whole register-register move. */
bool
-whole_reg_to_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
+whole_reg_move_p (rtx *ops, machine_mode mode, int avl_type_index)
{
/* An operation is a whole-register move if either
(1) Its vlmax operand equals VLMAX
@@ -6469,6 +6469,32 @@ whole_reg_to_reg_move_p (rtx *ops, machine_mode mode,
int avl_type_index)
&& known_eq (INTVAL (ops[4]), GET_MODE_NUNITS (mode)))
return true;
}
+
+ return false;
+}
+
+/* Same but for a whole-register load or store. */
+bool
+whole_reg_loadstore_p (rtx dest, rtx src, rtx mask, rtx avl, rtx avl_type)
+{
+ machine_mode mode = GET_MODE (dest);
+ if (!multiple_p (GET_MODE_SIZE (mode), BYTES_PER_RISCV_VECTOR))
+ return false;
+
+ if (((memory_operand (dest, mode)
+ && register_operand (src, mode))
+ || (register_operand (dest, mode)
+ && memory_operand (src, mode)))
+ && satisfies_constraint_Wc1 (mask))
+ {
+ if (INTVAL (avl_type) == VLMAX)
+ return true;
+ /* AVL propagation PASS will transform FIXED-VLMAX with NUNITS < 32
+ into NON-VLMAX with LEN = NUNITS. */
+ else if (CONST_INT_P (avl)
+ && known_eq (INTVAL (avl), GET_MODE_NUNITS (mode)))
+ return true;
+ }
return false;
}
diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md
index 5a02debdd20..4ad37bb441d 100644
--- a/gcc/config/riscv/thead-vector.md
+++ b/gcc/config/riscv/thead-vector.md
@@ -228,7 +228,7 @@ (define_insn_and_split
"*pred_mov_width<vlmem_op_attr><mode>"
vs<vlmem_op_attr>.v\t%3,%0%p1
vmv.v.v\t%0,%3
vmv.v.v\t%0,%3"
- "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
+ "&& riscv_vector::whole_reg_move_p (operands, <MODE>mode, 7)"
[(set (match_dup 0) (match_dup 3))]
""
[(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 45be9e6fe17..136ecdc787e 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1967,7 +1967,16 @@ (define_insn_and_split "*pred_mov<mode>"
vse<sew>.v\t%3,%0%p1
vmv.v.v\t%0,%3
vmv.v.v\t%0,%3"
- "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 7)"
+ "&& (register_operand (operands[0], <MODE>mode)
+ && register_operand (operands[3], <MODE>mode)
+ && riscv_vector::whole_reg_move_p (operands, <MODE>mode, 7))
+ || ((memory_operand (operands[0], <MODE>mode)
+ || memory_operand (operands[3], <MODE>mode))
+ && operands[2] != operands[0]
+ && !reload_completed
+ && riscv_vector::whole_reg_loadstore_p (operands[0], operands[3],
+ operands[1], operands[4],
+ operands[7]))"
[(set (match_dup 0) (match_dup 3))]
""
[(set_attr "type" "vlde,vlde,vlde,vste,vimov,vimov")
@@ -1975,7 +1984,7 @@ (define_insn_and_split "*pred_mov<mode>"
;; Dedicated pattern for vse.v instruction since we can't reuse pred_mov
pattern to include
;; memory operand as input which will produce inferior codegen.
-(define_insn "@pred_store<mode>"
+(define_insn_and_split "@pred_store<mode>"
[(set (match_operand:V_VLS 0 "memory_operand" "+m")
(if_then_else:V_VLS
(unspec:<VM>
@@ -1988,6 +1997,12 @@ (define_insn "@pred_store<mode>"
(match_dup 0)))]
"TARGET_VECTOR"
"vse<sew>.v\t%2,%0%p1"
+ "&& !reload_completed
+ && riscv_vector::whole_reg_loadstore_p (operands[0], operands[2],
+ operands[1], operands[3],
+ operands[4])"
+ [(set (match_dup 0) (match_dup 2))]
+ ""
[(set_attr "type" "vste")
(set_attr "mode" "<MODE>")
(set (attr "avl_type_idx") (const_int 4))
@@ -2016,7 +2031,7 @@ (define_insn_and_split "@pred_mov<mode>"
vmmv.m\t%0,%3
vmclr.m\t%0
vmset.m\t%0"
- "&& riscv_vector::whole_reg_to_reg_move_p (operands, <MODE>mode, 5)"
+ "&& riscv_vector::whole_reg_move_p (operands, <MODE>mode, 5)"
[(set (match_dup 0) (match_dup 3))]
""
[(set_attr "type" "vldm,vstm,vmalu,vmalu,vmalu")
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
index 1a99df6adf6..498ede9d10d 100644
--- a/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
+++ b/gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_call-4.c
@@ -1,5 +1,5 @@
/* { dg-do compile } */
-/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d
-mrvv-vector-bits=zvl -fno-vect-cost-model -ffast-math" } */
+/* { dg-additional-options "-march=rv32gcv_zvfh -mabi=ilp32d
-mrvv-vector-bits=zvl -fno-vect-cost-model -ffast-math -fno-schedule-insns
-fno-schedule-insns2" } */
#include "reduc_call-1.c"
diff --git a/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
b/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
new file mode 100644
index 00000000000..0dc3ff5b91c
--- /dev/null
+++ b/gcc/testsuite/gcc.target/riscv/rvv/base/vle-vl1r.c
@@ -0,0 +1,14 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -march=rv64gcv -mabi=lp64d -mrvv-vector-bits=zvl" } */
+
+#include <riscv_vector.h>
+
+vfloat32m1_t
+foo (float *a)
+{
+ vfloat32m1_t a0 = __riscv_vle32_v_f32m1 (a, 4);
+ return a0;
+}
+
+/* { dg-final { scan-assembler-not "vle32" } } */
+/* { dg-final { scan-assembler-times "vl1re32.v" 1 } } */
--
2.53.0