This patch is to enable SLP un-order reduction autao-vectorization
Consider this following case:
int __attribute__((noipa))
add_loop (int *x, int n, int res)
{
for (int i = 0; i < n; ++i)
{
res += x[i * 2];
res += x[i * 2 + 1];
}
return res;
}
--param riscv-autovec-preference=scalable -fopt-info-vec-missed:
:4:21: missed: couldn't vectorize loop
:4:21: missed: unsupported SLP instances
After this patch:
add_loop:
ble a1,zero,.L5
csrra6,vlenb
srlia4,a6,2
sllia1,a1,1
neg a7,a4
vsetvli t1,zero,e32,m1,ta,ma
vmv.v.i v2,0
vslide1up.vxv1,v2,a2 ---> generated by VEC_SHL_INSERT
.L4:
mv a3,a1
mv a5,a1
bleua1,a4,.L3
mv a5,a4
.L3:
vsetvli zero,a5,e32,m1,tu,ma
add a1,a1,a7
vle32.v v2,0(a0)
add a0,a0,a6
vadd.vv v1,v1,v2
bgtua3,a4,.L4
vsetivlizero,1,e32,m1,ta,ma
vmv.v.i v2,0
vsetvli t1,zero,e32,m1,ta,ma
vredsum.vs v1,v1,v2
vmv.x.s a0,v1
ret
.L5:
mv a0,a2
ret
gcc/ChangeLog:
* config/riscv/autovec.md (vec_shl_insert_): New patterns.
* config/riscv/riscv-v.cc (shuffle_compress_patterns): Fix bugs.
gcc/testsuite/ChangeLog:
* gcc.target/riscv/rvv/autovec/reduc/reduc-5.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-6.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-7.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-8.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc-9.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_run-5.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_run-6.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_run-7.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_run-8.c: New test.
* gcc.target/riscv/rvv/autovec/reduc/reduc_run-9.c: New test.
---
gcc/config/riscv/autovec.md | 32 +++
gcc/config/riscv/riscv-v.cc | 4 +
.../riscv/rvv/autovec/reduc/reduc-5.c | 88
.../riscv/rvv/autovec/reduc/reduc-6.c | 6 +
.../riscv/rvv/autovec/reduc/reduc-7.c | 88
.../riscv/rvv/autovec/reduc/reduc-8.c | 16 ++
.../riscv/rvv/autovec/reduc/reduc-9.c | 16 ++
.../riscv/rvv/autovec/reduc/reduc_run-5.c | 61 ++
.../riscv/rvv/autovec/reduc/reduc_run-6.c | 6 +
.../riscv/rvv/autovec/reduc/reduc_run-7.c | 188 ++
.../riscv/rvv/autovec/reduc/reduc_run-8.c | 22 ++
.../riscv/rvv/autovec/reduc/reduc_run-9.c | 22 ++
12 files changed, 549 insertions(+)
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-5.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-6.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-7.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-8.c
create mode 100644 gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc-9.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_run-5.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_run-6.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_run-7.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_run-8.c
create mode 100644
gcc/testsuite/gcc.target/riscv/rvv/autovec/reduc/reduc_run-9.c
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 8cdec75bacf..a85821ada9c 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1692,3 +1692,35 @@
riscv_vector::expand_reduction (SMIN, operands, f);
DONE;
})
+
+;; -
+;; [INT,FP] Initialize from individual elements
+;; -
+;; Includes:
+;; - vslide1up.vx/vfslide1up.vf
+;; -
+
+;; Slide an RVV vector left and insert a scalar into element 0.
+(define_expand "vec_shl_insert_"
+ [(match_operand:VI 0 "register_operand")
+ (match_operand:VI 1 "register_operand")
+ (match_operand: 2 "reg_or_0_operand")]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred_slide (UNSPEC_VSLIDE1UP, mode);
+ rtx ops[] = {operands[0], RVV_VUNDEF (mode), operands[1], operands[2]};
+ riscv_vector::emit_vlmax_slide_insn (icode, ops);
+ DONE;
+})
+
+(define_expand "vec_shl_insert_"
+ [(match_operand:VF 0 "register_operand")
+ (match_operand:VF 1 "register_operand")
+ (match_operand: 2 "register_operand")]
+ "TARGET_VECTOR"
+{
+ insn_code icode = code_for_pred_slide (UNSPEC_VFSLIDE1UP, mode);
+ rtx ops[] = {operands[0], RVV_VUNDEF (mode), operands[1],