https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123755
Jeffrey A. Law <law at gcc dot gnu.org> changed:
What |Removed |Added
----------------------------------------------------------------------------
Status|RESOLVED |REOPENED
Resolution|FIXED |---
--- Comment #4 from Jeffrey A. Law <law at gcc dot gnu.org> ---
That patch seems to have caused notable code quality regressions on riscv64.
unix/-march=rv64gc_zba_zbb_zbs_zicond: gcc:
gcc.target/riscv/rvv/autovec/reduc/reduc_call-5.c -ftree-vectorize -O2
-mrvv-max-lmul=dynamic scan-assembler-times
vfredosum\\.vs\\s+v[0-9]+,v[0-9]+,v[0-9]+ 1
Before the referenced change:
Before:
.L2:
vsetvli a5,a4,e8,mf4,ta,ma
vle64.v v6,0(a1)
vle64.v v2,0(a2)
vle64.v v4,0(a3)
vsetivli zero,1,e64,m1,ta,ma
vfmv.s.f v1,fa0
vsetvli zero,a5,e64,m2,ta,ma
slli a0,a5,3
sub a4,a4,a5
vfmacc.vv v4,v2,v6
add a1,a1,a0
add a2,a2,a0
add a3,a3,a0
vfmadd.vv v2,v6,v4
vfredosum.vs v1,v2,v1
vfmv.f.s fa0,v1
bne a4,zero,.L2
After is just atrocious:
csrr a7,vlenb
srli t6,a7,2
addi a4,t6,-1
li a5,1023
fmv.d fa5,fa0
bgtu a4,a5,.L8
neg t5,t6
slli a7,a7,1
addi t5,t5,1024
mv t4,a3
mv t3,a2
mv t1,a1
li a6,0
.L3:
vl2re64.v v2,0(t1)
vl2re64.v v6,0(t3)
vl2re64.v v4,0(t4)
vsetvli a5,zero,e64,m2,ta,ma
vfmv.s.f v1,fa5
add a6,a6,t6
add t1,t1,a7
add t3,t3,a7
vfmacc.vv v4,v2,v6
add t4,t4,a7
vfmadd.vv v2,v6,v4
vfredosum.vs v2,v2,v1
vfmv.f.s fa5,v2
bleu a6,t5,.L3
li a5,1024
fmv.d fa0,fa5
beq a6,a5,.L14
sub a0,a5,a6
mv t5,a6
.L2:
csrr t6,vlenb
srli a5,t6,3
li a7,1023
addi a4,a5,-1
sub a7,a7,a6
bltu a7,a4,.L10
slli a4,a6,3
add t2,a1,a4
add t0,a2,a4
add a4,a3,a4
vl1re64.v v1,0(t2)
vl1re64.v v3,0(t0)
vl1re64.v v2,0(a4)
vsetvli a7,zero,e64,m1,ta,ma
vfmv.s.f v4,fa5
neg a7,a5
addi a7,a7,1024
sub a7,a7,a6
vfmacc.vv v2,v1,v3
li t1,1024
sub a6,t1,a6
vfmadd.vv v1,v3,v2
vfredosum.vs v1,v1,v4
vfmv.f.s fa0,v1
bltu a7,a5,.L6
add t2,t2,t6
add t0,t0,t6
add a4,a4,t6
vl1re64.v v1,0(t2)
vl1re64.v v3,0(t0)
vl1re64.v v2,0(a4)
vfmv.s.f v4,fa0
srli a5,t6,2
vfmacc.vv v2,v1,v3
vfmadd.vv v1,v3,v2
vfredosum.vs v1,v1,v4
vfmv.f.s fa0,v1
.L6:
beq a6,a5,.L14
add t5,t5,a5
sub a0,a0,a5
.L5:
add a0,t5,a0
slli a4,t5,3
slli a0,a0,3
add a5,a1,a4
add a2,a2,a4
add a0,a1,a0
add a4,a3,a4
.L7:
fld fa5,0(a5)
fld fa3,0(a2)
fld fa4,0(a4)
addi a5,a5,8
addi a2,a2,8
fmadd.d fa4,fa5,fa3,fa4
addi a4,a4,8
fmadd.d fa5,fa5,fa3,fa4
fadd.d fa0,fa0,fa5
bne a0,a5,.L7
.L14:
ret
.L8:
li a6,0
li a0,1024
li t5,0
j .L2
.L10:
fmv.d fa0,fa5
j .L5