https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123755

Jeffrey A. Law <law at gcc dot gnu.org> changed:

           What    |Removed                     |Added
----------------------------------------------------------------------------
             Status|RESOLVED                    |REOPENED
         Resolution|FIXED                       |---

--- Comment #4 from Jeffrey A. Law <law at gcc dot gnu.org> ---
That patch seems to have caused notable code quality regressions on riscv64.

unix/-march=rv64gc_zba_zbb_zbs_zicond: gcc:
gcc.target/riscv/rvv/autovec/reduc/reduc_call-5.c -ftree-vectorize -O2
-mrvv-max-lmul=dynamic  scan-assembler-times
vfredosum\\.vs\\s+v[0-9]+,v[0-9]+,v[0-9]+ 1

Before the referenced change:

Before:
.L2:
        vsetvli a5,a4,e8,mf4,ta,ma
        vle64.v v6,0(a1)
        vle64.v v2,0(a2)
        vle64.v v4,0(a3)
        vsetivli        zero,1,e64,m1,ta,ma
        vfmv.s.f        v1,fa0
        vsetvli zero,a5,e64,m2,ta,ma
        slli    a0,a5,3
        sub     a4,a4,a5
        vfmacc.vv       v4,v2,v6
        add     a1,a1,a0
        add     a2,a2,a0
        add     a3,a3,a0
        vfmadd.vv       v2,v6,v4
        vfredosum.vs    v1,v2,v1
        vfmv.f.s        fa0,v1
        bne     a4,zero,.L2

After is just atrocious:

        csrr    a7,vlenb
        srli    t6,a7,2
        addi    a4,t6,-1
        li      a5,1023
        fmv.d   fa5,fa0
        bgtu    a4,a5,.L8
        neg     t5,t6
        slli    a7,a7,1
        addi    t5,t5,1024
        mv      t4,a3
        mv      t3,a2
        mv      t1,a1
        li      a6,0
.L3:
        vl2re64.v       v2,0(t1)
        vl2re64.v       v6,0(t3)
        vl2re64.v       v4,0(t4)
        vsetvli a5,zero,e64,m2,ta,ma
        vfmv.s.f        v1,fa5
        add     a6,a6,t6
        add     t1,t1,a7
        add     t3,t3,a7
        vfmacc.vv       v4,v2,v6
        add     t4,t4,a7
        vfmadd.vv       v2,v6,v4
        vfredosum.vs    v2,v2,v1
        vfmv.f.s        fa5,v2
        bleu    a6,t5,.L3
        li      a5,1024
        fmv.d   fa0,fa5
        beq     a6,a5,.L14
        sub     a0,a5,a6
        mv      t5,a6
.L2:
        csrr    t6,vlenb
        srli    a5,t6,3
        li      a7,1023
        addi    a4,a5,-1
        sub     a7,a7,a6
        bltu    a7,a4,.L10
        slli    a4,a6,3
        add     t2,a1,a4
        add     t0,a2,a4
        add     a4,a3,a4
        vl1re64.v       v1,0(t2)
        vl1re64.v       v3,0(t0)
        vl1re64.v       v2,0(a4)
        vsetvli a7,zero,e64,m1,ta,ma
        vfmv.s.f        v4,fa5
        neg     a7,a5
        addi    a7,a7,1024
        sub     a7,a7,a6
        vfmacc.vv       v2,v1,v3
        li      t1,1024
        sub     a6,t1,a6
        vfmadd.vv       v1,v3,v2
        vfredosum.vs    v1,v1,v4
        vfmv.f.s        fa0,v1
        bltu    a7,a5,.L6
        add     t2,t2,t6
        add     t0,t0,t6
        add     a4,a4,t6
        vl1re64.v       v1,0(t2)
        vl1re64.v       v3,0(t0)
        vl1re64.v       v2,0(a4)
        vfmv.s.f        v4,fa0
        srli    a5,t6,2
        vfmacc.vv       v2,v1,v3
        vfmadd.vv       v1,v3,v2
        vfredosum.vs    v1,v1,v4
        vfmv.f.s        fa0,v1
.L6:
        beq     a6,a5,.L14
        add     t5,t5,a5
        sub     a0,a0,a5
.L5:
        add     a0,t5,a0
        slli    a4,t5,3
        slli    a0,a0,3
        add     a5,a1,a4
        add     a2,a2,a4
        add     a0,a1,a0
        add     a4,a3,a4
.L7:
        fld     fa5,0(a5)
        fld     fa3,0(a2)
        fld     fa4,0(a4)
        addi    a5,a5,8
        addi    a2,a2,8
        fmadd.d fa4,fa5,fa3,fa4
        addi    a4,a4,8
        fmadd.d fa5,fa5,fa3,fa4
        fadd.d  fa0,fa0,fa5
        bne     a0,a5,.L7
.L14:
        ret
.L8:
        li      a6,0
        li      a0,1024
        li      t5,0
        j       .L2
.L10:
        fmv.d   fa0,fa5
        j       .L5

Reply via email to