https://gcc.gnu.org/bugzilla/show_bug.cgi?id=111313

            Bug ID: 111313
           Summary: RISC-V: Incorrect code gen for 2 level loop
           Product: gcc
           Version: 14.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c
          Assignee: unassigned at gcc dot gnu.org
          Reporter: pan2.li at intel dot com
  Target Milestone: ---

Created attachment 55846
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=55846&action=edit
Reproduce code

Given we have an example code as below.

#define K 32

signed short in[2*K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));
signed short coeff[K][K] __attribute__ ((__aligned__(__BIGGEST_ALIGNMENT__)));

__attribute__ ((noinline)) void
test ()
{
  for (int j = 0; j < K; j++)
  {
    for (int i = 0; i < 2*K; i++)
      in[i][j] = i+j;

    for (int i = 0; i < K; i++)
      coeff[i][j] = i + 2;
  }
}

When compile with option similar to "-march=rv64imafdcv -mabi=lp64d
-mcmodel=medlow   -fdiagnostics-plain-output  -flto -ffat-lto-objects   --param
riscv-autovec-preference=scalable -Wno-psabi -ftree-vectorize
-fno-tree-loop-distribute-patterns   -fno-vect-cost-model -fno-common
-fdump-tree-vect-details "

The assembly code will be:

init_in:
        lui     t1,%hi(coeff)
        lui     a7,%hi(in)
        csrr    a0,vlenb
        addi    t1,t1,%lo(coeff)
        addi    a7,a7,%lo(in)
        srli    a0,a0,2
        li      a6,0
        li      t3,32
        vsetvli a1,zero,e16,mf2,ta,ma
        vid.v   v3
        vsll.vi v3,v3,6
.L2:
        mv      a2,a7
        li      a4,64
        vmv.v.x v4,a6  <= this insn will have e16 first, and then e32 when loop
back
        vsetvli zero,zero,e32,m1,ta,ma
        vid.v   v2
.L3:
        vsetvli zero,zero,e16,mf2,ta,ma
        vmv1r.v v1,v2
        vncvt.x.x.w     v1,v1
        vsetvli a5,a4,e8,mf4,ta,ma
        vsetvli a3,zero,e16,mf2,ta,ma
        sub     a4,a4,a5
        vadd.vv v1,v1,v4
        vsetvli zero,a5,e16,mf2,ta,ma
        slli    a5,a5,6
        vsuxei16.v      v1,(a2),v3
        vsetvli a1,zero,e32,m1,ta,ma
        add     a2,a2,a5
        vmv.v.x v1,a0
        vadd.vv v2,v2,v1
        bne     a4,zero,.L3
        mv      a2,t1
        li      a4,32
        vid.v   v2
.L4:
        vsetvli zero,zero,e16,mf2,ta,ma
        vmv1r.v v1,v2
        vncvt.x.x.w     v1,v1
        vsetvli a5,a4,e8,mf4,ta,ma
        vsetvli a3,zero,e16,mf2,ta,ma
        sub     a4,a4,a5
        vadd.vi v1,v1,2
        vsetvli zero,a5,e16,mf2,ta,ma
        slli    a5,a5,6
        vsuxei16.v      v1,(a2),v3
        vsetvli a1,zero,e32,m1,ta,ma
        add     a2,a2,a5
        vmv.v.x v1,a0
        vadd.vv v2,v2,v1
        bne     a4,zero,.L4
        addiw   a6,a6,1
        addi    t1,t1,2
        addi    a7,a7,2
        bne     a6,t3,.L2
        ret

Reply via email to