https://gcc.gnu.org/bugzilla/show_bug.cgi?id=123748

            Bug ID: 123748
           Summary: Conversion from long to float is not vectorized for
                    aarch64 target
           Product: gcc
           Version: 16.0
            Status: UNCONFIRMED
          Keywords: missed-optimization
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: pzheng at gcc dot gnu.org
  Target Milestone: ---
            Target: aarch64

void foo(float *restrict f, long *restrict l) {
  for (int i = 0; i < 128; i++)
    f[i] = l[i];
}

With -O2, GCC generates:

foo:
        mov     x2, 0
.L2:
        ldr     x3, [x1, x2, lsl 3]
        scvtf   s31, x3
        str     s31, [x0, x2, lsl 2]
        add     x2, x2, 1
        cmp     x2, 128
        bne     .L2
        ret

While, LLVM generates:

foo:
        mov     x8, xzr
.LBB0_1:
        ldp     x9, x10, [x1]
        scvtf   s0, x10
        scvtf   s1, x9
        ldp     x9, x10, [x1, #16]
        add     x1, x1, #32
        mov     v1.s[1], v0.s[0]
        scvtf   s0, x9
        mov     v1.s[2], v0.s[0]
        scvtf   s0, x10
        mov     v1.s[3], v0.s[0]
        str     q1, [x0, x8]
        add     x8, x8, #16
        cmp     x8, #512
        b.ne    .LBB0_1
        ret

Reply via email to