Issue 130872
Summary [aarch64] clang/LLVM fails to vectorize simple loop
Labels
Assignees
Reporter haasn
    I believe this to be a bug, as the scalar version is significantly slower.

## Code
```c
void read32x2(char *restrict a, char *restrict b, const char *restrict in)
{
    for (int i = 0; i < 32; i++) {
        a[i] = in[2 * i + 0];
        b[i] = in[2 * i + 1];
    }
}
```

## clang trunk `-O3`:
```asm
read32x2:
        ldrb    w8, [x2]
        ldrb w9, [x2, #1]
        strb    w8, [x0]
        ldrb    w8, [x2, #2]
 strb    w9, [x1]
        ldrb    w9, [x2, #3]
        strb    w8, [x0, #1]
        ldrb    w8, [x2, #4]
        strb    w9, [x1, #1]
        ldrb w9, [x2, #5]
        strb    w8, [x0, #2]
        ldrb    w8, [x2, #6]
 strb    w9, [x1, #2]
        ldrb    w9, [x2, #7]
        strb w8, [x0, #3]
        ldrb    w8, [x2, #8]
        strb    w9, [x1, #3]
 ldrb    w9, [x2, #9]
        strb    w8, [x0, #4]
        ldrb    w8, [x2, #10]
        strb    w9, [x1, #4]
        ldrb    w9, [x2, #11]
 strb    w8, [x0, #5]
        ldrb    w8, [x2, #12]
        strb    w9, [x1, #5]
        ldrb    w9, [x2, #13]
        strb    w8, [x0, #6]
 ldrb    w8, [x2, #14]
        strb    w9, [x1, #6]
        ldrb    w9, [x2, #15]
        strb    w8, [x0, #7]
        ldrb    w8, [x2, #16]
 strb    w9, [x1, #7]
        ldrb    w9, [x2, #17]
        strb    w8, [x0, #8]
        ldrb    w8, [x2, #18]
        strb    w9, [x1, #8]
 ldrb    w9, [x2, #19]
        strb    w8, [x0, #9]
        ldrb    w8, [x2, #20]
        strb    w9, [x1, #9]
        ldrb    w9, [x2, #21]
 strb    w8, [x0, #10]
        ldrb    w8, [x2, #22]
        strb    w9, [x1, #10]
        ldrb    w9, [x2, #23]
        strb    w8, [x0, #11]
 ldrb    w8, [x2, #24]
        strb    w9, [x1, #11]
        ldrb    w9, [x2, #25]
        strb    w8, [x0, #12]
        ldrb    w8, [x2, #26]
 strb    w9, [x1, #12]
        ldrb    w9, [x2, #27]
        strb    w8, [x0, #13]
        ldrb    w8, [x2, #28]
        strb    w9, [x1, #13]
 ldrb    w9, [x2, #29]
        strb    w8, [x0, #14]
        ldrb    w8, [x2, #30]
        strb    w9, [x1, #14]
        ldrb    w9, [x2, #31]
 strb    w8, [x0, #15]
        ldrb    w8, [x2, #32]
        strb    w9, [x1, #15]
        ldrb    w9, [x2, #33]
        strb    w8, [x0, #16]
 ldrb    w8, [x2, #34]
        strb    w9, [x1, #16]
        ldrb    w9, [x2, #35]
        strb    w8, [x0, #17]
        ldrb    w8, [x2, #36]
 strb    w9, [x1, #17]
        ldrb    w9, [x2, #37]
        strb    w8, [x0, #18]
        ldrb    w8, [x2, #38]
        strb    w9, [x1, #18]
 ldrb    w9, [x2, #39]
        strb    w8, [x0, #19]
        ldrb    w8, [x2, #40]
        strb    w9, [x1, #19]
        ldrb    w9, [x2, #41]
 strb    w8, [x0, #20]
        ldrb    w8, [x2, #42]
        strb    w9, [x1, #20]
        ldrb    w9, [x2, #43]
        strb    w8, [x0, #21]
 ldrb    w8, [x2, #44]
        strb    w9, [x1, #21]
        ldrb    w9, [x2, #45]
        strb    w8, [x0, #22]
        ldrb    w8, [x2, #46]
 strb    w9, [x1, #22]
        ldrb    w9, [x2, #47]
        strb    w8, [x0, #23]
        ldrb    w8, [x2, #48]
        strb    w9, [x1, #23]
 ldrb    w9, [x2, #49]
        strb    w8, [x0, #24]
        ldrb    w8, [x2, #50]
        strb    w9, [x1, #24]
        ldrb    w9, [x2, #51]
 strb    w8, [x0, #25]
        ldrb    w8, [x2, #52]
        strb    w9, [x1, #25]
        ldrb    w9, [x2, #53]
        strb    w8, [x0, #26]
 ldrb    w8, [x2, #54]
        strb    w9, [x1, #26]
        ldrb    w9, [x2, #55]
        strb    w8, [x0, #27]
        ldrb    w8, [x2, #56]
 strb    w9, [x1, #27]
        ldrb    w9, [x2, #57]
        strb    w8, [x0, #28]
        ldrb    w8, [x2, #58]
        strb    w9, [x1, #28]
 ldrb    w9, [x2, #59]
        strb    w8, [x0, #29]
        ldrb    w8, [x2, #60]
        strb    w9, [x1, #29]
        ldrb    w9, [x2, #61]
 strb    w8, [x0, #30]
        ldrb    w8, [x2, #62]
        strb    w9, [x1, #30]
        ldrb    w9, [x2, #63]
        strb    w8, [x0, #31]
 strb    w9, [x1, #31]
        ret
```

## GCC trunk `-O3`:
```asm
read32x2:
        ld2     {v28.16b - v29.16b}, [x2], 32
 ld2     {v30.16b - v31.16b}, [x2]
        stp     q28, q30, [x0]
 stp     q29, q31, [x1]
        ret
```

## See Also
https://godbolt.org/z/5aWdbjTEx
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to