Issue |
130872
|
Summary |
[aarch64] clang/LLVM fails to vectorize simple loop
|
Labels |
|
Assignees |
|
Reporter |
haasn
|
I believe this to be a bug, as the scalar version is significantly slower.
## Code
```c
void read32x2(char *restrict a, char *restrict b, const char *restrict in)
{
for (int i = 0; i < 32; i++) {
a[i] = in[2 * i + 0];
b[i] = in[2 * i + 1];
}
}
```
## clang trunk `-O3`:
```asm
read32x2:
ldrb w8, [x2]
ldrb w9, [x2, #1]
strb w8, [x0]
ldrb w8, [x2, #2]
strb w9, [x1]
ldrb w9, [x2, #3]
strb w8, [x0, #1]
ldrb w8, [x2, #4]
strb w9, [x1, #1]
ldrb w9, [x2, #5]
strb w8, [x0, #2]
ldrb w8, [x2, #6]
strb w9, [x1, #2]
ldrb w9, [x2, #7]
strb w8, [x0, #3]
ldrb w8, [x2, #8]
strb w9, [x1, #3]
ldrb w9, [x2, #9]
strb w8, [x0, #4]
ldrb w8, [x2, #10]
strb w9, [x1, #4]
ldrb w9, [x2, #11]
strb w8, [x0, #5]
ldrb w8, [x2, #12]
strb w9, [x1, #5]
ldrb w9, [x2, #13]
strb w8, [x0, #6]
ldrb w8, [x2, #14]
strb w9, [x1, #6]
ldrb w9, [x2, #15]
strb w8, [x0, #7]
ldrb w8, [x2, #16]
strb w9, [x1, #7]
ldrb w9, [x2, #17]
strb w8, [x0, #8]
ldrb w8, [x2, #18]
strb w9, [x1, #8]
ldrb w9, [x2, #19]
strb w8, [x0, #9]
ldrb w8, [x2, #20]
strb w9, [x1, #9]
ldrb w9, [x2, #21]
strb w8, [x0, #10]
ldrb w8, [x2, #22]
strb w9, [x1, #10]
ldrb w9, [x2, #23]
strb w8, [x0, #11]
ldrb w8, [x2, #24]
strb w9, [x1, #11]
ldrb w9, [x2, #25]
strb w8, [x0, #12]
ldrb w8, [x2, #26]
strb w9, [x1, #12]
ldrb w9, [x2, #27]
strb w8, [x0, #13]
ldrb w8, [x2, #28]
strb w9, [x1, #13]
ldrb w9, [x2, #29]
strb w8, [x0, #14]
ldrb w8, [x2, #30]
strb w9, [x1, #14]
ldrb w9, [x2, #31]
strb w8, [x0, #15]
ldrb w8, [x2, #32]
strb w9, [x1, #15]
ldrb w9, [x2, #33]
strb w8, [x0, #16]
ldrb w8, [x2, #34]
strb w9, [x1, #16]
ldrb w9, [x2, #35]
strb w8, [x0, #17]
ldrb w8, [x2, #36]
strb w9, [x1, #17]
ldrb w9, [x2, #37]
strb w8, [x0, #18]
ldrb w8, [x2, #38]
strb w9, [x1, #18]
ldrb w9, [x2, #39]
strb w8, [x0, #19]
ldrb w8, [x2, #40]
strb w9, [x1, #19]
ldrb w9, [x2, #41]
strb w8, [x0, #20]
ldrb w8, [x2, #42]
strb w9, [x1, #20]
ldrb w9, [x2, #43]
strb w8, [x0, #21]
ldrb w8, [x2, #44]
strb w9, [x1, #21]
ldrb w9, [x2, #45]
strb w8, [x0, #22]
ldrb w8, [x2, #46]
strb w9, [x1, #22]
ldrb w9, [x2, #47]
strb w8, [x0, #23]
ldrb w8, [x2, #48]
strb w9, [x1, #23]
ldrb w9, [x2, #49]
strb w8, [x0, #24]
ldrb w8, [x2, #50]
strb w9, [x1, #24]
ldrb w9, [x2, #51]
strb w8, [x0, #25]
ldrb w8, [x2, #52]
strb w9, [x1, #25]
ldrb w9, [x2, #53]
strb w8, [x0, #26]
ldrb w8, [x2, #54]
strb w9, [x1, #26]
ldrb w9, [x2, #55]
strb w8, [x0, #27]
ldrb w8, [x2, #56]
strb w9, [x1, #27]
ldrb w9, [x2, #57]
strb w8, [x0, #28]
ldrb w8, [x2, #58]
strb w9, [x1, #28]
ldrb w9, [x2, #59]
strb w8, [x0, #29]
ldrb w8, [x2, #60]
strb w9, [x1, #29]
ldrb w9, [x2, #61]
strb w8, [x0, #30]
ldrb w8, [x2, #62]
strb w9, [x1, #30]
ldrb w9, [x2, #63]
strb w8, [x0, #31]
strb w9, [x1, #31]
ret
```
## GCC trunk `-O3`:
```asm
read32x2:
ld2 {v28.16b - v29.16b}, [x2], 32
ld2 {v30.16b - v31.16b}, [x2]
stp q28, q30, [x0]
stp q29, q31, [x1]
ret
```
## See Also
https://godbolt.org/z/5aWdbjTEx
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs