| Issue |
71522
|
| Summary |
[AArch64] Missed doubly loop fmla vectorisation (tsvc, s235)
|
| Labels |
backend:AArch64,
vectorization
|
| Assignees |
|
| Reporter |
sjoerdmeijer
|
GCC12 vectorises the statements in both the outer and inner loop. Clang doesn't do any vectorisation. As a result, we are about 90% behind for kernel s235 in TSVC.
Compile this input with `-O3 -mcpu=neoverse-v2 -ffast-math`:
```
__attribute__((aligned(64))) float a[32000],b[32000],c[32000],d[32000],e[32000],
aa[256][256],bb[256][256],cc[256][256],tt[256][256];
int dummy(float[32000], float[32000], float[32000], float[32000], float[32000], float[256][256], float[256][256], float[256][256], float);
float s235()
{
for (int nl = 0; nl < 200*(100000/256); nl++) {
for (int i = 0; i < 256; i++) {
a[i] += b[i] * c[i];
for (int j = 1; j < 256; j++) {
aa[j][i] = aa[j-1][i] + bb[j][i] * a[i];
}
}
dummy(a, b, c, d, e, aa, bb, cc, 0.);
}
return aa[1][2];
}
```
Clang's scalar codegen:
```
.LBB0_2: // Parent Loop BB0_1 Depth=1
ldr s0, [x21, x8, lsl #2]
ldr s1, [x22, x8, lsl #2]
ldr s2, [x23, x8, lsl #2]
mov w11, #255 // =0xff
mov x12, x9
mov x13, x10
fmadd s0, s1, s0, s2
ldr s1, [x20, x8, lsl #2]
str s0, [x23, x8, lsl #2]
.LBB0_3: // Parent Loop BB0_1 Depth=1
ldr s2, [x13, #1024]
subs x11, x11, #3
fmadd s1, s2, s0, s1
ldr s2, [x13, #2048]
str s1, [x12, #1024]
fmadd s1, s2, s0, s1
ldr s2, [x13, #3072]
add x13, x13, #3072
str s1, [x12, #2048]
fmadd s1, s2, s0, s1
str s1, [x12, #3072]
add x12, x12, #3072
b.ne .LBB0_3
add x8, x8, #1
add x10, x10, #4
add x9, x9, #4
cmp x8, #256
b.ne .LBB0_2
```
vs. GCC's vector code:
```
.L4:
add x10, x22, x11
sub x9, x8, #1024
ldr q29, [x21, x11]
mov x0, 0
ldr q30, [x2, x11]
ldr q31, [x28, x11]
fmla v29.4s, v30.4s, v31.4s
str q29, [x21, x11]
.L3:
ldr q30, [x10, x0]
ldr q31, [x9, x0]
fmla v31.4s, v30.4s, v29.4s
str q31, [x8, x0]
add x0, x0, 1024
cmp x0, x19
bne .L3
add x11, x11, 16
add x8, x8, 16
cmp x11, 1024
bne .L4
```
See also:
https://godbolt.org/z/5fG1bffqz
TODO:
Root cause analysis.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs