| Issue |
108052
|
| Summary |
Missed optimization, failure to remove residual empty loop
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
jeffplaisance
|
clang correctly optimizes this to a memset and a multiplication, but it fails to remove the residual empty loop:
https://godbolt.org/z/j3EcrMvjh
code:
```
#include <cstddef>
template <typename F>
__attribute__((noinline)) int loopFunction(F f, size_t n) {
int sum = 0;
for (size_t i = 0; i < n; ++i) {
sum += f(i);
}
return sum;
}
int f(char* __restrict__ a, int* b, size_t n) {
return loopFunction([=](auto i) __attribute__((always_inline)) {
return [&i](char* __restrict__ a, int* b) __attribute__((always_inline)) {
a[i] = 0;
return *b;
}(a, b);
}, n);
}
```
aarch64 assembly at -O1, residual empty loop is at .LBB1_2:
```
f(char*, int*, unsigned long):
b int loopFunction<f(char*, int*, unsigned long)::$_0>(f(char*, int*, unsigned long)::$_0, unsigned long)
int loopFunction<f(char*, int*, unsigned long)::$_0>(f(char*, int*, unsigned long)::$_0, unsigned long):
cbz x2, .LBB1_4
stp x29, x30, [sp, #-32]!
stp x20, x19, [sp, #16]
mov x29, sp
ldr w20, [x1]
mov w1, wzr
mov x19, x2
bl memset
mov x8, x19
.LBB1_2:
subs x8, x8, #1
b.ne .LBB1_2
mul w0, w20, w19
ldp x20, x19, [sp, #16]
ldp x29, x30, [sp], #32
ret
.LBB1_4:
mov w0, wzr
ret
```
this still occurs at -O2 and -O3, and on x86-64 as well, so it is not specific to aarch64 or -O1
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs