Issue 108052
Summary Missed optimization, failure to remove residual empty loop
Labels new issue
Assignees
Reporter jeffplaisance
    clang correctly optimizes this to a memset and a multiplication, but it fails to remove the residual empty loop:

https://godbolt.org/z/j3EcrMvjh

code:
```
#include <cstddef>

template <typename F>
__attribute__((noinline)) int loopFunction(F f, size_t n) {
    int sum = 0;
    for (size_t i = 0; i < n; ++i) {
        sum += f(i);
    }
    return sum;
}

int f(char* __restrict__ a, int* b, size_t n) {
 return loopFunction([=](auto i) __attribute__((always_inline)) {
 return [&i](char* __restrict__ a, int* b) __attribute__((always_inline)) {
            a[i] = 0;
            return *b;
        }(a, b);
 }, n);
}

```

aarch64 assembly at -O1, residual empty loop is at .LBB1_2:
```
f(char*, int*, unsigned long):
        b int loopFunction<f(char*, int*, unsigned long)::$_0>(f(char*, int*, unsigned long)::$_0, unsigned long)

int loopFunction<f(char*, int*, unsigned long)::$_0>(f(char*, int*, unsigned long)::$_0, unsigned long):
 cbz     x2, .LBB1_4
        stp     x29, x30, [sp, #-32]!
        stp x20, x19, [sp, #16]
        mov     x29, sp
        ldr     w20, [x1]
        mov     w1, wzr
        mov     x19, x2
        bl memset
        mov     x8, x19
.LBB1_2:
        subs    x8, x8, #1
        b.ne    .LBB1_2
        mul     w0, w20, w19
        ldp x20, x19, [sp, #16]
        ldp     x29, x30, [sp], #32
 ret
.LBB1_4:
        mov     w0, wzr
        ret
```

this still occurs at -O2 and -O3, and on x86-64 as well, so it is not specific to aarch64 or -O1
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to