https://gcc.gnu.org/bugzilla/show_bug.cgi?id=89134

            Bug ID: 89134
           Summary: A missing optimization opportunity for a simple branch
                    in loop
           Product: gcc
           Version: 9.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: tree-optimization
          Assignee: unassigned at gcc dot gnu.org
          Reporter: jiangning.liu at amperecomputing dot com
  Target Milestone: ---

For this simple case,

__attribute__((pure)) __attribute__((noinline)) int inc(int i)
{
        /* Do something else here */

        return i+1;
}
extern int do_something(void);
extern int b;
void test(int n)
{
        for (int i=0; i<n; i=inc(i))
        {
                if (b) {
                        b = do_something();
                }
        }
}


"GCC -O3" is generating a loop as below,

.L6:
        cbz     w1, .L5
        bl      do_something
        mov     w1, w0
        mov     w0, w19
        str     w1, [x21]
        bl      inc
        mov     w19, w0
        cmp     w20, w0
        bgt     .L6
.L3:
        ldp     x19, x20, [sp, 16]
        ldr     x21, [sp, 32]
        ldp     x29, x30, [sp], 48
        ret
.L5:
        mov     w0, w19
        bl      inc
        mov     w19, w0
        cmp     w20, w0
        ble     .L3
        mov     w0, w19
        bl      inc
        mov     w19, w0
        cmp     w20, w0
        bgt     .L5
        b       .L3
.L13:
        ret

But the loop with head at label .L5 can be completely optimized away, because
inc is a pure function and it is essentially an empty loop doing nothing.
Therefore the code can be optimized to be like,

.L6:
        cbz     w1, .L3
        bl      do_something
        mov     w1, w0
        mov     w0, w19
        str     w1, [x21]
        bl      inc
        mov     w19, w0
        cmp     w20, w0
        bgt     .L6
.L3:
        ldp     x19, x20, [sp, 16]
        ldr     x21, [sp, 32]
        ldp     x29, x30, [sp], 48
        ret

Reply via email to