https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109849

            Bug ID: 109849
           Summary: suboptimal code for vector walking loop
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: middle-end
          Assignee: unassigned at gcc dot gnu.org
          Reporter: hubicka at gcc dot gnu.org
  Target Milestone: ---

jan@localhost:/tmp> cat t.C
#include <vector>
typedef unsigned int uint32_t;
std::vector<std::pair<uint32_t, uint32_t>> stack;
void
test()
{
        while (!stack.empty()) {
                std::pair<uint32_t, uint32_t> cur = stack.back();
                stack.pop_back();
                if (cur.second)
                        break;
        }
}
jan@localhost:/tmp> gcc t.C -O3 -S 

yields to:

_Z4testv:
.LFB1264:
        .cfi_startproc
        movq    stack(%rip), %rcx
        movq    stack+8(%rip), %rax
        jmp     .L5
        .p2align 4,,10
        .p2align 3
.L6:
        movl    -4(%rax), %edx
        subq    $8, %rax
        movq    %rax, stack+8(%rip)
        testl   %edx, %edx
        jne     .L4
.L5:
        cmpq    %rax, %rcx
        jne     .L6
.L4:
        ret

We really should order the basic blocks putting cmpq before L6 saving a jump.
Moreover clang does

        .p2align        4, 0x90
.LBB1_1:                                # =>This Inner Loop Header: Depth=1
        cmpq    %rax, %rcx
        je      .LBB1_3
# %bb.2:                                #   in Loop: Header=BB1_1 Depth=1
        cmpl    $0, -4(%rcx)
        leaq    -8(%rcx), %rcx
        movq    %rcx, stack+8(%rip)
        je      .LBB1_1
.LBB1_3:
        retq

saving an instruction. Why we do not move stack+8 updating out of the loop?

Reply via email to