https://gcc.gnu.org/bugzilla/show_bug.cgi?id=91320

            Bug ID: 91320
           Summary: [9.1] x86-64 code generation / register allocation
                    regressed.
           Product: gcc
           Version: unknown
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: target
          Assignee: unassigned at gcc dot gnu.org
          Reporter: maxim.yegorushkin at gmail dot com
  Target Milestone: ---

The following code:

    #include <boost/dynamic_bitset.hpp>

    size_t g(boost::dynamic_bitset<> const& a, bool value) {
        auto count = a.count();
        return value ? count : a.size() - count;
    }

When compiled with `gcc-8.3 -O3 -std=gnu++17 -mpopcnt` generates the following
assembly:

    g(boost::dynamic_bitset<unsigned long, std::allocator<unsigned long> >
const&, bool):
            mov     r9, QWORD PTR [rdi]
            mov     r8, QWORD PTR [rdi+8]
            sub     r8, r9
            sar     r8, 3
            je      .L5
            xor     edx, edx
            xor     eax, eax
    .L3:
            xor     ecx, ecx
            popcnt  rcx, QWORD PTR [r9+rdx*8]
            add     rdx, 1
            add     rax, rcx
            cmp     r8, rdx
            jne     .L3
    .L2:
            test    sil, sil
            jne     .L1
            mov     rdx, QWORD PTR [rdi+24]
            sub     rdx, rax
            mov     rax, rdx
    .L1:
            ret
    .L5:
            xor     eax, eax
            jmp     .L2


When compiled with `gcc-9.1 -O3 -std=gnu++17 -mpopcnt` generates the following
assembly:

    g(boost::dynamic_bitset<unsigned long, std::allocator<unsigned long> >
const&, bool):
            mov     r9, QWORD PTR [rdi]
            mov     rcx, QWORD PTR [rdi+8]
            sub     rcx, r9
            sar     rcx, 3
            je      .L5
            xor     eax, eax
            xor     r8d, r8d
    .L3:
            xor     edx, edx
            popcnt  rdx, QWORD PTR [r9+rax*8]
            add     rax, 1
            add     r8, rdx
            cmp     rcx, rax
            jne     .L3
    .L2:
            test    sil, sil
            jne     .L1
            mov     rax, QWORD PTR [rdi+24]
            sub     rax, r8
            mov     r8, rax
    .L1:
            mov     rax, r8
            ret
    .L5:
            xor     r8d, r8d
            jmp     .L2

Note the extra `mov rax, r8` instruction. gcc-8.3 better allocates registers,
so that extra instruction is not necessary.

Reply via email to