https://gcc.gnu.org/bugzilla/show_bug.cgi?id=80293

            Bug ID: 80293
           Summary: g++ 5.4 -> 6.1 regression: unnecessary code at -O2
                    (-O1 is fine)
           Product: gcc
           Version: 6.2.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: c++
          Assignee: unassigned at gcc dot gnu.org
          Reporter: alex at weej dot com
  Target Milestone: ---

Created attachment 41110
  --> https://gcc.gnu.org/bugzilla/attachment.cgi?id=41110&action=edit
repro.cpp

Apologies for the poor summary, I really can't think of a good way to describe
this.

I'm using a couple of functions to conduit through an `std::array`
representation of variable's memory (similar to type-punning through
`std::memcpy`), the compiler even in -O2 is emitting a lot of unnecessary
garbage.

---

// g++ repro.cpp -c -o repro.o -std=gnu++11 -O2 -save-temps

#include <type_traits>
#include <array>
#include <cstring>
#include <cstdint>

// Return a copy of the underlying memory of an arbitrary value.
template <
    typename T,
    typename = typename
std::enable_if<std::is_trivially_copyable<T>::value>::type
>
auto getMem(
    T const & value
) -> std::array<char, sizeof(T)> {
    auto ret = std::array<char, sizeof(T)>{};
    std::memcpy(ret.data(), &value, sizeof(T));
    return ret;
}

template <
    typename T,
    typename = typename
std::enable_if<std::is_trivially_copyable<T>::value>::type
>
auto fromMem(
    std::array<char, sizeof(T)> const & buf
) -> T {
    auto ret = T{};
    std::memcpy(&ret, buf.data(), sizeof(T));
    return ret;
}

double foo1(std::uint64_t arg) {
    return fromMem<double>(getMem(arg));
}

double foo2(std::uint64_t arg) {
    return *reinterpret_cast<double*>(&arg);
}

double foo3(std::uint64_t arg) {
    double ret;
    std::memcpy(&ret, &arg, sizeof(arg));
    return ret;
}

---

In GCC 5.4 and older, as well as all versions of clang that I tested, all three
`foo*` functions emit identical and extremely short code. But in GCC 6.1 and
newer, `foo1` emits a load of extra instructions *only when -O2 or above is
used*.

---

Disassembly of section .text:

0000000000000000 <_Z4foo1m>:
   0:   48 83 ec 28             sub    $0x28,%rsp
   4:   64 48 8b 04 25 28 00    mov    %fs:0x28,%rax
   b:   00 00 
   d:   48 89 44 24 18          mov    %rax,0x18(%rsp)
  12:   31 c0                   xor    %eax,%eax
  14:   89 f8                   mov    %edi,%eax
  16:   66 c1 e8 08             shr    $0x8,%ax
  1a:   89 c1                   mov    %eax,%ecx
  1c:   89 f8                   mov    %edi,%eax
  1e:   c1 e8 18                shr    $0x18,%eax
  21:   89 c2                   mov    %eax,%edx
  23:   31 c0                   xor    %eax,%eax
  25:   40 88 f8                mov    %dil,%al
  28:   0f b6 d2                movzbl %dl,%edx
  2b:   88 cc                   mov    %cl,%ah
  2d:   48 89 f9                mov    %rdi,%rcx
  30:   48 c1 e2 18             shl    $0x18,%rdx
  34:   81 e1 00 00 ff 00       and    $0xff0000,%ecx
  3a:   48 25 ff ff 00 ff       and    $0xffffffffff00ffff,%rax
  40:   48 09 c8                or     %rcx,%rax
  43:   48 b9 ff ff ff 00 ff    movabs $0xffffffff00ffffff,%rcx
  4a:   ff ff ff 
  4d:   48 21 c8                and    %rcx,%rax
  50:   48 b9 ff ff ff ff 00    movabs $0xffffff00ffffffff,%rcx
  57:   ff ff ff 
  5a:   48 09 d0                or     %rdx,%rax
  5d:   48 ba 00 00 00 00 ff    movabs $0xff00000000,%rdx
  64:   00 00 00 
  67:   48 21 fa                and    %rdi,%rdx
  6a:   48 21 c8                and    %rcx,%rax
  6d:   48 b9 ff ff ff ff ff    movabs $0xffff00ffffffffff,%rcx
  74:   00 ff ff 
  77:   48 09 d0                or     %rdx,%rax
  7a:   48 ba 00 00 00 00 00    movabs $0xff0000000000,%rdx
  81:   ff 00 00 
  84:   48 21 fa                and    %rdi,%rdx
  87:   48 21 c8                and    %rcx,%rax
  8a:   48 b9 ff ff ff ff ff    movabs $0xff00ffffffffffff,%rcx
  91:   ff 00 ff 
  94:   48 09 d0                or     %rdx,%rax
  97:   48 ba 00 00 00 00 00    movabs $0xff000000000000,%rdx
  9e:   00 ff 00 
  a1:   48 21 fa                and    %rdi,%rdx
  a4:   48 c1 ef 38             shr    $0x38,%rdi
  a8:   48 21 c8                and    %rcx,%rax
  ab:   48 c1 e7 38             shl    $0x38,%rdi
  af:   48 09 d0                or     %rdx,%rax
  b2:   48 89 fa                mov    %rdi,%rdx
  b5:   48 bf ff ff ff ff ff    movabs $0xffffffffffffff,%rdi
  bc:   ff ff 00 
  bf:   48 21 c7                and    %rax,%rdi
  c2:   48 09 d7                or     %rdx,%rdi
  c5:   48 8b 44 24 18          mov    0x18(%rsp),%rax
  ca:   64 48 33 04 25 28 00    xor    %fs:0x28,%rax
  d1:   00 00 
  d3:   48 89 7c 24 08          mov    %rdi,0x8(%rsp)
  d8:   f2 0f 10 44 24 08       movsd  0x8(%rsp),%xmm0
  de:   75 05                   jne    e5 <_Z4foo1m+0xe5>
  e0:   48 83 c4 28             add    $0x28,%rsp
  e4:   c3                      retq   
  e5:   e8 00 00 00 00          callq  ea <_Z4foo1m+0xea>
  ea:   66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)

00000000000000f0 <_Z4foo2m>:
  f0:   48 89 7c 24 f8          mov    %rdi,-0x8(%rsp)
  f5:   f2 0f 10 44 24 f8       movsd  -0x8(%rsp),%xmm0
  fb:   c3                      retq   
  fc:   0f 1f 40 00             nopl   0x0(%rax)

0000000000000100 <_Z4foo3m>:
 100:   48 89 7c 24 f8          mov    %rdi,-0x8(%rsp)
 105:   f2 0f 10 44 24 f8       movsd  -0x8(%rsp),%xmm0
 10b:   c3                      retq   

---

Tested this on g++ (Ubuntu 6.2.0-5ubuntu12) 6.2.0 20161005 x86-64. Also
verified many versions with godbolt.org.

Reply via email to