__uint128_t foo1(__uint128_t x, __uint128_t y) { return x + y; } 0x0000000000000520 <+0>: mov %rdx,%rax 0x0000000000000523 <+3>: mov %rcx,%rdx 0x0000000000000526 <+6>: push %rbx 0x0000000000000527 <+7>: add %rdi,%rax 0x000000000000052a <+10>: adc %rsi,%rdx 0x000000000000052d <+13>: pop %rbx 0x000000000000052e <+14>: retq
%rbx isn't used, yet is saved and restored. __uint128_t foo2(__uint128_t x, unsigned long long y) { return x + y; } 0x0000000000000550 <+0>: mov %rdx,%rax 0x0000000000000553 <+3>: push %rbx 0x0000000000000554 <+4>: xor %edx,%edx 0x0000000000000556 <+6>: mov %rsi,%rbx 0x0000000000000559 <+9>: add %rdi,%rax 0x000000000000055c <+12>: adc %rbx,%rdx 0x000000000000055f <+15>: pop %rbx 0x0000000000000560 <+16>: retq %rbx is used, but doesn't need to be. %rcx can be used instead, saving a push-pop pair. __uint128_t foo3(unsigned long long x, __uint128_t y) { return x + y; } 0x0000000000000580 <+0>: mov %rdi,%rax 0x0000000000000583 <+3>: push %rbx 0x0000000000000584 <+4>: mov %rdx,%rbx 0x0000000000000587 <+7>: xor %edx,%edx 0x0000000000000589 <+9>: add %rsi,%rax 0x000000000000058c <+12>: adc %rbx,%rdx 0x000000000000058f <+15>: pop %rbx 0x0000000000000590 <+16>: retq Similar problems as with the previous two functions, with the addition of the fact that %rdx can now be used in-situ as an output, avoiding one of the mov instructions. i.e. the function could be optimized to be: mov %rdi,%rax xor %ecx,%ecx add %rsi,%rax adc %rcx,%rdx retq -- Summary: __uint128_t missed optimizations. Product: gcc Version: 4.5.0 Status: UNCONFIRMED Severity: enhancement Priority: P3 Component: c AssignedTo: unassigned at gcc dot gnu dot org ReportedBy: svfuerst at gmail dot com GCC build triplet: x86_64-linux GCC host triplet: x86_64-linux GCC target triplet: x86_64-linux http://gcc.gnu.org/bugzilla/show_bug.cgi?id=43644