these new builtins allows clang to optimize operator new/delete operations far better then before
__builtin_operator_new __builtin_operator_delete patches in review for clang 3.5 clang: http://reviews.llvm.org/rL210137 libc++: http://reviews.llvm.org/rL210211 the libc++ patch is very small and maybe the Qt container library can also benefit from an clang specific specialization #example 1 #include <vector> #include <numeric> int main() { const std::vector<int> a{1,2}; const std::vector<int> b{4,5}; const std::vector<int> ints { std::accumulate(a.begin(),a.end(),1), std::accumulate(b.begin(),b.end(),2), }; return std::accumulate(ints.begin(),ints.end(),100); } clang 3.4.1 main: # @main pushq %rbp pushq %r15 pushq %r14 pushq %rbx pushq %rax movl $8, %edi callq operator new(unsigned long) movq %rax, %r14 movabsq $8589934593, %rax # imm = 0x200000001 movq %rax, (%r14) movl $8, %edi callq operator new(unsigned long) movq %rax, %rbx movabsq $21474836484, %rax # imm = 0x500000004 movq %rax, (%rbx) movl (%r14), %r15d movl 4(%r14), %ebp movl $8, %edi callq operator new(unsigned long) leal 1(%r15,%rbp), %ebp testq %rax, %rax movl %ebp, (%rax) movl $11, 4(%rax) je .LBB0_5 movq %rax, %rdi callq operator delete(void*) .LBB0_5: # %_ZNSt6vectorIiSaIiEED2Ev.exit25 testq %rbx, %rbx je .LBB0_7 movq %rbx, %rdi callq operator delete(void*) .LBB0_7: # %_ZNSt6vectorIiSaIiEED2Ev.exit23 addl $111, %ebp testq %r14, %r14 je .LBB0_9 movq %r14, %rdi callq operator delete(void*) .LBB0_9: # %_ZNSt6vectorIiSaIiEED2Ev.exit21 movl %ebp, %eax addq $8, %rsp popq %rbx popq %r14 popq %r15 popq %rbp ret movq %rax, %rbp movq %rbp, %rdi callq _Unwind_Resume movq %rax, %rbp jmp .LBB0_14 movq %rax, %rbp testq %rbx, %rbx je .LBB0_14 movq %rbx, %rdi callq operator delete(void*) .LBB0_14: # %_ZNSt6vectorIiSaIiEED2Ev.exit15 testq %r14, %r14 je .LBB0_16 movq %r14, %rdi callq operator delete(void*) .LBB0_16: # %_ZNSt6vectorIiSaIiEED2Ev.exit movq %rbp, %rdi callq _Unwind_Resume GCC_except_table0: .byte 255 # @LPStart Encoding = omit .byte 3 # @TType Encoding = udata4 .asciz "\266\200\200" # @TType base offset .byte 3 # Call site Encoding = udata4 .byte 52 # Call site table length .long .Lset0 .long .Lset1 .long .Lset2 .byte 0 # On action: cleanup .long .Lset3 .long .Lset4 .long .Lset5 .byte 0 # On action: cleanup .long .Lset6 .long .Lset7 .long .Lset8 .byte 0 # On action: cleanup .long .Lset9 .long .Lset10 .long 0 # has no landing pad .byte 0 # On action: cleanup Ralph Smith patched clang/libc++ main: # @main movl $115, %eax retq #example 2 #include <string> int main() { return std::string("hello").size(); } clang 3.4.1 main: # @main pushq %rbx subq $32, %rsp leaq 16(%rsp), %rdi leaq 8(%rsp), %rdx movl $.L.str, %esi callq std::basic_string<char, std::char_traits<char>, std::allocator<char> >::basic_string(char const*, std::allocator<char> const&) movq 16(%rsp), %rax leaq -24(%rax), %rdi movl std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Rep::_S_empty_rep_storage, %ecx cmpq %rcx, %rdi movl -24(%rax), %ebx jne .LBB0_1 .LBB0_6: # %_ZNSsD1Ev.exit movl %ebx, %eax addq $32, %rsp popq %rbx ret .LBB0_1: addq $-8, %rax movl $__pthread_key_create, %ecx testq %rcx, %rcx je .LBB0_3 movl $-1, %ecx lock xaddl %ecx, (%rax) movl %ecx, 28(%rsp) movl 28(%rsp), %ecx jmp .LBB0_4 .LBB0_3: movl (%rax), %ecx leal -1(%rcx), %edx movl %edx, (%rax) .LBB0_4: # %_ZN9__gnu_cxxL27__exchange_and_add_dispatchEPii.exit.i.i.i testl %ecx, %ecx jg .LBB0_6 leaq 24(%rsp), %rsi callq std::basic_string<char, std::char_traits<char>, std::allocator<char> >::_Rep::_M_destroy(std::allocator<char> const&) jmp .LBB0_6 .L.str: .asciz "hello" Ralph Smith patched clang/libc++ main: # @main movl $5, %eax retq the results of gcc and VS2013 optimizations are far away from what clang can do with these patches _______________________________________________ Development mailing list [email protected] http://lists.qt-project.org/mailman/listinfo/development
