Couldn't some of the overhead come from the overhead of calling back()?

Andrei

David Simcha wrote:
Sorry, the list wouldn't post my accidental double paste anyhow because it was too big. Here's the disassembly of partition, showing only back() is getting called and pred seems to be getting inlined, contrary to what I had believed before. So changing enforce() in back() to assert() gets rid of about half the overhead, but I have no idea where the other half is coming from.

_D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100 PROC NEAR ; COMDEF _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100
        sub     esp, 36                                 ; 0000 _ 83. EC, 24
mov ecx, dword ptr [esp+2CH] ; 0003 _ 8B. 4C 24, 2C
        push    ebx                                     ; 0007 _ 53
mov ebx, dword ptr [esp+2CH] ; 0008 _ 8B. 5C 24, 2C
        test    ebx, ebx                                ; 000C _ 85. DB
        push    ebp                                     ; 000E _ 55
        push    esi                                     ; 000F _ 56
        mov     esi, eax                                ; 0010 _ 89. C6
        push    edi                                     ; 0012 _ 57
        jnz     ?_020                                   ; 0013 _ 75, 0E
        pop     edi                                     ; 0015 _ 5F
        mov     eax, ebx                                ; 0016 _ 8B. C3
        mov     edx, ecx                                ; 0018 _ 8B. D1
        pop     esi                                     ; 001A _ 5E
        pop     ebp                                     ; 001B _ 5D
        pop     ebx                                     ; 001C _ 5B
        add     esp, 36                                 ; 001D _ 83. C4, 24
        ret     8                                       ; 0020 _ C2, 0008

?_020: mov dword ptr [esp+3CH], ecx ; 0023 _ 89. 4C 24, 3C mov edx, dword ptr [esp+3CH] ; 0027 _ 8B. 54 24, 3C mov dword ptr [esp+38H], ebx ; 002B _ 89. 5C 24, 38 mov ebx, dword ptr [esp+38H] ; 002F _ 8B. 5C 24, 38 mov dword ptr [esp+14H], ebx ; 0033 _ 89. 5C 24, 14 mov dword ptr [esp+18H], edx ; 0037 _ 89. 54 24, 18 cmp dword ptr [esp+38H], 0 ; 003B _ 83. 7C 24, 38, 00 je ?_023 ; 0040 _ 0F 84, 00000088 ?_021: mov edx, dword ptr [esp+3CH] ; 0046 _ 8B. 54 24, 3C mov eax, dword ptr [esp+38H] ; 004A _ 8B. 44 24, 38
        mov     ebx, dword ptr [edx]                    ; 004E _ 8B. 1A
        push    dword ptr [esi+0CH]                     ; 0050 _ FF. 76, 0C
        push    dword ptr [esi+8H]                      ; 0053 _ FF. 76, 08
mov dword ptr [esp+18H], edx ; 0056 _ 89. 54 24, 18 call _D3std5array12__T4backTAiZ4backFNcAiZi ; 005A _ E8, 00000000(rel)
        cmp     dword ptr [eax], ebx                    ; 005F _ 39. 18
        jle     ?_022                                   ; 0061 _ 7E, 31
mov edi, dword ptr [esp+38H] ; 0063 _ 8B. 7C 24, 38 mov ecx, dword ptr [esp+10H] ; 0067 _ 8B. 4C 24, 10
        dec     edi                                     ; 006B _ 4F
        lea     edx, [ecx+4H]                           ; 006C _ 8D. 51, 04
mov ebx, dword ptr [esp+14H] ; 006F _ 8B. 5C 24, 14
        dec     ebx                                     ; 0073 _ 4B
mov dword ptr [esp+3CH], edx ; 0074 _ 89. 54 24, 3C mov edx, dword ptr [esp+18H] ; 0078 _ 8B. 54 24, 18 mov eax, dword ptr [esp+14H] ; 007C _ 8B. 44 24, 14 mov dword ptr [esp+38H], edi ; 0080 _ 89. 7C 24, 38
        add     edx, 4                                  ; 0084 _ 83. C2, 04
mov dword ptr [esp+14H], ebx ; 0087 _ 89. 5C 24, 14 mov dword ptr [esp+18H], edx ; 008B _ 89. 54 24, 18 jmp ?_025 ; 008F _ E9, 000000AA

?_022: push dword ptr [esp+3CH] ; 0094 _ FF. 74 24, 3C push dword ptr [esp+3CH] ; 0098 _ FF. 74 24, 3C call _D3std5array12__T4backTAiZ4backFNcAiZi ; 009C _ E8, 00000000(rel)
        mov     ebx, dword ptr [eax]                    ; 00A1 _ 8B. 18
        push    dword ptr [esi+0CH]                     ; 00A3 _ FF. 76, 0C
        push    dword ptr [esi+8H]                      ; 00A6 _ FF. 76, 08
call _D3std5array12__T4backTAiZ4backFNcAiZi ; 00A9 _ E8, 00000000(rel)
        cmp     dword ptr [eax], ebx                    ; 00AE _ 39. 18
        jg      ?_024                                   ; 00B0 _ 7F, 2E
mov edi, dword ptr [esp+38H] ; 00B2 _ 8B. 7C 24, 38
        dec     edi                                     ; 00B6 _ 4F
mov eax, dword ptr [esp+38H] ; 00B7 _ 8B. 44 24, 38 mov dword ptr [esp+38H], edi ; 00BB _ 89. 7C 24, 38 mov edx, dword ptr [esp+3CH] ; 00BF _ 8B. 54 24, 3C mov dword ptr [esp+3CH], edx ; 00C3 _ 89. 54 24, 3C cmp dword ptr [esp+38H], 0 ; 00C7 _ 83. 7C 24, 38, 00
        jnz     ?_022                                   ; 00CC _ 75, C6
?_023: mov edx, dword ptr [esp+18H] ; 00CE _ 8B. 54 24, 18 mov eax, dword ptr [esp+14H] ; 00D2 _ 8B. 44 24, 14
        pop     edi                                     ; 00D6 _ 5F
        pop     esi                                     ; 00D7 _ 5E
        pop     ebp                                     ; 00D8 _ 5D
        pop     ebx                                     ; 00D9 _ 5B
        add     esp, 36                                 ; 00DA _ 83. C4, 24
        ret     8                                       ; 00DD _ C2, 0008

?_024: push dword ptr [esp+3CH] ; 00E0 _ FF. 74 24, 3C push dword ptr [esp+3CH] ; 00E4 _ FF. 74 24, 3C call _D3std5array12__T4backTAiZ4backFNcAiZi ; 00E8 _ E8, 00000000(rel)
        mov     edi, eax                                ; 00ED _ 89. C7
mov edx, dword ptr [esp+3CH] ; 00EF _ 8B. 54 24, 3C
        mov     ebp, edx                                ; 00F3 _ 89. D5
mov eax, dword ptr [esp+38H] ; 00F5 _ 8B. 44 24, 38
; Note: Zero displacement could be omitted
        mov     ecx, dword ptr [edx]                    ; 00F9 _ 8B. 4A, 00
        mov     ebx, dword ptr [edi]                    ; 00FC _ 8B. 1F
        lea     edx, [edx+4H]                           ; 00FE _ 8D. 52, 04
        mov     dword ptr [ebp], ebx                    ; 0101 _ 89. 5D, 00
mov eax, dword ptr [esp+38H] ; 0104 _ 8B. 44 24, 38
        dec     eax                                     ; 0108 _ 48
        mov     dword ptr [edi], ecx                    ; 0109 _ 89. 0F
mov ebx, dword ptr [esp+14H] ; 010B _ 8B. 5C 24, 14 mov ecx, dword ptr [esp+18H] ; 010F _ 8B. 4C 24, 18 mov dword ptr [esp+38H], eax ; 0113 _ 89. 44 24, 38
        dec     ebx                                     ; 0117 _ 4B
mov eax, dword ptr [esp+14H] ; 0118 _ 8B. 44 24, 14 mov dword ptr [esp+14H], ebx ; 011C _ 89. 5C 24, 14
        add     ecx, 4                                  ; 0120 _ 83. C1, 04
mov ebx, dword ptr [esp+38H] ; 0123 _ 8B. 5C 24, 38 mov dword ptr [esp+18H], ecx ; 0127 _ 89. 4C 24, 18
        dec     ebx                                     ; 012B _ 4B
mov eax, dword ptr [esp+38H] ; 012C _ 8B. 44 24, 38 mov dword ptr [esp+3CH], edx ; 0130 _ 89. 54 24, 3C
        mov     ecx, edx                                ; 0134 _ 8B. CA
mov dword ptr [esp+38H], ebx ; 0136 _ 89. 5C 24, 38 mov dword ptr [esp+3CH], ecx ; 013A _ 89. 4C 24, 3C ?_025: cmp dword ptr [esp+38H], 0 ; 013E _ 83. 7C 24, 38, 00 jne ?_021 ; 0143 _ 0F 85, FFFFFEFD
; Note: Immediate operand could be made smaller by sign extension
jmp ?_023 ; 0149 _ E9, FFFFFF80 _D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2062VAyaa1_610889BE3F182A6EA5575A2D298A20D100 ENDP

On Fri, Jul 2, 2010 at 5:25 PM, David Simcha <[email protected] <mailto:[email protected]>> wrote:

    Sorry for the double paste.  Please ignore everything before the
    second time you see
    
_D3std9algorithm129__T8sortImplS793std10functional54__T13binaryFunImplVAyaa5_61203c2...




------------------------------------------------------------------------

_______________________________________________
phobos mailing list
[email protected]
http://lists.puremagic.com/mailman/listinfo/phobos
_______________________________________________
phobos mailing list
[email protected]
http://lists.puremagic.com/mailman/listinfo/phobos

Reply via email to