On Thu, 25 Sep 2008, Przemyslaw Czerpak wrote:
> In my GCC final code looks for -O3 is:
>       func:
>               pushl   %ebp
>               movl    %esp, %ebp
>               subl    $8, %esp
>               movl    hb_stack_ptr, %ecx
>               movl    4(%ecx), %eax
>               addl    $4, %eax
>               cmpl    8(%ecx), %eax
>               movl    %eax, 4(%ecx)
>               je      .L6
>       .L2:
>               movl    4(%ecx), %edx
>               leal    -4(%edx), %eax
>               movl    %eax, 4(%ecx)
>               movl    -4(%edx), %eax
>               testw   $-19451, (%eax)
>               jne     .L7
>               leave
>               ret
>       .L7:
>               movl    %eax, (%esp)
>               call    hb_itemClear
>               leave
>               ret
>       .L6:
>               call    hb_stackIncrease
>               movl    hb_stack_ptr, %ecx
>               jmp     .L2
> 
> It access hb_stack_ptr only _ONCE_ during normal code execution.

And next hint. I've just tried the same code but in MT mode with real
native TLS access. GCC gives:

      func2:
        pushl   %ebp
        movl    %esp, %ebp
        subl    $24, %esp
        movl    %ebx, -8(%ebp)
        movl    [EMAIL PROTECTED], %ebx
        movl    %esi, -4(%ebp)
        movl    %gs:0, %esi
        movl    (%esi,%ebx), %edx
        movl    (%edx), %eax
        addl    $4, %eax
        cmpl    4(%edx), %eax
        movl    %eax, (%edx)
        je      .L6
      .L2:
        movl    (%esi,%ebx), %eax
        movl    (%eax), %ecx
        leal    -4(%ecx), %edx
        movl    %edx, (%eax)
        movl    -4(%ecx), %eax
        testw   $-19451, (%eax)
        je      .L4
        movl    %eax, (%esp)
        call    hb_itemClear
      .L4:
        movl    -8(%ebp), %ebx
        movl    -4(%ebp), %esi
        movl    %ebp, %esp
        popl    %ebp
        ret
      .L6:
        call    hb_stackIncrease
        jmp     .L2

It stores TLS pointer inside ESI[EBX] registers and then simply reuses it
so even in very complicated functions it access TLS only once.
It means that using HB_STACK_TLS_PRELOAD reduces only indirect addressing
overhead and ESI register saveing/restoring:

      func2:
        pushl   %ebp
        movl    %gs:0, %eax
        movl    %esp, %ebp
        pushl   %ebx
        subl    $4, %esp
        movl    [EMAIL PROTECTED], %edx
        movl    (%eax,%edx), %ebx
        movl    (%ebx), %edx
        addl    $4, %edx
        cmpl    4(%ebx), %edx
        movl    %edx, (%ebx)
        je      .L6
      .L2:
        leal    -4(%edx), %eax
        movl    %eax, (%ebx)
        movl    -4(%edx), %eax
        testw   $-19451, (%eax)
        je      .L4
        movl    %eax, (%esp)
        call    hb_itemClear
      .L4:
        addl    $4, %esp
        popl    %ebx
        popl    %ebp
        ret
      .L6:
        call    hb_stackIncrease
        movl    (%ebx), %edx
        jmp     .L2

Now it's clear why the results are such different.
Each compiler which will not make such optimization will give
noticeable slower code for MT mode.

best regards,
Przemek
_______________________________________________
Harbour mailing list
Harbour@harbour-project.org
http://lists.harbour-project.org/mailman/listinfo/harbour

Reply via email to