Martijn van Oosterhout wrote:
> On Tue, Jan 24, 2006 at 05:24:28PM -0500, Seneca Cunningham wrote:
> 
>>After reading the post on -patches proposing that MemSet be changed to
>>use long instead of int32 on the grounds that a pair of x86-64 linux
>>boxes took less time to execute the long code 64*10^6 times[1], I took a
>>look at how the testcode performed on AIX with gcc.  While the switch to
>>long did result in a minor performance improvement, dropping the
>>MemSetLoop in favour of the native memset resulted in the tests taking
>>~25% the time as the MemSetLoop-like int loop. The 32-bit linux system I
>>ran the expanded tests on showed that for the buffer size range that
>>postgres can use the looping MemSet instead of memset (size <= 1024
>>bytes), MemSet generally had better performance.
> 
> 
> Could you please check the asm output to see what's going on. We've had
> tests like these produce odd results in the past because the compiler
> optimised away stuff that didn't have any effect. Since every memset
> after the first is a no-op, you want to make sure it's still actually
> doing the work...

Well, on both linux and AIX, all 30 of the 64000000 iterations loops
from the source exist (10 int, 10 long, 10 memset).  According to my
understanding of the assembler, memset itself is only called for values
>= 64 bytes on both platforms and the memset is called in each iteration.

The assembler for the 64 byte loops, with prepended line number, first
loop MemSetLoop int-variant, second loop memset, third loop MemSetLoop
long-variant:

64-bit AIX:

    419     addi 3,1,112
    420     li 4,0
    421     bl .gettimeofday
    422     nop
    423     lis 10,0x3d0
    424     cmpld 6,26,16
    425     li 11,0
    426     ori 10,10,36864
    427 L..41:
    428     bge 6,L..42
    429     mr 9,26
    430     li 0,0
    431 L..44:
    432     stw 0,0(9)
    433     addi 9,9,4
    434     cmpld 7,16,9
    435     bgt 7,L..44
    436 L..42:
    437     addi 0,11,1
    438     extsw 11,0
    439     cmpw 7,11,10
    440     bne+ 7,L..41
    441     li 4,0
    442     mr 3,22
    443     lis 25,0x3d0
    444     li 28,0
    445     bl .gettimeofday
    446     nop
    447     li 4,64
    448     addi 5,1,112
    449     ld 3,LC..9(2)
    450     mr 6,22
    451     ori 25,25,36864
    452     bl .print_time
    453     addi 3,1,112
    454     li 4,0
    455     bl .gettimeofday
    456     nop
    457 L..46:
    458     mr 3,26
    459     li 4,0
    460     li 5,64
    461     bl .memset
    462     nop
    463     addi 0,28,1
    464     extsw 28,0
    465     cmpw 7,28,25
    466     bne+ 7,L..46
    467     li 4,0
    468     mr 3,22
    469     bl .gettimeofday
    470     nop
    471     li 4,64
    472     addi 5,1,112
    473     ld 3,LC..11(2)
    474     mr 6,22
    475     bl .print_time
    476     addi 3,1,112
    477     li 4,0
    478     bl .gettimeofday
    479     nop
    480     lis 10,0x3d0
    481     cmpld 6,26,16
    482     li 11,0
    483     ori 10,10,36864
    484 L..48:
    485     bge 6,L..49
    486     mr 9,26
    487     li 0,0
    488 L..51:
    489     std 0,0(9)
    490     addi 9,9,8
    491     cmpld 7,9,16
    492     blt 7,L..51
    493 L..49:
    494     addi 0,11,1
    495     extsw 11,0
    496     cmpw 7,11,10
    497     bne+ 7,L..48
    498     li 4,0
    499     mr 3,22
    500     bl .gettimeofday
    501     nop
    502     li 4,64
    503     addi 5,1,112
    504     ld 3,LC..13(2)
    505     mr 6,22
    506     bl .print_time


32-bit Linux:

    387     popl    %ecx
    388     popl    %edi
    389     pushl   $0
    390     leal    -20(%ebp), %edx
    391     pushl   %edx
    392     call    gettimeofday
    393     xorl    %edx, %edx
    394     addl    $16, %esp
    395 .L41:
    396     movl    -4160(%ebp), %eax
    397     cmpl    %eax, -4144(%ebp)
    398     jae .L42
    399     movl    -4144(%ebp), %eax
    400 .L44:
    401     movl    $0, (%eax)
    402     addl    $4, %eax
    403     cmpl    %eax, -4160(%ebp)
    404     ja  .L44
    405 .L42:
    406     incl    %edx
    407     cmpl    $64000000, %edx
    408     jne .L41
    409     subl    $8, %esp
    410     pushl   $0
    411     leal    -28(%ebp), %edx
    412     pushl   %edx
    413     call    gettimeofday
    414     leal    -28(%ebp), %eax
    415     movl    %eax, (%esp)
    416     leal    -20(%ebp), %ecx
    417     movl    $64, %edx
    418     movl    $.LC5, %eax
    419     call    print_time
    420     popl    %eax
    421     popl    %edx
    422     pushl   $0
    423     leal    -20(%ebp), %edx
    424     pushl   %edx
    425     call    gettimeofday
    426     xorl    %edi, %edi
    427     addl    $16, %esp
    428 .L46:
    429     pushl   %eax
    430     pushl   $64
    431     pushl   $0
    432     movl    -4144(%ebp), %ecx
    433     pushl   %ecx
    434     call    memset
    435     incl    %edi
    436     addl    $16, %esp
    437     cmpl    $64000000, %edi
    438     jne .L46
    439     subl    $8, %esp
    440     pushl   $0
    441     leal    -28(%ebp), %eax
    442     pushl   %eax
    443     call    gettimeofday
    444     leal    -28(%ebp), %edx
    445     movl    %edx, (%esp)
    446     leal    -20(%ebp), %ecx
    447     movl    $64, %edx
    448     movl    $.LC6, %eax
    449     call    print_time
    450     popl    %eax
    451     popl    %edx
    452     pushl   $0
    453     leal    -20(%ebp), %eax
    454     pushl   %eax
    455     call    gettimeofday
    456     xorl    %edx, %edx
    457     addl    $16, %esp
    458 .L48:
    459     movl    -4160(%ebp), %eax
    460     cmpl    %eax, -4144(%ebp)
    461     jae .L49
    462     movl    -4144(%ebp), %eax
    463 .L51:
    464     movl    $0, (%eax)
    465     addl    $4, %eax
    466     cmpl    -4160(%ebp), %eax
    467     jb  .L51
    468 .L49:
    469     incl    %edx
    470     cmpl    $64000000, %edx
    471     jne .L48
    472     subl    $8, %esp
    473     pushl   $0
    474     leal    -28(%ebp), %edx
    475     pushl   %edx
    476     call    gettimeofday
    477     leal    -28(%ebp), %eax
    478     movl    %eax, (%esp)
    479     leal    -20(%ebp), %ecx
    480     movl    $64, %edx
    481     movl    $.LC7, %eax
    482     call    print_time

-- 
Seneca Cunningham
[EMAIL PROTECTED]

---------------------------(end of broadcast)---------------------------
TIP 5: don't forget to increase your free space map settings

Reply via email to