Am 02.06.2011 18:45, schrieb José Mejuto:
> Hello FPC,
> 
>                              PlainCompare  CompareByte    XCompareByte
>                              ------------  -----------    ------------
> Equal arrays 1000 elements   16250 ms      625 ms         656 ms
> Diff. arrays 1000 elements      62 ms      640 ms         656 ms
> 
> Equal arrays 32 elements       547 ms      625 ms         547 ms
> Diff. arrays 32 elements        62 ms      640 ms          62 ms
> 

I improved the original CompareByte, please tell me how it works for you.
function CompareByte(Const buf1,buf2;len:SizeInt):SizeInt; assembler;
var
  saveesi,saveedi : longint;
  savebl : byte;
asm
{$ifdef REGCALL}
        cmpl    $57,%ecx
        jg      .LCmpbyteFull

        movb    %bl,savebl
.LCmpbyteLoop:
        movb    (%eax),%bl
        cmpb    (%edx),%bl
        leal    1(%eax),%eax
        leal    1(%edx),%edx
        jne     .LCmpbyteExitFast
        decl    %ecx
        jne     .LCmpbyteLoop
.LCmpbyteExitFast:
        seteb   %al
        movb    savebl,%bl
        leave
        ret
.LCmpbyteFull:
        movl    %edi,saveedi
        movl    %esi,saveesi
        cld
        movl    %eax,%edi
        movl    %edx,%esi
        movl    %ecx,%eax
{$else}
        movl    %edi,saveedi
        movl    %esi,saveesi
        cld
        movl    buf2,%esi       { Load params}
        movl    buf1,%edi
        movl    len,%eax
{$endif}
        testl   %eax,%eax       {We address -1(%esi), so we have to deal with 
len=0}
        je      .LCmpbyteExit
        cmpl    $10,%eax         {<7 not worth aligning and go through all 
trouble}
        jl      .LCmpbyte2
        movl    %edi,%ecx       { Align on 32bits }
        negl    %ecx            { calc bytes to align   (%edi and 3) xor 3= 
-%edi and 3}
        andl    $3,%ecx
        subl    %ecx,%eax       { Subtract from number of bytes to go}
        orl     %ecx,%ecx
        rep
        cmpsb                   {The actual 32-bit Aligning}
        jne     .LCmpbyte3
        movl    %eax,%ecx       {bytes to do, divide by 4}
        andl    $3,%eax         {remainder}
        shrl    $2,%ecx         {The actual division}
        orl     %ecx,%ecx       {Sets zero flag if ecx=0 -> no cmp}
        rep
        cmpsl
        je      .LCmpbyte2       { All equal? then to the left over bytes}
        movl    $4,%eax         { Not equal. Rescan the last 4 bytes bytewise}
        subl    %eax,%esi
        subl    %eax,%edi
.LCmpbyte2:
        movl    %eax,%ecx       {bytes still to (re)scan}
        orl     %eax,%eax       {prevent disaster in case %eax=0}
        .balign 4
        rep
        cmpsb
.LCmpbyte3:
        movzbl  -1(%esi),%ecx
        movzbl  -1(%edi),%eax      // Compare failing (or equal) position
        subl    %ecx,%eax
.LCmpbyteExit:
        movl    saveedi,%edi
        movl    saveesi,%esi
end;
_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
http://lists.freepascal.org/mailman/listinfo/fpc-devel

Reply via email to