Initial: 58 seconds.

Eliminated the switch in popFront: 53s.

Replaced emplace with assignment: 23s.

Specialized emplace for non-struct types, reinserted: 23s.

Eliminated the loop in empty (replaced with return ranges[0].empty;): 17s.

I'm sure there are ways to further improve this, but there are a few difficulties. Each pass through the loop the code must transport values from the two arrays into a specific format and then distribute them for further calculation. Then, upon each popFront two words must be touched because arrays hold pointer+length, not pointer+pointer (as probably would be better since ranges are around).


Nice analysis!

Andrei

On 2/15/11 6:24 PM, bearophile wrote:
While doing some benchmarks on the Clay language I've found something 
interesting.

I have timed a zip() done on two short arrays, and I have found this D version 
too much slow compared to normal array code. Higher order functions like zip, 
map, filter, etc are going to stay, they aren't toys, so I think the D compiler 
has to digest them better.

(There is lot of of asm at the tail of this post, I don't know if some of t 
will get cut away.)

Bye,
bearophile


Timings, seconds, best of 3:
   #1:  1.95
   #2: 61.50
   #3:  2.25
   #4:  1.52

----------------------

Binary sizes, bytes:
   #1:  33_016
   #2: 284_188
   #3: 103_964
   #4: 103_964

----------------------

Compilers and command line used:

DMD 2.051
clay compiler (hg id dec41ba07d58 tip, llvm r122866, Jan  5 2011)

clay -inline test1.clay -o test1.exe
dmd -O -release -inline test2.d
dmd -O -release -inline test3.d

To produce asm code with Clay:
clay -inline -asm test1.clay -o test1.s

Output: -1149672960 for all versions.

----------------------

Code used:


// program #1 (Clay)
main() {
     var a = 
Vector([18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28]);
     var b = 
Vector([9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25]);
     var tot = 0; // 32 bit signed int
     for (i in range(0, 50*1000*1000))
         for (x, y in zipped(a, b))
             tot += x + y;
     println(tot);
}

----------------------

// program #2 (D2)
import std.c.stdio: printf;
import std.range: zip;
void main() {
     auto a = 
[18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
     auto b = 
[9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
     int tot = 0;
     foreach (i; 0 .. 50_000_000)
         foreach (xy; zip(a, b))
             tot += xy[0] + xy[1];
     printf("%d\n", tot);
}

----------------------

// program #3 (D2)
import std.c.stdio: printf;
void main() {
     auto a = 
[18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
     auto b = 
[9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
     int tot = 0;
     foreach (i; 0 .. 50_000_000)
         foreach (j; 0 .. a.length)
             tot += a[j] + b[j];
     printf("%d\n", tot);
}

----------------------

// program #4 (D2)
import std.c.stdio: printf;
void main() {
     auto a = 
[18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
     auto b = 
[9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
     int tot = 0;
     foreach (i; 0 .. 50_000_000)
         foreach (j; 0 .. 30)
             tot += a[j] + b[j];
     printf("%d\n", tot);
}

----------------------

Just loops program #1:

     movl    $50000000, %eax         # imm = 0x2FAF080
     .align    16, 0x90
LBB2_4:                                 # %bb.nph.i.i
                                         # =>This Loop Header: Depth=1
                                         #     Child Loop BB2_5 Depth 2
     xorl    %edx, %edx
     .align    16, 0x90
LBB2_5:                                 # %return410.i.i
                                         #   Parent Loop BB2_4 Depth=1
                                         # =>   This Inner Loop Header: Depth=2
     addl    (%esi,%edx,4), %ecx
     addl    (%edi,%edx,4), %ecx
     incl    %edx
     cmpl    $30, %edx
     jne    LBB2_5
# BB#3:                                 # %return272.loopexit.i.i
                                         #   in Loop: Header=BB2_4 Depth=1
     decl    %eax
     jne    LBB2_4

----------------------

Just loops program #2:

LCE:        push    dword ptr 018h[ESP]
         mov    ESI,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip6__initZ
         push    dword ptr 018h[ESP]
         push    dword ptr 028h[ESP]
         push    dword ptr 028h[ESP]
         push    0
         lea    EDI,078h[ESP]
         movsd
         movsd
         movsd
         movsd
         movsd
         lea    EAX,078h[ESP]
         call    near ptr 
_D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range14__T3ZipTAiTAiZ3Zip
         mov    ESI,EAX
         lea    EDI,044h[ESP]
         movsd
         movsd
         movsd
         movsd
         movsd
         lea    ESI,044h[ESP]
         lea    EDI,024h[ESP]
         movsd
         movsd
         movsd
         movsd
         movsd
         lea    EAX,024h[ESP]
         call    near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
         xor    AL,1
         je    L153
L11C:        lea    EAX,024h[ESP]
         call    near ptr 
_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
         mov    07Ch[ESP],EAX
         mov    ECX,07Ch[ESP]
         lea    EAX,024h[ESP]
         mov    080h[ESP],EDX
         add    ECX,080h[ESP]
         add    EBX,ECX
         call    near ptr _D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv
         lea    EAX,024h[ESP]
         call    near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
         xor    AL,1
         jne    L11C
L153:        inc    EBP
         cmp    EBP,02FAF080h
         jb    LCE


_D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range14__T3ZipTAiTAiZ3Zip
    comdat
         push    EBX
         mov    ECX,8[ESP]
         mov    EBX,014h[ESP]
         push    ESI
         mov    ESI,EAX
         mov    EDX,01Ch[ESP]
         mov    4[ESI],EDX
         mov    EDX,014h[ESP]
         mov    [ESI],EBX
         mov    EBX,010h[ESP]
         mov    010h[ESI],ECX
         mov    8[ESI],EBX
         mov    0Ch[ESI],EDX
         pop    ESI
         pop    EBX
         ret    014h


_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb    comdat
L0:        sub    ESP,05Ch
         mov    ECX,010h[EAX]
         test    ECX,ECX
         push    EBX
         push    ESI
         mov    ESI,EAX
         je    L21
         cmp    ECX,1
         je    LC3
         cmp    ECX,2
         je    L55
         jmp    near ptr LC3
L21:        mov    EBX,[ESI]
         mov    EDX,4[ESI]
         mov    0Ch[ESP],EBX
         mov    010h[ESP],EDX
         cmp    dword ptr 0Ch[ESP],0
         je    L4A
         mov    EBX,8[ESI]
         mov    EDX,0Ch[ESI]
         mov    014h[ESP],EBX
         mov    018h[ESP],EDX
         cmp    dword ptr 014h[ESP],0
         jne    LC3
L4A:        pop    ESI
         mov    EAX,1
         pop    EBX
         add    ESP,05Ch
         ret
L55:        mov    EBX,[ESI]
         mov    EDX,4[ESI]
         mov    ECX,offset 
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
         mov    01Ch[ESP],EBX
         mov    020h[ESP],EDX
         mov    02Ch[ESP],EAX
         mov    030h[ESP],ECX
         cmp    dword ptr 01Ch[ESP],0
         setz    DL
         mov    EBX,8[ESI]
         mov    024h[ESP],EBX
         mov    ECX,0Ch[ESI]
         mov    028h[ESP],ECX
         cmp    dword ptr 024h[ESP],0
         setz    CL
         cmp    DL,CL
         mov    EDX,1
         je    L98
         xor    EDX,EDX
L98:        xor    DL,1
         je    LC3
         push    dword ptr FLAT:_DATA[054h]
         push    dword ptr FLAT:_DATA[050h]
         push    0ADEh
         mov    EAX,038h[ESP]
         mov    EDX,03Ch[ESP]
         mov    EBX,038h[ESP]
         call    EDX
         push    EDX
         push    EAX
         call    near ptr _D3std9exception7bailOutFAyaixAaZv
LC3:        pop    ESI
         xor    EAX,EAX
         pop    EBX
         add    ESP,05Ch
         ret


_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple 
   comdat
     assume    
CS:_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
L0:        sub    ESP,028h
         mov    EDX,4[EAX]
         push    EBX
         push    ESI
         mov    ESI,EAX
         mov    EBX,[ESI]
         push    EDI
         mov    014h[ESP],EBX
         mov    018h[ESP],EDX
         cmp    dword ptr 014h[ESP],0
         je    L31
         lea    EDX,0Ch[ESP]
         mov    EBX,[ESI]
         push    EDX
         mov    EDX,4[ESI]
         mov    EAX,[EDX]
         push    4
         call    near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
         jmp short    L41
L31:        lea    ECX,0Ch[ESP]
         mov    EDI,4
         push    ECX
         push    EDI
         call    near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L41:        mov    EBX,8[ESI]
         mov    ECX,0Ch[ESI]
         test    EBX,EBX
         je    L61
         lea    EDI,010h[ESP]
         mov    EDX,0Ch[ESI]
         mov    EAX,8[ESI]
         push    EDI
         mov    EAX,[EDX]
         push    4
         call    near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
         jmp short    L71
L61:        lea    ECX,010h[ESP]
         mov    ESI,4
         push    ECX
         push    ESI
         call    near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L71:        mov    EDX,010h[ESP]
         mov    EAX,0Ch[ESP]
         pop    EDI
         pop    ESI
         pop    EBX
         add    ESP,028h
         ret


_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv    comdat
L0:        sub    ESP,04Ch
         mov    ECX,010h[EAX]
         test    ECX,ECX
         push    EBX
         push    EBP
         push    ESI
         push    EDI
         mov    EDI,EAX
         je    L1F
         cmp    ECX,1
         je    L4A
         cmp    ECX,2
         je    L8C
         jmp    near ptr L142
L1F:        mov    ESI,[EDI]
         mov    EDX,4[EDI]
         dec    ESI
         mov    EBX,[EDI]
         add    EDX,4
         lea    EBP,8[EDI]
         mov    [EDI],ESI
         mov    4[EDI],EDX
         mov    ESI,0[EBP]
         mov    EDX,4[EBP]
         dec    ESI
         mov    EBX,0[EBP]
         add    EDX,4
         mov    0[EBP],ESI
         mov    4[EBP],EDX
         jmp    near ptr L142
L4A:        mov    ESI,[EDI]
         mov    ECX,4[EDI]
         test    ESI,ESI
         je    L63
         mov    ESI,[EDI]
         mov    EDX,4[EDI]
         dec    ESI
         mov    EBX,[EDI]
         add    EDX,4
         mov    [EDI],ESI
         mov    4[EDI],EDX
L63:        mov    ESI,8[EDI]
         mov    ECX,0Ch[EDI]
         test    ESI,ESI
         je    L142
         lea    EBP,8[EDI]
         mov    ESI,0[EBP]
         mov    EDX,4[EBP]
         dec    ESI
         mov    EBX,0[EBP]
         add    EDX,4
         mov    0[EBP],ESI
         mov    4[EBP],EDX
         jmp    near ptr L142
L8C:        mov    EBX,[EDI]
         mov    EDX,4[EDI]
         mov    ECX,offset 
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral831MFZAxa
         mov    014h[ESP],EBX
         mov    018h[ESP],EDX
         mov    01Ch[ESP],EAX
         mov    020h[ESP],ECX
         cmp    dword ptr 014h[ESP],0
         setnz    DL
         xor    DL,1
         je    LD7
         mov    EDX,ECX
         push    dword ptr FLAT:_DATA[054h]
         push    dword ptr FLAT:_DATA[050h]
         push    0B86h
         mov    EAX,028h[ESP]
         mov    EBX,028h[ESP]
         call    EDX
         push    EDX
         push    EAX
         call    near ptr _D3std9exception7bailOutFAyaixAaZv
LD7:        mov    EAX,[EDI]
         mov    EDX,4[EDI]
         dec    EAX
         mov    EBX,[EDI]
         add    EDX,4
         mov    4[EDI],EDX
         mov    EDX,0Ch[EDI]
         mov    EBX,EDI
         mov    [EDI],EAX
         mov    EAX,8[EDI]
         mov    ECX,offset 
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral832MFZAxa
         mov    024h[ESP],EAX
         mov    028h[ESP],EDX
         mov    02Ch[ESP],EBX
         mov    030h[ESP],ECX
         cmp    dword ptr 024h[ESP],0
         setnz    DL
         xor    DL,1
         je    L12F
         push    dword ptr FLAT:_DATA[054h]
         push    dword ptr FLAT:_DATA[050h]
         push    0B86h
         mov    EAX,038h[ESP]
         call    ECX
         push    EDX
         push    EAX
         call    near ptr _D3std9exception7bailOutFAyaixAaZv
L12F:        lea    ESI,8[EDI]
         mov    EBX,[ESI]
         mov    EDX,4[ESI]
         dec    EBX
         mov    EAX,[ESI]
         mov    [ESI],EBX
         add    EDX,4
         mov    4[ESI],EDX
L142:        pop    EDI
         pop    ESI
         pop    EBP
         pop    EBX
         add    ESP,04Ch
         ret


_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb    comdat
L0:        sub    ESP,05Ch
         mov    ECX,010h[EAX]
         test    ECX,ECX
         push    EBX
         push    ESI
         mov    ESI,EAX
         je    L21
         cmp    ECX,1
         je    LC3
         cmp    ECX,2
         je    L55
         jmp    near ptr LC3
L21:        mov    EBX,[ESI]
         mov    EDX,4[ESI]
         mov    0Ch[ESP],EBX
         mov    010h[ESP],EDX
         cmp    dword ptr 0Ch[ESP],0
         je    L4A
         mov    EBX,8[ESI]
         mov    EDX,0Ch[ESI]
         mov    014h[ESP],EBX
         mov    018h[ESP],EDX
         cmp    dword ptr 014h[ESP],0
         jne    LC3
L4A:        pop    ESI
         mov    EAX,1
         pop    EBX
         add    ESP,05Ch
         ret
L55:        mov    EBX,[ESI]
         mov    EDX,4[ESI]
         mov    ECX,offset 
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
         mov    01Ch[ESP],EBX
         mov    020h[ESP],EDX
         mov    02Ch[ESP],EAX
         mov    030h[ESP],ECX
         cmp    dword ptr 01Ch[ESP],0
         setz    DL
         mov    EBX,8[ESI]
         mov    024h[ESP],EBX
         mov    ECX,0Ch[ESI]
         mov    028h[ESP],ECX
         cmp    dword ptr 024h[ESP],0
         setz    CL
         cmp    DL,CL
         mov    EDX,1
         je    L98
         xor    EDX,EDX
L98:        xor    DL,1
         je    LC3
         push    dword ptr FLAT:_DATA[054h]
         push    dword ptr FLAT:_DATA[050h]
         push    0ADEh
         mov    EAX,038h[ESP]
         mov    EDX,03Ch[ESP]
         mov    EBX,038h[ESP]
         call    EDX
         push    EDX
         push    EAX
         call    near ptr _D3std9exception7bailOutFAyaixAaZv
LC3:        pop    ESI
         xor    EAX,EAX
         pop    EBX
         add    ESP,05Ch
         ret


_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi    comdat
L0:        push    EAX
         mov    EDX,offset 
FLAT:_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi14__dgliteral829MFZC6object9Throwable
         push    EBX
         push    ESI
         cmp    dword ptr 010h[ESP],4
         sbb    ECX,ECX
         inc    ECX
         push    ECX
         lea    EBX,0Ch[ESP]
         push    EDX
         push    EBX
         call    near ptr 
_D3std9exception14__T7enforceTbZ7enforceFbLC6object9ThrowableZb
         mov    ECX,014h[ESP]
         mov    EAX,014h[ESP]
         mov    ESI,8[ESP]
         mov    [ECX],ESI
         pop    ESI
         pop    EBX
         pop    ECX
         ret    8


(I have not included all the code, like _D3std9exception7bailOutFAyaixAaZv).

----------------------

Just loops program #3:

LDE:    xor    EBX,EBX
         cmp    010h[ESP],BL
         je    LF5
LE6:    mov    ECX,[EBX*4][EAX]
         add    ECX,[EBX*4][EDI]
         add    ESI,ECX
         inc    EBX
         cmp    EBX,014h[ESP]
         jb    LE6
LF5:    inc    EDX
         cmp    EDX,02FAF080h
         jb    LDE

----------------------

Just loops program #4:

LBF:    xor    EBX,EBX
LC1:    mov    ECX,[EBX*4][EAX]
         add    ECX,[EBX*4][EDI]
         add    ESI,ECX
         inc    EBX
         cmp    EBX,01Eh
         jb    LC1
         inc    EDX
         cmp    EDX,02FAF080h
         jb    LBF

----------------------

Reply via email to