Re: Code speed

bearophile Wed, 14 Apr 2010 06:50:14 -0700

> - the two nested loops in the main are more efficient as ref double,
> this is something dmd will need to fix;


A test shows that on ldc the two nested loops are a little faster without the 
ref. I'd like the compiler to use a "const ref" with the foreach iterates on 
array items bigger than a word.

I've done a related small test:


version (Tango)
    import tango.stdc.stdio: printf;
else
    import std.stdio: printf;

void main() {
    auto data = new double[1_000];
    double tot = 0.0;
    foreach (uint i, ref el; data)
        tot += el * i;
    printf("%f\n", tot);
}


Just the asm of the loop.
Note how ldc keeps two iteration indexes, one in xmm0 and one in ESI, because 
it's faster this way. LCPI1_0 is .quad 4607182418800017408 that is the double 
value 1.0.

(Here the presence of ref doesn't change the code produced by ldc, but in the 
two loops of the original code it makes a little difference).

ldc -O5 -release -inline -enable-unsafe-fp-math -output-s test.d
dmd -O -release -inline test.d

-------------------------

ldc, with ref:

.LBB1_1:
    movapd  %xmm0, %xmm1
    mulsd   (%eax,%esi,8), %xmm1
    movsd   16(%esp), %xmm2
    addsd   %xmm1, %xmm2
    movsd   %xmm2, 16(%esp)
    incl    %esi
    cmpl    $1000, %esi
    addsd   .LCPI1_0, %xmm0
    jne .LBB1_1

-------------------------

ldc, without ref:

.LBB1_1:
    movapd  %xmm0, %xmm1
    mulsd   (%eax,%esi,8), %xmm1
    movsd   16(%esp), %xmm2
    addsd   %xmm1, %xmm2
    movsd   %xmm2, 16(%esp)
    incl    %esi
    cmpl    $1000, %esi
    addsd   .LCPI1_0, %xmm0
    jne .LBB1_1

-------------------------

dmd, with ref:

L45:    mov 8[ESP],EDX
        xor ESI,ESI
        fld qword ptr [EDX*8][ECX]
        mov 0Ch[ESP],ESI
        mov EBX,EDX
        inc EDX
        fild    long64 ptr 8[ESP]
        fmulp   ST(1),ST
        fadd    qword ptr 018h[ESP]
        cmp EDX,010h[ESP]
        fstp    qword ptr 018h[ESP]
        jb  L45

-------------------------

dmd, without ref:

L45:    mov 8[ESP],EDX
        xor ESI,ESI
        mov EBX,EDX
        mov 0Ch[ESP],ESI
        fild    long64 ptr 8[ESP]
        fmul    qword ptr [EDX*8][ECX]
        inc EDX
        cmp EDX,010h[ESP]
        fadd    qword ptr 018h[ESP]
        fstp    qword ptr 018h[ESP]
        jb  L45

Bye,
bearophile

Re: Code speed

Reply via email to