Through Reddit I've found an article about vector-calling-convention added to VS2013:
http://blogs.msdn.com/b/vcblog/archive/2013/07/12/introducing-vector-calling-convention.aspx


So I have written what I think is a similar D program:


import core.stdc.stdio, core.simd;

struct Particle { float4 x, y; }

Particle addParticles(in Particle p1, in Particle p2) pure nothrow {
    return Particle(p1.x + p2.x, p1.y + p2.y);
}

// BUG 10627 and 10523
//alias Particle2 = float4[2];
//Particle2 addParticles(in Particle2 p1, in Particle2 p2) {
//    return p1[] + p2[];
//}

void main() {
    auto p1 = Particle([1, 2, 3, 4], [10, 20, 30, 40]);
    printf("%f %f %f %f %f %f %f %f\n",
p1.x.array[0], p1.x.array[1], p1.x.array[2], p1.x.array[3], p1.y.array[0], p1.y.array[1], p1.y.array[2], p1.y.array[3]);

auto p2 = Particle([100, 200, 300, 400], [1000, 2000, 3000, 4000]);
    printf("%f %f %f %f %f %f %f %f\n",
p2.x.array[0], p2.x.array[1], p2.x.array[2], p2.x.array[3], p2.y.array[0], p2.y.array[1], p2.y.array[2], p2.y.array[3]);

    auto p3 = addParticles(p1, p2);
    printf("%f %f %f %f %f %f %f %f\n",
p3.x.array[0], p3.x.array[1], p3.x.array[2], p3.x.array[3], p3.y.array[0], p3.y.array[1], p3.y.array[2], p3.y.array[3]);
}


I have compiled with the latest ldc2 (Windows32):

ldc2 -O5 -disable-inlining -release -vectorize-slp -vectorize-slp-aggressive -output-s test.d


The resulting X86 asm:

__D4test12addParticlesFNaNbxS4test8ParticlexS4test8ParticleZS4test8Particle:
        pushl   %ebp
        movl    %esp, %ebp
        andl    $-16, %esp
        subl    $16, %esp
        movaps  40(%ebp), %xmm0
        movaps  56(%ebp), %xmm1
        addps   8(%ebp), %xmm0
        addps   24(%ebp), %xmm1
        movups  %xmm1, 16(%eax)
        movups  %xmm0, (%eax)
        movl    %ebp, %esp
        popl    %ebp
        ret     $64

__Dmain:
...
        movaps  160(%esp), %xmm0
        movaps  176(%esp), %xmm1
        movaps  %xmm1, 48(%esp)
        movaps  %xmm0, 32(%esp)
        movaps  128(%esp), %xmm0
        movaps  144(%esp), %xmm1
        movaps  %xmm1, 16(%esp)
        movaps  %xmm0, (%esp)
        leal    96(%esp), %eax
        calll   
__D4test12addParticlesFNaNbxS4test8ParticlexS4test8ParticleZS4test8Particle
        subl    $64, %esp
        movss   96(%esp), %xmm0
        movss   100(%esp), %xmm1
        movss   104(%esp), %xmm2
        movss   108(%esp), %xmm3
        movss   112(%esp), %xmm4
        movss   116(%esp), %xmm5
        movss   120(%esp), %xmm6
        movss   124(%esp), %xmm7
        cvtss2sd        %xmm7, %xmm7
        movsd   %xmm7, 60(%esp)
        cvtss2sd        %xmm6, %xmm6
        movsd   %xmm6, 52(%esp)
        cvtss2sd        %xmm5, %xmm5
        movsd   %xmm5, 44(%esp)
        cvtss2sd        %xmm4, %xmm4
        movsd   %xmm4, 36(%esp)
        cvtss2sd        %xmm3, %xmm3
        movsd   %xmm3, 28(%esp)
        cvtss2sd        %xmm2, %xmm2
        movsd   %xmm2, 20(%esp)
        cvtss2sd        %xmm1, %xmm1
        movsd   %xmm1, 12(%esp)
        cvtss2sd        %xmm0, %xmm0
        movsd   %xmm0, 4(%esp)
        movl    $_.str3, (%esp)
        calll   ___mingw_printf
        xorl    %eax, %eax
        movl    %ebp, %esp
        popl    %ebp
        ret


Are those vector calling conventions useful for D too?

Bye,
bearophile

Reply via email to