Philippe Sigaud:

Now, what I found more confusing is that, compiling with DMD or LDC, I got different results. Since Phobos code defining sin and cos in std.math and core.stdc.math is the same for DMD and LDC (duh!), I guess that means different intrinsics are used?

LDC2 optimizes this code even worse than DMD.

I opened a related thread:
http://forum.dlang.org/thread/[email protected]

--------------------------

import core.stdc.stdio: printf;
import std.math: sin, cos;

double g(in double x) pure nothrow {
    return sin(2.3 * x) + cos(3.7 * x);
}

void main() {
    double x = 0;
    foreach (immutable _; 0 .. 100_000_000)
        x = x.g;

    printf("%f\n", x);
}



/*
-O -release -inline -noboundscheck

DMD:

_D4test1gFNaNbxdZd  comdat
        fld qword ptr 4[ESP]
        fmul    qword ptr FLAT:_DATA[00h]
        fsin
        fld qword ptr 4[ESP]
        fmul    qword ptr FLAT:_DATA[08h]
        fcos
        faddp   ST(1),ST
        ret 8

__Dmain comdat
L0:     sub ESP,0Ch
        xor EAX,EAX
        mov dword ptr 4[ESP],0
        mov dword ptr 8[ESP],0
L15:    fld qword ptr 4[ESP]
        inc EAX
        cmp EAX,05F5E100h
        fmul    qword ptr FLAT:_DATA[00h]
        fsin
        fld qword ptr 4[ESP]
        fmul    qword ptr FLAT:_DATA[08h]
        fcos
        faddp   ST(1),ST
        fstp    qword ptr 4[ESP]
        jb  L15
        push    dword ptr 8[ESP]
        mov EAX,offset FLAT:_DATA[010h]
        push    dword ptr 8[ESP]
        push    EAX
        call    near ptr _printf
        add ESP,0Ch
        add ESP,0Ch
        xor EAX,EAX
        ret

// -------------------------

LDC2:

__D4test1gFNaNbxdZd:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-8, %esp
    subl    $56, %esp
    movsd   LCPI0_0, %xmm0
    mulsd   8(%ebp), %xmm0
    movsd   %xmm0, 40(%esp)
    fldl    40(%esp)
    fstpt   (%esp)
    calll   __D3std4math3sinFNaNbNfeZe
    subl    $12, %esp
    fstpt   12(%esp)
    movsd   8(%ebp), %xmm0
    mulsd   LCPI0_1, %xmm0
    movsd   %xmm0, 48(%esp)
    fldl    48(%esp)
    fstpt   (%esp)
    calll   __D3std4math3cosFNaNbNfeZe
    subl    $12, %esp
    fldt    12(%esp)
    faddp   %st(1)
    fstpl   32(%esp)
    movsd   32(%esp), %xmm0
    movsd   %xmm0, 24(%esp)
    fldl    24(%esp)
    movl    %ebp, %esp
    popl    %ebp
    ret $8

__Dmain:
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %esi
    andl    $-8, %esp
    subl    $72, %esp
    xorps   %xmm0, %xmm0
    movl    $100000000, %esi
    .align  16, 0x90
LBB1_1:
    movsd   %xmm0, 16(%esp)
    mulsd   LCPI1_0, %xmm0
    movsd   %xmm0, 48(%esp)
    fldl    48(%esp)
    fstpt   (%esp)
    calll   __D3std4math3sinFNaNbNfeZe
    subl    $12, %esp
    fstpt   28(%esp)
    movsd   16(%esp), %xmm0
    mulsd   LCPI1_1, %xmm0
    movsd   %xmm0, 56(%esp)
    fldl    56(%esp)
    fstpt   (%esp)
    calll   __D3std4math3cosFNaNbNfeZe
    subl    $12, %esp
    fldt    28(%esp)
    faddp   %st(1)
    fstpl   40(%esp)
    movsd   40(%esp), %xmm0
    decl    %esi
    jne LBB1_1
    movsd   %xmm0, 4(%esp)
    movl    $_.str, (%esp)
    calll   ___mingw_printf
    xorl    %eax, %eax
    leal    -4(%ebp), %esp
    popl    %esi
    popl    %ebp
    ret

--------------------------

import core.stdc.stdio: printf;
import core.stdc.math: sin, cos;

double g(in double x) pure nothrow {
    return sin(2.3 * x) + cos(3.7 * x);
}

void main() {
    double x = 0;
    foreach (immutable _; 0 .. 100_000_000)
        x = x.g;

    printf("%f\n", x);
}



/*
-O -release -inline -noboundscheck

LDC2:

__D5test21gFNaNbxdZd:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-8, %esp
    subl    $40, %esp
    movsd   LCPI0_0, %xmm0
    mulsd   8(%ebp), %xmm0
    movsd   %xmm0, (%esp)
    calll   _sin
    fstpl   32(%esp)
    movsd   32(%esp), %xmm0
    movsd   %xmm0, 8(%esp)
    movsd   8(%ebp), %xmm0
    mulsd   LCPI0_1, %xmm0
    movsd   %xmm0, (%esp)
    calll   _cos
    fstpl   24(%esp)
    movsd   8(%esp), %xmm0
    addsd   24(%esp), %xmm0
    movsd   %xmm0, 16(%esp)
    fldl    16(%esp)
    movl    %ebp, %esp
    popl    %ebp
    ret $8

__Dmain:
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %esi
    andl    $-8, %esp
    subl    $56, %esp
    xorps   %xmm0, %xmm0
    movl    $100000000, %esi
    .align  16, 0x90
LBB1_1:
    movsd   %xmm0, 16(%esp)
    mulsd   LCPI1_0, %xmm0
    movsd   %xmm0, (%esp)
    calll   _sin
    fstpl   40(%esp)
    movsd   40(%esp), %xmm0
    movsd   %xmm0, 24(%esp)
    movsd   16(%esp), %xmm0
    mulsd   LCPI1_1, %xmm0
    movsd   %xmm0, (%esp)
    calll   _cos
    fstpl   32(%esp)
    movsd   24(%esp), %xmm0
    addsd   32(%esp), %xmm0
    decl    %esi
    jne LBB1_1
    movsd   %xmm0, 4(%esp)
    movl    $_.str, (%esp)
    calll   ___mingw_printf
    xorl    %eax, %eax
    leal    -4(%ebp), %esp
    popl    %esi
    popl    %ebp
    ret

--------------------------

import core.stdc.stdio: printf;

version(LDC) {
    import ldc.intrinsics;

    double g(in double x) pure nothrow {
        return llvm_sin(2.3 * x) + llvm_cos(3.7 * x);
    }
}

void main() {
    double x = 0;
    foreach (immutable _; 0 .. 100_000_000)
        x = x.g;

    printf("%f\n", x);
}



/*
-O -release -inline -noboundscheck

LDC2:

__D5test31gFNaNbxdZd:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-8, %esp
    subl    $40, %esp
    movsd   LCPI0_0, %xmm0
    mulsd   8(%ebp), %xmm0
    movsd   %xmm0, (%esp)
    calll   _sin
    fstpl   24(%esp)
    movsd   24(%esp), %xmm0
    movsd   %xmm0, 8(%esp)
    movsd   8(%ebp), %xmm0
    mulsd   LCPI0_1, %xmm0
    movsd   %xmm0, (%esp)
    calll   _cos
    fstpl   16(%esp)
    movsd   8(%esp), %xmm0
    addsd   16(%esp), %xmm0
    movsd   %xmm0, 32(%esp)
    fldl    32(%esp)
    movl    %ebp, %esp
    popl    %ebp
    ret $8

__Dmain:
    pushl   %ebp
    movl    %esp, %ebp
    pushl   %esi
    andl    $-8, %esp
    subl    $56, %esp
    xorps   %xmm0, %xmm0
    movl    $100000000, %esi
    .align  16, 0x90
LBB1_1:
    movsd   %xmm0, 16(%esp)
    mulsd   LCPI1_0, %xmm0
    movsd   %xmm0, (%esp)
    calll   _sin
    fstpl   40(%esp)
    movsd   40(%esp), %xmm0
    movsd   %xmm0, 24(%esp)
    movsd   16(%esp), %xmm0
    mulsd   LCPI1_1, %xmm0
    movsd   %xmm0, (%esp)
    calll   _cos
    fstpl   32(%esp)
    movsd   24(%esp), %xmm0
    addsd   32(%esp), %xmm0
    decl    %esi
    jne LBB1_1
    movsd   %xmm0, 4(%esp)
    movl    $_.str, (%esp)
    calll   ___mingw_printf
    xorl    %eax, %eax
    leal    -4(%ebp), %esp
    popl    %esi
    popl    %ebp
    ret

--------------------------

// C99 code
#include <stdio.h>
#include <math.h>

double g(const double x) {
    return sin(2.3 * x) + cos(3.7 * x);
}

int main() {
    double x = 0;
    for (int i = 0; i < 100000000; i++)
        x = g(x);

    printf("%f\n", x);
    return 0;
}


/*
gcc -fkeep-inline-functions -std=c99 -flto -S -Ofast test4.c -o test4.s

_g:
    fldl    4(%esp)
    fldl    LC0
    fmul    %st(1), %st
    fsin
    fxch    %st(1)
    fmull   LC1
    fcos
    faddp   %st, %st(1)
    ret

_main:
    pushl   %ebp
    movl    %esp, %ebp
    andl    $-16, %esp
    subl    $16, %esp
    call    ___main
    movl    $100000000, %eax
    fld1
    fldz
    fldl    LC0
    fxch    %st(2)
    jmp L19
    .p2align 4,,7
L22:
    fld %st(0)
    fmul    %st(2), %st
    fsin
    fxch    %st(1)
    fmull   LC1
    fcos
L19:
    subl    $1, %eax
    faddp   %st, %st(1)
    jne L22
    fstp    %st(1)
    fstpl   4(%esp)
    movl    $LC5, (%esp)
    call    _printf
    xorl    %eax, %eax
    leave
    .cfi_restore 5
    .cfi_def_cfa 4, 4
    ret

Bye,
bearophile

Reply via email to