[Issue 3751] New: Optimalization error in some floating point code

d-bugmail Thu, 28 Jan 2010 20:20:10 -0800

http://d.puremagic.com/issues/show_bug.cgi?id=3751


           Summary: Optimalization error in some floating point code
           Product: D
           Version: 2.039
          Platform: x86
        OS/Version: Linux
            Status: NEW
          Severity: major
          Priority: P2
         Component: DMD
        AssignedTo: [email protected]
        ReportedBy: [email protected]


--- Comment #0 from Witold Baryluk <[email protected]> 2010-01-28 
20:17:58 PST ---
I tested this in 2.039 and v2.028, so probably all version beetwen are
affected.
I don't know how far this bug was introducent.

Here is simplified test case (functionally it is buggy but it is sufficient to
show bug):

import std.math;
import std.stdio;

double bisect(double z) {
    double left = -7.0, right = 7.0, half;
    while (true) { // do {} while(true); // also have this problem
        half = (left+right)*0.5;
version(something) {
        writefln("%s", half); // adding this solves problem
}

        if (left == half) {
            return half; // or break
        }
        if (half == right) {
            return half; // or break
        }
/+
        // the same effect as two if statments.
        if ((left == half) || (half == right)) {
            return half; // or break
        }
+/
        double fhalf = exp(-0.5*half*half) * (half + 0.7) - z;
        if (fhalf > 0.0) {
            right = half;
        } else if (fhalf <= 0.0) {
            left = half;
        }
    };
    //return half; // not rechable, irrevelant
}

void main() {
    foreach (i; 1 .. 10) {
        auto x = 0.1 + 0.001*i;
        writefln("%g %g", x, bisect(x));
    }
}


compile without optimalisation:
# dmd blad.d; ./blad
0.101 -0.580467
0.102 -0.579361
0.103 -0.578256
0.104 -0.577153
0.105 -0.57605
0.106 -0.574949
0.107 -0.573849
0.108 -0.57275
0.109 -0.571653
#

compile with optimalisation:
# dmd -O blad.d; ./blad
0.101 -0.580467
.... nothing happens, CPU usage 100%
^C
#

compile with optimalisation and dummy write:
# dmd -O -version=something blad.d; ./blad
...
...
0.109 -0.571653
# // correctly ends just like without optimalisations



You can check asmbler code here:

version without optimalisations:
.text._D4blad6bisectFdZd        segment
        assume  CS:.text._D4blad6bisectFdZd
_D4blad6bisectFdZd:
                push    EBP
                mov     EBP,ESP
                sub     ESP,020h
                fld     qword ptr FLAT:.rodata[08h]
                fstp    qword ptr -020h[EBP]
                fld     qword ptr FLAT:.rodata[019h]
                fstp    qword ptr -018h[EBP]
                fld     qword ptr FLAT:.rodata[02Ah]
                fstp    qword ptr -010h[EBP]
L21:            fld     qword ptr -020h[EBP]
                fadd    qword ptr -018h[EBP]
                fmul    qword ptr _t...@sym32[09h]
                fstp    qword ptr -010h[EBP]
                fld     qword ptr -020h[EBP]
                fld     qword ptr -010h[EBP]
                fucompp ST(1),ST
                fstsw   AX
                sahf
                jne     L46
                jp      L46
                fld     qword ptr -010h[EBP]
                leave
                ret     8
L46:            fld     qword ptr -010h[EBP]
                fld     qword ptr -018h[EBP]
                fucompp ST(1),ST
                fstsw   AX
                sahf
                jne     L5C
                jp      L5C
                fld     qword ptr -010h[EBP]
                leave
                ret     8
L5C:            fld     qword ptr -010h[EBP]
                fmul    qword ptr _t...@sym32[049h]
                fmul    qword ptr -010h[EBP]
                sub     ESP,8
                fstp    qword ptr [ESP]
                call    near ptr _d3std4math3expfnanb...@pc32
                fld     qword ptr -010h[EBP]
                fadd    qword ptr _tm...@sym32
                fmulp   ST(1),ST
                fsub    qword ptr 8[EBP]
                fstp    qword ptr -8[EBP]
                fld     qword ptr -8[EBP]
                ftst
                fstsw   AX
                sahf
                fstp    ST
                jbe     L98
                fld     qword ptr -010h[EBP]
                fstp    qword ptr -018h[EBP]
                jmp short       L21
L98:            fld     qword ptr -8[EBP]
                ftst
                fstsw   AX
                sahf
                fstp    ST
                ja      L21
                jp      L21
                fld     qword ptr -010h[EBP]
                fstp    qword ptr -020h[EBP]
                jmp     near ptr L21
                nop
                nop
                nop
.text._D4blad6bisectFdZd        ends


Version with optimalistion:

_D4blad6bisectFdZd:
                push    EBP
                mov     EBP,ESP
                sub     ESP,020h
                mov     dword ptr -010h[EBP],0
                fld     qword ptr FLAT:.rodata[0Fh]
                fld     qword ptr FLAT:.rodata[01Dh]
                fxch    ST1
                mov     dword ptr -0Ch[EBP],0
                fstp    qword ptr -020h[EBP]
                fstp    qword ptr -018h[EBP]
L28:            fld     qword ptr -010h[EBP]
                fld     qword ptr -018h[EBP]
                fucompp ST(1),ST
                fstsw   AX
                sahf
                jne     L40
                jp      L40
L37:            fld     qword ptr -010h[EBP]
                mov     ESP,EBP
                pop     EBP
                ret     8
L40:            fld     qword ptr -010h[EBP]
                fmul    qword ptr _t...@sym32[025h]
                fmul    qword ptr -010h[EBP]
                sub     ESP,8
                fstp    qword ptr [ESP]
                call    near ptr _d3std4math3expfnanb...@pc32
                fld     qword ptr -010h[EBP]
                fadd    qword ptr _t...@sym32[044h]
                fmulp   ST(1),ST
                fsub    qword ptr 8[EBP]
                fst     qword ptr -8[EBP]
                ftst
                fstsw   AX
                fstp    ST
                sahf
                jbe     L93
                fld     qword ptr -010h[EBP]
                fstp    qword ptr -018h[EBP]
L77:            fld     qword ptr -020h[EBP]
                fld     ST0
                fadd    qword ptr -018h[EBP]
                fmul    qword ptr _tm...@sym32[09h]
                fst     qword ptr -010h[EBP]
                fucompp ST(1),ST
                fstsw   AX
                sahf
                jp      L28
                je      L37
                jmp short       L28
L93:            fld     qword ptr -8[EBP]
                ftst
                fstsw   AX
                fstp    ST
                sahf
                ja      L77
                jp      L77
                fld     qword ptr -010h[EBP]
                fstp    qword ptr -020h[EBP]
                jmp short       L77
                nop
                nop
                nop
.text._D4blad6bisectFdZd        ends

version with "something" added artifacialy:
.text._D4blad6bisectFdZd        segment
        assume  CS:.text._D4blad6bisectFdZd
_D4blad6bisectFdZd:
                push    EBP
                mov     EBP,ESP
                sub     ESP,020h
                mov     dword ptr -010h[EBP],0
                fld     qword ptr _t...@sym32[017h]
                fld     qword ptr _t...@sym32[025h]
                fxch    ST1
                mov     dword ptr -0Ch[EBP],0
                fstp    qword ptr -020h[EBP]
                fstp    qword ptr -018h[EBP]
                push    dword ptr _t...@sym32[02eh]
                push    dword ptr _t...@sym32[030h]
                push    0
                push    0
                call    near ptr ...writeflntayatdz8writeflnfaya...@pc32
L3D:            fld     qword ptr -010h[EBP]
                fld     qword ptr -018h[EBP]
                fucompp ST(1),ST
                fstsw   AX
                sahf
                jne     L55
                jp      L55
L4C:            fld     qword ptr -010h[EBP]
                mov     ESP,EBP
                pop     EBP
                ret     8
L55:            fld     qword ptr -010h[EBP]
                fmul    qword ptr _t...@sym32[03ah]
                fmul    qword ptr -010h[EBP]
                sub     ESP,8
                fstp    qword ptr [ESP]
                call    near ptr _d3std4math3expfnanb...@pc32
                fld     qword ptr -010h[EBP]
                fadd    qword ptr _tm...@sym32[09h]
                fmulp   ST(1),ST
                fsub    qword ptr 8[EBP]
                fst     qword ptr -8[EBP]
                ftst
                fstsw   AX
                fstp    ST
                sahf
                jbe     LCA
                fld     qword ptr -010h[EBP]
                fstp    qword ptr -018h[EBP]
L8C:            push    dword ptr _tm...@sym32[02h]
                push    dword ptr _tm...@sym32[04h]
                fld     qword ptr -020h[EBP]
                fadd    qword ptr -018h[EBP]
                fmul    qword ptr _tm...@sym32[028h]
                fst     qword ptr -010h[EBP]
                sub     ESP,8
                fstp    qword ptr [ESP]
                call    near ptr  ...writeflntayatdz8writeflnfaya...@pc32
                fld     qword ptr -020h[EBP]
                fld     qword ptr -010h[EBP]
                fucompp ST(1),ST
                fstsw   AX
                sahf
                jp      L3D
                je      L4C
                jmp     near ptr L3D
LCA:            fld     qword ptr -8[EBP]
                ftst
                fstsw   AX
                fstp    ST
                sahf
                ja      L8C
                jp      L8C
                fld     qword ptr -010h[EBP]
                fstp    qword ptr -020h[EBP]
                jmp short       L8C
.text._D4blad6bisectFdZd        ends



What is interesting, that just adding single writefln makes this asembler code
change in much more places than just call of this function.

-- 
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------

[Issue 3751] New: Optimalization error in some floating point code

Reply via email to