https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109221

            Bug ID: 109221
           Summary: std.math.floor, core.math.ldexp, std.math.poly poor
                    inlining
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: d
          Assignee: ibuclaw at gdcproject dot org
          Reporter: witold.baryluk+gcc at gmail dot com
  Target Milestone: ---

Example:

static float sRGB_case4(float x) {
    // import std.math : exp;
    return 1.055f * expImpl(x) - 0.055f;  // expImpl not inlined by default
    // (inlined when using pragma(inline, true), but that fails to inline in
DMD)
}


// pragma(inline, true)
// This is borrowed from phobos/exponential.d to help gcc inline it fully.
// Only T == float case is here (as some traits are private to phobos).
// Also isNaN and range checks are removed, as sRGB performs own checks.
static private T expImpl(T)(T x) @safe pure nothrow @nogc
{
    //import std.math : floatTraits, RealFormat;
    //import std.math.traits : isNaN;
    //import std.math.rounding : floor;
    //import std.math.algebraic : poly;
    //import std.math.constants : LOG2E;
    import std.math;
    import core.math;

        static immutable T[6] P = [
            5.0000001201E-1,
            1.6666665459E-1,
            4.1665795894E-2,
            8.3334519073E-3,
            1.3981999507E-3,
            1.9875691500E-4,
        ];

        enum T C1 = 0.693359375;
        enum T C2 = -2.12194440e-4;

        // Overflow and Underflow limits.
        enum T OF = 88.72283905206835;
        enum T UF = -103.278929903431851103; // ln(2^-149)

    // Special cases.
    //if (isNaN(x))
    //    return x;
    //if (x > OF)
    //    return real.infinity;
    //if (x < UF)
    //    return 0.0;

    // Express: e^^x = e^^g * 2^^n
    //   = e^^g * e^^(n * LOG2E)
    //   = e^^(g + n * LOG2E)
    T xx = floor((cast(T) LOG2E) * x + cast(T) 0.5);   // NOT INLINED!
    const int n = cast(int) xx;
    x -= xx * C1;
    x -= xx * C2;

        xx = x * x;
        x = poly(x, P) * xx + x + 1.0f;     // poly is generated optimally, but
not inlined

    // Scale by power of 2.
    x = core.math.ldexp(x, n);    // NOT INLINED

    return x;
}


gdc gdc
(Compiler-Explorer-Build-gcc-454a4d5041f53cd1f7d902f6c0017b7ce95b36df-binutils-2.38)
13.0.1 20230318 (experimental)
gdc -O3 -march=znver2 -frelease -fbounds-check=off


pure nothrow @nogc @safe float std.math.algebraic.poly!(float, float,
6).poly(float, ref const(float[6])):
        vmovss  xmm1, DWORD PTR [rdi+20]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+16]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+12]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+8]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+4]
        vfmadd213ss     xmm0, xmm1, DWORD PTR [rdi]
        ret
pure nothrow @nogc @safe float example.expImpl!(float).expImpl(float):
        push    rbx
        vmovaps xmm1, xmm0
        sub     rsp, 16
        vmovss  xmm0, DWORD PTR .LC0[rip]
        vfmadd213ss     xmm0, xmm1, DWORD PTR .LC1[rip]
        vmovss  DWORD PTR [rsp+8], xmm1
        call    pure nothrow @nogc @trusted float
std.math.rounding.floor(float)
        vmovss  xmm1, DWORD PTR [rsp+8]
        mov     edi, OFFSET FLAT:immutable(float[6])
example.expImpl!(float).expImpl(float).P
        vfnmadd231ss    xmm1, xmm0, DWORD PTR .LC2[rip]
        vmovss  DWORD PTR [rsp+12], xmm0
        vfnmadd231ss    xmm1, xmm0, DWORD PTR .LC3[rip]
        vmulss  xmm3, xmm1, xmm1
        vmovaps xmm0, xmm1
        vmovss  DWORD PTR [rsp+8], xmm1
        vmovd   ebx, xmm3
        call    pure nothrow @nogc @safe float std.math.algebraic.poly!(float,
float, 6).poly(float, ref const(float[6]))
        vmovss  xmm1, DWORD PTR [rsp+8]
        vmovd   xmm4, ebx
        vmovss  xmm2, DWORD PTR [rsp+12]
        vfmadd132ss     xmm0, xmm1, xmm4
        vaddss  xmm0, xmm0, DWORD PTR .LC4[rip]
        add     rsp, 16
        pop     rbx
        vcvttss2si      edi, xmm2
        jmp     ldexpf
float example.sRGB_case4(float):
        sub     rsp, 8
        call    pure nothrow @nogc @safe float
example.expImpl!(float).expImpl(float)
        vmovss  xmm1, DWORD PTR .LC6[rip]
        vfmadd132ss     xmm0, xmm1, DWORD PTR .LC5[rip]
        add     rsp, 8
        ret


https://godbolt.org/z/YMoMPdjn5


Additionally

std.math.exp itself, is never inlined by gcc. This is important, as some early
checks (isNaN, OF, UF checks) in exp could be removed by proper inlining.

Reply via email to