[Bug d/109221] New: std.math.floor, core.math.ldexp, std.math.poly poor inlining

witold.baryluk+gcc at gmail dot com via Gcc-bugs Mon, 20 Mar 2023 18:09:38 -0700

https://gcc.gnu.org/bugzilla/show_bug.cgi?id=109221


            Bug ID: 109221
           Summary: std.math.floor, core.math.ldexp, std.math.poly poor
                    inlining
           Product: gcc
           Version: 13.0
            Status: UNCONFIRMED
          Severity: normal
          Priority: P3
         Component: d
          Assignee: ibuclaw at gdcproject dot org
          Reporter: witold.baryluk+gcc at gmail dot com
  Target Milestone: ---

Example:

static float sRGB_case4(float x) {
    // import std.math : exp;
    return 1.055f * expImpl(x) - 0.055f;  // expImpl not inlined by default
    // (inlined when using pragma(inline, true), but that fails to inline in
DMD)
}


// pragma(inline, true)
// This is borrowed from phobos/exponential.d to help gcc inline it fully.
// Only T == float case is here (as some traits are private to phobos).
// Also isNaN and range checks are removed, as sRGB performs own checks.
static private T expImpl(T)(T x) @safe pure nothrow @nogc
{
    //import std.math : floatTraits, RealFormat;
    //import std.math.traits : isNaN;
    //import std.math.rounding : floor;
    //import std.math.algebraic : poly;
    //import std.math.constants : LOG2E;
    import std.math;
    import core.math;

        static immutable T[6] P = [
            5.0000001201E-1,
            1.6666665459E-1,
            4.1665795894E-2,
            8.3334519073E-3,
            1.3981999507E-3,
            1.9875691500E-4,
        ];

        enum T C1 = 0.693359375;
        enum T C2 = -2.12194440e-4;

        // Overflow and Underflow limits.
        enum T OF = 88.72283905206835;
        enum T UF = -103.278929903431851103; // ln(2^-149)

    // Special cases.
    //if (isNaN(x))
    //    return x;
    //if (x > OF)
    //    return real.infinity;
    //if (x < UF)
    //    return 0.0;

    // Express: e^^x = e^^g * 2^^n
    //   = e^^g * e^^(n * LOG2E)
    //   = e^^(g + n * LOG2E)
    T xx = floor((cast(T) LOG2E) * x + cast(T) 0.5);   // NOT INLINED!
    const int n = cast(int) xx;
    x -= xx * C1;
    x -= xx * C2;

        xx = x * x;
        x = poly(x, P) * xx + x + 1.0f;     // poly is generated optimally, but
not inlined

    // Scale by power of 2.
    x = core.math.ldexp(x, n);    // NOT INLINED

    return x;
}


gdc gdc
(Compiler-Explorer-Build-gcc-454a4d5041f53cd1f7d902f6c0017b7ce95b36df-binutils-2.38)
13.0.1 20230318 (experimental)
gdc -O3 -march=znver2 -frelease -fbounds-check=off


pure nothrow @nogc @safe float std.math.algebraic.poly!(float, float,
6).poly(float, ref const(float[6])):
        vmovss  xmm1, DWORD PTR [rdi+20]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+16]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+12]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+8]
        vfmadd213ss     xmm1, xmm0, DWORD PTR [rdi+4]
        vfmadd213ss     xmm0, xmm1, DWORD PTR [rdi]
        ret
pure nothrow @nogc @safe float example.expImpl!(float).expImpl(float):
        push    rbx
        vmovaps xmm1, xmm0
        sub     rsp, 16
        vmovss  xmm0, DWORD PTR .LC0[rip]
        vfmadd213ss     xmm0, xmm1, DWORD PTR .LC1[rip]
        vmovss  DWORD PTR [rsp+8], xmm1
        call    pure nothrow @nogc @trusted float
std.math.rounding.floor(float)
        vmovss  xmm1, DWORD PTR [rsp+8]
        mov     edi, OFFSET FLAT:immutable(float[6])
example.expImpl!(float).expImpl(float).P
        vfnmadd231ss    xmm1, xmm0, DWORD PTR .LC2[rip]
        vmovss  DWORD PTR [rsp+12], xmm0
        vfnmadd231ss    xmm1, xmm0, DWORD PTR .LC3[rip]
        vmulss  xmm3, xmm1, xmm1
        vmovaps xmm0, xmm1
        vmovss  DWORD PTR [rsp+8], xmm1
        vmovd   ebx, xmm3
        call    pure nothrow @nogc @safe float std.math.algebraic.poly!(float,
float, 6).poly(float, ref const(float[6]))
        vmovss  xmm1, DWORD PTR [rsp+8]
        vmovd   xmm4, ebx
        vmovss  xmm2, DWORD PTR [rsp+12]
        vfmadd132ss     xmm0, xmm1, xmm4
        vaddss  xmm0, xmm0, DWORD PTR .LC4[rip]
        add     rsp, 16
        pop     rbx
        vcvttss2si      edi, xmm2
        jmp     ldexpf
float example.sRGB_case4(float):
        sub     rsp, 8
        call    pure nothrow @nogc @safe float
example.expImpl!(float).expImpl(float)
        vmovss  xmm1, DWORD PTR .LC6[rip]
        vfmadd132ss     xmm0, xmm1, DWORD PTR .LC5[rip]
        add     rsp, 8
        ret


https://godbolt.org/z/YMoMPdjn5


Additionally

std.math.exp itself, is never inlined by gcc. This is important, as some early
checks (isNaN, OF, UF checks) in exp could be removed by proper inlining.

[Bug d/109221] New: std.math.floor, core.math.ldexp, std.math.poly poor inlining

Reply via email to