I have only verified that the CRT actually builds with {i686,x86_64}-w64-mingw32-gcc; not sure whether anything goes wrong. Martin, would you please include this patch in your nightly tests? `-masm=intel` requires the latest Clang, though. Older versions do not accept Intel syntax in inline assembly (but they do output Intel syntax).
-- Best regards, LIU Hao
From c019d44f8a06e4105967d5a2de4c8109a2c6edb5 Mon Sep 17 00:00:00 2001 From: LIU Hao <[email protected]> Date: Wed, 22 Mar 2023 10:47:43 +0800 Subject: [PATCH] crt: Make CRT buildable with `-masm=intel` The unofficial AT&T syntax will likely be kept the default in foreseeable future. However my long-term plan is to transition to Intel syntax, even after decades. This is the zwischenzug for allowing the CRT to be built with GCC and latest Clang with the `-masm=intel` option, just like our headers. Signed-off-by: LIU Hao <[email protected]> --- mingw-w64-crt/cfguard/mingw_cfguard_support.c | 4 + mingw-w64-crt/math/llrint.c | 2 +- mingw-w64-crt/math/llrintf.c | 2 +- mingw-w64-crt/math/llrintl.c | 2 +- mingw-w64-crt/math/lrint.c | 2 +- mingw-w64-crt/math/lrintf.c | 2 +- mingw-w64-crt/math/lrintl.c | 2 +- mingw-w64-crt/math/modf.c | 42 +++++---- mingw-w64-crt/math/modff.c | 43 +++++---- mingw-w64-crt/math/modfl.c | 42 +++++---- mingw-w64-crt/math/x86/cossin.c | 81 +++++++++-------- mingw-w64-crt/math/x86/exp.def.h | 88 ++++++++++--------- mingw-w64-crt/math/x86/pow.def.h | 45 ++++++---- mingw-w64-crt/misc/feclearexcept.c | 6 +- mingw-w64-crt/misc/fegetround.c | 2 +- mingw-w64-crt/misc/winbs_uint64.c | 6 +- mingw-w64-crt/misc/winbs_ulong.c | 2 +- mingw-w64-crt/misc/winbs_ushort.c | 2 +- mingw-w64-crt/stdio/_scprintf.c | 12 ++- 19 files changed, 214 insertions(+), 173 deletions(-) diff --git a/mingw-w64-crt/cfguard/mingw_cfguard_support.c b/mingw-w64-crt/cfguard/mingw_cfguard_support.c index cf4535afd..2c6caeb4b 100644 --- a/mingw-w64-crt/cfguard/mingw_cfguard_support.c +++ b/mingw-w64-crt/cfguard/mingw_cfguard_support.c @@ -21,7 +21,11 @@ static void __guard_check_icall_dummy(void) {} // is passed via %rax. __asm__( "__guard_dispatch_icall_dummy:\n" +#if 0 __REGISTER_PREFIX__ + 1 == 0 // `0 % + 1 == 0` for at&t " jmp *%rax\n" +#else + " jmp rax\n" +#endif ); // This is intentionally declared as _not_ a function pointer, so that the diff --git a/mingw-w64-crt/math/llrint.c b/mingw-w64-crt/math/llrint.c index 4ba7af7e6..2d05878ba 100644 --- a/mingw-w64-crt/math/llrint.c +++ b/mingw-w64-crt/math/llrint.c @@ -10,7 +10,7 @@ long long llrint (double x) { long long retval = 0ll; #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__) - __asm__ __volatile__ ("fistpll %0" : "=m" (retval) : "t" (x) : "st"); + __asm__ __volatile__ ("fistp{ll} %0" : "=m" (retval) : "t" (x) : "st"); #else int mode = fegetround(); if (mode == FE_DOWNWARD) diff --git a/mingw-w64-crt/math/llrintf.c b/mingw-w64-crt/math/llrintf.c index e8085ec4e..f5cd19fc2 100644 --- a/mingw-w64-crt/math/llrintf.c +++ b/mingw-w64-crt/math/llrintf.c @@ -10,7 +10,7 @@ long long llrintf (float x) { long long retval = 0ll; #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__) - __asm__ __volatile__ ("fistpll %0" : "=m" (retval) : "t" (x) : "st"); + __asm__ __volatile__ ("fistp{ll} %0" : "=m" (retval) : "t" (x) : "st"); #else int mode = fegetround(); if (mode == FE_DOWNWARD) diff --git a/mingw-w64-crt/math/llrintl.c b/mingw-w64-crt/math/llrintl.c index 6a2bf73d8..d3651059c 100644 --- a/mingw-w64-crt/math/llrintl.c +++ b/mingw-w64-crt/math/llrintl.c @@ -10,7 +10,7 @@ long long llrintl (long double x) { long long retval = 0ll; #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__) - __asm__ __volatile__ ("fistpll %0" : "=m" (retval) : "t" (x) : "st"); + __asm__ __volatile__ ("fistp{ll} %0" : "=m" (retval) : "t" (x) : "st"); #else int mode = fegetround(); if (mode == FE_DOWNWARD) diff --git a/mingw-w64-crt/math/lrint.c b/mingw-w64-crt/math/lrint.c index 7831446be..1bf489b85 100644 --- a/mingw-w64-crt/math/lrint.c +++ b/mingw-w64-crt/math/lrint.c @@ -15,7 +15,7 @@ long lrint (double x) #if defined(_AMD64_) || defined(__x86_64__) retval = _mm_cvtsd_si32(_mm_load_sd(&x)); #elif defined(_X86_) || defined(__i386__) - __asm__ __volatile__ ("fistpl %0" : "=m" (retval) : "t" (x) : "st"); + __asm__ __volatile__ ("fistp{l} %0" : "=m" (retval) : "t" (x) : "st"); #elif defined(__arm__) || defined(_ARM_) float temp; __asm__ __volatile__ ( diff --git a/mingw-w64-crt/math/lrintf.c b/mingw-w64-crt/math/lrintf.c index 1e8902f03..5a9dcfe83 100644 --- a/mingw-w64-crt/math/lrintf.c +++ b/mingw-w64-crt/math/lrintf.c @@ -15,7 +15,7 @@ long lrintf (float x) #if defined(_AMD64_) || defined(__x86_64__) retval = _mm_cvtss_si32(_mm_load_ss(&x)); #elif defined(_X86_) || defined(__i386__) - __asm__ __volatile__ ("fistpl %0" : "=m" (retval) : "t" (x) : "st"); + __asm__ __volatile__ ("fistp{l} %0" : "=m" (retval) : "t" (x) : "st"); #elif defined(__arm__) || defined(_ARM_) __asm__ __volatile__ ( "vcvtr.s32.f32 %[src], %[src]\n\t" diff --git a/mingw-w64-crt/math/lrintl.c b/mingw-w64-crt/math/lrintl.c index d710fac05..ad476052a 100644 --- a/mingw-w64-crt/math/lrintl.c +++ b/mingw-w64-crt/math/lrintl.c @@ -9,7 +9,7 @@ long lrintl (long double x) { long retval = 0l; #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__) - __asm__ __volatile__ ("fistpl %0" : "=m" (retval) : "t" (x) : "st"); + __asm__ __volatile__ ("fistp{l} %0" : "=m" (retval) : "t" (x) : "st"); #elif defined(__arm__) || defined(_ARM_) || defined(__aarch64__) || defined(_ARM64_) retval = lrint(x); #endif diff --git a/mingw-w64-crt/math/modf.c b/mingw-w64-crt/math/modf.c index d2623095c..17bc3260d 100644 --- a/mingw-w64-crt/math/modf.c +++ b/mingw-w64-crt/math/modf.c @@ -13,25 +13,31 @@ modf (double value, double* iptr) double int_part = 0.0; /* truncate */ #if defined(_AMD64_) || defined(__x86_64__) - asm volatile ("subq $8, %%rsp\n" - "fnstcw 4(%%rsp)\n" - "movzwl 4(%%rsp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%rsp)\n" - "fldcw (%%rsp)\n" - "frndint\n" - "fldcw 4(%%rsp)\n" - "addq $8, %%rsp\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */ + asm volatile ( + "{ subq $8, %%rsp | sub rsp, 8 }\n" + "{ fnstcw 4(%%rsp) | fnstcw word ptr [rsp + 4] }\n" + "{ movzwl 4(%%rsp), %%eax | movzx eax, word ptr [rsp + 4] }\n" + "{ orb $12, %%ah | or eax, 0x0C00 }\n" + "{ movw %%ax, (%%rsp) | mov word ptr [rsp], ax }\n" + "{ fldcw (%%rsp) | fldcw word ptr [rsp] }\n" + "{ frndint | frndint }\n" + "{ fldcw 4(%%rsp) | fldcw word ptr [rsp + 4] }\n" + "{ addq $8, %%rsp | add rsp, 8 }\n" + : "=t" (int_part) : "0" (value) : "eax"); /* round */ #elif defined(_X86_) || defined(__i386__) - asm volatile ("push %%eax\n\tsubl $8, %%esp\n" - "fnstcw 4(%%esp)\n" - "movzwl 4(%%esp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%esp)\n" - "fldcw (%%esp)\n" - "frndint\n" - "fldcw 4(%%esp)\n" - "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */ + asm volatile ( + "{ push %%eax | push eax }\n" + "{ subl $8, %%esp | sub esp, 8 }\n" + "{ fnstcw 4(%%esp) | fnstcw word ptr [esp + 4] }\n" + "{ movzwl 4(%%esp), %%eax | movzx eax, word ptr [esp + 4] }\n" + "{ orb $12, %%ah | or eax, 0x0C00 }\n" + "{ movw %%ax, (%%esp) | mov word ptr [esp], ax }\n" + "{ fldcw (%%esp) | fldcw word ptr [esp] }\n" + "{ frndint | frndint }\n" + "{ fldcw 4(%%esp) | fldcw word ptr [esp] }\n" + "{ addl $8, %%esp | add esp, 8 }\n" + "{ pop %%eax | pop eax }\n" + : "=t" (int_part) : "0" (value) : "eax"); /* round */ #else int_part = trunc(value); #endif diff --git a/mingw-w64-crt/math/modff.c b/mingw-w64-crt/math/modff.c index dcf19cfed..f80d8092c 100644 --- a/mingw-w64-crt/math/modff.c +++ b/mingw-w64-crt/math/modff.c @@ -11,28 +11,33 @@ float modff (float value, float* iptr) { float int_part = 0.0F; - /* truncate */ /* truncate */ #if defined(_AMD64_) || defined(__x86_64__) - asm volatile ("subq $8, %%rsp\n" - "fnstcw 4(%%rsp)\n" - "movzwl 4(%%rsp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%rsp)\n" - "fldcw (%%rsp)\n" - "frndint\n" - "fldcw 4(%%rsp)\n" - "addq $8, %%rsp\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */ + asm volatile ( + "{ subq $8, %%rsp | sub rsp, 8 }\n" + "{ fnstcw 4(%%rsp) | fnstcw word ptr [rsp + 4] }\n" + "{ movzwl 4(%%rsp), %%eax | movzx eax, word ptr [rsp + 4] }\n" + "{ orb $12, %%ah | or eax, 0x0C00 }\n" + "{ movw %%ax, (%%rsp) | mov word ptr [rsp], ax }\n" + "{ fldcw (%%rsp) | fldcw word ptr [rsp] }\n" + "{ frndint | frndint }\n" + "{ fldcw 4(%%rsp) | fldcw word ptr [rsp + 4] }\n" + "{ addq $8, %%rsp | add rsp, 8 }\n" + : "=t" (int_part) : "0" (value) : "eax"); /* round */ #elif defined(_X86_) || defined(__i386__) - asm volatile ("push %%eax\n\tsubl $8, %%esp\n" - "fnstcw 4(%%esp)\n" - "movzwl 4(%%esp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%esp)\n" - "fldcw (%%esp)\n" - "frndint\n" - "fldcw 4(%%esp)\n" - "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */ + asm volatile ( + "{ push %%eax | push eax }\n" + "{ subl $8, %%esp | sub esp, 8 }\n" + "{ fnstcw 4(%%esp) | fnstcw word ptr [esp + 4] }\n" + "{ movzwl 4(%%esp), %%eax | movzx eax, word ptr [esp + 4] }\n" + "{ orb $12, %%ah | or eax, 0x0C00 }\n" + "{ movw %%ax, (%%esp) | mov word ptr [esp], ax }\n" + "{ fldcw (%%esp) | fldcw word ptr [esp] }\n" + "{ frndint | frndint }\n" + "{ fldcw 4(%%esp) | fldcw word ptr [esp] }\n" + "{ addl $8, %%esp | add esp, 8 }\n" + "{ pop %%eax | pop eax }\n" + : "=t" (int_part) : "0" (value) : "eax"); /* round */ #else int_part = truncf(value); #endif diff --git a/mingw-w64-crt/math/modfl.c b/mingw-w64-crt/math/modfl.c index 33593e6de..a87b5a1d1 100644 --- a/mingw-w64-crt/math/modfl.c +++ b/mingw-w64-crt/math/modfl.c @@ -13,25 +13,31 @@ modfl (long double value, long double* iptr) long double int_part = 0.0L; /* truncate */ #if defined(_AMD64_) || defined(__x86_64__) - asm volatile ("subq $8, %%rsp\n" - "fnstcw 4(%%rsp)\n" - "movzwl 4(%%rsp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%rsp)\n" - "fldcw (%%rsp)\n" - "frndint\n" - "fldcw 4(%%rsp)\n" - "addq $8, %%rsp\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */ + asm volatile ( + "{ subq $8, %%rsp | sub rsp, 8 }\n" + "{ fnstcw 4(%%rsp) | fnstcw word ptr [rsp + 4] }\n" + "{ movzwl 4(%%rsp), %%eax | movzx eax, word ptr [rsp + 4] }\n" + "{ orb $12, %%ah | or eax, 0x0C00 }\n" + "{ movw %%ax, (%%rsp) | mov word ptr [rsp], ax }\n" + "{ fldcw (%%rsp) | fldcw word ptr [rsp] }\n" + "{ frndint | frndint }\n" + "{ fldcw 4(%%rsp) | fldcw word ptr [rsp + 4] }\n" + "{ addq $8, %%rsp | add rsp, 8 }\n" + : "=t" (int_part) : "0" (value) : "eax"); /* round */ #elif defined(_X86_) || defined(__i386__) - asm volatile ("push %%eax\n\tsubl $8, %%esp\n" - "fnstcw 4(%%esp)\n" - "movzwl 4(%%esp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%esp)\n" - "fldcw (%%esp)\n" - "frndint\n" - "fldcw 4(%%esp)\n" - "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */ + asm volatile ( + "{ push %%eax | push eax }\n" + "{ subl $8, %%esp | sub esp, 8 }\n" + "{ fnstcw 4(%%esp) | fnstcw word ptr [esp + 4] }\n" + "{ movzwl 4(%%esp), %%eax | movzx eax, word ptr [esp + 4] }\n" + "{ orb $12, %%ah | or eax, 0x0C00 }\n" + "{ movw %%ax, (%%esp) | mov word ptr [esp], ax }\n" + "{ fldcw (%%esp) | fldcw word ptr [esp] }\n" + "{ frndint | frndint }\n" + "{ fldcw 4(%%esp) | fldcw word ptr [esp] }\n" + "{ addl $8, %%esp | add esp, 8 }\n" + "{ pop %%eax | pop eax }\n" + : "=t" (int_part) : "0" (value) : "eax"); /* round */ #else int_part = truncl(value); #endif diff --git a/mingw-w64-crt/math/x86/cossin.c b/mingw-w64-crt/math/x86/cossin.c index cb3340545..6eebb6255 100644 --- a/mingw-w64-crt/math/x86/cossin.c +++ b/mingw-w64-crt/math/x86/cossin.c @@ -12,19 +12,20 @@ void sincos (double __x, double *p_sin, double *p_cos) { long double c, s; - __asm__ __volatile__ ("fsincos\n\t" - "fnstsw %%ax\n\t" - "testl $0x400, %%eax\n\t" - "jz 1f\n\t" - "fldpi\n\t" - "fadd %%st(0)\n\t" - "fxch %%st(1)\n\t" - "2: fprem1\n\t" - "fnstsw %%ax\n\t" - "testl $0x400, %%eax\n\t" - "jnz 2b\n\t" - "fstp %%st(1)\n\t" - "fsincos\n\t" + __asm__ __volatile__ ( + " { fsincos | fsincos }\n\t" + " { fnstsw %%ax | fnstsw ax }\n\t" + " { testl $0x400, %%eax | test eax, 0x400 }\n\t" + " { jz 1f | jz 1f }\n\t" + " { fldpi | fldpi }\n\t" + " { fadd %%st(0) | fadd st(0) }\n\t" + " { fxch %%st(1) | fxch st(1) }\n\t" + "2:{ fprem1 | fprem1 }\n\t" + " { fnstsw %%ax | fnstsw ax }\n\t" + " { testl $0x400, %%eax | test eax, 0x400 }\n\t" + " { jnz 2b | jnz 2b }\n\t" + " { fstp %%st(1) | fstp st(1) }\n\t" + " { fsincos | fsincos }\n\t" "1:" : "=t" (c), "=u" (s) : "0" (__x) : "eax"); *p_sin = (double) s; *p_cos = (double) c; @@ -34,19 +35,20 @@ void sincosf (float __x, float *p_sin, float *p_cos) { long double c, s; - __asm__ __volatile__ ("fsincos\n\t" - "fnstsw %%ax\n\t" - "testl $0x400, %%eax\n\t" - "jz 1f\n\t" - "fldpi\n\t" - "fadd %%st(0)\n\t" - "fxch %%st(1)\n\t" - "2: fprem1\n\t" - "fnstsw %%ax\n\t" - "testl $0x400, %%eax\n\t" - "jnz 2b\n\t" - "fstp %%st(1)\n\t" - "fsincos\n\t" + __asm__ __volatile__ ( + " { fsincos | fsincos }\n\t" + " { fnstsw %%ax | fnstsw ax }\n\t" + " { testl $0x400, %%eax | test eax, 0x400 }\n\t" + " { jz 1f | jz 1f }\n\t" + " { fldpi | fldpi }\n\t" + " { fadd %%st(0) | fadd st(0) }\n\t" + " { fxch %%st(1) | fxch st(1) }\n\t" + "2:{ fprem1 | fprem1 }\n\t" + " { fnstsw %%ax | fnstsw ax }\n\t" + " { testl $0x400, %%eax | test eax, 0x400 }\n\t" + " { jnz 2b | jnz 2b }\n\t" + " { fstp %%st(1) | fstp st(1) }\n\t" + " { fsincos | fsincos }\n\t" "1:" : "=t" (c), "=u" (s) : "0" (__x) : "eax"); *p_sin = (float) s; *p_cos = (float) c; @@ -56,19 +58,20 @@ void sincosl (long double __x, long double *p_sin, long double *p_cos) { long double c, s; - __asm__ __volatile__ ("fsincos\n\t" - "fnstsw %%ax\n\t" - "testl $0x400, %%eax\n\t" - "jz 1f\n\t" - "fldpi\n\t" - "fadd %%st(0)\n\t" - "fxch %%st(1)\n\t" - "2: fprem1\n\t" - "fnstsw %%ax\n\t" - "testl $0x400, %%eax\n\t" - "jnz 2b\n\t" - "fstp %%st(1)\n\t" - "fsincos\n\t" + __asm__ __volatile__ ( + " { fsincos | fsincos }\n\t" + " { fnstsw %%ax | fnstsw ax }\n\t" + " { testl $0x400, %%eax | test eax, 0x400 }\n\t" + " { jz 1f | jz 1f }\n\t" + " { fldpi | fldpi }\n\t" + " { fadd %%st(0) | fadd st(0) }\n\t" + " { fxch %%st(1) | fxch st(1) }\n\t" + "2:{ fprem1 | fprem1 }\n\t" + " { fnstsw %%ax | fnstsw ax }\n\t" + " { testl $0x400, %%eax | test eax, 0x400 }\n\t" + " { jnz 2b | jnz 2b }\n\t" + " { fstp %%st(1) | fstp st(1) }\n\t" + " { fsincos | fsincos }\n\t" "1:" : "=t" (c), "=u" (s) : "0" (__x) : "eax"); *p_sin = s; *p_cos = c; diff --git a/mingw-w64-crt/math/x86/exp.def.h b/mingw-w64-crt/math/x86/exp.def.h index 33a176aec..78b52f653 100644 --- a/mingw-w64-crt/math/x86/exp.def.h +++ b/mingw-w64-crt/math/x86/exp.def.h @@ -53,53 +53,55 @@ __expl_internal (long double x) { long double res = 0.0L; asm volatile ( - "fldl2e\n\t" /* 1 log2(e) */ - "fmul %%st(1),%%st\n\t" /* 1 x log2(e) */ + "{ fldl2e | fldl2e }\n\t" /* 1 log2(e) */ + "{ fmul %%st(1),%%st | fmul st(1), st }\n\t" /* 1 x log2(e) */ #ifdef __x86_64__ - "subq $8, %%rsp\n" - "fnstcw 4(%%rsp)\n" - "movzwl 4(%%rsp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%rsp)\n" - "fldcw (%%rsp)\n" - "frndint\n\t" /* 1 i */ - "fld %%st(1)\n\t" /* 2 x */ - "frndint\n\t" /* 2 xi */ - "fldcw 4(%%rsp)\n" - "addq $8, %%rsp\n" + "{ subq $8, %%rsp | sub rsp, 8 }\n\t" + "{ fnstcw 4(%%rsp) | fnstcw word ptr [rsp + 4] }\n\t" + "{ movzwl 4(%%rsp), %%eax | movzx eax, word ptr [rsp + 4] }\n\t" + "{ orb $12, %%ah | or eax, 0x0C00 }\n\t" + "{ movw %%ax, (%%rsp) | mov word ptr [rsp], ax }\n\t" + "{ fldcw (%%rsp) | fldcw word ptr [rsp] }\n\t" + "{ frndint | frndint }\n\t" /* 1 i */ + "{ fld %%st(1) | fld st(1) }\n\t" /* 2 x */ + "{ frndint | frndint }\n\t" /* 2 xi */ + "{ fldcw 4(%%rsp) | fldcw word ptr [rsp + 4] }\n\t" + "{ addq $8, %%rsp | add rsp, 8 }\n\t" #else - "push %%eax\n\tsubl $8, %%esp\n" - "fnstcw 4(%%esp)\n" - "movzwl 4(%%esp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%esp)\n" - "fldcw (%%esp)\n" - "frndint\n\t" /* 1 i */ - "fld %%st(1)\n\t" /* 2 x */ - "frndint\n\t" /* 2 xi */ - "fldcw 4(%%esp)\n" - "addl $8, %%esp\n\tpop %%eax\n" + "{ push %%eax | push eax }\n\t" + "{ subl $8, %%esp | sub esp, 8 }\n\t" + "{ fnstcw 4(%%esp) | fnstcw word ptr [esp + 4] }\n\t" + "{ movzwl 4(%%esp), %%eax | movzx eax, word ptr [esp + 4] }\n\t" + "{ orb $12, %%ah | or eax, 0x0C00 }\n\t" + "{ movw %%ax, (%%esp) | mov word ptr [esp], ax }\n\t" + "{ fldcw (%%esp) | fldcw word ptr [esp] }\n\t" + "{ frndint | frndint }\n\t" /* 1 i */ + "{ fld %%st(1) | fld st(1) }\n\t" /* 2 x */ + "{ frndint | frndint }\n\t" /* 2 xi */ + "{ fldcw 4(%%esp) | fldcw word ptr [esp + 4] }\n\t" + "{ addl $8, %%esp | add esp, 8 }\n\t" + "{ pop %%eax | pop eax }\n\t" #endif - "fld %%st(1)\n\t" /* 3 i */ - "fldt %2\n\t" /* 4 c0 */ - "fld %%st(2)\n\t" /* 5 xi */ - "fmul %%st(1),%%st\n\t" /* 5 c0 xi */ - "fsubp %%st,%%st(2)\n\t" /* 4 f = c0 xi - i */ - "fld %%st(4)\n\t" /* 5 x */ - "fsub %%st(3),%%st\n\t" /* 5 xf = x - xi */ - "fmulp %%st,%%st(1)\n\t" /* 4 c0 xf */ - "faddp %%st,%%st(1)\n\t" /* 3 f = f + c0 xf */ - "fldt %3\n\t" /* 4 */ - "fmul %%st(4),%%st\n\t" /* 4 c1 * x */ - "faddp %%st,%%st(1)\n\t" /* 3 f = f + c1 * x */ - "f2xm1\n\t" /* 3 2^(fract(x * log2(e))) - 1 */ - "fld1\n\t" /* 4 1.0 */ - "faddp\n\t" /* 3 2^(fract(x * log2(e))) */ - "fstp %%st(1)\n\t" /* 2 */ - "fscale\n\t" /* 2 scale factor is st(1); e^x */ - "fstp %%st(1)\n\t" /* 1 */ - "fstp %%st(1)\n\t" /* 0 */ + "{ fld %%st(1) | fld st(1) }\n\t" /* 3 i */ + "{ fldt %2 | fld %2 }\n\t" /* 4 c0 */ + "{ fld %%st(2) | fld st(2) }\n\t" /* 5 xi */ + "{ fmul %%st(1),%%st | fmul st, st(1) }\n\t" /* 5 c0 xi */ + "{ fsubp %%st,%%st(2) | fsubp st(2), st }\n\t" /* 4 f = c0 xi - i */ + "{ fld %%st(4) | fld st(4) }\n\t" /* 5 x */ + "{ fsub %%st(3),%%st | fsub st, st(3) }\n\t" /* 5 xf = x - xi */ + "{ fmulp %%st,%%st(1) | fmulp st(1), st }\n\t" /* 4 c0 xf */ + "{ faddp %%st,%%st(1) | faddp st(1), st }\n\t" /* 3 f = f + c0 xf */ + "{ fldt %3 | fld %3 }\n\t" /* 4 */ + "{ fmul %%st(4),%%st | fmul st, st(4) }\n\t" /* 4 c1 * x */ + "{ faddp %%st,%%st(1) | faddp st(1), st }\n\t" /* 3 f = f + c1 * x */ + "{ f2xm1 | f2xm1 }\n\t" /* 3 2^(fract(x * log2(e))) - 1 */ + "{ fld1 | fld1 }\n\t" /* 4 1.0 */ + "{ faddp | faddp }\n\t" /* 3 2^(fract(x * log2(e))) */ + "{ fstp %%st(1) | fstp st(1) }\n\t" /* 2 */ + "{ fscale | fscale }\n\t" /* 2 scale factor is st(1); e^x */ + "{ fstp %%st(1) | fstp st(1) }\n\t" /* 1 */ + "{ fstp %%st(1) | fstp st(1) }\n\t" /* 0 */ : "=t" (res) : "0" (x), "m" (c0), "m" (c1) : "ax", "dx"); return res; } diff --git a/mingw-w64-crt/math/x86/pow.def.h b/mingw-w64-crt/math/x86/pow.def.h index 0cf0739fe..8f6953307 100644 --- a/mingw-w64-crt/math/x86/pow.def.h +++ b/mingw-w64-crt/math/x86/pow.def.h @@ -79,28 +79,35 @@ static __FLT_TYPE internal_modf (__FLT_TYPE value, __FLT_TYPE *iptr) { __FLT_TYPE int_part = (__FLT_TYPE) 0.0; - /* truncate */ /* truncate */ #ifdef __x86_64__ - asm volatile ("pushq %%rax\n\tsubq $8, %%rsp\n" - "fnstcw 4(%%rsp)\n" - "movzwl 4(%%rsp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%rsp)\n" - "fldcw (%%rsp)\n" - "frndint\n" - "fldcw 4(%%rsp)\n" - "addq $8, %%rsp\npopq %%rax" : "=t" (int_part) : "0" (value)); /* round */ + asm volatile ( + "{ pushq %%rax | push rax }\n" + "{ subq $8, %%rsp | sub rsp, 8 }\n" + "{ fnstcw 4(%%rsp) | fnstcw word ptr [rsp + 4] }\n" + "{ movzwl 4(%%rsp), %%eax | movzx eax, word ptr [rsp + 4] }\n" + "{ orb $12, %%ah | or eax, 0x0C00 }\n" + "{ movw %%ax, (%%rsp) | mov word ptr [rsp], ax }\n" + "{ fldcw (%%rsp) | fldcw word ptr [rsp] }\n" + "{ frndint | frndint }\n" + "{ fldcw 4(%%rsp) | fldcw word ptr [rsp + 4] }\n" + "{ addq $8, %%rsp | add rsp, 8 }\n" + "{ popq %%rax | pop rax }\n" + : "=t" (int_part) : "0" (value)); /* round */ #else - asm volatile ("push %%eax\n\tsubl $8, %%esp\n" - "fnstcw 4(%%esp)\n" - "movzwl 4(%%esp), %%eax\n" - "orb $12, %%ah\n" - "movw %%ax, (%%esp)\n" - "fldcw (%%esp)\n" - "frndint\n" - "fldcw 4(%%esp)\n" - "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value)); /* round */ + asm volatile ( + "{ push %%eax | push eax }\n" + "{ subl $8, %%esp | sub esp, 8 }\n" + "{ fnstcw 4(%%esp) | fnstcw word ptr [esp + 4] }\n" + "{ movzwl 4(%%esp), %%eax | movzx eax, word ptr [esp + 4] }\n" + "{ orb $12, %%ah | or eax, 0x0C00 }\n" + "{ movw %%ax, (%%esp) | mov word ptr [esp], ax }\n" + "{ fldcw (%%esp) | fldcw word ptr [esp] }\n" + "{ frndint | frndint }\n" + "{ fldcw 4(%%esp) | fldcw word ptr [esp + 4] }\n" + "{ addl $8, %%esp | add esp, 8 }\n" + "{ pop %%eax | pop eax }\n" + : "=t" (int_part) : "0" (value)); /* round */ #endif if (iptr) *iptr = int_part; diff --git a/mingw-w64-crt/misc/feclearexcept.c b/mingw-w64-crt/misc/feclearexcept.c index 673528b20..32d8627fb 100644 --- a/mingw-w64-crt/misc/feclearexcept.c +++ b/mingw-w64-crt/misc/feclearexcept.c @@ -15,10 +15,10 @@ int __mingw_has_sse(void) #ifndef _WIN64 int o_flag, n_flag; - __asm__ volatile ("pushfl\n\tpopl %0" : "=mr" (o_flag)); + __asm__ volatile ("pushf{l}\n\tpop{l} %0" : "=mr" (o_flag)); n_flag = o_flag ^ 0x200000; - __asm__ volatile ("pushl %0\n\tpopfl" : : "g" (n_flag)); - __asm__ volatile ("pushfl\n\tpopl %0" : "=mr" (n_flag)); + __asm__ volatile ("push{l} %0\n\tpopf{l}" : : "g" (n_flag)); + __asm__ volatile ("pushf{l}\n\tpop{l} %0" : "=mr" (n_flag)); if (n_flag == o_flag) return 0; #endif diff --git a/mingw-w64-crt/misc/fegetround.c b/mingw-w64-crt/misc/fegetround.c index d2fdb7725..f17bd0421 100644 --- a/mingw-w64-crt/misc/fegetround.c +++ b/mingw-w64-crt/misc/fegetround.c @@ -21,7 +21,7 @@ fegetround (void) __asm__ volatile ("mrs %0, fpcr" : "=r" (fpcr)); return (fpcr & (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO)); #else - int _control; + unsigned short _control; __asm__ volatile ("fnstcw %0" : "=m" (*&_control)); return (_control & (FE_TONEAREST | FE_DOWNWARD | FE_UPWARD | FE_TOWARDZERO)); #endif /* defined(_ARM_) || defined(__arm__) || defined(_ARM64_) || defined(__aarch64__) */ diff --git a/mingw-w64-crt/misc/winbs_uint64.c b/mingw-w64-crt/misc/winbs_uint64.c index c0b316221..c089bb5f3 100644 --- a/mingw-w64-crt/misc/winbs_uint64.c +++ b/mingw-w64-crt/misc/winbs_uint64.c @@ -4,7 +4,7 @@ unsigned long long __cdecl _byteswap_uint64(unsigned long long _Int64) { #if defined(_AMD64_) || defined(__x86_64__) unsigned long long retval; - __asm__ __volatile__ ("bswapq %[retval]" : [retval] "=rm" (retval) : "[retval]" (_Int64)); + __asm__ __volatile__ ("bswap{q} %[retval]" : [retval] "=rm" (retval) : "[retval]" (_Int64)); return retval; #elif defined(_X86_) || defined(__i386__) union { @@ -15,8 +15,8 @@ unsigned long long __cdecl _byteswap_uint64(unsigned long long _Int64) }; } retval; retval.int64part = _Int64; - __asm__ __volatile__ ("bswapl %[lowpart]\n" - "bswapl %[hipart]\n" + __asm__ __volatile__ ("bswap{l} %[lowpart]\n" + "bswap{l} %[hipart]\n" : [lowpart] "=rm" (retval.hipart), [hipart] "=rm" (retval.lowpart) : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart)); return retval.int64part; #else diff --git a/mingw-w64-crt/misc/winbs_ulong.c b/mingw-w64-crt/misc/winbs_ulong.c index 9cd6b2907..e63f4aebe 100644 --- a/mingw-w64-crt/misc/winbs_ulong.c +++ b/mingw-w64-crt/misc/winbs_ulong.c @@ -4,7 +4,7 @@ unsigned long __cdecl _byteswap_ulong (unsigned long _Long) { #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__) unsigned long retval; - __asm__ __volatile__ ("bswapl %[retval]" : [retval] "=rm" (retval) : "[retval]" (_Long)); + __asm__ __volatile__ ("bswap{l} %[retval]" : [retval] "=rm" (retval) : "[retval]" (_Long)); return retval; #else unsigned char *b = (void*)&_Long; diff --git a/mingw-w64-crt/misc/winbs_ushort.c b/mingw-w64-crt/misc/winbs_ushort.c index 46b57fda4..a7747c363 100644 --- a/mingw-w64-crt/misc/winbs_ushort.c +++ b/mingw-w64-crt/misc/winbs_ushort.c @@ -4,7 +4,7 @@ unsigned short __cdecl _byteswap_ushort(unsigned short _Short) { #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__) unsigned short retval; - __asm__ __volatile__ ("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (_Short)); + __asm__ __volatile__ ("{ rorw $8, %w[retval] | ror %w[retval], 8 }" : [retval] "=rm" (retval) : "[retval]" (_Short)); return retval; #else unsigned char *b = (void*)&_Short; diff --git a/mingw-w64-crt/stdio/_scprintf.c b/mingw-w64-crt/stdio/_scprintf.c index bcee08a95..55ed01bd4 100644 --- a/mingw-w64-crt/stdio/_scprintf.c +++ b/mingw-w64-crt/stdio/_scprintf.c @@ -31,7 +31,11 @@ asm ( ".globl\t" ASM_SYM(_scprintf) "\n\t" ".def\t" ASM_SYM(_scprintf) ";\t.scl\t2;\t.type\t32;\t.endef\n" ASM_SYM(_scprintf) ":\n\t" +#if 0 __REGISTER_PREFIX__ + 1 == 0 // `0 % + 1 == 0` for at&t "jmp\t*" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf)) +#else + "jmp [" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf)) "]" +#endif ); #else @@ -63,14 +67,18 @@ static void resolve_scprintf(void) asm ( ".def\t" ASM_SYM(init_scprintf) ";\t.scl\t3;\t.type\t32;\t.endef\n" ASM_SYM(init_scprintf) ":\n\t" - "pushal\n\t" + "pusha\n\t" "call\t" ASM_SYM(resolve_scprintf) "\n\t" - "popal\n\t" + "popa\n\t" /* fallthrough */ ".globl\t" ASM_SYM(_scprintf) "\n\t" ".def\t" ASM_SYM(_scprintf) ";\t.scl\t2;\t.type\t32;\t.endef\n" ASM_SYM(_scprintf) ":\n\t" +#if 0 __REGISTER_PREFIX__ + 1 == 0 // `0 % + 1 == 0` for at&t "jmp\t*" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf)) +#else + "jmp [" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf)) "]" +#endif ); #endif -- 2.34.1
OpenPGP_signature
Description: OpenPGP digital signature
_______________________________________________ Mingw-w64-public mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/mingw-w64-public
