Some fixes:

1. Fixed the order of operands of an FMUL.

2. Introduced dummy static functions for `__guard_dispatch_icall_dummy` and 
`_scprintf`.
   Only within function scopes, can extended asm statements be allowed.

3. There were condition `#if`s, where the x86 path pushed and popped EAX, but 
the x86_64
   path did not. When the EAX register is clobbered, there is no need to save 
and restore
   it; and when they are temporarily used, they should always be clobbered.


The file 'mingw_cfguard_support.c' has been compiled with GCC and Clang, targeting x86_64-w64-mingw32, with `-masm=att` and `-masm=intel`. No errors have been observed.

The file '_scprintf.c' has been compiled with GCC, target i686-w64-mingw32 (it's i386-only and MSYS2 doesn't have an i386 clang environment), with `-masm=att` and `-masm=intel`. No errors have been observed.



--
Best regards,
LIU Hao

From d3c0001bc57ef2558c1cc2966908a6bdd75c1c35 Mon Sep 17 00:00:00 2001
From: LIU Hao <[email protected]>
Date: Wed, 22 Mar 2023 10:47:43 +0800
Subject: [PATCH] crt: Make CRT buildable with `-masm=intel`

The unofficial AT&T syntax will likely be kept the default in foreseeable
future. However my long-term plan is to transition to Intel syntax, even
after decades. This is the zwischenzug for allowing the CRT to be built
with GCC and latest Clang with the `-masm=intel` option, just like our
headers.

Signed-off-by: LIU Hao <[email protected]>
---
 mingw-w64-crt/cfguard/mingw_cfguard_support.c | 11 ++-
 mingw-w64-crt/math/llrint.c                   |  2 +-
 mingw-w64-crt/math/llrintf.c                  |  2 +-
 mingw-w64-crt/math/llrintl.c                  |  2 +-
 mingw-w64-crt/math/lrint.c                    |  2 +-
 mingw-w64-crt/math/lrintf.c                   |  2 +-
 mingw-w64-crt/math/lrintl.c                   |  2 +-
 mingw-w64-crt/math/modf.c                     | 40 +++++----
 mingw-w64-crt/math/modff.c                    | 41 +++++----
 mingw-w64-crt/math/modfl.c                    | 40 +++++----
 mingw-w64-crt/math/x86/cossin.c               | 81 +++++++++--------
 mingw-w64-crt/math/x86/exp.def.h              | 88 +++++++++----------
 mingw-w64-crt/math/x86/pow.def.h              | 41 +++++----
 mingw-w64-crt/misc/feclearexcept.c            |  6 +-
 mingw-w64-crt/misc/fegetround.c               |  2 +-
 mingw-w64-crt/misc/winbs_uint64.c             |  6 +-
 mingw-w64-crt/misc/winbs_ulong.c              |  2 +-
 mingw-w64-crt/misc/winbs_ushort.c             |  2 +-
 mingw-w64-crt/stdio/_scprintf.c               | 54 ++++++++----
 19 files changed, 233 insertions(+), 193 deletions(-)

diff --git a/mingw-w64-crt/cfguard/mingw_cfguard_support.c 
b/mingw-w64-crt/cfguard/mingw_cfguard_support.c
index cf4535afd..c350409e8 100644
--- a/mingw-w64-crt/cfguard/mingw_cfguard_support.c
+++ b/mingw-w64-crt/cfguard/mingw_cfguard_support.c
@@ -19,10 +19,15 @@ static void __guard_check_icall_dummy(void) {}
 
 // When CFGuard is not active, directly tail-call the target address, which
 // is passed via %rax.
-__asm__(
+__attribute__((used))
+static void dummy_scope_1(void)
+{
+    __asm__(
     "__guard_dispatch_icall_dummy:\n"
-    "    jmp *%rax\n"
-);
+    "    jmp {*%%}rax\n"
+        :  // extended asm
+    );
+}
 
 // This is intentionally declared as _not_ a function pointer, so that the
 // jmp instruction is not included as a valid call target for CFGuard.
diff --git a/mingw-w64-crt/math/llrint.c b/mingw-w64-crt/math/llrint.c
index 4ba7af7e6..2d05878ba 100644
--- a/mingw-w64-crt/math/llrint.c
+++ b/mingw-w64-crt/math/llrint.c
@@ -10,7 +10,7 @@ long long llrint (double x)
 {
   long long retval = 0ll;
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || 
defined(__i386__)
-  __asm__ __volatile__ ("fistpll %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{ll} %0"  : "=m" (retval) : "t" (x) : "st");
 #else
   int mode = fegetround();
   if (mode == FE_DOWNWARD)
diff --git a/mingw-w64-crt/math/llrintf.c b/mingw-w64-crt/math/llrintf.c
index e8085ec4e..f5cd19fc2 100644
--- a/mingw-w64-crt/math/llrintf.c
+++ b/mingw-w64-crt/math/llrintf.c
@@ -10,7 +10,7 @@ long long llrintf (float x)
 {
   long long retval = 0ll;
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || 
defined(__i386__)
-  __asm__ __volatile__ ("fistpll %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{ll} %0"  : "=m" (retval) : "t" (x) : "st");
 #else
   int mode = fegetround();
   if (mode == FE_DOWNWARD)
diff --git a/mingw-w64-crt/math/llrintl.c b/mingw-w64-crt/math/llrintl.c
index 6a2bf73d8..d3651059c 100644
--- a/mingw-w64-crt/math/llrintl.c
+++ b/mingw-w64-crt/math/llrintl.c
@@ -10,7 +10,7 @@ long long llrintl (long double x)
 {
   long long retval = 0ll;
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || 
defined(__i386__)
-  __asm__ __volatile__ ("fistpll %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{ll} %0"  : "=m" (retval) : "t" (x) : "st");
 #else
   int mode = fegetround();
   if (mode == FE_DOWNWARD)
diff --git a/mingw-w64-crt/math/lrint.c b/mingw-w64-crt/math/lrint.c
index 7831446be..1bf489b85 100644
--- a/mingw-w64-crt/math/lrint.c
+++ b/mingw-w64-crt/math/lrint.c
@@ -15,7 +15,7 @@ long lrint (double x)
 #if defined(_AMD64_) || defined(__x86_64__)
   retval = _mm_cvtsd_si32(_mm_load_sd(&x));
 #elif defined(_X86_) || defined(__i386__)
-  __asm__ __volatile__ ("fistpl %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{l} %0"  : "=m" (retval) : "t" (x) : "st");
 #elif defined(__arm__) || defined(_ARM_)
   float temp;
   __asm__ __volatile__ (
diff --git a/mingw-w64-crt/math/lrintf.c b/mingw-w64-crt/math/lrintf.c
index 1e8902f03..5a9dcfe83 100644
--- a/mingw-w64-crt/math/lrintf.c
+++ b/mingw-w64-crt/math/lrintf.c
@@ -15,7 +15,7 @@ long lrintf (float x)
 #if defined(_AMD64_) || defined(__x86_64__)
   retval = _mm_cvtss_si32(_mm_load_ss(&x));
 #elif defined(_X86_) || defined(__i386__)
-  __asm__ __volatile__ ("fistpl %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{l} %0"  : "=m" (retval) : "t" (x) : "st");
 #elif defined(__arm__) || defined(_ARM_)
   __asm__ __volatile__ (
     "vcvtr.s32.f32    %[src], %[src]\n\t"
diff --git a/mingw-w64-crt/math/lrintl.c b/mingw-w64-crt/math/lrintl.c
index d710fac05..ad476052a 100644
--- a/mingw-w64-crt/math/lrintl.c
+++ b/mingw-w64-crt/math/lrintl.c
@@ -9,7 +9,7 @@ long lrintl (long double x)
 {
   long retval = 0l;
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || 
defined(__i386__)
-  __asm__ __volatile__ ("fistpl %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{l} %0"  : "=m" (retval) : "t" (x) : "st");
 #elif defined(__arm__) || defined(_ARM_) || defined(__aarch64__) || 
defined(_ARM64_)
     retval = lrint(x);
 #endif
diff --git a/mingw-w64-crt/math/modf.c b/mingw-w64-crt/math/modf.c
index d2623095c..9f1d8a220 100644
--- a/mingw-w64-crt/math/modf.c
+++ b/mingw-w64-crt/math/modf.c
@@ -13,25 +13,29 @@ modf (double value, double* iptr)
   double int_part = 0.0;
   /* truncate */
 #if defined(_AMD64_) || defined(__x86_64__)
-  asm volatile ("subq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n"
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */
+  asm volatile (
+    "{ subq $8, %%rsp          | sub rsp, 8                     }\n"
+    "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n"
+    "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n"
+    "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n"
+    "{ addq $8, %%rsp          | add rsp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #elif defined(_X86_) || defined(__i386__)
-  asm volatile ("push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n"
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value) : "eax"); 
/* round */
+  asm volatile (
+    "{ subl $8, %%esp          | sub esp, 8                     }\n"
+    "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n"
+    "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n"
+    "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%esp)          | fldcw word ptr [esp]           }\n"
+    "{ addl $8, %%esp          | add esp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #else
   int_part = trunc(value);
 #endif
diff --git a/mingw-w64-crt/math/modff.c b/mingw-w64-crt/math/modff.c
index dcf19cfed..7a25468e5 100644
--- a/mingw-w64-crt/math/modff.c
+++ b/mingw-w64-crt/math/modff.c
@@ -11,28 +11,31 @@ float
 modff (float value, float* iptr)
 {
   float int_part = 0.0F;
-  /* truncate */ 
   /* truncate */
 #if defined(_AMD64_) || defined(__x86_64__)
-  asm volatile ("subq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n"
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */
+  asm volatile (
+    "{ subq $8, %%rsp          | sub rsp, 8                     }\n"
+    "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n"
+    "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n"
+    "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n"
+    "{ addq $8, %%rsp          | add rsp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #elif defined(_X86_) || defined(__i386__)
-  asm volatile ("push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n"
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value) : "eax"); 
/* round */
+  asm volatile (
+    "{ subl $8, %%esp          | sub esp, 8                     }\n"
+    "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n"
+    "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n"
+    "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%esp)          | fldcw word ptr [esp]           }\n"
+    "{ addl $8, %%esp          | add esp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #else
   int_part = truncf(value);
 #endif
diff --git a/mingw-w64-crt/math/modfl.c b/mingw-w64-crt/math/modfl.c
index 33593e6de..f903b2246 100644
--- a/mingw-w64-crt/math/modfl.c
+++ b/mingw-w64-crt/math/modfl.c
@@ -13,25 +13,29 @@ modfl (long double value, long double* iptr)
   long double int_part = 0.0L;
   /* truncate */
 #if defined(_AMD64_) || defined(__x86_64__)
-  asm volatile ("subq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n"
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */
+  asm volatile (
+    "{ subq $8, %%rsp          | sub rsp, 8                     }\n"
+    "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n"
+    "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n"
+    "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n"
+    "{ addq $8, %%rsp          | add rsp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #elif defined(_X86_) || defined(__i386__)
-  asm volatile ("push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n"
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value) : "eax"); 
/* round */
+  asm volatile (
+    "{ subl $8, %%esp          | sub esp, 8                     }\n"
+    "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n"
+    "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n"
+    "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%esp)          | fldcw word ptr [esp]           }\n"
+    "{ addl $8, %%esp          | add esp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #else
   int_part = truncl(value);
 #endif
diff --git a/mingw-w64-crt/math/x86/cossin.c b/mingw-w64-crt/math/x86/cossin.c
index cb3340545..6eebb6255 100644
--- a/mingw-w64-crt/math/x86/cossin.c
+++ b/mingw-w64-crt/math/x86/cossin.c
@@ -12,19 +12,20 @@ void sincos (double __x, double *p_sin, double *p_cos)
 {
   long double c, s;
 
-  __asm__ __volatile__ ("fsincos\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jz        1f\n\t"
-    "fldpi\n\t"
-    "fadd      %%st(0)\n\t"
-    "fxch      %%st(1)\n\t"
-    "2: fprem1\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jnz       2b\n\t"
-    "fstp      %%st(1)\n\t"
-    "fsincos\n\t"
+  __asm__ __volatile__ (
+    "  { fsincos              | fsincos          }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jz 1f                | jz 1f            }\n\t"
+    "  { fldpi                | fldpi            }\n\t"
+    "  { fadd %%st(0)         | fadd st(0)       }\n\t"
+    "  { fxch %%st(1)         | fxch st(1)       }\n\t"
+    "2:{ fprem1               | fprem1           }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jnz 2b               | jnz 2b           }\n\t"
+    "  { fstp %%st(1)         | fstp st(1)       }\n\t"
+    "  { fsincos              | fsincos          }\n\t"
     "1:" : "=t" (c), "=u" (s) : "0" (__x) : "eax");
   *p_sin = (double) s;
   *p_cos = (double) c;
@@ -34,19 +35,20 @@ void sincosf (float __x, float *p_sin, float *p_cos)
 {
   long double c, s;
 
-  __asm__ __volatile__ ("fsincos\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jz        1f\n\t"
-    "fldpi\n\t"
-    "fadd      %%st(0)\n\t"
-    "fxch      %%st(1)\n\t"
-    "2: fprem1\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jnz       2b\n\t"
-    "fstp      %%st(1)\n\t"
-    "fsincos\n\t"
+  __asm__ __volatile__ (
+    "  { fsincos              | fsincos          }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jz 1f                | jz 1f            }\n\t"
+    "  { fldpi                | fldpi            }\n\t"
+    "  { fadd %%st(0)         | fadd st(0)       }\n\t"
+    "  { fxch %%st(1)         | fxch st(1)       }\n\t"
+    "2:{ fprem1               | fprem1           }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jnz 2b               | jnz 2b           }\n\t"
+    "  { fstp %%st(1)         | fstp st(1)       }\n\t"
+    "  { fsincos              | fsincos          }\n\t"
     "1:" : "=t" (c), "=u" (s) : "0" (__x) : "eax");
   *p_sin = (float) s;
   *p_cos = (float) c;
@@ -56,19 +58,20 @@ void sincosl (long double __x, long double *p_sin, long 
double *p_cos)
 {
   long double c, s;
 
-  __asm__ __volatile__ ("fsincos\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jz        1f\n\t"
-    "fldpi\n\t"
-    "fadd      %%st(0)\n\t"
-    "fxch      %%st(1)\n\t"
-    "2: fprem1\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jnz       2b\n\t"
-    "fstp      %%st(1)\n\t"
-    "fsincos\n\t"
+  __asm__ __volatile__ (
+    "  { fsincos              | fsincos          }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jz 1f                | jz 1f            }\n\t"
+    "  { fldpi                | fldpi            }\n\t"
+    "  { fadd %%st(0)         | fadd st(0)       }\n\t"
+    "  { fxch %%st(1)         | fxch st(1)       }\n\t"
+    "2:{ fprem1               | fprem1           }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jnz 2b               | jnz 2b           }\n\t"
+    "  { fstp %%st(1)         | fstp st(1)       }\n\t"
+    "  { fsincos              | fsincos          }\n\t"
     "1:" : "=t" (c), "=u" (s) : "0" (__x) : "eax");
   *p_sin = s;
   *p_cos = c;
diff --git a/mingw-w64-crt/math/x86/exp.def.h b/mingw-w64-crt/math/x86/exp.def.h
index 33a176aec..7ca7c4396 100644
--- a/mingw-w64-crt/math/x86/exp.def.h
+++ b/mingw-w64-crt/math/x86/exp.def.h
@@ -53,54 +53,54 @@ __expl_internal (long double x)
 {
   long double res = 0.0L;
   asm volatile (
-       "fldl2e\n\t"             /* 1  log2(e)         */
-       "fmul %%st(1),%%st\n\t"  /* 1  x log2(e)       */
+       "{ fldl2e                  | fldl2e                }\n\t"  /* 1  
log2(e)         */
+       "{ fmul %%st(1),%%st       | fmul st, st(1)        }\n\t"  /* 1  x 
log2(e)       */
 
 #ifdef __x86_64__
-    "subq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n\t"            /* 1  i               */
-    "fld %%st(1)\n\t"        /* 2  x               */
-    "frndint\n\t"            /* 2  xi              */
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\n"
+       "{ subq $8, %%rsp          | sub rsp, 8                     }\n\t"
+       "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n\t"
+       "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n\t"
+       "{ orb $12, %%ah           | or eax, 0x0C00                 }\n\t"
+       "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n\t"
+       "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n\t"
+       "{ frndint                 | frndint                        }\n\t"  /* 
1  i               */
+       "{ fld %%st(1)             | fld st(1)                      }\n\t"  /* 
2  x               */
+       "{ frndint                 | frndint                        }\n\t"  /* 
2  xi              */
+       "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n\t"
+       "{ addq $8, %%rsp          | add rsp, 8                     }\n\t"
 #else
-    "push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n\t"            /* 1  i               */
-    "fld %%st(1)\n\t"        /* 2  x               */
-    "frndint\n\t"            /* 2  xi              */
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n"
+       "{ subl $8, %%esp          | sub esp, 8                     }\n\t"
+       "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n\t"
+       "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n\t"
+       "{ orb $12, %%ah           | or eax, 0x0C00                 }\n\t"
+       "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n\t"
+       "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n\t"
+       "{ frndint                 | frndint                        }\n\t"  /* 
1  i               */
+       "{ fld %%st(1)             | fld st(1)                      }\n\t"  /* 
2  x               */
+       "{ frndint                 | frndint                        }\n\t"  /* 
2  xi              */
+       "{ fldcw 4(%%esp)          | fldcw word ptr [esp + 4]       }\n\t"
+       "{ addl $8, %%esp          | add esp, 8                     }\n\t"
 #endif
-       "fld %%st(1)\n\t"        /* 3  i               */
-       "fldt %2\n\t"            /* 4  c0              */
-       "fld %%st(2)\n\t"        /* 5  xi              */
-       "fmul %%st(1),%%st\n\t"  /* 5  c0 xi           */
-       "fsubp %%st,%%st(2)\n\t" /* 4  f = c0 xi  - i  */
-       "fld %%st(4)\n\t"        /* 5  x               */
-       "fsub %%st(3),%%st\n\t"  /* 5  xf = x - xi     */
-       "fmulp %%st,%%st(1)\n\t" /* 4  c0 xf           */
-       "faddp %%st,%%st(1)\n\t" /* 3  f = f + c0 xf   */
-       "fldt %3\n\t"            /* 4                  */
-       "fmul %%st(4),%%st\n\t"  /* 4  c1 * x          */
-       "faddp %%st,%%st(1)\n\t" /* 3  f = f + c1 * x  */
-       "f2xm1\n\t"             /* 3 2^(fract(x * log2(e))) - 1 */
-       "fld1\n\t"               /* 4 1.0              */
-       "faddp\n\t"             /* 3 2^(fract(x * log2(e))) */
-       "fstp   %%st(1)\n\t"    /* 2  */
-       "fscale\n\t"            /* 2 scale factor is st(1); e^x */
-       "fstp   %%st(1)\n\t"    /* 1  */
-       "fstp   %%st(1)\n\t"    /* 0  */
-       : "=t" (res) : "0" (x), "m" (c0), "m" (c1) : "ax", "dx");
+       "{ fld %%st(1)             | fld st(1)                      }\n\t"  /* 
3  i               */
+       "{ fldt %2                 | fld %2                         }\n\t"  /* 
4  c0              */
+       "{ fld %%st(2)             | fld st(2)                      }\n\t"  /* 
5  xi              */
+       "{ fmul %%st(1),%%st       | fmul st, st(1)                 }\n\t"  /* 
5  c0 xi           */
+       "{ fsubp %%st,%%st(2)      | fsubp st(2), st                }\n\t"  /* 
4  f = c0 xi  - i  */
+       "{ fld %%st(4)             | fld st(4)                      }\n\t"  /* 
5  x               */
+       "{ fsub %%st(3),%%st       | fsub st, st(3)                 }\n\t"  /* 
5  xf = x - xi     */
+       "{ fmulp %%st,%%st(1)      | fmulp st(1), st                }\n\t"  /* 
4  c0 xf           */
+       "{ faddp %%st,%%st(1)      | faddp st(1), st                }\n\t"  /* 
3  f = f + c0 xf   */
+       "{ fldt %3                 | fld %3                         }\n\t"  /* 
4                  */
+       "{ fmul %%st(4),%%st       | fmul st, st(4)                 }\n\t"  /* 
4  c1 * x          */
+       "{ faddp %%st,%%st(1)      | faddp st(1), st                }\n\t"  /* 
3  f = f + c1 * x  */
+       "{ f2xm1                   | f2xm1                          }\n\t"  /* 
3 2^(fract(x * log2(e))) - 1 */
+       "{ fld1                    | fld1                           }\n\t"  /* 
4 1.0              */
+       "{ faddp                   | faddp                          }\n\t"  /* 
3 2^(fract(x * log2(e))) */
+       "{ fstp %%st(1)           | fstp st(1)                     }\n\t"  /* 2 
 */
+       "{ fscale                  | fscale                         }\n\t"  /* 
2 scale factor is st(1); e^x */
+       "{ fstp %%st(1)           | fstp st(1)                     }\n\t"  /* 1 
 */
+       "{ fstp %%st(1)           | fstp st(1)                     }\n\t"  /* 0 
 */
+       : "=t" (res) : "0" (x), "m" (c0), "m" (c1) : "eax");
   return res;
 }
 
diff --git a/mingw-w64-crt/math/x86/pow.def.h b/mingw-w64-crt/math/x86/pow.def.h
index 0cf0739fe..3da9e063f 100644
--- a/mingw-w64-crt/math/x86/pow.def.h
+++ b/mingw-w64-crt/math/x86/pow.def.h
@@ -79,28 +79,31 @@ static __FLT_TYPE
 internal_modf (__FLT_TYPE value, __FLT_TYPE *iptr)
 {
   __FLT_TYPE int_part = (__FLT_TYPE) 0.0;
-  /* truncate */ 
   /* truncate */
 #ifdef __x86_64__
-  asm volatile ("pushq %%rax\n\tsubq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n"
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\npopq %%rax" : "=t" (int_part) : "0" (value)); /* round */
+  asm volatile (
+    "{ subq $8, %%rsp          | sub rsp, 8                     }\n"
+    "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n"
+    "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n"
+    "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n"
+    "{ addq $8, %%rsp          | add rsp, 8                     }\n"
+    : "=t" (int_part) : "0" (value)); /* round */
 #else
-  asm volatile ("push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n"
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value)); /* round 
*/
+  asm volatile (
+    "{ subl $8, %%esp          | sub esp, 8                     }\n"
+    "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n"
+    "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n"
+    "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%esp)          | fldcw word ptr [esp + 4]       }\n"
+    "{ addl $8, %%esp          | add esp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #endif
   if (iptr)
     *iptr = int_part;
diff --git a/mingw-w64-crt/misc/feclearexcept.c 
b/mingw-w64-crt/misc/feclearexcept.c
index 673528b20..32d8627fb 100644
--- a/mingw-w64-crt/misc/feclearexcept.c
+++ b/mingw-w64-crt/misc/feclearexcept.c
@@ -15,10 +15,10 @@ int __mingw_has_sse(void)
 #ifndef _WIN64
   int o_flag, n_flag;
   
-  __asm__ volatile ("pushfl\n\tpopl %0" : "=mr" (o_flag));
+  __asm__ volatile ("pushf{l}\n\tpop{l} %0" : "=mr" (o_flag));
   n_flag = o_flag ^ 0x200000;
-  __asm__ volatile ("pushl %0\n\tpopfl" : : "g" (n_flag));
-  __asm__ volatile ("pushfl\n\tpopl %0" : "=mr" (n_flag));
+  __asm__ volatile ("push{l} %0\n\tpopf{l}" : : "g" (n_flag));
+  __asm__ volatile ("pushf{l}\n\tpop{l} %0" : "=mr" (n_flag));
   if (n_flag == o_flag)
     return 0;
 #endif
diff --git a/mingw-w64-crt/misc/fegetround.c b/mingw-w64-crt/misc/fegetround.c
index d2fdb7725..f17bd0421 100644
--- a/mingw-w64-crt/misc/fegetround.c
+++ b/mingw-w64-crt/misc/fegetround.c
@@ -21,7 +21,7 @@ fegetround (void)
   __asm__ volatile ("mrs %0, fpcr" : "=r" (fpcr));
   return (fpcr & (FE_TONEAREST | FE_DOWNWARD |  FE_UPWARD | FE_TOWARDZERO));
 #else
-  int _control;
+  unsigned short _control;
   __asm__ volatile ("fnstcw %0" : "=m" (*&_control));
   return (_control & (FE_TONEAREST | FE_DOWNWARD |  FE_UPWARD | 
FE_TOWARDZERO));
 #endif /* defined(_ARM_) || defined(__arm__) || defined(_ARM64_) || 
defined(__aarch64__) */
diff --git a/mingw-w64-crt/misc/winbs_uint64.c 
b/mingw-w64-crt/misc/winbs_uint64.c
index c0b316221..c089bb5f3 100644
--- a/mingw-w64-crt/misc/winbs_uint64.c
+++ b/mingw-w64-crt/misc/winbs_uint64.c
@@ -4,7 +4,7 @@ unsigned long long __cdecl _byteswap_uint64(unsigned long long 
_Int64)
 {
 #if defined(_AMD64_) || defined(__x86_64__)
   unsigned long long retval;
-  __asm__ __volatile__ ("bswapq %[retval]" : [retval] "=rm" (retval) : 
"[retval]" (_Int64));
+  __asm__ __volatile__ ("bswap{q} %[retval]" : [retval] "=rm" (retval) : 
"[retval]" (_Int64));
   return retval;
 #elif defined(_X86_) || defined(__i386__)
   union {
@@ -15,8 +15,8 @@ unsigned long long __cdecl _byteswap_uint64(unsigned long 
long _Int64)
     };
   } retval;
   retval.int64part = _Int64;
-  __asm__ __volatile__ ("bswapl %[lowpart]\n"
-    "bswapl %[hipart]\n"
+  __asm__ __volatile__ ("bswap{l} %[lowpart]\n"
+    "bswap{l} %[hipart]\n"
     : [lowpart] "=rm" (retval.hipart), [hipart] "=rm" (retval.lowpart)  : 
"[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart));
   return retval.int64part;
 #else
diff --git a/mingw-w64-crt/misc/winbs_ulong.c b/mingw-w64-crt/misc/winbs_ulong.c
index 9cd6b2907..e63f4aebe 100644
--- a/mingw-w64-crt/misc/winbs_ulong.c
+++ b/mingw-w64-crt/misc/winbs_ulong.c
@@ -4,7 +4,7 @@ unsigned long __cdecl _byteswap_ulong (unsigned long _Long)
 {
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || 
defined(__i386__)
   unsigned long retval;
-  __asm__ __volatile__ ("bswapl %[retval]" : [retval] "=rm" (retval) : 
"[retval]" (_Long));
+  __asm__ __volatile__ ("bswap{l} %[retval]" : [retval] "=rm" (retval) : 
"[retval]" (_Long));
   return retval;
 #else
   unsigned char *b = (void*)&_Long;
diff --git a/mingw-w64-crt/misc/winbs_ushort.c 
b/mingw-w64-crt/misc/winbs_ushort.c
index 46b57fda4..a7747c363 100644
--- a/mingw-w64-crt/misc/winbs_ushort.c
+++ b/mingw-w64-crt/misc/winbs_ushort.c
@@ -4,7 +4,7 @@ unsigned short __cdecl _byteswap_ushort(unsigned short _Short)
 {
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || 
defined(__i386__)
   unsigned short retval;
-  __asm__ __volatile__ ("rorw $8, %w[retval]" : [retval] "=rm" (retval) : 
"[retval]" (_Short));
+  __asm__ __volatile__ ("{ rorw $8, %w[retval] | ror %w[retval], 8 }" : 
[retval] "=rm" (retval) : "[retval]" (_Short));
   return retval;
 #else
   unsigned char *b = (void*)&_Short;
diff --git a/mingw-w64-crt/stdio/_scprintf.c b/mingw-w64-crt/stdio/_scprintf.c
index bcee08a95..3e45ef5ab 100644
--- a/mingw-w64-crt/stdio/_scprintf.c
+++ b/mingw-w64-crt/stdio/_scprintf.c
@@ -26,13 +26,22 @@ int (__cdecl *__MINGW_IMP_SYMBOL(_scprintf))(const char * 
__restrict__, ...) = e
 
 /* gcc does not provide an easy way to call another variadic function with 
reusing current arguments
  * this source file is used only on i386, so do this function redirect via 
inline i386 assembly */
+__attribute__((used))
+static void dummy_scope_1(void)
+{
 #define ASM_SYM(sym) __MINGW64_STRINGIFY(__MINGW_USYMBOL(sym))
-asm (
-".globl\t" ASM_SYM(_scprintf) "\n\t"
-".def\t" ASM_SYM(_scprintf) ";\t.scl\t2;\t.type\t32;\t.endef\n"
-ASM_SYM(_scprintf) ":\n\t"
-    "jmp\t*" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf))
-);
+    __asm__ (
+    ".globl\t" ASM_SYM(_scprintf) "\n\t"
+    ".def\t" ASM_SYM(_scprintf) ";\t.scl\t2;\t.type\t32;\t.endef\n"
+    ASM_SYM(_scprintf) ":\n\t"
+        "{ jmp\t*" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf))
+        /* below is Intel syntax, complicated because clang doesn't support
+         * indirect jumps with memory operands at the moment.  */
+        "  | mov eax, dword ptr [" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf)) 
"]\n\t"
+        "    jmp eax }"
+        :  /* extended asm  */
+    );
+}
 
 #else
 
@@ -59,18 +68,27 @@ static void resolve_scprintf(void)
 
 /* gcc does not provide an easy way to call another variadic function with 
reusing current arguments
  * this source file is used only on i386, so do this function redirect via 
inline i386 assembly */
+__attribute__((used))
+static void dummy_scope_2(void)
+{
 #define ASM_SYM(sym) __MINGW64_STRINGIFY(__MINGW_USYMBOL(sym))
-asm (
-".def\t" ASM_SYM(init_scprintf) ";\t.scl\t3;\t.type\t32;\t.endef\n"
-ASM_SYM(init_scprintf) ":\n\t"
-    "pushal\n\t"
-    "call\t" ASM_SYM(resolve_scprintf) "\n\t"
-    "popal\n\t"
-    /* fallthrough */
-".globl\t" ASM_SYM(_scprintf) "\n\t"
-".def\t" ASM_SYM(_scprintf) ";\t.scl\t2;\t.type\t32;\t.endef\n"
-ASM_SYM(_scprintf) ":\n\t"
-    "jmp\t*" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf))
-);
+    __asm__ (
+    ".def\t" ASM_SYM(init_scprintf) ";\t.scl\t3;\t.type\t32;\t.endef\n"
+    ASM_SYM(init_scprintf) ":\n\t"
+        "pusha\n\t"
+        "call\t" ASM_SYM(resolve_scprintf) "\n\t"
+        "popa\n\t"
+        /* fallthrough */
+    ".globl\t" ASM_SYM(_scprintf) "\n\t"
+    ".def\t" ASM_SYM(_scprintf) ";\t.scl\t2;\t.type\t32;\t.endef\n"
+    ASM_SYM(_scprintf) ":\n\t"
+        "{ jmp\t*" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf))
+        /* below is Intel syntax, complicated because clang doesn't support
+         * indirect jumps with memory operands at the moment.  */
+        "  | mov eax, dword ptr [" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf)) 
"]\n\t"
+        "    jmp eax }"
+        :  /* extended asm  */
+    );
+}
 
 #endif
-- 
2.40.0

Attachment: OpenPGP_signature
Description: OpenPGP digital signature

_______________________________________________
Mingw-w64-public mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to