This is the first attempt to allow the CRT to be built with `-masm=intel`. This patch itself may look messy because it contains a lot of inline changes. It should be examined with `git show --color-words='\S'`.

I have only verified that the CRT actually builds with {i686,x86_64}-w64-mingw32-gcc; not sure whether anything goes wrong. Martin, would you please include this patch in your nightly tests? `-masm=intel` requires the latest Clang, though. Older versions do not accept Intel syntax in inline assembly (but they do output Intel syntax).


--
Best regards,
LIU Hao
From c019d44f8a06e4105967d5a2de4c8109a2c6edb5 Mon Sep 17 00:00:00 2001
From: LIU Hao <[email protected]>
Date: Wed, 22 Mar 2023 10:47:43 +0800
Subject: [PATCH] crt: Make CRT buildable with `-masm=intel`

The unofficial AT&T syntax will likely be kept the default in foreseeable
future. However my long-term plan is to transition to Intel syntax, even
after decades. This is the zwischenzug for allowing the CRT to be built
with GCC and latest Clang with the `-masm=intel` option, just like our
headers.

Signed-off-by: LIU Hao <[email protected]>
---
 mingw-w64-crt/cfguard/mingw_cfguard_support.c |  4 +
 mingw-w64-crt/math/llrint.c                   |  2 +-
 mingw-w64-crt/math/llrintf.c                  |  2 +-
 mingw-w64-crt/math/llrintl.c                  |  2 +-
 mingw-w64-crt/math/lrint.c                    |  2 +-
 mingw-w64-crt/math/lrintf.c                   |  2 +-
 mingw-w64-crt/math/lrintl.c                   |  2 +-
 mingw-w64-crt/math/modf.c                     | 42 +++++----
 mingw-w64-crt/math/modff.c                    | 43 +++++----
 mingw-w64-crt/math/modfl.c                    | 42 +++++----
 mingw-w64-crt/math/x86/cossin.c               | 81 +++++++++--------
 mingw-w64-crt/math/x86/exp.def.h              | 88 ++++++++++---------
 mingw-w64-crt/math/x86/pow.def.h              | 45 ++++++----
 mingw-w64-crt/misc/feclearexcept.c            |  6 +-
 mingw-w64-crt/misc/fegetround.c               |  2 +-
 mingw-w64-crt/misc/winbs_uint64.c             |  6 +-
 mingw-w64-crt/misc/winbs_ulong.c              |  2 +-
 mingw-w64-crt/misc/winbs_ushort.c             |  2 +-
 mingw-w64-crt/stdio/_scprintf.c               | 12 ++-
 19 files changed, 214 insertions(+), 173 deletions(-)

diff --git a/mingw-w64-crt/cfguard/mingw_cfguard_support.c b/mingw-w64-crt/cfguard/mingw_cfguard_support.c
index cf4535afd..2c6caeb4b 100644
--- a/mingw-w64-crt/cfguard/mingw_cfguard_support.c
+++ b/mingw-w64-crt/cfguard/mingw_cfguard_support.c
@@ -21,7 +21,11 @@ static void __guard_check_icall_dummy(void) {}
 // is passed via %rax.
 __asm__(
     "__guard_dispatch_icall_dummy:\n"
+#if 0 __REGISTER_PREFIX__ + 1 == 0  // `0 % + 1 == 0` for at&t
     "    jmp *%rax\n"
+#else
+    "    jmp rax\n"
+#endif
 );
 
 // This is intentionally declared as _not_ a function pointer, so that the
diff --git a/mingw-w64-crt/math/llrint.c b/mingw-w64-crt/math/llrint.c
index 4ba7af7e6..2d05878ba 100644
--- a/mingw-w64-crt/math/llrint.c
+++ b/mingw-w64-crt/math/llrint.c
@@ -10,7 +10,7 @@ long long llrint (double x)
 {
   long long retval = 0ll;
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
-  __asm__ __volatile__ ("fistpll %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{ll} %0"  : "=m" (retval) : "t" (x) : "st");
 #else
   int mode = fegetround();
   if (mode == FE_DOWNWARD)
diff --git a/mingw-w64-crt/math/llrintf.c b/mingw-w64-crt/math/llrintf.c
index e8085ec4e..f5cd19fc2 100644
--- a/mingw-w64-crt/math/llrintf.c
+++ b/mingw-w64-crt/math/llrintf.c
@@ -10,7 +10,7 @@ long long llrintf (float x)
 {
   long long retval = 0ll;
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
-  __asm__ __volatile__ ("fistpll %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{ll} %0"  : "=m" (retval) : "t" (x) : "st");
 #else
   int mode = fegetround();
   if (mode == FE_DOWNWARD)
diff --git a/mingw-w64-crt/math/llrintl.c b/mingw-w64-crt/math/llrintl.c
index 6a2bf73d8..d3651059c 100644
--- a/mingw-w64-crt/math/llrintl.c
+++ b/mingw-w64-crt/math/llrintl.c
@@ -10,7 +10,7 @@ long long llrintl (long double x)
 {
   long long retval = 0ll;
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
-  __asm__ __volatile__ ("fistpll %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{ll} %0"  : "=m" (retval) : "t" (x) : "st");
 #else
   int mode = fegetround();
   if (mode == FE_DOWNWARD)
diff --git a/mingw-w64-crt/math/lrint.c b/mingw-w64-crt/math/lrint.c
index 7831446be..1bf489b85 100644
--- a/mingw-w64-crt/math/lrint.c
+++ b/mingw-w64-crt/math/lrint.c
@@ -15,7 +15,7 @@ long lrint (double x)
 #if defined(_AMD64_) || defined(__x86_64__)
   retval = _mm_cvtsd_si32(_mm_load_sd(&x));
 #elif defined(_X86_) || defined(__i386__)
-  __asm__ __volatile__ ("fistpl %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{l} %0"  : "=m" (retval) : "t" (x) : "st");
 #elif defined(__arm__) || defined(_ARM_)
   float temp;
   __asm__ __volatile__ (
diff --git a/mingw-w64-crt/math/lrintf.c b/mingw-w64-crt/math/lrintf.c
index 1e8902f03..5a9dcfe83 100644
--- a/mingw-w64-crt/math/lrintf.c
+++ b/mingw-w64-crt/math/lrintf.c
@@ -15,7 +15,7 @@ long lrintf (float x)
 #if defined(_AMD64_) || defined(__x86_64__)
   retval = _mm_cvtss_si32(_mm_load_ss(&x));
 #elif defined(_X86_) || defined(__i386__)
-  __asm__ __volatile__ ("fistpl %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{l} %0"  : "=m" (retval) : "t" (x) : "st");
 #elif defined(__arm__) || defined(_ARM_)
   __asm__ __volatile__ (
     "vcvtr.s32.f32    %[src], %[src]\n\t"
diff --git a/mingw-w64-crt/math/lrintl.c b/mingw-w64-crt/math/lrintl.c
index d710fac05..ad476052a 100644
--- a/mingw-w64-crt/math/lrintl.c
+++ b/mingw-w64-crt/math/lrintl.c
@@ -9,7 +9,7 @@ long lrintl (long double x)
 {
   long retval = 0l;
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
-  __asm__ __volatile__ ("fistpl %0"  : "=m" (retval) : "t" (x) : "st");
+  __asm__ __volatile__ ("fistp{l} %0"  : "=m" (retval) : "t" (x) : "st");
 #elif defined(__arm__) || defined(_ARM_) || defined(__aarch64__) || defined(_ARM64_)
     retval = lrint(x);
 #endif
diff --git a/mingw-w64-crt/math/modf.c b/mingw-w64-crt/math/modf.c
index d2623095c..17bc3260d 100644
--- a/mingw-w64-crt/math/modf.c
+++ b/mingw-w64-crt/math/modf.c
@@ -13,25 +13,31 @@ modf (double value, double* iptr)
   double int_part = 0.0;
   /* truncate */
 #if defined(_AMD64_) || defined(__x86_64__)
-  asm volatile ("subq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n"
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */
+  asm volatile (
+    "{ subq $8, %%rsp          | sub rsp, 8                     }\n"
+    "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n"
+    "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n"
+    "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n"
+    "{ addq $8, %%rsp          | add rsp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #elif defined(_X86_) || defined(__i386__)
-  asm volatile ("push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n"
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */
+  asm volatile (
+    "{ push %%eax              | push eax                       }\n"
+    "{ subl $8, %%esp          | sub esp, 8                     }\n"
+    "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n"
+    "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n"
+    "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%esp)          | fldcw word ptr [esp]           }\n"
+    "{ addl $8, %%esp          | add esp, 8                     }\n"
+    "{ pop %%eax               | pop eax                       }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #else
   int_part = trunc(value);
 #endif
diff --git a/mingw-w64-crt/math/modff.c b/mingw-w64-crt/math/modff.c
index dcf19cfed..f80d8092c 100644
--- a/mingw-w64-crt/math/modff.c
+++ b/mingw-w64-crt/math/modff.c
@@ -11,28 +11,33 @@ float
 modff (float value, float* iptr)
 {
   float int_part = 0.0F;
-  /* truncate */ 
   /* truncate */
 #if defined(_AMD64_) || defined(__x86_64__)
-  asm volatile ("subq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n"
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */
+  asm volatile (
+    "{ subq $8, %%rsp          | sub rsp, 8                     }\n"
+    "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n"
+    "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n"
+    "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n"
+    "{ addq $8, %%rsp          | add rsp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #elif defined(_X86_) || defined(__i386__)
-  asm volatile ("push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n"
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */
+  asm volatile (
+    "{ push %%eax              | push eax                       }\n"
+    "{ subl $8, %%esp          | sub esp, 8                     }\n"
+    "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n"
+    "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n"
+    "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%esp)          | fldcw word ptr [esp]           }\n"
+    "{ addl $8, %%esp          | add esp, 8                     }\n"
+    "{ pop %%eax               | pop eax                       }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #else
   int_part = truncf(value);
 #endif
diff --git a/mingw-w64-crt/math/modfl.c b/mingw-w64-crt/math/modfl.c
index 33593e6de..a87b5a1d1 100644
--- a/mingw-w64-crt/math/modfl.c
+++ b/mingw-w64-crt/math/modfl.c
@@ -13,25 +13,31 @@ modfl (long double value, long double* iptr)
   long double int_part = 0.0L;
   /* truncate */
 #if defined(_AMD64_) || defined(__x86_64__)
-  asm volatile ("subq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n"
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */
+  asm volatile (
+    "{ subq $8, %%rsp          | sub rsp, 8                     }\n"
+    "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n"
+    "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n"
+    "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n"
+    "{ addq $8, %%rsp          | add rsp, 8                     }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #elif defined(_X86_) || defined(__i386__)
-  asm volatile ("push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n"
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value) : "eax"); /* round */
+  asm volatile (
+    "{ push %%eax              | push eax                       }\n"
+    "{ subl $8, %%esp          | sub esp, 8                     }\n"
+    "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n"
+    "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n"
+    "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%esp)          | fldcw word ptr [esp]           }\n"
+    "{ addl $8, %%esp          | add esp, 8                     }\n"
+    "{ pop %%eax               | pop eax                       }\n"
+    : "=t" (int_part) : "0" (value) : "eax"); /* round */
 #else
   int_part = truncl(value);
 #endif
diff --git a/mingw-w64-crt/math/x86/cossin.c b/mingw-w64-crt/math/x86/cossin.c
index cb3340545..6eebb6255 100644
--- a/mingw-w64-crt/math/x86/cossin.c
+++ b/mingw-w64-crt/math/x86/cossin.c
@@ -12,19 +12,20 @@ void sincos (double __x, double *p_sin, double *p_cos)
 {
   long double c, s;
 
-  __asm__ __volatile__ ("fsincos\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jz        1f\n\t"
-    "fldpi\n\t"
-    "fadd      %%st(0)\n\t"
-    "fxch      %%st(1)\n\t"
-    "2: fprem1\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jnz       2b\n\t"
-    "fstp      %%st(1)\n\t"
-    "fsincos\n\t"
+  __asm__ __volatile__ (
+    "  { fsincos              | fsincos          }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jz 1f                | jz 1f            }\n\t"
+    "  { fldpi                | fldpi            }\n\t"
+    "  { fadd %%st(0)         | fadd st(0)       }\n\t"
+    "  { fxch %%st(1)         | fxch st(1)       }\n\t"
+    "2:{ fprem1               | fprem1           }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jnz 2b               | jnz 2b           }\n\t"
+    "  { fstp %%st(1)         | fstp st(1)       }\n\t"
+    "  { fsincos              | fsincos          }\n\t"
     "1:" : "=t" (c), "=u" (s) : "0" (__x) : "eax");
   *p_sin = (double) s;
   *p_cos = (double) c;
@@ -34,19 +35,20 @@ void sincosf (float __x, float *p_sin, float *p_cos)
 {
   long double c, s;
 
-  __asm__ __volatile__ ("fsincos\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jz        1f\n\t"
-    "fldpi\n\t"
-    "fadd      %%st(0)\n\t"
-    "fxch      %%st(1)\n\t"
-    "2: fprem1\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jnz       2b\n\t"
-    "fstp      %%st(1)\n\t"
-    "fsincos\n\t"
+  __asm__ __volatile__ (
+    "  { fsincos              | fsincos          }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jz 1f                | jz 1f            }\n\t"
+    "  { fldpi                | fldpi            }\n\t"
+    "  { fadd %%st(0)         | fadd st(0)       }\n\t"
+    "  { fxch %%st(1)         | fxch st(1)       }\n\t"
+    "2:{ fprem1               | fprem1           }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jnz 2b               | jnz 2b           }\n\t"
+    "  { fstp %%st(1)         | fstp st(1)       }\n\t"
+    "  { fsincos              | fsincos          }\n\t"
     "1:" : "=t" (c), "=u" (s) : "0" (__x) : "eax");
   *p_sin = (float) s;
   *p_cos = (float) c;
@@ -56,19 +58,20 @@ void sincosl (long double __x, long double *p_sin, long double *p_cos)
 {
   long double c, s;
 
-  __asm__ __volatile__ ("fsincos\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jz        1f\n\t"
-    "fldpi\n\t"
-    "fadd      %%st(0)\n\t"
-    "fxch      %%st(1)\n\t"
-    "2: fprem1\n\t"
-    "fnstsw    %%ax\n\t"
-    "testl     $0x400, %%eax\n\t"
-    "jnz       2b\n\t"
-    "fstp      %%st(1)\n\t"
-    "fsincos\n\t"
+  __asm__ __volatile__ (
+    "  { fsincos              | fsincos          }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jz 1f                | jz 1f            }\n\t"
+    "  { fldpi                | fldpi            }\n\t"
+    "  { fadd %%st(0)         | fadd st(0)       }\n\t"
+    "  { fxch %%st(1)         | fxch st(1)       }\n\t"
+    "2:{ fprem1               | fprem1           }\n\t"
+    "  { fnstsw %%ax          | fnstsw ax        }\n\t"
+    "  { testl $0x400, %%eax  | test eax, 0x400  }\n\t"
+    "  { jnz 2b               | jnz 2b           }\n\t"
+    "  { fstp %%st(1)         | fstp st(1)       }\n\t"
+    "  { fsincos              | fsincos          }\n\t"
     "1:" : "=t" (c), "=u" (s) : "0" (__x) : "eax");
   *p_sin = s;
   *p_cos = c;
diff --git a/mingw-w64-crt/math/x86/exp.def.h b/mingw-w64-crt/math/x86/exp.def.h
index 33a176aec..78b52f653 100644
--- a/mingw-w64-crt/math/x86/exp.def.h
+++ b/mingw-w64-crt/math/x86/exp.def.h
@@ -53,53 +53,55 @@ __expl_internal (long double x)
 {
   long double res = 0.0L;
   asm volatile (
-       "fldl2e\n\t"             /* 1  log2(e)         */
-       "fmul %%st(1),%%st\n\t"  /* 1  x log2(e)       */
+       "{ fldl2e                  | fldl2e                }\n\t"  /* 1  log2(e)         */
+       "{ fmul %%st(1),%%st       | fmul st(1), st        }\n\t"  /* 1  x log2(e)       */
 
 #ifdef __x86_64__
-    "subq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n\t"            /* 1  i               */
-    "fld %%st(1)\n\t"        /* 2  x               */
-    "frndint\n\t"            /* 2  xi              */
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\n"
+       "{ subq $8, %%rsp          | sub rsp, 8                     }\n\t"
+       "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n\t"
+       "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n\t"
+       "{ orb $12, %%ah           | or eax, 0x0C00                 }\n\t"
+       "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n\t"
+       "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n\t"
+       "{ frndint                 | frndint                        }\n\t"  /* 1  i               */
+       "{ fld %%st(1)             | fld st(1)                      }\n\t"  /* 2  x               */
+       "{ frndint                 | frndint                        }\n\t"  /* 2  xi              */
+       "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n\t"
+       "{ addq $8, %%rsp          | add rsp, 8                     }\n\t"
 #else
-    "push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n\t"            /* 1  i               */
-    "fld %%st(1)\n\t"        /* 2  x               */
-    "frndint\n\t"            /* 2  xi              */
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n"
+       "{ push %%eax              | push eax                       }\n\t"
+       "{ subl $8, %%esp          | sub esp, 8                     }\n\t"
+       "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n\t"
+       "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n\t"
+       "{ orb $12, %%ah           | or eax, 0x0C00                 }\n\t"
+       "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n\t"
+       "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n\t"
+       "{ frndint                 | frndint                        }\n\t"  /* 1  i               */
+       "{ fld %%st(1)             | fld st(1)                      }\n\t"  /* 2  x               */
+       "{ frndint                 | frndint                        }\n\t"  /* 2  xi              */
+       "{ fldcw 4(%%esp)          | fldcw word ptr [esp + 4]       }\n\t"
+       "{ addl $8, %%esp          | add esp, 8                     }\n\t"
+       "{ pop %%eax               | pop eax                        }\n\t"
 #endif
-       "fld %%st(1)\n\t"        /* 3  i               */
-       "fldt %2\n\t"            /* 4  c0              */
-       "fld %%st(2)\n\t"        /* 5  xi              */
-       "fmul %%st(1),%%st\n\t"  /* 5  c0 xi           */
-       "fsubp %%st,%%st(2)\n\t" /* 4  f = c0 xi  - i  */
-       "fld %%st(4)\n\t"        /* 5  x               */
-       "fsub %%st(3),%%st\n\t"  /* 5  xf = x - xi     */
-       "fmulp %%st,%%st(1)\n\t" /* 4  c0 xf           */
-       "faddp %%st,%%st(1)\n\t" /* 3  f = f + c0 xf   */
-       "fldt %3\n\t"            /* 4                  */
-       "fmul %%st(4),%%st\n\t"  /* 4  c1 * x          */
-       "faddp %%st,%%st(1)\n\t" /* 3  f = f + c1 * x  */
-       "f2xm1\n\t"		/* 3 2^(fract(x * log2(e))) - 1 */
-       "fld1\n\t"               /* 4 1.0              */
-       "faddp\n\t"		/* 3 2^(fract(x * log2(e))) */
-       "fstp	%%st(1)\n\t"    /* 2  */
-       "fscale\n\t"	        /* 2 scale factor is st(1); e^x */
-       "fstp	%%st(1)\n\t"    /* 1  */
-       "fstp	%%st(1)\n\t"    /* 0  */
+       "{ fld %%st(1)             | fld st(1)                      }\n\t"  /* 3  i               */
+       "{ fldt %2                 | fld %2                         }\n\t"  /* 4  c0              */
+       "{ fld %%st(2)             | fld st(2)                      }\n\t"  /* 5  xi              */
+       "{ fmul %%st(1),%%st       | fmul st, st(1)                 }\n\t"  /* 5  c0 xi           */
+       "{ fsubp %%st,%%st(2)      | fsubp st(2), st                }\n\t"  /* 4  f = c0 xi  - i  */
+       "{ fld %%st(4)             | fld st(4)                      }\n\t"  /* 5  x               */
+       "{ fsub %%st(3),%%st       | fsub st, st(3)                 }\n\t"  /* 5  xf = x - xi     */
+       "{ fmulp %%st,%%st(1)      | fmulp st(1), st                }\n\t"  /* 4  c0 xf           */
+       "{ faddp %%st,%%st(1)      | faddp st(1), st                }\n\t"  /* 3  f = f + c0 xf   */
+       "{ fldt %3                 | fld %3                         }\n\t"  /* 4                  */
+       "{ fmul %%st(4),%%st       | fmul st, st(4)                 }\n\t"  /* 4  c1 * x          */
+       "{ faddp %%st,%%st(1)      | faddp st(1), st                }\n\t"  /* 3  f = f + c1 * x  */
+       "{ f2xm1                   | f2xm1                          }\n\t"  /* 3 2^(fract(x * log2(e))) - 1 */
+       "{ fld1                    | fld1                           }\n\t"  /* 4 1.0              */
+       "{ faddp                   | faddp                          }\n\t"  /* 3 2^(fract(x * log2(e))) */
+       "{ fstp	%%st(1)           | fstp st(1)                     }\n\t"  /* 2  */
+       "{ fscale                  | fscale                         }\n\t"  /* 2 scale factor is st(1); e^x */
+       "{ fstp	%%st(1)           | fstp st(1)                     }\n\t"  /* 1  */
+       "{ fstp	%%st(1)           | fstp st(1)                     }\n\t"  /* 0  */
        : "=t" (res) : "0" (x), "m" (c0), "m" (c1) : "ax", "dx");
   return res;
 }
diff --git a/mingw-w64-crt/math/x86/pow.def.h b/mingw-w64-crt/math/x86/pow.def.h
index 0cf0739fe..8f6953307 100644
--- a/mingw-w64-crt/math/x86/pow.def.h
+++ b/mingw-w64-crt/math/x86/pow.def.h
@@ -79,28 +79,35 @@ static __FLT_TYPE
 internal_modf (__FLT_TYPE value, __FLT_TYPE *iptr)
 {
   __FLT_TYPE int_part = (__FLT_TYPE) 0.0;
-  /* truncate */ 
   /* truncate */
 #ifdef __x86_64__
-  asm volatile ("pushq %%rax\n\tsubq $8, %%rsp\n"
-    "fnstcw 4(%%rsp)\n"
-    "movzwl 4(%%rsp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%rsp)\n"
-    "fldcw (%%rsp)\n"
-    "frndint\n"
-    "fldcw 4(%%rsp)\n"
-    "addq $8, %%rsp\npopq %%rax" : "=t" (int_part) : "0" (value)); /* round */
+  asm volatile (
+    "{ pushq %%rax             | push rax                       }\n"
+    "{ subq $8, %%rsp          | sub rsp, 8                     }\n"
+    "{ fnstcw 4(%%rsp)         | fnstcw word ptr [rsp + 4]      }\n"
+    "{ movzwl 4(%%rsp), %%eax  | movzx eax, word ptr [rsp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%rsp)      | mov word ptr [rsp], ax         }\n"
+    "{ fldcw (%%rsp)           | fldcw word ptr [rsp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%rsp)          | fldcw word ptr [rsp + 4]       }\n"
+    "{ addq $8, %%rsp          | add rsp, 8                     }\n"
+    "{ popq %%rax              | pop rax                        }\n"
+    : "=t" (int_part) : "0" (value)); /* round */
 #else
-  asm volatile ("push %%eax\n\tsubl $8, %%esp\n"
-    "fnstcw 4(%%esp)\n"
-    "movzwl 4(%%esp), %%eax\n"
-    "orb $12, %%ah\n"
-    "movw %%ax, (%%esp)\n"
-    "fldcw (%%esp)\n"
-    "frndint\n"
-    "fldcw 4(%%esp)\n"
-    "addl $8, %%esp\n\tpop %%eax\n" : "=t" (int_part) : "0" (value)); /* round */
+  asm volatile (
+    "{ push %%eax              | push eax                       }\n"
+    "{ subl $8, %%esp          | sub esp, 8                     }\n"
+    "{ fnstcw 4(%%esp)         | fnstcw word ptr [esp + 4]      }\n"
+    "{ movzwl 4(%%esp), %%eax  | movzx eax, word ptr [esp + 4]  }\n"
+    "{ orb $12, %%ah           | or eax, 0x0C00                 }\n"
+    "{ movw %%ax, (%%esp)      | mov word ptr [esp], ax         }\n"
+    "{ fldcw (%%esp)           | fldcw word ptr [esp]           }\n"
+    "{ frndint                 | frndint                        }\n"
+    "{ fldcw 4(%%esp)          | fldcw word ptr [esp + 4]       }\n"
+    "{ addl $8, %%esp          | add esp, 8                     }\n"
+    "{ pop %%eax               | pop eax                        }\n"
+    : "=t" (int_part) : "0" (value)); /* round */
 #endif
   if (iptr)
     *iptr = int_part;
diff --git a/mingw-w64-crt/misc/feclearexcept.c b/mingw-w64-crt/misc/feclearexcept.c
index 673528b20..32d8627fb 100644
--- a/mingw-w64-crt/misc/feclearexcept.c
+++ b/mingw-w64-crt/misc/feclearexcept.c
@@ -15,10 +15,10 @@ int __mingw_has_sse(void)
 #ifndef _WIN64
   int o_flag, n_flag;
   
-  __asm__ volatile ("pushfl\n\tpopl %0" : "=mr" (o_flag));
+  __asm__ volatile ("pushf{l}\n\tpop{l} %0" : "=mr" (o_flag));
   n_flag = o_flag ^ 0x200000;
-  __asm__ volatile ("pushl %0\n\tpopfl" : : "g" (n_flag));
-  __asm__ volatile ("pushfl\n\tpopl %0" : "=mr" (n_flag));
+  __asm__ volatile ("push{l} %0\n\tpopf{l}" : : "g" (n_flag));
+  __asm__ volatile ("pushf{l}\n\tpop{l} %0" : "=mr" (n_flag));
   if (n_flag == o_flag)
     return 0;
 #endif
diff --git a/mingw-w64-crt/misc/fegetround.c b/mingw-w64-crt/misc/fegetround.c
index d2fdb7725..f17bd0421 100644
--- a/mingw-w64-crt/misc/fegetround.c
+++ b/mingw-w64-crt/misc/fegetround.c
@@ -21,7 +21,7 @@ fegetround (void)
   __asm__ volatile ("mrs %0, fpcr" : "=r" (fpcr));
   return (fpcr & (FE_TONEAREST | FE_DOWNWARD |  FE_UPWARD | FE_TOWARDZERO));
 #else
-  int _control;
+  unsigned short _control;
   __asm__ volatile ("fnstcw %0" : "=m" (*&_control));
   return (_control & (FE_TONEAREST | FE_DOWNWARD |  FE_UPWARD | FE_TOWARDZERO));
 #endif /* defined(_ARM_) || defined(__arm__) || defined(_ARM64_) || defined(__aarch64__) */
diff --git a/mingw-w64-crt/misc/winbs_uint64.c b/mingw-w64-crt/misc/winbs_uint64.c
index c0b316221..c089bb5f3 100644
--- a/mingw-w64-crt/misc/winbs_uint64.c
+++ b/mingw-w64-crt/misc/winbs_uint64.c
@@ -4,7 +4,7 @@ unsigned long long __cdecl _byteswap_uint64(unsigned long long _Int64)
 {
 #if defined(_AMD64_) || defined(__x86_64__)
   unsigned long long retval;
-  __asm__ __volatile__ ("bswapq %[retval]" : [retval] "=rm" (retval) : "[retval]" (_Int64));
+  __asm__ __volatile__ ("bswap{q} %[retval]" : [retval] "=rm" (retval) : "[retval]" (_Int64));
   return retval;
 #elif defined(_X86_) || defined(__i386__)
   union {
@@ -15,8 +15,8 @@ unsigned long long __cdecl _byteswap_uint64(unsigned long long _Int64)
     };
   } retval;
   retval.int64part = _Int64;
-  __asm__ __volatile__ ("bswapl %[lowpart]\n"
-    "bswapl %[hipart]\n"
+  __asm__ __volatile__ ("bswap{l} %[lowpart]\n"
+    "bswap{l} %[hipart]\n"
     : [lowpart] "=rm" (retval.hipart), [hipart] "=rm" (retval.lowpart)  : "[lowpart]" (retval.lowpart), "[hipart]" (retval.hipart));
   return retval.int64part;
 #else
diff --git a/mingw-w64-crt/misc/winbs_ulong.c b/mingw-w64-crt/misc/winbs_ulong.c
index 9cd6b2907..e63f4aebe 100644
--- a/mingw-w64-crt/misc/winbs_ulong.c
+++ b/mingw-w64-crt/misc/winbs_ulong.c
@@ -4,7 +4,7 @@ unsigned long __cdecl _byteswap_ulong (unsigned long _Long)
 {
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
   unsigned long retval;
-  __asm__ __volatile__ ("bswapl %[retval]" : [retval] "=rm" (retval) : "[retval]" (_Long));
+  __asm__ __volatile__ ("bswap{l} %[retval]" : [retval] "=rm" (retval) : "[retval]" (_Long));
   return retval;
 #else
   unsigned char *b = (void*)&_Long;
diff --git a/mingw-w64-crt/misc/winbs_ushort.c b/mingw-w64-crt/misc/winbs_ushort.c
index 46b57fda4..a7747c363 100644
--- a/mingw-w64-crt/misc/winbs_ushort.c
+++ b/mingw-w64-crt/misc/winbs_ushort.c
@@ -4,7 +4,7 @@ unsigned short __cdecl _byteswap_ushort(unsigned short _Short)
 {
 #if defined(_AMD64_) || defined(__x86_64__) || defined(_X86_) || defined(__i386__)
   unsigned short retval;
-  __asm__ __volatile__ ("rorw $8, %w[retval]" : [retval] "=rm" (retval) : "[retval]" (_Short));
+  __asm__ __volatile__ ("{ rorw $8, %w[retval] | ror %w[retval], 8 }" : [retval] "=rm" (retval) : "[retval]" (_Short));
   return retval;
 #else
   unsigned char *b = (void*)&_Short;
diff --git a/mingw-w64-crt/stdio/_scprintf.c b/mingw-w64-crt/stdio/_scprintf.c
index bcee08a95..55ed01bd4 100644
--- a/mingw-w64-crt/stdio/_scprintf.c
+++ b/mingw-w64-crt/stdio/_scprintf.c
@@ -31,7 +31,11 @@ asm (
 ".globl\t" ASM_SYM(_scprintf) "\n\t"
 ".def\t" ASM_SYM(_scprintf) ";\t.scl\t2;\t.type\t32;\t.endef\n"
 ASM_SYM(_scprintf) ":\n\t"
+#if 0 __REGISTER_PREFIX__ + 1 == 0  // `0 % + 1 == 0` for at&t
     "jmp\t*" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf))
+#else
+    "jmp [" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf)) "]"
+#endif
 );
 
 #else
@@ -63,14 +67,18 @@ static void resolve_scprintf(void)
 asm (
 ".def\t" ASM_SYM(init_scprintf) ";\t.scl\t3;\t.type\t32;\t.endef\n"
 ASM_SYM(init_scprintf) ":\n\t"
-    "pushal\n\t"
+    "pusha\n\t"
     "call\t" ASM_SYM(resolve_scprintf) "\n\t"
-    "popal\n\t"
+    "popa\n\t"
     /* fallthrough */
 ".globl\t" ASM_SYM(_scprintf) "\n\t"
 ".def\t" ASM_SYM(_scprintf) ";\t.scl\t2;\t.type\t32;\t.endef\n"
 ASM_SYM(_scprintf) ":\n\t"
+#if 0 __REGISTER_PREFIX__ + 1 == 0  // `0 % + 1 == 0` for at&t
     "jmp\t*" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf))
+#else
+    "jmp [" ASM_SYM(__MINGW_IMP_SYMBOL(_scprintf)) "]"
+#endif
 );
 
 #endif
-- 
2.34.1

Attachment: OpenPGP_signature
Description: OpenPGP digital signature

_______________________________________________
Mingw-w64-public mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to