[PATCH] Inline assembly division needs volatile

Ximin Luo Wed, 25 Oct 2017 05:53:26 -0700

Because division assembly instructions can raise processor exceptions, these 
need __volatile__ annotations. Otherwise, this can result in unintended 
optimisations that cause spurious SIGFPEs, e.g.: 
https://github.com/fredrik-johansson/arb/issues/194


See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82677 for details.

I've attached a tentative patch for longlong.h, but it needs review and 
testing! Since this code has been copied to many FOSS projects, I did not have 
time to test this everywhere. I only confirmed the fix works with the "divq" 
x86-64 instruction, on flint's copy of longlong.h, against the failing test in 
the github issue linked above.

However I hope that the explanation I provided on the GCC bug report makes 
sense, and that the fix makes sense to the GMP developers. Note in particular 
"Comment 9" which explains why explicit zero-checks in C code are not enough. I 
encourage those interested to run the attached test case as well, which is a 
standalone C file that needs no external libraries. (It copies the udiv_qrnnd 
macro from GMP, like many projects have.)

X

-- 
GPG: ed25519/56034877E1F87C35
GPG: rsa4096/1318EFAC5FBBDBCE
https://github.com/infinity0/pubkeys.git

--- longlong.h.orig	2017-10-25 13:23:27.127693874 +0200
+++ longlong.h	2017-10-25 13:28:51.132015224 +0200
@@ -1021,7 +1021,7 @@
 	   : "=a" (w0), "=d" (w1)					\
 	   : "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
 #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
-  __asm__ ("divq %4"		     /* stringification in K&R C */	\
+  __asm__ __volatile__ ("divq %4"		     /* stringification in K&R C */	\
 	   : "=a" (q), "=d" (r)						\
 	   : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
 /* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
@@ -1073,7 +1073,7 @@
 	   struct {USItype __l, __h;} __i;				\
 	  } __nn;							\
     __nn.__i.__h = (nh); __nn.__i.__l = (nl);				\
-    __asm__ ("ediv %d,%n,%0"						\
+    __asm__ __volatile__ ("ediv %d,%n,%0"						\
 	   : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d));		\
     (r) = __rq.__i.__l; (q) = __rq.__i.__h;				\
   } while (0)
@@ -1121,12 +1121,12 @@
 	   : "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
 #define UMUL_TIME 45
 #define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("divu%.l %4,%1:%0"						\
+  __asm__ __volatile__ ("divu%.l %4,%1:%0"						\
 	   : "=d" (q), "=d" (r)						\
 	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
 #define UDIV_TIME 90
 #define sdiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("divs%.l %4,%1:%0"						\
+  __asm__ __volatile__ ("divs%.l %4,%1:%0"						\
 	   : "=d" (q), "=d" (r)						\
 	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
 #else /* for other 68k family members use 16x16->32 multiplication */
@@ -1209,7 +1209,7 @@
 	   struct {USItype __h, __l;} __i;				\
 	  } __x, __q;							\
   __x.__i.__h = (n1); __x.__i.__l = (n0);				\
-  __asm__ ("divu.d %0,%1,%2"						\
+  __asm__ __volatile__ ("divu.d %0,%1,%2"						\
 	   : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d));		\
   (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
 #define UMUL_TIME 5
@@ -1393,7 +1393,7 @@
   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
 #define SMUL_TIME 4
 #define sdiv_qrnnd(q, r, nh, nl, d) \
-  __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+  __asm__ __volatile__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
 #define UDIV_TIME 100
 #endif
 #endif /* 32-bit POWER architecture variants.  */
@@ -1623,7 +1623,7 @@
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     USItype __q;							\
-    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
+    __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
 	     : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));		\
     (r) = (n0) - __q * (d);						\
     (q) = __q;								\
@@ -1648,7 +1648,7 @@
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     USItype __q;							\
-    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
+    __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
 	     : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));		\
     (r) = (n0) - __q * (d);						\
     (q) = __q;								\
@@ -1664,7 +1664,7 @@
   __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
 #define UMUL_TIME 5
 #define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("! Inlined udiv_qrnnd\n"					\
+  __asm__ __volatile__ ("! Inlined udiv_qrnnd\n"					\
 "	wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"	\
 "	tst	%%g0\n"							\
 "	divscc	%3,%4,%%g1\n"						\
@@ -1717,7 +1717,7 @@
 /* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd.  */
 #ifndef umul_ppmm
 #define umul_ppmm(w1, w0, u, v) \
-  __asm__ ("! Inlined umul_ppmm\n"					\
+  __asm__ __volatile__ ("! Inlined umul_ppmm\n"					\
 "	wr	%%g0,%2,%%y	! SPARC has 0-3 delay insn after a wr\n" \
 "	sra	%3,31,%%g2	! Don't move this insn\n"		\
 "	and	%2,%%g2,%%g2	! Don't move this insn\n"		\
@@ -1848,7 +1848,7 @@
 	   struct {SItype __l, __h;} __i;				\
 	  } __x;							\
     __x.__i.__h = n1; __x.__i.__l = n0;					\
-    __asm__ ("ediv %3,%2,%0,%1"						\
+    __asm__ __volatile__ ("ediv %3,%2,%0,%1"						\
 	     : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d));		\
   } while (0)
 #if 0

_______________________________________________
gmp-bugs mailing list
[email protected]
https://gmplib.org/mailman/listinfo/gmp-bugs

[PATCH] Inline assembly division needs __volatile__

Reply via email to

[PATCH] Inline assembly division needs volatile