Because division assembly instructions can raise processor exceptions, these
need __volatile__ annotations. Otherwise, this can result in unintended
optimisations that cause spurious SIGFPEs, e.g.:
https://github.com/fredrik-johansson/arb/issues/194
See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82677 for details.
I've attached a tentative patch for longlong.h, but it needs review and
testing! Since this code has been copied to many FOSS projects, I did not have
time to test this everywhere. I only confirmed the fix works with the "divq"
x86-64 instruction, on flint's copy of longlong.h, against the failing test in
the github issue linked above.
However I hope that the explanation I provided on the GCC bug report makes
sense, and that the fix makes sense to the GMP developers. Note in particular
"Comment 9" which explains why explicit zero-checks in C code are not enough. I
encourage those interested to run the attached test case as well, which is a
standalone C file that needs no external libraries. (It copies the udiv_qrnnd
macro from GMP, like many projects have.)
X
--
GPG: ed25519/56034877E1F87C35
GPG: rsa4096/1318EFAC5FBBDBCE
https://github.com/infinity0/pubkeys.git
--- longlong.h.orig 2017-10-25 13:23:27.127693874 +0200
+++ longlong.h 2017-10-25 13:28:51.132015224 +0200
@@ -1021,7 +1021,7 @@
: "=a" (w0), "=d" (w1) \
: "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
- __asm__ ("divq %4" /* stringification in K&R C */ \
+ __asm__ __volatile__ ("divq %4" /* stringification in K&R C */ \
: "=a" (q), "=d" (r) \
: "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
@@ -1073,7 +1073,7 @@
struct {USItype __l, __h;} __i; \
} __nn; \
__nn.__i.__h = (nh); __nn.__i.__l = (nl); \
- __asm__ ("ediv %d,%n,%0" \
+ __asm__ __volatile__ ("ediv %d,%n,%0" \
: "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \
(r) = __rq.__i.__l; (q) = __rq.__i.__h; \
} while (0)
@@ -1121,12 +1121,12 @@
: "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
#define UMUL_TIME 45
#define udiv_qrnnd(q, r, n1, n0, d) \
- __asm__ ("divu%.l %4,%1:%0" \
+ __asm__ __volatile__ ("divu%.l %4,%1:%0" \
: "=d" (q), "=d" (r) \
: "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
#define UDIV_TIME 90
#define sdiv_qrnnd(q, r, n1, n0, d) \
- __asm__ ("divs%.l %4,%1:%0" \
+ __asm__ __volatile__ ("divs%.l %4,%1:%0" \
: "=d" (q), "=d" (r) \
: "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
#else /* for other 68k family members use 16x16->32 multiplication */
@@ -1209,7 +1209,7 @@
struct {USItype __h, __l;} __i; \
} __x, __q; \
__x.__i.__h = (n1); __x.__i.__l = (n0); \
- __asm__ ("divu.d %0,%1,%2" \
+ __asm__ __volatile__ ("divu.d %0,%1,%2" \
: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
#define UMUL_TIME 5
@@ -1393,7 +1393,7 @@
__asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
#define SMUL_TIME 4
#define sdiv_qrnnd(q, r, nh, nl, d) \
- __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+ __asm__ __volatile__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
#define UDIV_TIME 100
#endif
#endif /* 32-bit POWER architecture variants. */
@@ -1623,7 +1623,7 @@
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
USItype __q; \
- __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
: "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
(r) = (n0) - __q * (d); \
(q) = __q; \
@@ -1648,7 +1648,7 @@
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
USItype __q; \
- __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
: "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
(r) = (n0) - __q * (d); \
(q) = __q; \
@@ -1664,7 +1664,7 @@
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
#define UMUL_TIME 5
#define udiv_qrnnd(q, r, n1, n0, d) \
- __asm__ ("! Inlined udiv_qrnnd\n" \
+ __asm__ __volatile__ ("! Inlined udiv_qrnnd\n" \
" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
" tst %%g0\n" \
" divscc %3,%4,%%g1\n" \
@@ -1717,7 +1717,7 @@
/* Default to sparc v7 versions of umul_ppmm and udiv_qrnnd. */
#ifndef umul_ppmm
#define umul_ppmm(w1, w0, u, v) \
- __asm__ ("! Inlined umul_ppmm\n" \
+ __asm__ __volatile__ ("! Inlined umul_ppmm\n" \
" wr %%g0,%2,%%y ! SPARC has 0-3 delay insn after a wr\n" \
" sra %3,31,%%g2 ! Don't move this insn\n" \
" and %2,%%g2,%%g2 ! Don't move this insn\n" \
@@ -1848,7 +1848,7 @@
struct {SItype __l, __h;} __i; \
} __x; \
__x.__i.__h = n1; __x.__i.__l = n0; \
- __asm__ ("ediv %3,%2,%0,%1" \
+ __asm__ __volatile__ ("ediv %3,%2,%0,%1" \
: "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \
} while (0)
#if 0
_______________________________________________
gmp-bugs mailing list
[email protected]
https://gmplib.org/mailman/listinfo/gmp-bugs