Ximin Luo:
> Because division assembly instructions can raise processor exceptions, these
> need __volatile__ annotations. Otherwise, this can result in unintended
> optimisations that cause spurious SIGFPEs, e.g.:
> https://github.com/fredrik-johansson/arb/issues/194
>
> See https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82677 for details.
>
> I've attached a tentative patch for longlong.h, but it needs review and
> testing! Since this code has been copied to many FOSS projects, I did not
> have time to test this everywhere. I only confirmed the fix works with the
> "divq" x86-64 instruction, on flint's copy of longlong.h, against the failing
> test in the github issue linked above.
>
> However I hope that the explanation I provided on the GCC bug report makes
> sense, and that the fix makes sense to the GMP developers. Note in particular
> "Comment 9" which explains why explicit zero-checks in C code are not enough.
> I encourage those interested to run the attached test case as well, which is
> a standalone C file that needs no external libraries. (It copies the
> udiv_qrnnd macro from GMP, like many projects have.)
>
Looks like I missed a few spots, here is an updated patch that should hopefully
cover all the division assembly instructions.
X
--
GPG: ed25519/56034877E1F87C35
GPG: rsa4096/1318EFAC5FBBDBCE
https://github.com/infinity0/pubkeys.git
--- longlong.h.orig 2017-10-25 13:23:27.127693874 +0200
+++ longlong.h 2017-10-25 14:08:15.891718445 +0200
@@ -408,7 +408,7 @@
: "r" (__m0), "r" (__m1)); \
} while (0)
#define udiv_qrnnd(q, r, n1, n0, d) \
- __asm__ ("dividu %0,%3,%4" \
+ __asm__ __volatile__ ("dividu %0,%3,%4" \
: "=r" (q), "=q" (r) \
: "1" (n1), "r" (n0), "r" (d))
#define count_leading_zeros(count, x) \
@@ -623,7 +623,7 @@
: "=g" (ph), "=r" (pl) \
: "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
#define udiv_qrnnd(q, r, nh, nl, d) \
- __asm__ ("divx %4,%0,%1" \
+ __asm__ __volatile__ ("divx %4,%0,%1" \
: "=g" (q), "=r" (r) \
: "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
#define count_leading_zeros(count, x) \
@@ -765,7 +765,7 @@
struct {USItype __h, __l;} __i; \
} __x; \
__x.__i.__h = n1; __x.__i.__l = n0; \
- __asm__ ("dlr\t%0,%2" \
+ __asm__ __volatile__ ("dlr\t%0,%2" \
: "=r" (__x.__ll) \
: "0" (__x.__ll), "r" (d)); \
(q) = __x.__i.__l; (r) = __x.__i.__h; \
@@ -775,7 +775,7 @@
do { \
register USItype __r0 __asm__ ("0") = (n1); \
register USItype __r1 __asm__ ("1") = (n0); \
- __asm__ ("dlr\t%0,%4" \
+ __asm__ __volatile__ ("dlr\t%0,%4" \
: "=r" (__r0), "=r" (__r1) \
: "r" (__r0), "r" (__r1), "r" (d)); \
(q) = __r1; (r) = __r0; \
@@ -800,7 +800,7 @@
struct {USItype __h, __l;} __i; \
} __x; \
__x.__i.__h = n1; __x.__i.__l = n0; \
- __asm__ ("dr\t%0,%2" \
+ __asm__ __volatile__ ("dr\t%0,%2" \
: "=r" (__x.__ll) \
: "0" (__x.__ll), "r" (d)); \
(q) = __x.__i.__l; (r) = __x.__i.__h; \
@@ -843,7 +843,7 @@
struct {UDItype __h, __l;} __i; \
} __x; \
__x.__i.__h = n1; __x.__i.__l = n0; \
- __asm__ ("dlgr\t%0,%2" \
+ __asm__ __volatile__ ("dlgr\t%0,%2" \
: "=r" (__x.__ll) \
: "0" (__x.__ll), "r" ((UDItype)(d))); \
(q) = __x.__i.__l; (r) = __x.__i.__h; \
@@ -880,7 +880,7 @@
: "=a" (w0), "=d" (w1) \
: "%0" ((USItype)(u)), "rm" ((USItype)(v)))
#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
- __asm__ ("divl %4" /* stringification in K&R C */ \
+ __asm__ __volatile__ ("divl %4" /* stringification in K&R C */ \
: "=a" (q), "=d" (r) \
: "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx)))
@@ -1021,7 +1021,7 @@
: "=a" (w0), "=d" (w1) \
: "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
#define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
- __asm__ ("divq %4" /* stringification in K&R C */ \
+ __asm__ __volatile__ ("divq %4" /* stringification in K&R C */ \
: "=a" (q), "=d" (r) \
: "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
/* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
@@ -1073,7 +1073,7 @@
struct {USItype __l, __h;} __i; \
} __nn; \
__nn.__i.__h = (nh); __nn.__i.__l = (nl); \
- __asm__ ("ediv %d,%n,%0" \
+ __asm__ __volatile__ ("ediv %d,%n,%0" \
: "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \
(r) = __rq.__i.__l; (q) = __rq.__i.__h; \
} while (0)
@@ -1121,12 +1121,12 @@
: "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
#define UMUL_TIME 45
#define udiv_qrnnd(q, r, n1, n0, d) \
- __asm__ ("divu%.l %4,%1:%0" \
+ __asm__ __volatile__ ("divu%.l %4,%1:%0" \
: "=d" (q), "=d" (r) \
: "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
#define UDIV_TIME 90
#define sdiv_qrnnd(q, r, n1, n0, d) \
- __asm__ ("divs%.l %4,%1:%0" \
+ __asm__ __volatile__ ("divs%.l %4,%1:%0" \
: "=d" (q), "=d" (r) \
: "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
#else /* for other 68k family members use 16x16->32 multiplication */
@@ -1209,7 +1209,7 @@
struct {USItype __h, __l;} __i; \
} __x, __q; \
__x.__i.__h = (n1); __x.__i.__l = (n0); \
- __asm__ ("divu.d %0,%1,%2" \
+ __asm__ __volatile__ ("divu.d %0,%1,%2" \
: "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \
(r) = (n0) - __q.__l * (d); (q) = __q.__l; })
#define UMUL_TIME 5
@@ -1293,7 +1293,7 @@
struct {USItype __l, __h;} __i; \
} __x; \
__x.__i.__h = (n1); __x.__i.__l = (n0); \
- __asm__ ("deid %2,%0" \
+ __asm__ __volatile__ ("deid %2,%0" \
: "=g" (__x.__ll) \
: "0" (__x.__ll), "g" ((USItype)(d))); \
(r) = __x.__i.__l; (q) = __x.__i.__h; })
@@ -1393,7 +1393,7 @@
__asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
#define SMUL_TIME 4
#define sdiv_qrnnd(q, r, nh, nl, d) \
- __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+ __asm__ __volatile__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
#define UDIV_TIME 100
#endif
#endif /* 32-bit POWER architecture variants. */
@@ -1623,7 +1623,7 @@
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
USItype __q; \
- __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
: "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
(r) = (n0) - __q * (d); \
(q) = __q; \
@@ -1648,7 +1648,7 @@
#define udiv_qrnnd(q, r, n1, n0, d) \
do { \
USItype __q; \
- __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
+ __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \
: "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \
(r) = (n0) - __q * (d); \
(q) = __q; \
@@ -1664,7 +1664,7 @@
__asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
#define UMUL_TIME 5
#define udiv_qrnnd(q, r, n1, n0, d) \
- __asm__ ("! Inlined udiv_qrnnd\n" \
+ __asm__ __volatile__ ("! Inlined udiv_qrnnd\n" \
" wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \
" tst %%g0\n" \
" divscc %3,%4,%%g1\n" \
@@ -1848,7 +1848,7 @@
struct {SItype __l, __h;} __i; \
} __x; \
__x.__i.__h = n1; __x.__i.__l = n0; \
- __asm__ ("ediv %3,%2,%0,%1" \
+ __asm__ __volatile__ ("ediv %3,%2,%0,%1" \
: "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \
} while (0)
#if 0
_______________________________________________
gmp-bugs mailing list
[email protected]
https://gmplib.org/mailman/listinfo/gmp-bugs