Torbjörn Granlund:
> Ximin Luo <[email protected]> writes:
> 
>   Any chance my patch could be tested and merged?
> 
> I tried applying it, but it fails for the current head.  Manual merging
> is required.  (It applies flawlessly to GMP 6.1, though.)
> 
> If you could modify your patch to apply to the GMP head, then that would
> help.  The repo is here: https://gmplib.org/repo/gmp/
> 

I've rebased, tidied up the whitespace, and added a test case - see attached 
patches. However, I'm not sure the test case will be very useful since it is 
GCC-7.2 specific, is only for x86-64, and is sensitive to being changed in a 
way that might make GCC-7.2 not perform the target optimisation. You can decide 
whether to keep it...

(I assigned copyright to FSF to be consistent with the other files, I assume 
something this small doesn't need a specific CLA/waiver...)

>   I did not ask other projects' opinions on this yet, since they seemed
>   to have copied the code from GMP. Could you confirm that GMP is the
>   original source?
> 
> I originally wrote it for use in GCC, glibc, and GMP.  I only actively
> maintain the GMP version.
> 
> (The versions has diverged, unfortunately.  The GMP version now contains
> some GMPisms which might need cleaning up if we want to make the
> versions converge.)
> 

That would be good yes... would also be a good to put a warning at the top of 
that file along the lines of "the original version is from GMP at address 
<URL>, please submit all change requests there rather than this project, and 
update regularly".

Might also be a good idea to expose it as a public header file, then at least 
projects that already depend on GMP (e.g. flint and other scientific stuff) can 
just #include it instead of copying it.

X

-- 
GPG: ed25519/56034877E1F87C35
GPG: rsa4096/1318EFAC5FBBDBCE
https://github.com/infinity0/pubkeys.git
From cab3a2d0bd1288b50c71c708791d4e2903317134 Mon Sep 17 00:00:00 2001
From: Ximin Luo <[email protected]>
Date: Wed, 8 Nov 2017 12:26:11 +0100
Subject: [PATCH 1/2] Add test for udiv faulty optimisation, fixed next commit

---
 tests/Makefile.am      |  2 +-
 tests/t-longlong-opt.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 1 deletion(-)
 create mode 100644 tests/t-longlong-opt.c

diff --git a/tests/Makefile.am b/tests/Makefile.am
index ff42b66a4..968c8237b 100644
--- a/tests/Makefile.am
+++ b/tests/Makefile.am
@@ -35,6 +35,6 @@ libtests_la_SOURCES = tests.h \
 libtests_la_DEPENDENCIES = @CALLING_CONVENTIONS_OBJS@
 libtests_la_LIBADD = $(libtests_la_DEPENDENCIES) $(top_builddir)/libgmp.la
 
-check_PROGRAMS = t-bswap t-constants t-count_zeros t-hightomask \
+check_PROGRAMS = t-bswap t-constants t-count_zeros t-hightomask t-longlong-opt \
   t-modlinv t-popc t-parity t-sub
 TESTS = $(check_PROGRAMS)
diff --git a/tests/t-longlong-opt.c b/tests/t-longlong-opt.c
new file mode 100644
index 000000000..3720bda83
--- /dev/null
+++ b/tests/t-longlong-opt.c
@@ -0,0 +1,57 @@
+/* Test longlong.h, faulty optimisation case.
+
+Copyright 2017 Free Software Foundation, Inc.
+
+This file is part of the GNU MP Library test suite.
+
+The GNU MP Library test suite is free software; you can redistribute it
+and/or modify it under the terms of the GNU General Public License as
+published by the Free Software Foundation; either version 3 of the License,
+or (at your option) any later version.
+
+The GNU MP Library test suite is distributed in the hope that it will be
+useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+the GNU MP Library test suite.  If not, see https://www.gnu.org/licenses/.  */
+
+#include <stdlib.h>
+
+#include "gmp-impl.h"
+#include "longlong.h"
+#include "tests.h"
+
+ulong g, q;
+
+/* Misoptimised under GCC 7.2 due to absent __volatile__
+   https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82677
+
+   The optimisation is sensitive to other things - e.g. "#include <stdio.h>"
+   to the top of this test, cancels the optimisation.  */
+__attribute__((optimize("-O2")))
+int
+udiv_in_conditional_loop ()
+{
+  ulong s = 1;
+  for (ulong i = 0; i < 5; i++)
+    for (ulong j = 0; j < 5; j++)
+      if (g)
+        {
+          ulong r, d = i << s;
+          if (d != 0)
+            udiv_qrnnd(q, r, 0, 0, d);
+        }
+}
+
+int
+main (int argc, char *argv[])
+{
+  tests_start ();
+
+  udiv_in_conditional_loop ();
+
+  tests_end ();
+  exit (0);
+}
-- 
2.14.2

From 6c3cbe3a952727f0c0d74ce7b9650e551e33b047 Mon Sep 17 00:00:00 2001
From: Ximin Luo <[email protected]>
Date: Wed, 8 Nov 2017 12:27:12 +0100
Subject: [PATCH 2/2] Inline assembly division needs __volatile__

See https://gmplib.org/list-archives/gmp-bugs/2017-October/004231.html etc
and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82677 for details.
---
 longlong.h | 37 +++++++++++++++++++------------------
 1 file changed, 19 insertions(+), 18 deletions(-)

diff --git a/longlong.h b/longlong.h
index 43ec9ad71..f305cc676 100644
--- a/longlong.h
+++ b/longlong.h
@@ -403,7 +403,7 @@ long __MPN(count_leading_zeros) (UDItype);
 	     : "r" (__m0), "r" (__m1));					\
   } while (0)
 #define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("dividu %0,%3,%4"						\
+  __asm__ __volatile__ ("dividu %0,%3,%4"				\
 	   : "=r" (q), "=q" (r)						\
 	   : "1" (n1), "r" (n0), "r" (d))
 #define count_leading_zeros(count, x) \
@@ -651,7 +651,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   : "=g" (ph), "=r" (pl)					\
 	   : "%0" ((USItype)(m0)), "g" ((USItype)(m1)))
 #define udiv_qrnnd(q, r, nh, nl, d) \
-  __asm__ ("divx %4,%0,%1"						\
+  __asm__ __volatile__ ("divx %4,%0,%1"					\
 	   : "=g" (q), "=r" (r)						\
 	   : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d)))
 #define count_leading_zeros(count, x) \
@@ -788,7 +788,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   struct {USItype __h, __l;} __i;				\
 	  } __x;							\
     __x.__i.__h = n1; __x.__i.__l = n0;					\
-    __asm__ ("dlr\t%0,%2"						\
+    __asm__ __volatile__ ("dlr\t%0,%2"					\
 	     : "=r" (__x.__ll)						\
 	     : "0" (__x.__ll), "r" (d));				\
     (q) = __x.__i.__l; (r) = __x.__i.__h;				\
@@ -798,7 +798,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
   do {									\
     register USItype __r0 __asm__ ("0") = (n1);				\
     register USItype __r1 __asm__ ("1") = (n0);				\
-    __asm__ ("dlr\t%0,%4"						\
+    __asm__ __volatile__ ("dlr\t%0,%4"					\
 	     : "=r" (__r0), "=r" (__r1)					\
 	     : "r" (__r0), "r" (__r1), "r" (d));			\
     (q) = __r1; (r) = __r0;						\
@@ -823,7 +823,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   struct {USItype __h, __l;} __i;				\
 	  } __x;							\
     __x.__i.__h = n1; __x.__i.__l = n0;					\
-    __asm__ ("dr\t%0,%2"						\
+    __asm__ __volatile__ ("dr\t%0,%2"					\
 	     : "=r" (__x.__ll)						\
 	     : "0" (__x.__ll), "r" (d));				\
     (q) = __x.__i.__l; (r) = __x.__i.__h;				\
@@ -866,7 +866,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   struct {UDItype __h, __l;} __i;				\
 	  } __x;							\
     __x.__i.__h = n1; __x.__i.__l = n0;					\
-    __asm__ ("dlgr\t%0,%2"						\
+    __asm__ __volatile__ ("dlgr\t%0,%2"					\
 	     : "=r" (__x.__ll)						\
 	     : "0" (__x.__ll), "r" ((UDItype)(d)));			\
     (q) = __x.__i.__l; (r) = __x.__i.__h;				\
@@ -903,7 +903,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   : "=a" (w0), "=d" (w1)					\
 	   : "%0" ((USItype)(u)), "rm" ((USItype)(v)))
 #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
-  __asm__ ("divl %4"		     /* stringification in K&R C */	\
+  __asm__ __volatile__ ("divl %4"    /* stringification in K&R C */	\
 	   : "=a" (q), "=d" (r)						\
 	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx)))
 
@@ -1045,7 +1045,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   : "%0" ((UDItype)(u)), "rm" ((UDItype)(v)))
 #endif
 #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\
-  __asm__ ("divq %4"		     /* stringification in K&R C */	\
+  __asm__ __volatile__ ("divq %4"    /* stringification in K&R C */	\
 	   : "=a" (q), "=d" (r)						\
 	   : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx)))
 /* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */
@@ -1097,7 +1097,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   struct {USItype __l, __h;} __i;				\
 	  } __nn;							\
     __nn.__i.__h = (nh); __nn.__i.__l = (nl);				\
-    __asm__ ("ediv %d,%n,%0"						\
+    __asm__ __volatile__ ("ediv %d,%n,%0"				\
 	   : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d));		\
     (r) = __rq.__i.__l; (q) = __rq.__i.__h;				\
   } while (0)
@@ -1144,11 +1144,11 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   : "=d" (w0), "=d" (w1)					\
 	   : "%0" ((USItype)(u)), "dmi" ((USItype)(v)))
 #define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("divu%.l %4,%1:%0"						\
+  __asm__ __volatile__ ("divu%.l %4,%1:%0"				\
 	   : "=d" (q), "=d" (r)						\
 	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
 #define sdiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("divs%.l %4,%1:%0"						\
+  __asm__ __volatile__ ("divs%.l %4,%1:%0"				\
 	   : "=d" (q), "=d" (r)						\
 	   : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d)))
 #else /* for other 68k family members use 16x16->32 multiplication */
@@ -1229,7 +1229,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   struct {USItype __h, __l;} __i;				\
 	  } __x, __q;							\
   __x.__i.__h = (n1); __x.__i.__l = (n0);				\
-  __asm__ ("divu.d %0,%1,%2"						\
+  __asm__ __volatile__ ("divu.d %0,%1,%2"				\
 	   : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d));		\
   (r) = (n0) - __q.__l * (d); (q) = __q.__l; })
 #endif /* __m88110__ */
@@ -1304,7 +1304,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   struct {USItype __l, __h;} __i;				\
 	  } __x;							\
   __x.__i.__h = (n1); __x.__i.__l = (n0);				\
-  __asm__ ("deid %2,%0"							\
+  __asm__ __volatile__ ("deid %2,%0"					\
 	   : "=g" (__x.__ll)						\
 	   : "0" (__x.__ll), "g" ((USItype)(d)));			\
   (r) = __x.__i.__l; (q) = __x.__i.__h; })
@@ -1399,7 +1399,8 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 #define smul_ppmm(xh, xl, m0, m1) \
   __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1))
 #define sdiv_qrnnd(q, r, nh, nl, d) \
-  __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+  __asm__ __volatile__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d))
+#define UDIV_TIME 100
 #endif
 #endif /* 32-bit POWER architecture variants.  */
 
@@ -1631,7 +1632,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     USItype __q;							\
-    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
+    __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"	\
 	     : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));		\
     (r) = (n0) - __q * (d);						\
     (q) = __q;								\
@@ -1654,7 +1655,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 #define udiv_qrnnd(q, r, n1, n0, d) \
   do {									\
     USItype __q;							\
-    __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"			\
+    __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0"	\
 	     : "=r" (__q) : "r" (n1), "r" (n0), "r" (d));		\
     (r) = (n0) - __q * (d);						\
     (q) = __q;								\
@@ -1668,7 +1669,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 #define umul_ppmm(w1, w0, u, v) \
   __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v))
 #define udiv_qrnnd(q, r, n1, n0, d) \
-  __asm__ ("! Inlined udiv_qrnnd\n"					\
+  __asm__ __volatile__ ("! Inlined udiv_qrnnd\n"			\
 "	wr	%%g0,%2,%%y	! Not a delayed write for sparclite\n"	\
 "	tst	%%g0\n"							\
 "	divscc	%3,%4,%%g1\n"						\
@@ -1847,7 +1848,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype);
 	   struct {SItype __l, __h;} __i;				\
 	  } __x;							\
     __x.__i.__h = n1; __x.__i.__l = n0;					\
-    __asm__ ("ediv %3,%2,%0,%1"						\
+    __asm__ __volatile__ ("ediv %3,%2,%0,%1"				\
 	     : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d));		\
   } while (0)
 #if 0
-- 
2.14.2

Attachment: signature.asc
Description: OpenPGP digital signature

_______________________________________________
gmp-bugs mailing list
[email protected]
https://gmplib.org/mailman/listinfo/gmp-bugs

Reply via email to