Torbjörn Granlund: > Ximin Luo <[email protected]> writes: > > Any chance my patch could be tested and merged? > > I tried applying it, but it fails for the current head. Manual merging > is required. (It applies flawlessly to GMP 6.1, though.) > > If you could modify your patch to apply to the GMP head, then that would > help. The repo is here: https://gmplib.org/repo/gmp/ >
I've rebased, tidied up the whitespace, and added a test case - see attached patches. However, I'm not sure the test case will be very useful since it is GCC-7.2 specific, is only for x86-64, and is sensitive to being changed in a way that might make GCC-7.2 not perform the target optimisation. You can decide whether to keep it... (I assigned copyright to FSF to be consistent with the other files, I assume something this small doesn't need a specific CLA/waiver...) > I did not ask other projects' opinions on this yet, since they seemed > to have copied the code from GMP. Could you confirm that GMP is the > original source? > > I originally wrote it for use in GCC, glibc, and GMP. I only actively > maintain the GMP version. > > (The versions has diverged, unfortunately. The GMP version now contains > some GMPisms which might need cleaning up if we want to make the > versions converge.) > That would be good yes... would also be a good to put a warning at the top of that file along the lines of "the original version is from GMP at address <URL>, please submit all change requests there rather than this project, and update regularly". Might also be a good idea to expose it as a public header file, then at least projects that already depend on GMP (e.g. flint and other scientific stuff) can just #include it instead of copying it. X -- GPG: ed25519/56034877E1F87C35 GPG: rsa4096/1318EFAC5FBBDBCE https://github.com/infinity0/pubkeys.git
From cab3a2d0bd1288b50c71c708791d4e2903317134 Mon Sep 17 00:00:00 2001 From: Ximin Luo <[email protected]> Date: Wed, 8 Nov 2017 12:26:11 +0100 Subject: [PATCH 1/2] Add test for udiv faulty optimisation, fixed next commit --- tests/Makefile.am | 2 +- tests/t-longlong-opt.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 tests/t-longlong-opt.c diff --git a/tests/Makefile.am b/tests/Makefile.am index ff42b66a4..968c8237b 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -35,6 +35,6 @@ libtests_la_SOURCES = tests.h \ libtests_la_DEPENDENCIES = @CALLING_CONVENTIONS_OBJS@ libtests_la_LIBADD = $(libtests_la_DEPENDENCIES) $(top_builddir)/libgmp.la -check_PROGRAMS = t-bswap t-constants t-count_zeros t-hightomask \ +check_PROGRAMS = t-bswap t-constants t-count_zeros t-hightomask t-longlong-opt \ t-modlinv t-popc t-parity t-sub TESTS = $(check_PROGRAMS) diff --git a/tests/t-longlong-opt.c b/tests/t-longlong-opt.c new file mode 100644 index 000000000..3720bda83 --- /dev/null +++ b/tests/t-longlong-opt.c @@ -0,0 +1,57 @@ +/* Test longlong.h, faulty optimisation case. + +Copyright 2017 Free Software Foundation, Inc. + +This file is part of the GNU MP Library test suite. + +The GNU MP Library test suite is free software; you can redistribute it +and/or modify it under the terms of the GNU General Public License as +published by the Free Software Foundation; either version 3 of the License, +or (at your option) any later version. + +The GNU MP Library test suite is distributed in the hope that it will be +useful, but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +Public License for more details. + +You should have received a copy of the GNU General Public License along with +the GNU MP Library test suite. If not, see https://www.gnu.org/licenses/. */ + +#include <stdlib.h> + +#include "gmp-impl.h" +#include "longlong.h" +#include "tests.h" + +ulong g, q; + +/* Misoptimised under GCC 7.2 due to absent __volatile__ + https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82677 + + The optimisation is sensitive to other things - e.g. "#include <stdio.h>" + to the top of this test, cancels the optimisation. */ +__attribute__((optimize("-O2"))) +int +udiv_in_conditional_loop () +{ + ulong s = 1; + for (ulong i = 0; i < 5; i++) + for (ulong j = 0; j < 5; j++) + if (g) + { + ulong r, d = i << s; + if (d != 0) + udiv_qrnnd(q, r, 0, 0, d); + } +} + +int +main (int argc, char *argv[]) +{ + tests_start (); + + udiv_in_conditional_loop (); + + tests_end (); + exit (0); +} -- 2.14.2
From 6c3cbe3a952727f0c0d74ce7b9650e551e33b047 Mon Sep 17 00:00:00 2001 From: Ximin Luo <[email protected]> Date: Wed, 8 Nov 2017 12:27:12 +0100 Subject: [PATCH 2/2] Inline assembly division needs __volatile__ See https://gmplib.org/list-archives/gmp-bugs/2017-October/004231.html etc and https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82677 for details. --- longlong.h | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/longlong.h b/longlong.h index 43ec9ad71..f305cc676 100644 --- a/longlong.h +++ b/longlong.h @@ -403,7 +403,7 @@ long __MPN(count_leading_zeros) (UDItype); : "r" (__m0), "r" (__m1)); \ } while (0) #define udiv_qrnnd(q, r, n1, n0, d) \ - __asm__ ("dividu %0,%3,%4" \ + __asm__ __volatile__ ("dividu %0,%3,%4" \ : "=r" (q), "=q" (r) \ : "1" (n1), "r" (n0), "r" (d)) #define count_leading_zeros(count, x) \ @@ -651,7 +651,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); : "=g" (ph), "=r" (pl) \ : "%0" ((USItype)(m0)), "g" ((USItype)(m1))) #define udiv_qrnnd(q, r, nh, nl, d) \ - __asm__ ("divx %4,%0,%1" \ + __asm__ __volatile__ ("divx %4,%0,%1" \ : "=g" (q), "=r" (r) \ : "1" ((USItype)(nh)), "0" ((USItype)(nl)), "g" ((USItype)(d))) #define count_leading_zeros(count, x) \ @@ -788,7 +788,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); struct {USItype __h, __l;} __i; \ } __x; \ __x.__i.__h = n1; __x.__i.__l = n0; \ - __asm__ ("dlr\t%0,%2" \ + __asm__ __volatile__ ("dlr\t%0,%2" \ : "=r" (__x.__ll) \ : "0" (__x.__ll), "r" (d)); \ (q) = __x.__i.__l; (r) = __x.__i.__h; \ @@ -798,7 +798,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); do { \ register USItype __r0 __asm__ ("0") = (n1); \ register USItype __r1 __asm__ ("1") = (n0); \ - __asm__ ("dlr\t%0,%4" \ + __asm__ __volatile__ ("dlr\t%0,%4" \ : "=r" (__r0), "=r" (__r1) \ : "r" (__r0), "r" (__r1), "r" (d)); \ (q) = __r1; (r) = __r0; \ @@ -823,7 +823,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); struct {USItype __h, __l;} __i; \ } __x; \ __x.__i.__h = n1; __x.__i.__l = n0; \ - __asm__ ("dr\t%0,%2" \ + __asm__ __volatile__ ("dr\t%0,%2" \ : "=r" (__x.__ll) \ : "0" (__x.__ll), "r" (d)); \ (q) = __x.__i.__l; (r) = __x.__i.__h; \ @@ -866,7 +866,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); struct {UDItype __h, __l;} __i; \ } __x; \ __x.__i.__h = n1; __x.__i.__l = n0; \ - __asm__ ("dlgr\t%0,%2" \ + __asm__ __volatile__ ("dlgr\t%0,%2" \ : "=r" (__x.__ll) \ : "0" (__x.__ll), "r" ((UDItype)(d))); \ (q) = __x.__i.__l; (r) = __x.__i.__h; \ @@ -903,7 +903,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); : "=a" (w0), "=d" (w1) \ : "%0" ((USItype)(u)), "rm" ((USItype)(v))) #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ - __asm__ ("divl %4" /* stringification in K&R C */ \ + __asm__ __volatile__ ("divl %4" /* stringification in K&R C */ \ : "=a" (q), "=d" (r) \ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "rm" ((USItype)(dx))) @@ -1045,7 +1045,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); : "%0" ((UDItype)(u)), "rm" ((UDItype)(v))) #endif #define udiv_qrnnd(q, r, n1, n0, dx) /* d renamed to dx avoiding "=d" */\ - __asm__ ("divq %4" /* stringification in K&R C */ \ + __asm__ __volatile__ ("divq %4" /* stringification in K&R C */ \ : "=a" (q), "=d" (r) \ : "0" ((UDItype)(n0)), "1" ((UDItype)(n1)), "rm" ((UDItype)(dx))) /* bsrq destination must be a 64-bit register, hence UDItype for __cbtmp. */ @@ -1097,7 +1097,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); struct {USItype __l, __h;} __i; \ } __nn; \ __nn.__i.__h = (nh); __nn.__i.__l = (nl); \ - __asm__ ("ediv %d,%n,%0" \ + __asm__ __volatile__ ("ediv %d,%n,%0" \ : "=d" (__rq.__ll) : "dI" (__nn.__ll), "dI" (d)); \ (r) = __rq.__i.__l; (q) = __rq.__i.__h; \ } while (0) @@ -1144,11 +1144,11 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); : "=d" (w0), "=d" (w1) \ : "%0" ((USItype)(u)), "dmi" ((USItype)(v))) #define udiv_qrnnd(q, r, n1, n0, d) \ - __asm__ ("divu%.l %4,%1:%0" \ + __asm__ __volatile__ ("divu%.l %4,%1:%0" \ : "=d" (q), "=d" (r) \ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) #define sdiv_qrnnd(q, r, n1, n0, d) \ - __asm__ ("divs%.l %4,%1:%0" \ + __asm__ __volatile__ ("divs%.l %4,%1:%0" \ : "=d" (q), "=d" (r) \ : "0" ((USItype)(n0)), "1" ((USItype)(n1)), "dmi" ((USItype)(d))) #else /* for other 68k family members use 16x16->32 multiplication */ @@ -1229,7 +1229,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); struct {USItype __h, __l;} __i; \ } __x, __q; \ __x.__i.__h = (n1); __x.__i.__l = (n0); \ - __asm__ ("divu.d %0,%1,%2" \ + __asm__ __volatile__ ("divu.d %0,%1,%2" \ : "=r" (__q.__ll) : "r" (__x.__ll), "r" (d)); \ (r) = (n0) - __q.__l * (d); (q) = __q.__l; }) #endif /* __m88110__ */ @@ -1304,7 +1304,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); struct {USItype __l, __h;} __i; \ } __x; \ __x.__i.__h = (n1); __x.__i.__l = (n0); \ - __asm__ ("deid %2,%0" \ + __asm__ __volatile__ ("deid %2,%0" \ : "=g" (__x.__ll) \ : "0" (__x.__ll), "g" ((USItype)(d))); \ (r) = __x.__i.__l; (q) = __x.__i.__h; }) @@ -1399,7 +1399,8 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); #define smul_ppmm(xh, xl, m0, m1) \ __asm__ ("mul %0,%2,%3" : "=r" (xh), "=q" (xl) : "r" (m0), "r" (m1)) #define sdiv_qrnnd(q, r, nh, nl, d) \ - __asm__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d)) + __asm__ __volatile__ ("div %0,%2,%4" : "=r" (q), "=q" (r) : "r" (nh), "1" (nl), "r" (d)) +#define UDIV_TIME 100 #endif #endif /* 32-bit POWER architecture variants. */ @@ -1631,7 +1632,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ USItype __q; \ - __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ (r) = (n0) - __q * (d); \ (q) = __q; \ @@ -1654,7 +1655,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); #define udiv_qrnnd(q, r, n1, n0, d) \ do { \ USItype __q; \ - __asm__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ + __asm__ __volatile__ ("mov %1,%%y;nop;nop;nop;udiv %2,%3,%0" \ : "=r" (__q) : "r" (n1), "r" (n0), "r" (d)); \ (r) = (n0) - __q * (d); \ (q) = __q; \ @@ -1668,7 +1669,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); #define umul_ppmm(w1, w0, u, v) \ __asm__ ("umul %2,%3,%1;rd %%y,%0" : "=r" (w1), "=r" (w0) : "r" (u), "r" (v)) #define udiv_qrnnd(q, r, n1, n0, d) \ - __asm__ ("! Inlined udiv_qrnnd\n" \ + __asm__ __volatile__ ("! Inlined udiv_qrnnd\n" \ " wr %%g0,%2,%%y ! Not a delayed write for sparclite\n" \ " tst %%g0\n" \ " divscc %3,%4,%%g1\n" \ @@ -1847,7 +1848,7 @@ extern UWtype __MPN(udiv_qrnnd) (UWtype *, UWtype, UWtype, UWtype); struct {SItype __l, __h;} __i; \ } __x; \ __x.__i.__h = n1; __x.__i.__l = n0; \ - __asm__ ("ediv %3,%2,%0,%1" \ + __asm__ __volatile__ ("ediv %3,%2,%0,%1" \ : "=g" (q), "=g" (r) : "g" (__x.__ll), "g" (d)); \ } while (0) #if 0 -- 2.14.2
signature.asc
Description: OpenPGP digital signature
_______________________________________________ gmp-bugs mailing list [email protected] https://gmplib.org/mailman/listinfo/gmp-bugs
