Hello GMP developers. I wasn't quite sure if this message should be sent to gmp-bugs or to gmp-devel.
Since this isn't the first issue due to types of internal variables (e.g. http://gmplib.org/list-archives/gmp-bugs/2009-July/001538.html) I thought it might deserve some discussion. Apologies if that's not the case. I have isolated a problem with internal types in mpn_mul for huge operands on 64 bit machines. This is present in GMP 5.1.2 and also seems to be present in the gmp development version. You will need a machine with several hundred GB of RAM to reproduce this. The essential issue is in mpn_mul_fft_decompose. Variables K and l are passed in is as "int" type. A local variable is then computed as: mp_size_t Kl = K * l; This causes internal allocation to overflow here: tmp = TMP_ALLOC_LIMBS(Kl + 1); leading to an unrecoverable error: GNU MP: Cannot allocate memory (size=18446744056529682440) I have attached a gdb session that illustrates the problem as the file gdb-example. I was a bit conservative in the build of GMP completely disabling assembly code to allow debugging - you might want to tweak that to make it run faster. The test program I used in the debug session is also attached as mpn_mul_test.c. I have also prepared a patch against GMP 5.1.2 that is attached as patch-mul_fft. This attempts to remove uses of int, unsigned int etc replacing with scalable types. I hope you find this useful. Best wishes, Mark Mark Sofroniou Wolfram Research
# configured GMP 5.1.2 as: ./configure --disable-shared --enable-assert \ --enable-alloca=debug --disable-assembly CFLAGS=-g # compile and link mpn_mul_test.c # this passes a.out 4294967296 # this fails a.out 4831838208 # here is a gdb session illustrating the problem in the failed case Starting program: /Developer/marks/GMP/mul/a.out GNU MP: Cannot allocate memory (size=18446744056529682440) Program received signal SIGABRT, Aborted. 0x0000003609e328a5 in raise (sig=6) at ../nptl/sysdeps/unix/sysv/linux/raise.c:64 64 return INLINE_SYSCALL (tgkill, 3, pid, selftid, sig); (gdb) up #1 0x0000003609e34085 in abort () at abort.c:92 92 raise (SIGABRT); (gdb) #2 0x00000000004254e4 in __gmp_default_allocate (size=18446744056529682440) at memory.c:48 48 abort (); (gdb) #3 0x000000000041ff67 in __gmp_tmp_debug_alloc (file=0x429e0f "mul_fft.c", line=642, dummy=1, markp=0x7fffffffce48, decl_name=0x42a0af "__tmp_marker", size=18446744056529682440) at tal-debug.c:100 100 p->block = (*__gmp_allocate_func) (size); (gdb) #4 0x0000000000424037 in mpn_mul_fft_decompose (A=0x7fdff3fda010, Ap=0x630370, K=2048, nprime=2099200, n=0x7ffbf7feb010, nl=2147483648, l=1048576, Mp=65600, T=0x7fe7f5fdf010) at mul_fft.c:642 642 tmp = TMP_ALLOC_LIMBS(Kl + 1); (gdb) p Kl $1 = -2147483648 (gdb) p K $2 = 2048 (gdb) p l $3 = 1048576 (gdb) p K*l $4 = -2147483648 (gdb) p (size_t) (Kl+1) $6 = 18446744071562067969 (gdb) p K*((mp_size_t) l) $5 = 2147483648 <http://www.gnu.org/software/gdb/bugs/>... Reading symbols from /Developer/marks/GMP/mul/a.out...done. (gdb) r Starting program: /Developer/marks/GMP/mul/a.out mul_fft.c:745: GNU MP assertion failed: (pla) >= 0 Program received signal SIGABRT, Aborted. 0x0000003609e328a5 in raise (sig=6) r 8589934592 /* failed */
#include <stdlib.h> #include <stdio.h> #include <string.h> #include <limits.h> #include <sys/time.h> #include <sys/resource.h> #include "gmp.h" #define MAX_LIMB 0xffffffffffffffffUL typedef int mp_bool_t; /* Unix timing function. */ typedef enum {USER, SYSTEM} timing_choice; static double get_timing(timing_choice choice) { struct rusage ru; struct timeval tval; double time_taken; getrusage(RUSAGE_SELF, &ru); if (choice == USER) { tval = ru.ru_utime; } else { tval = ru.ru_stime; } time_taken = (double) tval.tv_sec + 1.e-6 * ((double) tval.tv_usec); return time_taken; } /* Check the remainder is zero and the quotient of division is the same as the multipicand. */ static mp_bool_t mpn_mul_test_limbs(const mp_limb_t *qp, const mp_limb_t *xp, const mp_size_t lx, const mp_limb_t *rp, const mp_size_t lr) { mp_size_t i; for (i = 0; i < lr; i++) { if (rp[i] != 0) return 0; } if (mpn_cmp(qp, xp, lx) != 0) return 0; if (qp[lx] != 0) return 0; return 1; } /* Test the result of multiplication using division with remainder. */ static mp_bool_t mpn_mul_div_test(mp_limb_t *zp, const mp_size_t lz, const mp_limb_t *xp, const mp_size_t lx, const mp_limb_t *yp, const mp_size_t ly) { mp_limb_t *qp, *rp; mp_size_t qextra = 0; size_t bytes; mp_bool_t test; /* Recycle zp for the computation of quotient and remainder to save space */ rp = zp; qp = zp + ly; mpn_tdiv_qr(qp, rp, qextra, zp, lz, yp, ly); test = mpn_mul_test_limbs(qp, xp, lx, rp, ly); return test; } /* Structural test for the result of multiplication of numbers composed of MAX_LIMB limbs. This is specific to the form of the input but is fast and has negligible memory footprint. */ static int mpn_mul_structure_test(const mp_limb_t *zp, const mp_size_t lx, const mp_size_t ly, const mp_size_t lz) { mp_size_t i; if (zp[0] != 1) return 0; for (i = 1; i < ly; i++) { if (zp[i] != 0) return 0; } for (i = ly; i < lx; i++) { if (zp[i] != MAX_LIMB) return 0; } if (zp[lx] != (MAX_LIMB - 1)) return 0; for (i = lx + 1; i < lz; i++) { if (zp[i] != MAX_LIMB) return 0; } return 1; } typedef enum {STRUCTURE, DIVIDE} verification; static mp_bool_t mpn_mul_test(verification form, mp_limb_t *zp, const mp_size_t lz, const mp_limb_t *xp, const mp_size_t lx, const mp_limb_t *yp, const mp_size_t ly) { if (form == STRUCTURE) return mpn_mul_structure_test(zp, lx, ly, lz); else return mpn_mul_div_test(zp, lz, xp, lx, yp, ly); } /* Perform a multiplication of the given size and check the result. */ static void mpn_mul_verify(mp_size_t len) { mp_limb_t *xp, *yp, *zp; mp_limb_t carry; size_t bytes; mp_size_t lx, ly, lz; timing_choice choice = USER; double time1, time2, time3; mp_bool_t test; lx = len; ly = len; lz = lx + ly; printf("Sizes: lx %ld ly %ld lz %ld\n", lx, ly, lz); /* Construct multiplicands made up of MAX_LIMB limbs. */ bytes = lx*sizeof(mp_limb_t); xp = malloc(bytes); memset(xp, UCHAR_MAX, bytes); bytes = ly*sizeof(mp_limb_t); yp = malloc(bytes); memset(yp, UCHAR_MAX, bytes); bytes = (lz + 1)*sizeof(mp_limb_t); zp = malloc(bytes); memset(zp, 0, bytes); time1 = get_timing(choice); carry = mpn_mul(zp, xp, lx, yp, ly); if (carry == 0) lz--; time2 = get_timing(choice); printf("Multiplication test time: %f\n", time2 - time1); test = mpn_mul_test(STRUCTURE, zp, lz, xp, lx, yp, ly); time3 = get_timing(choice); free(xp); free(yp); free(zp); printf("Verification test time: %f\n", time3 - time2); if (test) printf("Test passed\n"); else printf("Test failed\n"); } int main (int argc, char *argv[]) { mp_size_t len; if (argc != 2) { printf("Specify the number of limbs to multiply\n"); return 0; } len = atol(argv[1]); mpn_mul_verify(len); return 0; }
diff -r 266c28da4680 mpn/generic/mul_fft.c --- a/mpn/generic/mul_fft.c Wed Jul 17 17:42:56 2013 +0200 +++ b/mpn/generic/mul_fft.c Wed Sep 18 03:55:03 2013 -0500 @@ -67,8 +67,8 @@ static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *, mp_ptr *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_size_t, int **, mp_ptr, int); -static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, int, mp_srcptr, - mp_size_t, int, int, mp_ptr); +static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, mp_size_t, mp_srcptr, + mp_size_t, mp_size_t, mp_size_t, mp_ptr); /* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n. @@ -192,9 +192,9 @@ r and a must have n+1 limbs, and not overlap. */ static void -mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n) +mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_size_t d, mp_size_t n) { - int sh; + mp_size_t sh; mp_limb_t cc, rd; sh = d % GMP_NUMB_BITS; @@ -283,7 +283,7 @@ Assumes a and b are semi-normalized. */ static inline void -mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n) +mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n) { mp_limb_t c, x; @@ -314,7 +314,7 @@ Assumes a and b are semi-normalized. */ static inline void -mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n) +mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n) { mp_limb_t c, x; @@ -428,7 +428,8 @@ if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { - int k, K2, nprime2, Nprime2, M2, maxLK, l, Mp2; + int k, K2, maxLK; + mp_size_t N, Nprime2, nprime2, M2, Mp2, l; int **fft_l; mp_ptr *Ap, *Bp, A, B, T; @@ -445,10 +446,10 @@ /* we should ensure that nprime2 is a multiple of the next K */ if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { - unsigned long K3; + mp_limb_t K3; for (;;) { - K3 = 1L << mpn_fft_best_k (nprime2, sqr); + K3 = (mp_limb_t) 1 << mpn_fft_best_k (nprime2, sqr); if ((nprime2 & (K3 - 1)) == 0) break; nprime2 = (nprime2 + K3 - 1) & -K3; @@ -492,7 +493,7 @@ { mp_ptr a, b, tp, tpn; mp_limb_t cc; - int n2 = 2 * n; + mp_size_t n2 = 2 * n; tp = TMP_ALLOC_LIMBS (n2); tpn = tp + n; TRACE (printf (" mpn_mul_n %d of %ld limbs\n", K, n)); @@ -570,7 +571,7 @@ static void mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, int k, mp_size_t n) { - int i; + mp_size_t i; ASSERT (r != a); i = 2 * n * GMP_NUMB_BITS - k; @@ -585,13 +586,13 @@ Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1, then {rp,n}=0. */ -static int +static mp_size_t mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an) { mp_size_t l; - long int m; + mp_size_t m; mp_limb_t cc; - int rpn; + mp_size_t rpn; ASSERT ((n <= an) && (an <= 3 * n)); m = an - 2 * n; @@ -625,8 +626,8 @@ We must have nl <= 2*K*l. */ static void -mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, int nprime, mp_srcptr n, - mp_size_t nl, int l, int Mp, mp_ptr T) +mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, mp_size_t nprime, mp_srcptr n, + mp_size_t nl, mp_size_t l, mp_size_t Mp, mp_ptr T) { int i, j; mp_ptr tmp; @@ -712,7 +713,8 @@ mp_size_t nprime, mp_size_t l, mp_size_t Mp, int **fft_l, mp_ptr T, int sqr) { - int K, i, pla, lo, sh, j; + int K, i, j; + mp_size_t lo, pla, sh; mp_ptr p; mp_limb_t cc; @@ -792,10 +794,10 @@ } /* return the lcm of a and 2^k */ -static unsigned long int -mpn_mul_fft_lcm (unsigned long int a, unsigned int k) +static mp_limb_t +mpn_mul_fft_lcm (mp_limb_t a, mp_limb_t k) { - unsigned long int l = k; + mp_limb_t l = k; while (a % 2 == 0 && k > 0) { @@ -832,7 +834,7 @@ K = 1 << k; M = N >> k; /* N = 2^k M */ l = 1 + (M - 1) / GMP_NUMB_BITS; - maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */ + maxLK = mpn_mul_fft_lcm ((mp_limb_t) GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */ Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK; /* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */ @@ -842,10 +844,10 @@ /* we should ensure that recursively, nprime is a multiple of the next K */ if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { - unsigned long K2; + mp_limb_t K2; for (;;) { - K2 = 1L << mpn_fft_best_k (nprime, sqr); + K2 = (mp_limb_t) 1 << mpn_fft_best_k (nprime, sqr); if ((nprime & (K2 - 1)) == 0) break; nprime = (nprime + K2 - 1) & -K2;
_______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org http://gmplib.org/mailman/listinfo/gmp-devel