Attached is a revised patch for mul_fft. I have tested it to confirm that both mpn_mul and mpn_tdiv_qr now work with numbers of size 2^32 limbs.
Regards, Mark
diff -r 266c28da4680 mpn/generic/mul_fft.c --- a/mpn/generic/mul_fft.c Wed Jul 17 17:42:56 2013 +0200 +++ b/mpn/generic/mul_fft.c Mon Sep 23 05:14:02 2013 -0500 @@ -67,8 +67,8 @@ static mp_limb_t mpn_mul_fft_internal (mp_ptr, mp_size_t, int, mp_ptr *, mp_ptr *, mp_ptr, mp_ptr, mp_size_t, mp_size_t, mp_size_t, int **, mp_ptr, int); -static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, int, mp_srcptr, - mp_size_t, int, int, mp_ptr); +static void mpn_mul_fft_decompose (mp_ptr, mp_ptr *, int, mp_size_t, mp_srcptr, + mp_size_t, mp_size_t, mp_size_t, mp_ptr); /* Find the best k to use for a mod 2^(m*GMP_NUMB_BITS)+1 FFT for m >= n. @@ -192,9 +192,9 @@ r and a must have n+1 limbs, and not overlap. */ static void -mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, unsigned int d, mp_size_t n) +mpn_fft_mul_2exp_modF (mp_ptr r, mp_srcptr a, mp_size_t d, mp_size_t n) { - int sh; + mp_size_t sh; mp_limb_t cc, rd; sh = d % GMP_NUMB_BITS; @@ -283,7 +283,7 @@ Assumes a and b are semi-normalized. */ static inline void -mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n) +mpn_fft_add_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n) { mp_limb_t c, x; @@ -314,7 +314,7 @@ Assumes a and b are semi-normalized. */ static inline void -mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, int n) +mpn_fft_sub_modF (mp_ptr r, mp_srcptr a, mp_srcptr b, mp_size_t n) { mp_limb_t c, x; @@ -428,12 +428,14 @@ if (n >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { - int k, K2, nprime2, Nprime2, M2, maxLK, l, Mp2; + mp_size_t K2, maxLK; + mp_size_t N, Nprime2, nprime2, M2, Mp2, l; + int k; int **fft_l; mp_ptr *Ap, *Bp, A, B, T; k = mpn_fft_best_k (n, sqr); - K2 = 1 << k; + K2 = (mp_size_t) 1 << k; ASSERT_ALWAYS((n & (K2 - 1)) == 0); maxLK = (K2 > GMP_NUMB_BITS) ? K2 : GMP_NUMB_BITS; M2 = n * GMP_NUMB_BITS >> k; @@ -445,10 +447,10 @@ /* we should ensure that nprime2 is a multiple of the next K */ if (nprime2 >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { - unsigned long K3; + size_t K3; for (;;) { - K3 = 1L << mpn_fft_best_k (nprime2, sqr); + K3 = (size_t) 1 << mpn_fft_best_k (nprime2, sqr); if ((nprime2 & (K3 - 1)) == 0) break; nprime2 = (nprime2 + K3 - 1) & -K3; @@ -470,7 +472,7 @@ fft_l[i] = TMP_ALLOC_TYPE (1<<i, int); mpn_fft_initl (fft_l, k); - TRACE (printf ("recurse: %ldx%ld limbs -> %d times %dx%d (%1.2f)\n", n, + TRACE (printf ("recurse: %ldx%ld limbs -> %ld times %ldx%ld (%1.2f)\n", n, n, K2, nprime2, nprime2, 2.0*(double)n/nprime2/K2)); for (i = 0; i < K; i++, ap++, bp++) { @@ -492,7 +494,7 @@ { mp_ptr a, b, tp, tpn; mp_limb_t cc; - int n2 = 2 * n; + mp_size_t n2 = 2 * n; tp = TMP_ALLOC_LIMBS (n2); tpn = tp + n; TRACE (printf (" mpn_mul_n %d of %ld limbs\n", K, n)); @@ -570,7 +572,7 @@ static void mpn_fft_div_2exp_modF (mp_ptr r, mp_srcptr a, int k, mp_size_t n) { - int i; + mp_size_t i; ASSERT (r != a); i = 2 * n * GMP_NUMB_BITS - k; @@ -585,13 +587,11 @@ Returns carry out, i.e. 1 iff {ap,an} = -1 mod 2^(n*GMP_NUMB_BITS)+1, then {rp,n}=0. */ -static int +static mp_size_t mpn_fft_norm_modF (mp_ptr rp, mp_size_t n, mp_ptr ap, mp_size_t an) { - mp_size_t l; - long int m; + mp_size_t l, m, rpn; mp_limb_t cc; - int rpn; ASSERT ((n <= an) && (an <= 3 * n)); m = an - 2 * n; @@ -625,10 +625,11 @@ We must have nl <= 2*K*l. */ static void -mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, int nprime, mp_srcptr n, - mp_size_t nl, int l, int Mp, mp_ptr T) +mpn_mul_fft_decompose (mp_ptr A, mp_ptr *Ap, int K, mp_size_t nprime, mp_srcptr n, + mp_size_t nl, mp_size_t l, mp_size_t Mp, mp_ptr T) { - int i, j; + int i; + mp_size_t j; mp_ptr tmp; mp_size_t Kl = K * l; TMP_DECL; @@ -712,7 +713,8 @@ mp_size_t nprime, mp_size_t l, mp_size_t Mp, int **fft_l, mp_ptr T, int sqr) { - int K, i, pla, lo, sh, j; + int K, i, j; + mp_size_t lo, pla, sh; mp_ptr p; mp_limb_t cc; @@ -792,10 +794,10 @@ } /* return the lcm of a and 2^k */ -static unsigned long int -mpn_mul_fft_lcm (unsigned long int a, unsigned int k) +static mp_size_t +mpn_mul_fft_lcm (mp_size_t a, mp_size_t k) { - unsigned long int l = k; + mp_size_t l = k; while (a % 2 == 0 && k > 0) { @@ -812,7 +814,8 @@ mp_srcptr m, mp_size_t ml, int k) { - int K, maxLK, i; + int K, i; + mp_size_t maxLK; mp_size_t N, Nprime, nprime, M, Mp, l; mp_ptr *Ap, *Bp, A, T, B; int **fft_l; @@ -832,27 +835,27 @@ K = 1 << k; M = N >> k; /* N = 2^k M */ l = 1 + (M - 1) / GMP_NUMB_BITS; - maxLK = mpn_mul_fft_lcm ((unsigned long) GMP_NUMB_BITS, k); /* lcm (GMP_NUMB_BITS, 2^k) */ + maxLK = mpn_mul_fft_lcm ((mp_size_t) GMP_NUMB_BITS, (mp_size_t) k); /* lcm (GMP_NUMB_BITS, 2^k) */ Nprime = (1 + (2 * M + k + 2) / maxLK) * maxLK; /* Nprime = ceil((2*M+k+3)/maxLK)*maxLK; */ nprime = Nprime / GMP_NUMB_BITS; - TRACE (printf ("N=%ld K=%d, M=%ld, l=%ld, maxLK=%d, Np=%ld, np=%ld\n", + TRACE (printf ("N=%ld K=%d, M=%ld, l=%ld, maxLK=%ld, Np=%ld, np=%ld\n", N, K, M, l, maxLK, Nprime, nprime)); /* we should ensure that recursively, nprime is a multiple of the next K */ if (nprime >= (sqr ? SQR_FFT_MODF_THRESHOLD : MUL_FFT_MODF_THRESHOLD)) { - unsigned long K2; + size_t K2; for (;;) { - K2 = 1L << mpn_fft_best_k (nprime, sqr); + K2 = (size_t) 1 << mpn_fft_best_k (nprime, sqr); if ((nprime & (K2 - 1)) == 0) break; nprime = (nprime + K2 - 1) & -K2; Nprime = nprime * GMP_LIMB_BITS; /* warning: since nprime changed, K2 may change too! */ } - TRACE (printf ("new maxLK=%d, Np=%ld, np=%ld\n", maxLK, Nprime, nprime)); + TRACE (printf ("new maxLK=%ld, Np=%ld, np=%ld\n", maxLK, Nprime, nprime)); } ASSERT_ALWAYS (nprime < pl); /* otherwise we'll loop */
_______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org http://gmplib.org/mailman/listinfo/gmp-devel