On Tue, Mar 17, 2020 at 3:04 PM Marco Bodrato <bodr...@mail.dm.unipi.it> wrote:
> Ciao, > > Il 2020-03-16 02:23 Seth Troisi ha scritto: > > On Sun, Mar 15, 2020 at 4:38 PM Marco Bodrato > > <bodr...@mail.dm.unipi.it> wrote: > >> May I write a few more comments? > > > Always, my opinion about being ready is just that it's passed > > the bar of being good enough not that it's perfect. > > I'm tempted to simply commit your proposal and then change it :-) > But if we keep on discussing here, we can improving the code with the > opinions of two persons... > > >> I see now: > >> + if (nbits <= 32) > >> + incr_limit = 336; > > >> Probably, we should change the name of the variable to > >> odd_candidates_in_sieve, then you would probably have written: > >> if (nbits <= 32) > >> odd_numbers_in_sieve = 168; > > > Renamed to odds_in_composite_sieve > > Of course the name is not essential, the values are :-) > And they seem correct to me, now. > I only discovered on experimental verification but the +1 is not needed, it took me a while to understand why but p is incremented to the first odd after the start of the gap by so the gap is -2 > > >> Then I wonder if the cost of the /* Sieve next segment */ step > [...] > >> need to allocate the possibly huge next_mult array? > > > > You're correct, this reduces the memory use 80% > > Moreover the code has now the following structure: > > SIEVE; > for (;;) { > CHECK_AND_POSSIBLY_BREAK; > SIEVE; > } > > There is an unneeded code duplication, it should be: > > for (;;) { > SIEVE; > CHECK_AND_POSSIBLY_BREAK; > } > This is a great improvement :) thanks for the additional pair of eyes :) > >> If you need to store larger gaps, you can also save in > >> the array gap>>1, because they all are even. The prime > >> limit will be able to go beyond 2^37... > > > > Updated comment for how this could be done in the future. > > The first gap larger than 500, if I'm not wrong, is > 304599508537+514=304599509051 > I added "far" so the comment now reads "allow this to go far beyond 436M" > > >> I'd expect a condition like > >> > >> + if (nbits <= numberof (primegap_small) * 2 + 1) > >> + { > >> + primegap = primegap_small; > >> + prime_limit = nbits / 2; > >> + } > > > Done > > I read: > + if (2 * nbits <= NUMBER_OF_PRIMES) > + { > + primegap = primegap_small; > + prime_limit = nbits / 2; > + } > > But ... this means that at most one fourth of the constants in > primegap_small are used, am I wrong? > I was experimenting with using 3 * nbits / 5 and other ratios and miscommitted this. Thanks again for the 2nd pair of eyes. > > Ĝis, > m >
diff -r bcca14c8a090 mpz/nextprime.c --- a/mpz/nextprime.c Thu Mar 05 00:43:26 2020 +0100 +++ b/mpz/nextprime.c Tue Mar 17 22:35:29 2020 -0700 @@ -30,33 +30,108 @@ GNU Lesser General Public License along with the GNU MP Library. If not, see https://www.gnu.org/licenses/. */ +#include <string.h> + #include "gmp-impl.h" #include "longlong.h" -static const unsigned char primegap[] = +/*********************************************************/ +/* Section sieve: sieving functions and tools for primes */ +/*********************************************************/ + +static mp_limb_t +id_to_n (mp_limb_t id) { return id*3+1+(id&1); } + +static mp_limb_t +n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } + +static mp_size_t +primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } + + +/************************************/ +/* Section macros: macros for sieve */ +/************************************/ + +#define LOOP_ON_SIEVE_BEGIN(prime,start,end,sieve) \ + do { \ + mp_limb_t __mask, __index, __max_i, __i; \ + __i = (start); \ + __index = __i / GMP_LIMB_BITS; \ + __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ + __max_i = (end); \ + do { \ + ++__i; \ + if (((sieve)[__index] & __mask) == 0) \ + { \ + mp_limb_t prime = id_to_n(__i) \ + +#define LOOP_ON_SIEVE_END \ + } \ + __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ + __index += __mask & 1; \ + } while (__i <= __max_i); \ + } while (0) + + + +static const unsigned char primegap_small[] = { 2,2,4,2,4,2,4,6,2,6,4,2,4,6,6,2,6,4,2,6,4,6,8,4,2,4,2,4,14,4,6, 2,10,2,6,6,4,6,6,2,10,2,4,2,12,12,4,2,4,6,2,10,6,6,6,2,6,4,2,10,14,4,2, 4,14,6,10,2,4,6,8,6,6,4,6,8,4,8,10,2,10,2,6,4,6,8,4,2,4,12,8,4,8,4,6, - 12,2,18,6,10,6,6,2,6,10,6,6,2,6,6,4,2,12,10,2,4,6,6,2,12,4,6,8,10,8,10,8, - 6,6,4,8,6,4,8,4,14,10,12,2,10,2,4,2,10,14,4,2,4,14,4,2,4,20,4,8,10,8,4,6, - 6,14,4,6,6,8,6,12 + 12,2,18,6,10 }; -#define NUMBER_OF_PRIMES 167 +#define NUMBER_OF_PRIMES 100 + +long calculate_sievelimit(mp_bitcnt_t nbits) { + long sieve_limit; + + // Estimate a good sieve bound. Based on derivative of + // Merten's 3rd theorem * avg gap * cost of mod + // vs + // Cost of PRP test O(N^2.55) + if (nbits < 12800) + { + mpz_t tmp; + // sieve_limit ~= nbits ^ (5/2) / 124 + mpz_init (tmp); + mpz_ui_pow_ui (tmp, nbits, 5); + mpz_sqrt (tmp, tmp); + mpz_tdiv_q_ui(tmp, tmp, 124); + + // TODO: Storing gap/2 would allow this to go far beyond 436M. + sieve_limit = mpz_get_ui(tmp); + mpz_clear (tmp); + } + else + { + /* Larger threshold is faster but takes ~O(n/ln(n)) memory. + * For 33,000 bits limitting to 150M is ~12% slower than using the + * optimal 1.5B sieve_limit. + */ + sieve_limit = 150000000; + } + + ASSERT( 1000 < sieve_limit && sieve_limit <= 150000000 ); + return sieve_limit; +} void mpz_nextprime (mpz_ptr p, mpz_srcptr n) { - unsigned short *moduli; - unsigned long difference; - int i; + char *composite; + const unsigned char *primegap; unsigned prime_limit; - unsigned long prime; + unsigned prime; mp_size_t pn; mp_bitcnt_t nbits; + int i, m; unsigned incr; - TMP_SDECL; + unsigned odds_in_composite_sieve; + unsigned long difference; + TMP_DECL; /* First handle tiny numbers */ if (mpz_cmp_ui (n, 2) < 0) @@ -70,59 +145,91 @@ if (mpz_cmp_ui (p, 7) <= 0) return; + TMP_MARK; pn = SIZ(p); MPN_SIZEINBASE_2EXP(nbits, PTR(p), pn, 1); - if (nbits / 2 >= NUMBER_OF_PRIMES) - prime_limit = NUMBER_OF_PRIMES - 1; + if (nbits / 2 <= NUMBER_OF_PRIMES) + { + primegap = primegap_small; + prime_limit = nbits / 2; + } else - prime_limit = nbits / 2; + { + long sieve_limit; + mp_limb_t *sieve; + unsigned char *primegap_tmp; + int last_prime; + + /* sieve numbers up to sieve_limit and save prime count */ + sieve_limit = calculate_sievelimit(nbits); + sieve = TMP_ALLOC_LIMBS (primesieve_size (sieve_limit)); + prime_limit = gmp_primesieve(sieve, sieve_limit); + + /* Needed to avoid assignment of read-only location */ + primegap_tmp = TMP_ALLOC_TYPE (prime_limit, unsigned char); + primegap = primegap_tmp; - TMP_SMARK; + i = 0; + last_prime = 3; + LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), n_to_bit (sieve_limit), sieve); + primegap_tmp[i++] = prime - last_prime; + last_prime = prime; + LOOP_ON_SIEVE_END; + + /* Both primesieve and prime_limit ignore the first two primes. */ + ASSERT(i == prime_limit); + } - /* Compute residues modulo small odd primes */ - moduli = TMP_SALLOC_TYPE (prime_limit, unsigned short); + if (nbits <= 32) + odds_in_composite_sieve = 336 / 2; + else if (nbits <= 64) + odds_in_composite_sieve = 1550 / 2; + else + /* Corresponds to a merit 14 prime_gap, which is rare. */ + odds_in_composite_sieve = 5 * nbits; + /* composite[2*i] stores if p+2*i is a known composite */ + composite = TMP_SALLOC_TYPE (odds_in_composite_sieve, char); + + difference = 0; for (;;) { - /* FIXME: Compute lazily? */ + memset (composite, 0, odds_in_composite_sieve); prime = 3; for (i = 0; i < prime_limit; i++) - { - moduli[i] = mpz_tdiv_ui (p, prime); - prime += primegap[i]; - } + { + m = mpz_cdiv_ui(p, prime); + /* Only care about odd multiplies of prime. */ + if (m & 1) + m += prime; + m >>= 1; -#define INCR_LIMIT 0x10000 /* deep science */ + /* Mark off any composites in sieve */ + for (; m < odds_in_composite_sieve; m += prime) + composite[m] = 1; + prime += primegap[i]; + } - for (difference = incr = 0; incr < INCR_LIMIT; difference += 2) - { - /* First check residues */ - prime = 3; - for (i = 0; i < prime_limit; i++) - { - unsigned r; - /* FIXME: Reduce moduli + incr and store back, to allow for - division-free reductions. Alternatively, table primes[]'s - inverses (mod 2^16). */ - r = (moduli[i] + incr) % prime; - prime += primegap[i]; + for (incr = 0; incr < odds_in_composite_sieve; difference += 2, incr += 1) + { + if (composite[incr]) + continue; - if (r == 0) - goto next; - } - - mpz_add_ui (p, p, difference); - difference = 0; + mpz_add_ui (p, p, difference); + difference = 0; - /* Miller-Rabin test */ - if (mpz_millerrabin (p, 25)) - goto done; - next:; - incr += 2; - } + /* Miller-Rabin test */ + if (mpz_millerrabin (p, 25)) + goto done; + } + + /* Sieve next segment, very rare */ mpz_add_ui (p, p, difference); difference = 0; } done: - TMP_SFREE; + TMP_FREE; } + +#undef LOOP_ON_SIEVE_END +#undef LOOP_ON_SIEVE_BEGIN
_______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel