This is an automated email from the git hooks/post-receive script. ppm-guest pushed a commit to annotated tag v0.36 in repository libmath-prime-util-perl.
commit 5be203368d0f96cf19210e3697df459ce62ace9a Author: Dana Jacobsen <d...@acm.org> Date: Sun Jan 5 00:19:58 2014 -0800 Removed old SQUFOF code. Faster is_perfect_square. Streamline trial division pre-factoring. --- Changes | 3 + XS.xs | 23 +-- examples/bench-factor-extra.pl | 2 - examples/bench-factor-semiprime.pl | 8 +- examples/test-factor-gnufactor.pl | 13 +- factor.c | 375 ++++++++++--------------------------- factor.h | 1 - lib/Math/Prime/Util.pm | 8 +- primality.c | 24 +-- t/50-factoring.t | 3 +- util.h | 20 +- 11 files changed, 144 insertions(+), 336 deletions(-) diff --git a/Changes b/Changes index 4712ef8..050084c 100644 --- a/Changes +++ b/Changes @@ -63,6 +63,9 @@ Revision history for Perl module Math::Prime::Util - While Math::BigInt has the bgcd function, it is slow for native numbers, even with the Pari or GMP back ends. The gcd here is 20-100x faster. + - Removed the old SQUFOF code, so the racing version is the only one. It + was already the only one being used. + 0.35 2013-12-08 diff --git a/XS.xs b/XS.xs index 2cae867..c6f7254 100644 --- a/XS.xs +++ b/XS.xs @@ -361,11 +361,10 @@ trial_factor(IN UV n, ...) fermat_factor = 1 holf_factor = 2 squfof_factor = 3 - rsqufof_factor = 4 - pbrent_factor = 5 - prho_factor = 6 - pplus1_factor = 7 - pminus1_factor = 8 + pbrent_factor = 4 + prho_factor = 5 + pplus1_factor = 6 + pminus1_factor = 7 PPCODE: if (n == 0) XSRETURN_UV(0); while ( (n% 2) == 0 ) { n /= 2; XPUSHs(sv_2mortal(newSVuv( 2 ))); } @@ -386,19 +385,16 @@ trial_factor(IN UV n, ...) case 3: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1)); nfactors = squfof_factor (n, factors, arg1); break; case 4: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1)); - nfactors = racing_squfof_factor(n, factors, arg1); break; - case 5: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1)); arg2 = (items < 3) ? 1 : SvUV(ST(2)); nfactors = pbrent_factor (n, factors, arg1, arg2); break; - case 6: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1)); + case 5: arg1 = (items < 2) ? 4*1024*1024 : SvUV(ST(1)); nfactors = prho_factor (n, factors, arg1); break; - case 7: arg1 = (items < 2) ? 200 : SvUV(ST(1)); + case 6: arg1 = (items < 2) ? 200 : SvUV(ST(1)); nfactors = pplus1_factor (n, factors, arg1); break; - case 8: arg1 = (items < 2) ? 1*1024*1024 : SvUV(ST(1)); - arg2 = (items < 3) ? 0 : SvUV(ST(2)); - if (arg2 == 0) arg2 = 10*arg1; /* default B2 */ + case 7: + default: arg1 = (items < 2) ? 1*1024*1024 : SvUV(ST(1)); + arg2 = (items < 3) ? 10*arg1 : SvUV(ST(2)); nfactors = pminus1_factor(n, factors, arg1, arg2); break; - default: break; } EXTEND(SP, nfactors); for (i = 0; i < nfactors; i++) @@ -606,7 +602,6 @@ factor(IN SV* svn) return; /* skip implicit PUTBACK */ } - void divisor_sum(IN SV* svn, ...) PREINIT: diff --git a/examples/bench-factor-extra.pl b/examples/bench-factor-extra.pl index 674ce19..d5726d8 100755 --- a/examples/bench-factor-extra.pl +++ b/examples/bench-factor-extra.pl @@ -58,7 +58,6 @@ sub test_at_digits { $nfactored{'pminus1'} += $calc_nfacs->(Math::Prime::Util::pminus1_factor($_, $p1smooth)); $nfactored{'pplus1'} += $calc_nfacs->(Math::Prime::Util::pplus1_factor($_, $p1smooth)); $nfactored{'squfof'} += $calc_nfacs->(Math::Prime::Util::squfof_factor($_, $sqrounds)); - $nfactored{'rsqufof'} += $calc_nfacs->(Math::Prime::Util::rsqufof_factor($_, $rsqrounds)); #$nfactored{'trial'} += $calc_nfacs->(Math::Prime::Util::trial_factor($_)); #$nfactored{'fermat'} += $calc_nfacs->(Math::Prime::Util::fermat_factor($_, $rounds)); $nfactored{'holf'} += $calc_nfacs->(Math::Prime::Util::holf_factor($_, $hrounds)); @@ -79,7 +78,6 @@ sub test_at_digits { "fermat" => sub { Math::Prime::Util::fermat_factor($_, $rounds) for @nums}, "holf" => sub { Math::Prime::Util::holf_factor($_, $hrounds) for @nums }, "squfof" => sub { Math::Prime::Util::squfof_factor($_, $sqrounds) for @nums }, - "rsqufof" => sub { Math::Prime::Util::rsqufof_factor($_) for @nums }, "trial" => sub { Math::Prime::Util::trial_factor($_) for @nums }, }; delete $lref->{'fermat'} if $digits >= 9; diff --git a/examples/bench-factor-semiprime.pl b/examples/bench-factor-semiprime.pl index b9973f0..53b73ed 100755 --- a/examples/bench-factor-semiprime.pl +++ b/examples/bench-factor-semiprime.pl @@ -4,7 +4,7 @@ use warnings; $| = 1; # fast pipes srand(377); -use Math::Prime::Util qw/factor -nobigint/; +use Math::Prime::Util qw/factor/; use Math::Factor::XS qw/prime_factors/; use Math::Pari qw/factorint/; use Benchmark qw/:all/; @@ -93,9 +93,9 @@ foreach my $sp (@semiprimes) { print "OK\n"; my %compare = ( - 'MPU' => sub { factor($_) for @semiprimes; }, - 'MFXS' => sub { prime_factors($_) for @semiprimes; }, - 'Pari' => sub { foreach my $n (@semiprimes) { my @factors; my ($pn,$pc) = @{factorint($n)}; push @factors, (int($pn->[$_])) x $pc->[$_] for (0 .. $#{$pn}); } } + 'MPU' => sub { do { my @f = factor($_) } for @semiprimes; }, + 'MFXS' => sub { do { my @f = prime_factors($_) } for @semiprimes; }, + 'Pari' => sub { do { my ($pn,$pc) = @{factorint($_)}; my @f = map { int($pn->[$_]) x $pc->[$_] } 0 .. $#$pn; } for @semiprimes; }, ); delete $compare{'MFXS'} if $skip_mfxs; diff --git a/examples/test-factor-gnufactor.pl b/examples/test-factor-gnufactor.pl index 4546910..2e3b43e 100755 --- a/examples/test-factor-gnufactor.pl +++ b/examples/test-factor-gnufactor.pl @@ -25,15 +25,16 @@ my $num = 1000; # large numbers. You'll probably want to turn it off here as it will be # many thousands of times slower than MPU and Pari. -# A performance note: MPU and Pari get their results by calling a function. -# GNU factor gets its result by multiple shells out to /usr/bin/factor with -# the numbers as command line arguments. This adds a lot of overhead that -# has nothing to do with their implementation. For comparison, try turning -# on the MPU factor.pl script, and weep at Perl's startup cost. +# A benchmarking note: in this script, getting MPU and Pari results are done +# by calling a function, where getting GNU factor results are done via +# multiple shells to /usr/bin/factor with the inputs as command line +# arguments. This adds a lot of overhead that has nothing to do with their +# implementation. For comparison, I've included an option for getting MPU +# factoring via calling the factor.pl script. Weep at the startup cost. my $do_gnu = 1; my $do_pari = 1; -my $use_mpu_factor_script = 0; +my $use_mpu_factor_script = 1; my $rgen = sub { my $range = shift; diff --git a/factor.c b/factor.c index 092a2c9..ed61fea 100644 --- a/factor.c +++ b/factor.c @@ -11,8 +11,12 @@ #include "primality.h" #define FUNC_isqrt 1 #define FUNC_gcd_ui 1 +#define FUNC_is_perfect_square 1 #include "util.h" +/* factor will do trial division through this prime number, must be in table */ +#define TRIAL_TO_PRIME 81 + /* * You need to remember to use UV for unsigned and IV for signed types that * are large enough to hold our data. @@ -24,6 +28,29 @@ * match the native integer type used inside our Perl, so just use those. */ +static const unsigned short primes_small[] = + {0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97, + 101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191, + 193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283, + 293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401, + 409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509, + 521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631, + 641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751, + 757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877, + 881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009, + 1013,1019,1021,1031,1033,1039,1049,1051,1061,1063,1069,1087,1091,1093, + 1097,1103,1109,1117,1123,1129,1151,1153,1163,1171,1181,1187,1193,1201, + 1213,1217,1223,1229,1231,1237,1249,1259,1277,1279,1283,1289,1291,1297, + 1301,1303,1307,1319,1321,1327,1361,1367,1373,1381,1399,1409,1423,1427, + 1429,1433,1439,1447,1451,1453,1459,1471,1481,1483,1487,1489,1493,1499, + 1511,1523,1531,1543,1549,1553,1559,1567,1571,1579,1583,1597,1601,1607, + 1609,1613,1619,1621,1627,1637,1657,1663,1667,1669,1693,1697,1699,1709, + 1721,1723,1733,1741,1747,1753,1759,1777,1783,1787,1789,1801,1811,1823, + 1831,1847,1861,1867,1871,1873,1877,1879,1889,1901,1907,1913,1931,1933, + 1949,1951,1973,1979,1987,1993,1997,1999,2003,2011}; +#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0])) + + /* The main factoring loop */ /* Puts factors in factors[] and returns the number found. */ int factor(UV n, UV *factors) @@ -31,23 +58,41 @@ int factor(UV n, UV *factors) int nfactors = 0; /* Number of factored in factors result */ int const verbose = _XS_get_verbose(); - UV const tlim_lower = 401; /* Trial division through this prime */ - UV const tlim = 409; /* This means we've checked through here */ + UV f; UV tofac_stack[MPU_MAX_FACTORS+1]; UV fac_stack[MPU_MAX_FACTORS+1]; int ntofac = 0; /* Number of items on tofac_stack */ int nfac = 0; /* Number of items on fac_stack */ - if (n < 10000000) - return trial_factor(n, factors, 0); - - /* Trial division for all factors below tlim */ - nfactors = trial_factor(n, factors, tlim_lower); - n = factors[--nfactors]; + if (n < 4) { + factors[0] = n; + return (n == 1) ? 0 : 1; + } + while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n /= 2; } + while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; } + while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; } + f = 7; + + if (f*f <= n) { + UV sp = 3; + while (++sp < TRIAL_TO_PRIME) { + f = primes_small[sp]; + if (f*f > n) break; + while ( (n%f) == 0 ) { + factors[nfactors++] = f; + n /= f; + } + } + } + if (n < f*f) { + if (n != 1) + factors[nfactors++] = n; + return nfactors; + } /* loop over each remaining factor, until ntofac == 0 */ do { - while ( (n >= (tlim*tlim)) && (!_XS_is_prime(n)) ) { + while ( (n >= f*f) && (!_XS_is_prime(n)) ) { int split_success = 0; /* Adjust the number of rounds based on the number size */ UV const br_rounds = ((n>>29) < 100000) ? 1500 : 1500; @@ -60,8 +105,8 @@ int factor(UV n, UV *factors) } /* SQUFOF with these parameters gets 99.9% of everything left */ if (!split_success && n < (UV_MAX>>2)) { - split_success = racing_squfof_factor(n,tofac_stack+ntofac, sq_rounds)-1; - if (verbose) printf("rsqufof %d\n", split_success); + split_success = squfof_factor(n,tofac_stack+ntofac, sq_rounds)-1; + if (verbose) printf("squfof %d\n", split_success); } /* At this point we should only have 16+ digit semiprimes. */ /* This p-1 gets about 2/3 of what makes it through the above */ @@ -89,8 +134,7 @@ int factor(UV n, UV *factors) n = tofac_stack[ntofac]; /* Set n to the other one */ } else { /* Factor via trial division. Nothing should make it here. */ - UV f = tlim; - UV m = tlim % 30; + UV m = f % 30; UV limit = isqrt(n); if (verbose) printf("doing trial on %"UVuf"\n", n); while (f <= limit) { @@ -132,6 +176,7 @@ int factor(UV n, UV *factors) return nfactors; } + int factor_exp(UV n, UV *factors, UV* exponents) { int i, j, nfactors; @@ -159,32 +204,8 @@ int factor_exp(UV n, UV *factors, UV* exponents) } - -static const unsigned short primes_small[] = - {0,2,3,5,7,11,13,17,19,23,29,31,37,41,43,47,53,59,61,67,71,73,79,83,89,97, - 101,103,107,109,113,127,131,137,139,149,151,157,163,167,173,179,181,191, - 193,197,199,211,223,227,229,233,239,241,251,257,263,269,271,277,281,283, - 293,307,311,313,317,331,337,347,349,353,359,367,373,379,383,389,397,401, - 409,419,421,431,433,439,443,449,457,461,463,467,479,487,491,499,503,509, - 521,523,541,547,557,563,569,571,577,587,593,599,601,607,613,617,619,631, - 641,643,647,653,659,661,673,677,683,691,701,709,719,727,733,739,743,751, - 757,761,769,773,787,797,809,811,821,823,827,829,839,853,857,859,863,877, - 881,883,887,907,911,919,929,937,941,947,953,967,971,977,983,991,997,1009, - 1013,1019,1021,1031,1033,1039,1049,1051,1061,1063,1069,1087,1091,1093, - 1097,1103,1109,1117,1123,1129,1151,1153,1163,1171,1181,1187,1193,1201, - 1213,1217,1223,1229,1231,1237,1249,1259,1277,1279,1283,1289,1291,1297, - 1301,1303,1307,1319,1321,1327,1361,1367,1373,1381,1399,1409,1423,1427, - 1429,1433,1439,1447,1451,1453,1459,1471,1481,1483,1487,1489,1493,1499, - 1511,1523,1531,1543,1549,1553,1559,1567,1571,1579,1583,1597,1601,1607, - 1609,1613,1619,1621,1627,1637,1657,1663,1667,1669,1693,1697,1699,1709, - 1721,1723,1733,1741,1747,1753,1759,1777,1783,1787,1789,1801,1811,1823, - 1831,1847,1861,1867,1871,1873,1877,1879,1889,1901,1907,1913,1931,1933, - 1949,1951,1973,1979,1987,1993,1997,1999,2003,2011}; -#define NPRIMES_SMALL (sizeof(primes_small)/sizeof(primes_small[0])) - int trial_factor(UV n, UV *factors, UV maxtrial) { - UV f, limit, newlimit; int nfactors = 0; if (maxtrial == 0) maxtrial = UV_MAX; @@ -194,56 +215,38 @@ int trial_factor(UV n, UV *factors, UV maxtrial) factors[0] = n; return (n == 1) ? 0 : 1; } - /* Trial division for 2, 3, 5, 7, and see if we're done */ + /* Trial division for 2, 3, 5 immediately */ while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n /= 2; } if (3<=maxtrial) while ( (n % 3) == 0 ) { factors[nfactors++] = 3; n /= 3; } if (5<=maxtrial) while ( (n % 5) == 0 ) { factors[nfactors++] = 5; n /= 5; } - if (7<=maxtrial) while ( (n % 7) == 0 ) { factors[nfactors++] = 7; n /= 7; } - f = 11; - if ( (n < (f*f)) || (maxtrial < f) ) { - if (n != 1) - factors[nfactors++] = n; - return nfactors; - } - - /* Trial division to this number at most. Reduced as we find factors. */ - limit = isqrt(n); - if (limit > maxtrial) - limit = maxtrial; - /* Use the table of small primes to quickly do trial division. */ - { - UV sp = 5; - UV slimit = (limit < 2003) ? limit : 2003; - f = primes_small[sp]; - while (f <= slimit) { - if ( (n%f) == 0 ) { - do { - factors[nfactors++] = f; - n /= f; - } while ( (n%f) == 0 ); - newlimit = isqrt(n); - if (newlimit < slimit) slimit = newlimit; - if (newlimit < limit) limit = newlimit; + if (7*7 <= n) { + UV f, sp = 3; + while (++sp < NPRIMES_SMALL) { + f = primes_small[sp]; + if (f*f > n || f > maxtrial) break; + while ( (n%f) == 0 ) { + factors[nfactors++] = f; + n /= f; } - f = primes_small[++sp]; } - } - - /* Trial division using a mod-30 wheel for larger values */ - if (f <= limit) { - UV m = f % 30; - while (f <= limit) { - if ( (n%f) == 0 ) { - do { - factors[nfactors++] = f; - n /= f; - } while ( (n%f) == 0 ); - newlimit = isqrt(n); - if (newlimit < limit) limit = newlimit; + /* Trial division using a mod-30 wheel for larger values */ + if (f*f <= n && f <= maxtrial) { + UV newlimit, limit = isqrt(n); + if (limit > maxtrial) limit = maxtrial; + UV m = f % 30; + while (f <= limit) { + if ( (n%f) == 0 ) { + do { + factors[nfactors++] = f; + n /= f; + } while ( (n%f) == 0 ); + newlimit = isqrt(n); + if (newlimit < limit) limit = newlimit; + } + f += wheeladvance30[m]; + m = nextwheel30[m]; } - f += wheeladvance30[m]; - m = nextwheel30[m]; } } /* All done! */ @@ -253,74 +256,6 @@ int trial_factor(UV n, UV *factors, UV maxtrial) } -/* Return 0 if n is not a perfect square. Set sqrtn to int(sqrt(n)) if so. - * - * Some simple solutions: - * - * return ( ((n&2)!= 0) || ((n&7)==5) || ((n&11) == 8) ) ? 0 : 1; - * - * or: - * - * m = n & 31; - * if ( m==0 || m==1 || m==4 || m==9 || m==16 || m==17 || m==25 ) - * ...test for perfect square... - * - * or: - * - * if ( ((0x0202021202030213ULL >> (n & 63)) & 1) && - * ((0x0402483012450293ULL >> (n % 63)) & 1) && - * ((0x218a019866014613ULL >> ((n % 65) & 63)) & 1) && - * ((0x23b >> (n % 11) & 1)) ) { - * - * - * The following Bloom filter cascade works very well indeed. Read all - * about it here: http://mersenneforum.org/showpost.php?p=110896 - */ -static int is_perfect_square(UV n, UV* sqrtn) -{ - UV m; - m = n & 127; - if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0; - /* 82% of non-squares rejected here */ - -#if 0 - /* The big deal with this technique is that you do two total operations, - * one cheap (the & 127 above), one expensive (the modulo below) on n. - * The rest of the operations are 32-bit operations. This is a huge win - * if n is multiprecision. - * However, in this file we're doing native precision sqrt, so it just - * isn't expensive enough to justify this second filter set. - */ - lm = n % UVCONST(63*25*11*17*19*23*31); - m = lm % 63; - if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0; - m = lm % 25; - if ((m*0x1929fc1b) & (m*0x4c9ea3b2) & 0x51001005) return 0; - m = 0xd10d829a*(lm%31); - if (m & (m+0x672a5354) & 0x21025115) return 0; - m = lm % 23; - if ((m*0x7bd28629) & (m*0xe7180889) & 0xf8300) return 0; - m = lm % 19; - if ((m*0x1b8bead3) & (m*0x4d75a124) & 0x4280082b) return 0; - m = lm % 17; - if ((m*0x6736f323) & (m*0x9b1d499) & 0xc0000300) return 0; - m = lm % 11; - if ((m*0xabf1a3a7) & (m*0x2612bf93) & 0x45854000) return 0; - /* 99.92% of non-squares are rejected now */ -#endif -#if 0 - /* This could save time on some platforms, but not on x86 */ - m = n % 63; - if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0; -#endif - m = isqrt(n); - if (n != (m*m)) - return 0; - - if (sqrtn != 0) *sqrtn = m; - return 1; -} - static int _divisors_from_factors(UV v, UV npe, UV* fp, UV* fe, UV* res) { UV p, e, i; if (npe == 0) return 0; @@ -401,9 +336,9 @@ UV divisor_sum(UV n, UV k) UV product = 1; if (k > 5 || (k > 0 && n >= sigma_overflow[k-1])) return 0; - if (n == 0) return (k == 0) ? 2 : 1; /* divisors are [0,1] */ - if (n == 1) return 1; /* divisors are [1] */ - nfac = factor(n, factors); + if (n <= 1) /* n=0 divisors are [0,1] */ + return (n == 1) ? 1 : (k == 0) ? 2 : 1; /* n=1 divisors are [1] */ + nfac = factor(n,factors); if (k == 0) { for (i = 0; i < nfac; i++) { UV e = 1, f = factors[i]; @@ -491,7 +426,8 @@ int holf_factor(UV n, UV *factors, UV rounds) * so we won't be able to accurately detect it anyway. */ s++; /* s = ceil(sqrt(n*i)) */ m = sqrmod(s, n); - if (is_perfect_square(m, &f)) { + if (is_perfect_square(m)) { + f = isqrt(m); f = gcd_ui( (s>f) ? s-f : f-s, n); /* This should always succeed, but with overflow concerns.... */ if ((f == 1) || (f == n)) @@ -799,131 +735,7 @@ int pplus1_factor(UV n, UV *factors, UV B1) } -/* My modification of Ben Buhrow's modification of Bob Silverman's SQUFOF code. - */ - -int squfof_factor(UV n, UV *factors, UV rounds) -{ - IV qqueue[100+1]; - IV qpoint; - IV rounds2 = (IV) (rounds/16); - UV temp; - IV iq,ll,l2,p,pnext,q,qlast,r,s,t,i; - IV jter, iter; - int reloop; - - MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in squfof_factor"); - - /* TODO: What value of n leads to overflow? */ - - qlast = 1; - s = isqrt(n); - - p = s; - temp = n - (s*s); /* temp = n - floor(sqrt(n))^2 */ - if (temp == 0) { - factors[0] = s; - factors[1] = s; - return 2; - } - - q = temp; /* q = excess of n over next smaller square */ - ll = 1 + 2*(IV)sqrt((double)(p+p)); - l2 = ll/2; - qpoint = 0; - - /* In the loop below, we need to check if q is a square right before */ - /* the end of the loop. Is there a faster way? The current way is */ - /* EXPENSIVE! (many branches and double prec sqrt) */ - - for (jter=0; (UV)jter < rounds; jter++) { - iq = (s + p)/q; - pnext = iq*q - p; - if (q <= ll) { - if ((q & 1) == 0) { qqueue[qpoint] = q/2; if (++qpoint>=100) jter = -1; } - else if (q <= l2) { qqueue[qpoint] = q; if (++qpoint>=100) jter = -1; } - if (jter < 0) { - factors[0] = n; return 1; - } - } - - t = qlast + iq*(p - pnext); - qlast = q; - q = t; - p = pnext; /* check for square; even iter */ - if (jter & 1) continue; /* jter is odd:omit square test */ - r = isqrt(q); /* r = floor(sqrt(q)) */ - if (q != r*r) continue; - if (qpoint == 0) break; - qqueue[qpoint] = 0; - reloop = 0; - for (i=0; i<qpoint-1; i+=2) { /* treat queue as list for simplicity*/ - if (r == qqueue[i]) { reloop = 1; break; } - if (r == qqueue[i+1]) { reloop = 1; break; } - } - if (reloop || (r == qqueue[qpoint-1])) continue; - break; - } /* end of main loop */ - - if ((UV)jter >= rounds) { - factors[0] = n; return 1; - } - - qlast = r; - p = p + r*((s - p)/r); - q = (n - (p*p)) / qlast; /* q = (n - p*p)/qlast (div is exact)*/ - for (iter=0; iter<rounds2; iter++) { /* unrolled second main loop */ - iq = (s + p)/q; - pnext = iq*q - p; - if (p == pnext) break; - t = qlast + iq*(p - pnext); - qlast = q; - q = t; - p = pnext; - iq = (s + p)/q; - pnext = iq*q - p; - if (p == pnext) break; - t = qlast + iq*(p - pnext); - qlast = q; - q = t; - p = pnext; - iq = (s + p)/q; - pnext = iq*q - p; - if (p == pnext) break; - t = qlast + iq*(p - pnext); - qlast = q; - q = t; - p = pnext; - iq = (s + p)/q; - pnext = iq*q - p; - if (p == pnext) break; - t = qlast + iq*(p - pnext); - qlast = q; - q = t; - p = pnext; - } - - if (iter >= rounds2) { - factors[0] = n; return 1; - } - - if ((q & 1) == 0) q/=2; /* q was factor or 2*factor */ - - if ( (q == 1) || ((UV)q == n) ) { - factors[0] = n; return 1; - } - - p = n/q; - - /* printf(" squfof found %lu = %lu * %lu in %ld/%ld rounds\n", n, p, q, jter, iter); */ - - factors[0] = p; - factors[1] = q; - MPUassert( factors[0] * factors[1] == n , "incorrect factoring"); - return 2; -} - -/* Another version, based on Ben Buhrow's racing SQUFOF. */ +/* SQUFOF, based on Ben Buhrow's racing version. */ typedef struct { @@ -985,15 +797,16 @@ static void squfof_unit(UV n, mult_t* mult_save, UV* f) SQUARE_SEARCH_ITERATION; /* Even iteration. Check for square: Qn = S*S */ - if (is_perfect_square( Qn, &S )) + if (is_perfect_square(Qn)) break; /* Odd iteration. */ SQUARE_SEARCH_ITERATION; } + S = isqrt(Qn); /* printf("found square %lu after %lu iterations with mult %d\n", Qn, i, mult_save->mult); */ - /* Reduce to G0 */ + /* Reduce to G0 */ Ro = P + S*((b0 - P)/S); t1 = Ro; So = (n - t1*t1)/S; @@ -1033,7 +846,7 @@ static const UV squfof_multipliers[] = 3*11, 3, 5*11, 5, 7*11, 7, 11, 1 }; #define NSQUFOF_MULT (sizeof(squfof_multipliers)/sizeof(squfof_multipliers[0])) -int racing_squfof_factor(UV n, UV *factors, UV rounds) +int squfof_factor(UV n, UV *factors, UV rounds) { const UV big2 = UV_MAX >> 2; mult_t mult_save[NSQUFOF_MULT]; @@ -1042,7 +855,7 @@ int racing_squfof_factor(UV n, UV *factors, UV rounds) UV rounds_done = 0; /* Caller should have handled these trivial cases */ - MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in racing_squfof_factor"); + MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in squfof_factor"); /* Too big */ if (n > big2) { diff --git a/factor.h b/factor.h index 7ee466a..7c4682c 100644 --- a/factor.h +++ b/factor.h @@ -18,7 +18,6 @@ extern int prho_factor(UV n, UV *factors, UV maxrounds); extern int pminus1_factor(UV n, UV *factors, UV B1, UV B2); extern int pplus1_factor(UV n, UV *factors, UV B); extern int squfof_factor(UV n, UV *factors, UV rounds); -extern int racing_squfof_factor(UV n, UV *factors, UV rounds); extern UV* _divisor_list(UV n, UV *num_divisors); diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm index c8b410c..1018b31 100644 --- a/lib/Math/Prime/Util.pm +++ b/lib/Math/Prime/Util.pm @@ -136,7 +136,6 @@ BEGIN { *fermat_factor = \&Math::Prime::Util::PP::fermat_factor; *holf_factor = \&Math::Prime::Util::PP::holf_factor; *squfof_factor = \&Math::Prime::Util::PP::squfof_factor; - *rsqufof_factor = \&Math::Prime::Util::PP::squfof_factor; *pbrent_factor = \&Math::Prime::Util::PP::pbrent_factor; *prho_factor = \&Math::Prime::Util::PP::prho_factor; *pminus1_factor = \&Math::Prime::Util::PP::pminus1_factor; @@ -3863,10 +3862,7 @@ same advantages and disadvantages as Fermat's method. =head2 squfof_factor -=head2 rsqufof_factor - my @factors = squfof_factor($n); - my @factors = rsqufof_factor($n); # racing multiplier version Produces factors, not necessarily prime, of the positive number input. An optional number of rounds can be given as a second parameter. It is possible @@ -4799,9 +4795,7 @@ The current implementation does not use these ideas. Future versions might. The SQUFOF implementation being used is a slight modification to the public domain racing version written by Ben Buhrow. Enhancements with ideas from Ben's later code as well as Jason Papadopoulos's public domain implementations -are planned for a later version. The old SQUFOF implementation, still included -in the code, is my modifications to Ben Buhrow's modifications to Bob -Silverman's code. +are planned for a later version. The LMO implementation is based on the 2003 preprint from Christian Bau, as well as the 2006 paper from Tomás Oliveira e Silva. I also want to diff --git a/primality.c b/primality.c index 33880a5..5d0fdbf 100644 --- a/primality.c +++ b/primality.c @@ -8,6 +8,7 @@ #include "mulmod.h" #define FUNC_isqrt 1 #define FUNC_gcd_ui 1 +#define FUNC_is_perfect_square #include "util.h" /* Primality related functions, including Montgomery math */ @@ -148,19 +149,6 @@ static int monty_mr64(const uint64_t n, const UV* bases, int cnt) -/* Helper functions */ -static int is_perfect_square(UV n, UV* sqrtn) -{ - UV m; - m = n & 127; - if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0; - m = isqrt(n); - if (n != (m*m)) - return 0; - if (sqrtn != 0) - *sqrtn = m; - return 1; -} static int jacobi_iu(IV in, UV m) { int j = 1; UV n = (in < 0) ? -in : in; @@ -286,7 +274,7 @@ int _XS_BPSW(UV const n) return 0; if (jacobi_iu(D, n) == -1) break; - if (P == (3+20) && is_perfect_square(n, 0)) return 0; + if (P == (3+20) && is_perfect_square(n)) return 0; P++; if (P > 65535) croak("lucas_extrastrong_params: P exceeded 65535"); @@ -423,7 +411,7 @@ int _XS_is_lucas_pseudoprime(UV n, int strength) if (gcd_ui(Du, n) > 1 && gcd_ui(Du, n) != n) return 0; if (jacobi_iu(D, n) == -1) break; - if (Du == 21 && is_perfect_square(n, 0)) return 0; + if (Du == 21 && is_perfect_square(n)) return 0; Du += 2; sign = -sign; } @@ -437,7 +425,7 @@ int _XS_is_lucas_pseudoprime(UV n, int strength) if (gcd_ui(D, n) > 1 && gcd_ui(D, n) != n) return 0; if (jacobi_iu(D, n) == -1) break; - if (P == 21 && is_perfect_square(n, 0)) return 0; + if (P == 21 && is_perfect_square(n)) return 0; P++; } } @@ -595,7 +583,7 @@ int _XS_is_almost_extra_strong_lucas_pseudoprime(UV n, UV increment) return 0; if (jacobi_iu(D, n) == -1) break; - if (P == (3+20*increment) && is_perfect_square(n, 0)) return 0; + if (P == (3+20*increment) && is_perfect_square(n)) return 0; P += increment; if (P > 65535) croak("lucas_extrastrong_params: P exceeded 65535"); @@ -680,7 +668,7 @@ int _XS_is_frobenius_underwood_pseudoprime(UV n) if (n < 7) return (n == 2 || n == 3 || n == 5); if ((n % 2) == 0 || n == UV_MAX) return 0; - if (is_perfect_square(n,0)) return 0; + if (is_perfect_square(n)) return 0; x = 0; t = -1; diff --git a/t/50-factoring.t b/t/50-factoring.t index a3af134..49cfbce 100644 --- a/t/50-factoring.t +++ b/t/50-factoring.t @@ -113,7 +113,7 @@ plan tests => (3 * scalar @testn) + 2*scalar(keys %prime_factors) + 4*scalar(keys %all_factors) + 2*scalar(keys %factor_exponents) - + 10*9 + + 10*8 # 10 extra factoring tests * 8 algorithms + 8 + 1; @@ -159,7 +159,6 @@ extra_factor_test("trial_factor", sub {Math::Prime::Util::trial_factor(shift)}) extra_factor_test("fermat_factor", sub {Math::Prime::Util::fermat_factor(shift)}); extra_factor_test("holf_factor", sub {Math::Prime::Util::holf_factor(shift)}); extra_factor_test("squfof_factor", sub {Math::Prime::Util::squfof_factor(shift)}); -extra_factor_test("rsqufof_factor", sub {Math::Prime::Util::rsqufof_factor(shift)}); extra_factor_test("pbrent_factor", sub {Math::Prime::Util::pbrent_factor(shift)}); extra_factor_test("prho_factor", sub {Math::Prime::Util::prho_factor(shift)}); extra_factor_test("pminus1_factor",sub {Math::Prime::Util::pminus1_factor(shift)}); diff --git a/util.h b/util.h index 0c40e59..ff4a99d 100644 --- a/util.h +++ b/util.h @@ -46,7 +46,7 @@ extern UV znorder(UV a, UV n); #define MPU_PROB_PRIME_BEST UVCONST(100000000) #endif -#ifdef FUNC_isqrt +#if defined(FUNC_isqrt) || defined(FUNC_is_perfect_square) static UV isqrt(UV n) { UV root; #if BITS_PER_WORD == 32 @@ -101,4 +101,22 @@ static UV lcm_ui(UV x, UV y) { } #endif +#ifdef FUNC_is_perfect_square +/* See: http://mersenneforum.org/showpost.php?p=110896 */ +static int is_perfect_square(UV n) +{ + UV m; + m = n & 127; + if ((m*0x8bc40d7d) & (m*0xa1e2f5d1) & 0x14020a) return 0; + /* If your sqrt is particularly slow, this cuts out another 80%: + m = n % 63; if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0; + and this cuts out some more: + m = n % 25; if ((m*0x1929fc1b) & (m*0x4c9ea3b2) & 0x51001005) return 0; + */ + m = (UV) ( sqrt((double) n) + 0.5 ); + return m*m == n; +} +#endif + + #endif -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list Pkg-perl-cvs-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits