This is an automated email from the git hooks/post-receive script. ppm-guest pushed a commit to annotated tag v0.32 in repository libmath-prime-util-perl.
commit a7f63a02366f3005abd8e20d12ed51b873e2a1cd Author: Dana Jacobsen <d...@acm.org> Date: Mon Sep 16 00:16:13 2013 -0700 Speedups for divisor_sum --- Changes | 4 +- aks.c | 1 - factor.c | 68 +++++++++------ lib/Math/Prime/Util.pm | 209 +++++++++++++++++++++++++--------------------- lib/Math/Prime/Util/PP.pm | 10 +-- sieve.c | 2 +- util.c | 10 +-- 7 files changed, 166 insertions(+), 138 deletions(-) diff --git a/Changes b/Changes index d41e4f0..316fda4 100644 --- a/Changes +++ b/Changes @@ -20,7 +20,9 @@ Revision history for Perl module Math::Prime::Util - divisor_sum can take an integer 'k' in the second argument to compute sigma_k. This is much faster than using subs, especially when the - result can be computed in XS using native precision. + result can be computed in XS using native precision. For integer + second arguments, the result will automatically be a bigint if needed. + It is also much faster for larger inputs. - Incorporate Montgomery reduction for primality testing, thanks to Wojciech Izykowski. This is a 1.3 to 1.5x speedup for is_prob_prime, diff --git a/aks.c b/aks.c index 119d55b..bc4a369 100644 --- a/aks.c +++ b/aks.c @@ -27,7 +27,6 @@ #include "ptypes.h" #include "aks.h" #include "util.h" -#include "sieve.h" #include "cache.h" #include "mulmod.h" diff --git a/factor.c b/factor.c index 94e919c..4c08193 100644 --- a/factor.c +++ b/factor.c @@ -185,15 +185,17 @@ int trial_factor(UV n, UV *factors, UV maxtrial) /* Use the table of small primes to quickly do trial division. */ { UV sp = 5; + UV slimit = (limit < 2003) ? limit : 2003; f = primes_small[sp]; - while (f <= limit && f <= 2003) { + while (f <= slimit) { if ( (n%f) == 0 ) { do { factors[nfactors++] = f; n /= f; } while ( (n%f) == 0 ); newlimit = isqrt(n); - if (newlimit < limit) limit = newlimit; + if (newlimit < slimit) slimit = newlimit; + if (newlimit < limit) limit = newlimit; } f = primes_small[++sp]; } @@ -291,43 +293,55 @@ static int is_perfect_square(UV n, UV* sqrtn) } +/* + * The usual method, on OEIS for instance, is: + * (p^(k*(e+1))-1) / (p^k-1) + * but that overflows quicky. Instead we rearrange as: + * 1 + p^k + p^k^2 + ... p^k^e + */ UV _XS_divisor_sum(UV n, UV k) { UV factors[MPU_MAX_FACTORS+1]; - int nfac, i; + int nfac, i, j; UV product = 1; if (n <= 1) return n; nfac = factor(n, factors); - for (i = 0; i < nfac; i++) { - UV f = factors[i]; - UV e = 1; - while (i+1 < nfac && f == factors[i+1]) - { e++; i++; } - if (k == 0) { + if (k == 0) { + for (i = 0; i < nfac; i++) { + UV e = 1, f = factors[i]; + while (i+1 < nfac && f == factors[i+1]) { e++; i++; } product *= (e+1); - } else if (k == 1 && e == 1) { - product *= f+1; - } else if (k == 1) { - UV fmult = f * f; - while (e-- >= 2) - fmult *= f; - product *= (fmult - 1) / (f - 1); - } else { /* Overflow is a concern */ - UV j, pk = f * f; - for (j = 2; j < k; j++) pk *= f; - if (e == 1) { - product *= pk+1; - } else { - /* Less overflow than (pk^(e+1)-1) / (pk-1) */ - UV fmult = 1 + pk + pk*pk; - UV pke = pk * pk; - for (j = 2; j < e; j++) { + } + } else if (k == 1) { + for (i = 0; i < nfac; i++) { + UV e = 1, f = factors[i]; + UV fmult = 1 + f; + while (i+1 < nfac && f == factors[i+1]) { e++; i++; } + if (e > 1) { + UV pke = f; + for (j = 1; j < e; j++) { + pke *= f; + fmult += pke; + } + } + product *= fmult; + } + } else { + for (i = 0; i < nfac; i++) { + UV e = 1, f = factors[i]; + UV fmult, pk = f; + for (j = 1; j < k; j++) pk *= f; + while (i+1 < nfac && f == factors[i+1]) { e++; i++; } + fmult = 1 + pk; + if (e > 1) { + UV pke = pk; + for (j = 1; j < e; j++) { pke *= pk; fmult += pke; } - product *= fmult; } + product *= fmult; } } return product; diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm index 533ba76..0411681 100644 --- a/lib/Math/Prime/Util.pm +++ b/lib/Math/Prime/Util.pm @@ -869,7 +869,7 @@ sub primes { my $l = ($_Config{'maxbits'} > 32 && $bits > 79) ? 63 : 31; $l = $bits-2 if $bits-2 < $l; my $arange = (1 << $l) - 1; # 2^$l-1 - my $brange = Math::BigInt->new(2)->bpow($bits-$l-2)->bsub(1); + my $brange = Math::BigInt->new(2)->bpow($bits-$l-2)->bdec(); my $b = 2 * $irandf->($brange) + 1; # Precalculate some modulii so we can do trial division on native int # 9699690 = 2*3*5*7*11*13*17*19, so later operations can be native ints @@ -1009,7 +1009,7 @@ sub primes { # R is a random number between $I+1 and 2*$I my $R = $I + 1 + $irandf->( $I - 1 ); #my $n = 2 * $R * $q + 1; - my $n = Math::BigInt->new(2)->bmul($R)->bmul($q)->badd(1); + my $n = Math::BigInt->new(2)->bmul($R)->bmul($q)->binc(); # We constructed a promising looking $n. Now test it. print "." if $verbose > 2; if ($_HAVE_GMP) { @@ -1082,7 +1082,7 @@ sub primes { my $qpp = random_nbit_prime($lpp); $qp = Math::BigInt->new("$qp") unless ref($qp) eq 'Math::BigInt'; $qpp = Math::BigInt->new("$qpp") unless ref($qpp) eq 'Math::BigInt'; - my ($il, $rem) = Math::BigInt->new(2)->bpow($l-1)->bsub(1)->bdiv(2*$qpp); + my ($il, $rem) = Math::BigInt->new(2)->bpow($l-1)->bdec()->bdiv(2*$qpp); $il++ if $rem > 0; $il = $il->as_int(); my $iu = Math::BigInt->new(2)->bpow($l)->bsub(2)->bdiv(2*$qpp)->as_int(); @@ -1090,11 +1090,11 @@ sub primes { for (my $i = $istart; $i <= $iu; $i++) { # Search for q my $q = 2 * $i * $qpp + 1; next unless is_prob_prime($q); - my $pp = $qp->copy->bmodpow($q-2, $q)->bmul(2)->bmul($qp)->bsub(1); + my $pp = $qp->copy->bmodpow($q-2, $q)->bmul(2)->bmul($qp)->bdec(); my ($jl, $rem) = Math::BigInt->new(2)->bpow($t-1)->bsub($pp)->bdiv(2*$q*$qp); $jl++ if $rem > 0; $jl = $jl->as_int(); - my $ju = Math::BigInt->new(2)->bpow($t)->bsub(1)->bsub($pp)->bdiv(2*$q*$qp)->as_int(); + my $ju = Math::BigInt->new(2)->bpow($t)->bdec()->bsub($pp)->bdiv(2*$q*$qp)->as_int(); my $jstart = $jl + $irandf->($ju - $jl); for (my $j = $jstart; $j <= $ju; $j++) { # Search for p my $p = $pp + 2 * $j * $q * $qp; @@ -1352,28 +1352,21 @@ sub euler_phi { return $n if $n <= 1; my @factors = factor($n); + my $totient = $n - $n + 1; + my $lastf = 0; + if (ref($n) ne 'Math::BigInt') { - my $totient = 1; - my $lastf = 0; foreach my $f (@factors) { if ($f == $lastf) { $totient *= $f; } else { $totient *= $f-1; $lastf = $f; } } - return $totient; - } - - my $totient = $n->copy->bone; - my $lastf = 0; - foreach my $factor (@factors) { - # This screwball line is here to solve some issues with the GMP backend, - # which has a weird bug. Results of the multiply can turn negative (!) - # if we don't do this. Perhaps related to RT 71548? - # perl -le 'use Math::BigInt lib=>'GMP'; my $a = 2931542417; my $n = Math::BigInt->new("49754396241690624"); my $x = $n*$a; print $x;' - # perl -le 'use Math::BigInt lib=>'GMP'; my $a = Math::BigInt->bone; $a *= 2931542417; $a *= 49754396241690624; print $a;' - # TODO: more work reproducing this - my $f = $n->copy->bzero->badd("$factor"); - if ($f == $lastf) { $totient->bmul($f); } - else { $totient->bmul($f->copy->bsub(1)); $lastf = $f; } + } else { + my $zero = $n->copy->bzero; + foreach my $factor (@factors) { + my $f = $zero->copy->badd("$factor"); # Math::BigInt::GMP RT 71548 + if ($f == $lastf) { $totient->bmul($f); } + else { $totient->bmul($f->copy->bdec()); $lastf = $f; } + } } return $totient; } @@ -1404,22 +1397,27 @@ sub jordan_totient { my $zero = $n->copy->bzero; foreach my $factor (@factors) { my $fmult = $zero->copy->badd("$factor")->bpow($k); - $totient->bmul($fmult->copy->bsub(1)); + $totient->bmul($fmult->copy->bdec()); $totient->bmul($fmult) for (2 .. $factor_mult{$factor}); } } return $totient; } -# Mathematica and Pari both have functions like this. +my @_ds_overflow = # We'll use BigInt math if the input is larger than this. + (~0 > 4294967295) + ? (0, 3000000000000000000, 3000000000, 2487240, 64260, 7026) + : (0, 845404560, 52560, 1548, 252, 84); sub divisor_sum { my($n, $k) = @_; return (0,1)[$n] if defined $n && $n <= 1; _validate_num($n) || _validate_positive_integer($n); - $k = 1 unless defined $k; + # With no argument, call the XS routine for k=1 immediately if possible. + return _XS_divisor_sum($n, 1) + if !defined $k && $n <= $_XS_MAXVAL && $n < $_ds_overflow[1]; - if (ref($k) eq 'CODE') { + if (defined $k && ref($k) eq 'CODE') { my $sum = $k->(1); return $sum if $n == 1; foreach my $f (all_factors($n), $n ) { @@ -1429,34 +1427,59 @@ sub divisor_sum { } croak "Second argument must be a code ref or number" - unless _validate_num($k) || _validate_positive_integer($k); - - if ($n <= $_XS_MAXVAL) { - return _XS_divisor_sum($n, $k) if $k <= 1; - # 2 overflows 64-bit at 3,000,000,000 ish - # 3 overflows 64-bit at 2487240 - # 4 overflows 64-bit at 64260 - # If we think we can handle overflow, we could do a few more + unless !defined $k || _validate_num($k) || _validate_positive_integer($k); + $k = 1 if !defined $k; + + my $will_overflow = ($k == 0) ? 0 + : ($k > 5) ? 1 + : $n >= $_ds_overflow[$k]; + + return _XS_divisor_sum($n, $k) if $n <= $_XS_MAXVAL && !$will_overflow; + + if ($will_overflow) { + if (!defined $Math::BigInt::VERSION) { + eval { require Math::BigInt; Math::BigInt->import(try=>'GMP,Pari'); 1; } + or do { croak "Cannot load Math::BigInt"; }; + } } - my $bone = (ref($_[0]) eq 'Math::BigInt') ? $_[0]->copy->bone : 1; - my $product = $bone; + # The standard way is: + # my $pk = $f ** $k; $product *= ($pk ** ($e+1) - 1) / ($pk - 1); + # But we get less overflow using: + # my $pk = $f ** $k; $product *= $pk**E for E in 0 .. e + # Also separate BigInt and do fiddly bits for better performance. + + my $product = 1; my @factors = ($n <= $_XS_MAXVAL) ? _XS_factor($n) : factor($n); - while (@factors) { - my $f = shift @factors; - my $e = 1; - while (@factors && $factors[0] == $f) { - $e++; - shift @factors; + + if (!$will_overflow) { + while (@factors) { + my ($e, $f) = (1, shift @factors); + while (@factors && $factors[0] == $f) { $e++; shift @factors; } + if ($k == 0) { + $product *= ($e+1); + } else { + my $pk = $f ** $k; + my $fmult = $pk + 1; + foreach my $E (2 .. $e) { $fmult += $pk**$E } + $product *= $fmult; + } } - if ($k == 0) { $product *= ($e+1); } - elsif ($k == 1) { - if ($e == 1) { $product *= $f+1; } - else { $product *= ( ($bone*$f)**($e+1) - 1) / ($f - 1); } - } else { - # TODO: do the long way around to get less overflow, see factor.c - my $pk = ($bone * $f) ** $k; - $product *= ($e == 1) ? $pk+1 : ($pk ** ($e+1) - 1) / ($pk - 1); + } else { + $product = Math::BigInt->bone; + my $bik = Math::BigInt->new("$k"); + while (@factors) { + my ($e, $f) = (1, shift @factors); + while (@factors && $factors[0] == $f) { $e++; shift @factors; } + my $pk = Math::BigInt->new("$f")->bpow($bik); + if ($e == 1) { $pk->binc(); $product->bmul($pk); } + elsif ($e == 2) { $pk->badd($pk*$pk)->binc(); $product->bmul($pk); } + else { + my $fmult = $pk; + foreach my $E (2 .. $e) { $fmult += $pk->copy->bpow($E) } + $fmult->binc(); + $product *= $fmult; + } } } return $product; @@ -1468,11 +1491,20 @@ sub _generic_forprimes (&$;$) { ## no critic qw(ProhibitSubroutinePrototypes) if (!defined $end) { $end = $beg; $beg = 2; } _validate_num($beg) || _validate_positive_integer($beg); _validate_num($end) || _validate_positive_integer($end); - my $p = ($beg <= 2) ? 2 : next_prime($beg-1); - while ($p <= $end) { - local *_ = \$p; - $sub->(); - $p = next_prime($p); + # It's possible we're here just because the arguments were bigints < 2^64 + # TODO: make a function to convert native size bigints to UVs, and let the + # XS functions call that, so we don't do these loop-de-loops. + if (!ref($beg) && !ref($end) && $beg <= $_XS_MAXVAL && $end <= $_XS_MAXVAL) { + return forprimes( \&$sub, $beg, $end); + } + $beg = 2 if $beg < 2; + { + my $pp; + local *_ = \$pp; + for (my $p = next_prime($beg-1); $p <= $end; $p = next_prime($p)) { + $pp = $p; + $sub->(); + } } } @@ -2477,9 +2509,10 @@ Version 0.31 say "lamba(49) = ", log(exp_mangoldt(49)); # divisor sum - $sigma = divisor_sum( $n ); - $sigma0 = divisor_sum( $n, 1 ); - $sigma2 = divisor_sum( $n, sub { $_[0]*$_[0] } ); + $sigma = divisor_sum( $n ); # sum of divisors + $sigma0 = divisor_sum( $n, 0 ); # count of divisors + $sigmak = divisor_sum( $n, $k ); + $sigmaf = divisor_sum( $n, sub { log($_[0]) } ); # arbitrary func # primorial n#, primorial p(n)#, and lcm say "The product of primes below 47 is ", primorial(47); @@ -3450,41 +3483,28 @@ yield similar results, albeit slower and using more memory. say "Sum of divisors of $n:", divisor_sum( $n ); -This function takes a positive integer as input and returns the sum of all -the divisors of the input, including 1 and itself. This is known as the -sigma function (see Hardy and Wright section 16.7, or OEIS A000203). - -If a second argument is given that is numeric, it is used as the sigma -parameter. The result will then be the sum of each divisor raised to -this power. Hence C<0> will count the divisors, C<1> will sum them, -C<2> will sum the squares, and so on. - -The more general form takes a code reference as a second parameter, which -is applied to each divisor before the summation. This allows computation -of numerous functions such as OEIS A000005 [d(n), sigma_0(n), tau(n)]: - - divisor_sum( $n, sub { 1 } ); - -OEIS A001157 [sigma_2(n)]: - - divisor_sum( $n, sub { $_[0]*$_[0] } ) - -the general sigma_k (OEIS A00005, A000203, A001157, A001158, etc.): +This function takes a positive integer as input and returns the sum of the +k-th powers of the divisors of the input, including 1 and itself. If the +second argument (C<k>) is omitted it is assumed to be 1. This is known as +the sigma function (see Hardy and Wright section 16.7, or OEIS A000203). +The API is identical to Pari/GP's C<sigma> function. - divisor_sum( $n, sub { $_[0] ** $k } ); +The second argument can be a code reference, which is called for each divisor +and the results are summed. This allows computation of other functions, +but will be less efficient than using the numeric second argument. -the 5th Jordan totient (OEIS A059378): +An example of the 5th Jordan totient (OEIS A059378): divisor_sum( $n, sub { my $d=shift; $d**5 * moebius($n/$d); } ); -though in the last case we have a function L</jordan_totient> to compute -it more efficiently. +though we have a function L</jordan_totient> which is more efficient. This function is useful for calculating things like aliquot sums, abundant numbers, perfect numbers, etc. -The summation is done as a bigint if the input was a bigint object. You may -need to ensure the result of the subroutine does not overflow a native int. +For numeric second arguments (sigma computations), the result will be a bigint +if necessary. For the code reference case, the user must take care to return +bigints if overflow will be a concern. =head2 primorial @@ -4405,11 +4425,10 @@ With it installed, it is about 2x slower than Math::Pari. Similar to MPU's L<moebius>. Comparisons are similar to C<eulerphi>. -=item C<sumdiv> +=item C<sigma> -Similar to MPU's L<divisor_sum>. The standard sum (sigma_1) is -very fast in MPU. Giving it a sub makes it much slower, and for numbers -with very many factors, Pari is I<much> faster. +Similar to MPU's L<divisor_sum>. MPU is ~10x faster for native integers +and about 2x slower for bigints. =item C<eint1> @@ -4425,13 +4444,13 @@ and complex inputs). Overall, L<Math::Pari> supports a huge variety of functionality and has a sophisticated and mature code base behind it (noting that the default version of Pari used is about 10 years old now). -For native integers sometimes -the functions can be slower, but bigints are often superior and it rarely -has any performance surprises. Some of the unique features MPU offers include -super fast prime counts, nth_prime, ECPP primality proofs with certificates, -approximations and limits for both, random primes, fast Mertens calculations, -Chebyshev theta and psi functions, and the logarithmic integral and Riemann R -functions. All with fairly minimal installation requirements. +For native integers often using Math::Pari will be slower, but bigints are +often superior and it rarely has any performance surprises. Some of the +unique features MPU offers include super fast prime counts, nth_prime, +ECPP primality proofs with certificates, approximations and limits for both, +random primes, fast Mertens calculations, Chebyshev theta and psi functions, +and the logarithmic integral and Riemann R functions. All with fairly +minimal installation requirements. =head1 PERFORMANCE diff --git a/lib/Math/Prime/Util/PP.pm b/lib/Math/Prime/Util/PP.pm index 551d5a7..e4e1ba5 100644 --- a/lib/Math/Prime/Util/PP.pm +++ b/lib/Math/Prime/Util/PP.pm @@ -780,7 +780,7 @@ sub is_pseudoprime { my $x = (ref($n) eq 'Math::BigInt') ? $n->copy->bzero->badd($base)->bmodpow($n-1,$n) : _native_powmod($base, $n-1, $n); - return ($x == 1); + return ($x == 1) ? 1 : 0; } sub miller_rabin { @@ -800,7 +800,7 @@ sub miller_rabin { if ( ref($n) eq 'Math::BigInt' ) { my $s = 0; - my $nminus1 = $n->copy->bsub(1); + my $nminus1 = $n->copy->bdec(); my $d = $nminus1->copy; while ($d->is_even) { $s++; @@ -1143,7 +1143,7 @@ sub is_almost_extra_strong_lucas_pseudoprime { my $ZERO = $n->copy->bzero; my $V = $ZERO + $P; # V_{k} my $W = $ZERO + $P*$P-2; # V_{k+1} - my $kstr = substr($n->copy->badd(1)->as_bin, 2); + my $kstr = substr($n->copy->binc()->as_bin, 2); $kstr =~ s/(0*)$//; my $s = length($1); my $bpos = 0; @@ -1330,7 +1330,7 @@ sub is_aks_prime { return 1 if $r >= $n; # Since r is a prime, phi(r) = r-1 - my $rlimit = int( Math::BigFloat->new("$r")->bsub(1) + my $rlimit = int( Math::BigFloat->new("$r")->bdec() ->bsqrt->bmul($log2n)->bfloor->bstr); $_poly_bignum = 1; @@ -1711,7 +1711,7 @@ sub pminus1_factor { my $one = $n->copy->bone; my ($j, $q, $saveq) = (32, 2, 2); my $t = $one->copy; - my $a = $one->copy->badd(1); + my $a = $one->copy->binc(); my $savea = $a->copy; my $f = 1; my($pc_beg, $pc_end, @bprimes); diff --git a/sieve.c b/sieve.c index 5e94d5d..7a8de05 100644 --- a/sieve.c +++ b/sieve.c @@ -372,7 +372,7 @@ int next_segment_primes(void* vctx, UV* base, UV* low, UV* high) ctx->lod += range_d; ctx->low = *high + 2; - + return 1; } diff --git a/util.c b/util.c index b4436af..17d927b 100644 --- a/util.c +++ b/util.c @@ -152,13 +152,7 @@ int _XS_is_prime(UV n) const unsigned char* sieve; int isprime; - if (n <= 10) { - switch (n) { - case 2: case 3: case 5: case 7: return 2; break; - default: break; - } - return 0; - } + if (n <= 10) return (n == 2 || n == 3 || n == 5 || n == 7) ? 2 : 0; d = n/30; m = n - d*30; mtab = masktab30[m]; /* Bitmask in mod30 wheel */ @@ -806,7 +800,7 @@ signed char* _moebius_range(UV lo, UV hi) croak("Could not get memory for %"UVuf" moebius results\n", hi-lo+1); A = (unsigned char*) mu; if (sqrtn*sqrtn != hi) sqrtn++; /* ceil sqrtn */ - + logp = 1; nextlog = 3; /* 2+1 */ START_DO_FOR_EACH_PRIME(2, sqrtn) { UV p2 = p*p; -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list Pkg-perl-cvs-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits