This is an automated email from the git hooks/post-receive script. ppm-guest pushed a commit to annotated tag v0.28 in repository libmath-prime-util-perl.
commit ed60b06a4e197d8150e5f2a764f1a50992d1afa4 Author: Dana Jacobsen <d...@acm.org> Date: Tue May 21 17:17:36 2013 -0700 Update primearray and add a simple iterator --- TODO | 5 ++ examples/bench-primearray.pl | 120 +++++++++++++++++++++++++++++++ lib/Math/Prime/Util.pm | 16 ++++- lib/Math/Prime/Util/PrimeArray.pm | 146 ++++++++++++++++++++++++-------------- 4 files changed, 231 insertions(+), 56 deletions(-) diff --git a/TODO b/TODO index f716733..b2bce1a 100644 --- a/TODO +++ b/TODO @@ -45,6 +45,11 @@ - Tests - Examples +- prime_iterator + - Documentation + - Tests + - Examples + - Make forprimes use a segment sieve - Figure out a way to make the internal FOR_EACH_PRIME macros use a segmented diff --git a/examples/bench-primearray.pl b/examples/bench-primearray.pl new file mode 100755 index 0000000..bc2d402 --- /dev/null +++ b/examples/bench-primearray.pl @@ -0,0 +1,120 @@ +#!/usr/bin/env perl +use strict; +use warnings; +use Math::Prime::Util qw/:all/; +use Math::Prime::Util::PrimeArray qw/make_prime_iterator/; +use Math::NumSeq::Primes; +use Math::Prime::TiedArray; +use Benchmark qw/:all/; +use List::Util qw/min max/; +my $count = shift || -2; + +my ($s, $nlimit, $ilimit, $expect); + +if (1) { +print "summation to 100k\n"; +$nlimit = 100000; +$ilimit = prime_count($nlimit)-1; +$expect = 0; forprimes { $expect += $_ } $nlimit; + +cmpthese($count,{ + 'primes' => sub { $s=0; $s += $_ for @{primes($nlimit)}; die unless $s == $expect; }, + 'forprimes' => sub { $s=0; forprimes { $s += $_ } $nlimit; die unless $s == $expect; }, + 'iterator' => sub { $s=0; my $it = prime_iterator(); + $s += $it->() for 0..$ilimit; + die unless $s == $expect; }, + 'pa iter' => sub { $s=0; my $it = make_prime_iterator(); + $s += $it->() for 0..$ilimit; + die unless $s == $expect; }, + 'pa index' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + $s += $primes[$_] for 0..$ilimit; + die unless $s == $expect; }, + 'pa loop' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + for (@primes) { last if $_ > $nlimit; $s += $_; } + die $s unless $s == $expect; }, + 'pa slice' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + $s += $_ for @primes[0..$ilimit]; + die unless $s == $expect; }, + 'pa each' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + while(my(undef,$v) = each @primes) { last if $v > $nlimit; $s += $v; } + die $s unless $s == $expect; }, + 'pa shift' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + while ((my $p = shift @primes) <= $nlimit) { $s += $p; } + die unless $s == $expect; }, + 'numseq' => sub { $s=0; my $seq = Math::NumSeq::Primes->new; + while (1) { my($undev,$v) = $seq->next; last if $v > $nlimit; $s += $v; } + die $s unless $s == $expect; }, + # This was slightly faster than slice or shift + 'tiedarray' => sub { $s=0; tie my @primes, "Math::Prime::TiedArray", extend_step => 1000; + $s += $primes[$_] for 0..$ilimit; + die unless $s == $expect; }, +}); +} + +if (0) { +print "summation to 10M\n"; +print "Skipping Math::Prime::TiedArray as it will take too long\n"; +$nlimit = 10_000_000; +$ilimit = prime_count($nlimit)-1; +$expect = 0; forprimes { $expect += $_ } $nlimit; + +cmpthese($count,{ + 'primes' => sub { $s=0; $s += $_ for @{primes($nlimit)}; die unless $s == $expect; }, + 'forprimes' => sub { $s=0; forprimes { $s += $_ } $nlimit; die unless $s == $expect; }, + 'pa index' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + $s += $primes[$_] for 0..$ilimit; + die unless $s == $expect; }, + 'pa loop' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + for (@primes) { last if $_ > $nlimit; $s += $_; } + die $s unless $s == $expect; }, + 'pa slice' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + $s += $_ for @primes[0..$ilimit]; + die unless $s == $expect; }, + 'pa each' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + while(my(undef,$v) = each @primes) { last if $v > $nlimit; $s += $v; } + die $s unless $s == $expect; }, + 'pa shift' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + while ((my $p = shift @primes) <= $nlimit) { $s += $p; } + die unless $s == $expect; }, + 'numseq' => sub { $s=0; my $seq = Math::NumSeq::Primes->new; + while (1) { my($undev,$v) = $seq->next; last if $v > $nlimit; $s += $v; } + die $s unless $s == $expect; }, +}); +} + +if (0) { +print "Walk primes backwards from 1M\n"; +$nlimit = 1_000_000; +$ilimit = prime_count($nlimit)-1; +$expect = 0; forprimes { $expect += $_ } $nlimit; + +cmpthese($count,{ + 'rev primes'=> sub { $s=0; $s += $_ for reverse @{primes($nlimit)}; die unless $s == $expect; }, + 'nthprime' => sub { $s=0; $s += nth_prime($_) for reverse 1..$ilimit+1; die unless $s == $expect; }, + 'pa index' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + $s += $primes[$_] for reverse 0..$ilimit; + die unless $s == $expect; }, + 'tiedarray' => sub { $s=0; tie my @primes, "Math::Prime::TiedArray", extend_step => 1000; + $s += $primes[$_] for reverse 0..$ilimit; + die unless $s == $expect; }, +}); +} + +if (0) { +print "Random walk in 1M\n"; +srand(29); +my @rindex; +do { push @rindex, int(rand(1000000)) } for 1..10000; +$expect = 0; $expect += nth_prime($_+1) for @rindex; + +cmpthese($count,{ + 'nthprime' => sub { $s=0; $s += nth_prime($_+1) for @rindex; }, + 'pa index' => sub { $s=0; tie my @primes, "Math::Prime::Util::PrimeArray"; + $s += $primes[$_] for @rindex; + die unless $s == $expect; }, + # Argh! Is it possible to write a slower sieve than the one MPTA uses? + #'tiedarray' => sub { $s=0; tie my @primes, "Math::Prime::TiedArray", extend_step => 10000; + # $s += $primes[$_] for @rindex; + # die unless $s == $expect; }, +}); +} diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm index 0dac169..1347132 100644 --- a/lib/Math/Prime/Util.pm +++ b/lib/Math/Prime/Util.pm @@ -21,7 +21,7 @@ our @EXPORT_OK = is_aks_prime miller_rabin primes - forprimes + forprimes prime_iterator next_prime prev_prime prime_count prime_count_lower prime_count_upper prime_count_approx @@ -1365,6 +1365,20 @@ sub _generic_forprimes (&$;$) { } } +sub prime_iterator { + my($start) = @_; + my $p = (defined $start && $start > 0) ? $start-1 : 0; + if (ref($p) ne 'Math::BigInt' && $p <= $_XS_MAXVAL) { + return sub { $p = _XS_next_prime($p); return $p; }; + } elsif ($_HAVE_GMP) { + return sub { my $next = Math::Prime::Util::GMP::next_prime($p); + $p = $p-$p+$next; + return $p; }; + } else { + return sub { $p = Math::Prime::Util::PP::next_prime($p); return $p; } + } +} + # Omega function A001221. Just an example. sub _omega { my($n) = @_; diff --git a/lib/Math/Prime/Util/PrimeArray.pm b/lib/Math/Prime/Util/PrimeArray.pm index 76d7adc..39fa38a 100644 --- a/lib/Math/Prime/Util/PrimeArray.pm +++ b/lib/Math/Prime/Util/PrimeArray.pm @@ -4,7 +4,7 @@ use warnings; BEGIN { $Math::Prime::Util::PrimeArray::AUTHORITY = 'cpan:DANAJ'; - $Math::Prime::Util::PrimeArray::VERSION = '0.14'; + $Math::Prime::Util::PrimeArray::VERSION = '0.28'; } # parent is cleaner, and in the Perl 5.10.1 / 5.12.0 core, but not earlier. @@ -14,7 +14,7 @@ our @EXPORT_OK = qw( ); our %EXPORT_TAGS = (all => [ @EXPORT_OK ]); -use Math::Prime::Util qw/nth_prime nth_prime_upper primes prime_precalc/; +use Math::Prime::Util qw/nth_prime nth_prime_upper nth_prime_lower primes prime_precalc next_prime prev_prime/; use Tie::Array; use Carp qw/carp croak confess/; @@ -59,27 +59,38 @@ sub FETCH { if ( $index < $begidx || $index > $endidx ) { - if ($index == $endidx+1) { $self->{ACCESS_TYPE}++; } - elsif ($index == $begidx-1) { $self->{ACCESS_TYPE}--; } - else { $self->{ACCESS_TYPE}=0; } - - if ($index == $endidx+1 && $self->{ACCESS_TYPE} > 2) { # Forward iter - - my $end_prime = nth_prime_upper($index + 10000); - $self->{PRIMES} = primes( $self->{PRIMES}->[-1]+1, $end_prime ); - - $begidx = $endidx+1; - - # TODO: backwards iter - - } else { # Looks like random access so far. Get a small range. - - my $start_idx = ($index < 500) ? 0 : $index - 500; - my $start_prime = nth_prime($start_idx+1); # This is exact - my $end_prime = nth_prime_upper($index+500); # This is a bound - $self->{PRIMES} = primes($start_prime, $end_prime); - - $begidx = $start_idx; + if ($index == $endidx+1) { # Forward iteration + + $self->{ACCESS_TYPE}++; + if ($self->{ACCESS_TYPE} > 2) { + my $end_prime = nth_prime_upper($index + 10_000); + $self->{PRIMES} = primes( $self->{PRIMES}->[-1]+1, $end_prime ); + $begidx = $endidx+1; + } else { + $begidx = $index; + $self->{PRIMES} = [next_prime($self->{PRIMES}->[-1])]; + } + + } elsif ($index == $begidx-1) { # Backward iteration + + $self->{ACCESS_TYPE}--; + if ($self->{ACCESS_TYPE} < -2) { + my $num = 10_000; + $begidx = ($index < $num) ? 0 : $index - $num; + my $start_prime = nth_prime($begidx+1); + my $end_prime = nth_prime_upper($index+500); + $self->{PRIMES} = primes($start_prime, $end_prime); + } else { + $begidx = $index; + $self->{PRIMES} = [prev_prime($self->{PRIMES}->[0])]; + } + + } else { # Random access + + $self->{ACCESS_TYPE} = int($self->{ACCESS_TYPE} / 2); + # Alternately we could get a small window + $begidx = $index; + $self->{PRIMES} = [nth_prime($begidx+1)]; } $self->{BEG_INDEX} = $begidx; @@ -126,7 +137,7 @@ Math::Prime::Util::PrimeArray - A tied array for primes =head1 VERSION -Version 0.14 +Version 0.28 =head1 SYNOPSIS @@ -134,7 +145,7 @@ Version 0.14 use Math::Prime::Util::PrimeArray; # Create: - my @primes; tie @primes, 'Math::Prime::Util::PrimeArray'; + tie my @primes, 'Math::Prime::Util::PrimeArray'; # Use in a loop by index: for my $n (1..10) { @@ -169,11 +180,9 @@ An array that acts like the infinite set of primes. This may be more convenient than using L<Math::Prime::Util> directly, and in some cases it can be faster than calling C<next_prime> and C<prev_prime>. -Internally when an index is accessed, an area surrounding the index is sieved -if necessary, then the result returned. This means random access will be a -little slower than using C<nth_prime> directly, but not by very much. -Random access in a small window (1000 or so primes in either direction) will -be very fast, as will sequential access in either direction. +If the access pattern is ascending or decending, then a window is sieved and +results returned from the window as needed. If the access pattern is random, +then C<nth_prime> is used. Shifting acts like the array is losing elements at the front, so after two shifts, C<$primes[0] == 5>. Unshift will move the internal shift index back @@ -189,52 +198,79 @@ Example: say $primes[0]; # 5 unshift @primes, 2; # back up two say $primes[0]; # 2 - + +If you prefer the iterator pattern, I would recommend using +L<Math::Prime::Util/prime_iterator>. It will be faster than using this +tied array, but of course you don't get random access. If you find yourself +using the C<shift> operation, consider the iterator. + =head1 LIMITATIONS The size of the array will always be shown as 2147483647 (IV32 max), even in a 64-bit environment where primes through C<2^64> are available. +There are some people that find the idea of shifting a prime array abhorrent, +as after two shifts, "the second prime is 7?!". If this bothers you, do not +use C<shift> on the tied array. + =head1 PERFORMANCE + MPU forprimes: forprimes { $sum += $_ } nth_prime(100_000); + MPU iterator: my $it = prime_iterator; $sum += $it->() for 1..100000; + MPU array: $sum += $_ for @{primes(nth_prime(100_000))}; + MPUPA: tie my @primes, ...; $sum += $primes[$_] for 0..99999; + MNSP: my $seq = Math::NumSeq::Primes->new; + $sum += ($seq->next)[1] for 1..100000; + MPTA: tie my @primes, ...; $sum += $primes[$_] for 0..99999; + +Memory use is comparing the delta between just loading the module and running +the test. Math::NumSeq v58, Math::Prime::TiedArray v0.04. + Summing the first 0.1M primes via walking the array: - 40ms 3.3 MB $sum += $_ for @{primes(nth_prime(100_000))}; - 300ms 0.6 MB Math::Prime::Util::PrimeArray - 230ms 2.2 MB Math::NumSeq::Primes - 10300ms 36.2 MB Math::Primes::TiedArray (extend 1k) + 9ms 52k Math::Prime::Util forprimes + 150ms 0 Math::Prime::Util prime_iterator + 15ms 4400k Math::Prime::Util sum big array + 220ms 840k Math::Prime::Util::PrimeArray + 130ms 280k Math::NumSeq::Primes sequence iterator + 33000ms 65 MB Math::Prime::TiedArray (extend 1k) Summing the first 1M primes via walking the array: - 0.3s 35.5 MB $sum += $_ for @{primes(nth_prime(1_000_000))}; - 3.9s 1.3 MB Math::Prime::Util::PrimeArray - 9.6s 2.2 MB Math::NumSeq::Primes - 146.7s 444.9 MB Math::Primes::TiedArray (extend 1k) + 0.1s 300k Math::Prime::Util forprimes + 1.9s 0 Math::Prime::Util prime_iterator + 0.2s 40 MB Math::Prime::Util sum big array + 2.2s 1.1MB Math::Prime::Util::PrimeArray + 7.1s 1.2MB Math::NumSeq::Primes sequence iterator + 122.1s 785 MB Math::Prime::TiedArray (extend 1k) Summing the first 10M primes via walking the array: - 4s 365 MB $sum += $_ for @{primes(nth_prime(10_000_000))}; - 2m 2 MB Math::Prime::Util::PrimeArray - 85m 2 MB Math::NumSeq::Primes - >5000 MB Math::Primes::TiedArray (extend 1k) - -Using L<Math::Prime::Util> directly in a naive fashion uses lots of memory, -but is extremely fast. Sieving segments at a time would control the memory -use, which is one thing the C<PrimeArray> tie is trying to do for you (but -adds more inefficiency than is ideal). + 0.8s 5.9MB Math::Prime::Util forprimes + 23.1s 0 Math::Prime::Util prime_iterator + 1.6s 368 MB Math::Prime::Util sum big array + 21.2s 1.2MB Math::Prime::Util::PrimeArray + 3680 s 11.1MB Math::NumSeq::Primes sequence iterator + >5000 MB Math::Primes::TiedArray (extend 1k) + +L<Math::Prime::Util> offers three obvious solutions: a big array, an iterator, +and the C<forprimes> construct. The big array is fast but uses a B<lot> of +memory, forcing the user to start programming segments. Using the iterator +avoids all the memory use, but isn't as fast (this may improve in a later +release, as this is a new feature). The C<forprimes> construct is by far +the fastest, but it isn't quite as flexible as the iterator (most notably +there is no way to exit early, and it doesn't lend itself to wrapping inside +a filter). L<Math::NumSeq::Primes> offers an iterator alternative, and works quite well -for reasonably small numbers. It does not, however, support random access. -There seems to be a 2MB fixed overhead, but after that the memory use is -is well constrained. It is very fast for small values, but clearly is -getting slower as we sum to 1 million, and takes well over an hour to count -to 10 million. +for reasonably small numbers. It does not support random access. It is +very fast for small values, but is very slow with large counts. L<Math::Primes::TiedArray> is remarkably impractical for anything other -than very small numbers. I believe the times and memory use in the above -tables illustrate this. +than very small numbers. The sieve used is incredibly slow, and the memory +use is crazy. =head1 SEE ALSO -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list Pkg-perl-cvs-commits@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits