This is an automated email from the git hooks/post-receive script. ppm-guest pushed a commit to annotated tag v0.17 in repository libmath-prime-util-perl.
commit a6339fa0f1efadecbe6652ac0155e08ae4aa9b88 Author: Dana Jacobsen <[email protected]> Date: Tue Dec 18 02:27:22 2012 -0800 Unroll inner loop of sieve for another 20% speedup --- Changes | 10 ++++++++++ lib/Math/Prime/Util.pm | 12 +++++++----- sieve.c | 50 ++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 59 insertions(+), 13 deletions(-) diff --git a/Changes b/Changes index 5fb3a8d..bff55b0 100644 --- a/Changes +++ b/Changes @@ -1,5 +1,15 @@ Revision history for Perl extension Math::Prime::Util. +0.17 18 December 2012 + + - Perl 5.8.1 - 5.8.7 miscalculates 12345 ** 4, which I used in a test. + + - Fix (hopefully) for MSC compilation. + + - Unroll sieve loop for another 20% or so speedup. It won't have much + practical application now that we use Lehmer's method for counts, but + there are some cases that can still show speedups. + 0.16 11 December 2012 - randbits >= 32 on some 32-bit systems was messing us up. Restrict our diff --git a/lib/Math/Prime/Util.pm b/lib/Math/Prime/Util.pm index 6ed33e2..a1e889a 100644 --- a/lib/Math/Prime/Util.pm +++ b/lib/Math/Prime/Util.pm @@ -2812,7 +2812,7 @@ seconds using the Lehmer algorithm in version 0.12. Small portable functions suitable for plugging into XS: - 5.3 My segmented SoE used in this module + 4.1 My segmented SoE used in this module (with unrolled inner loop) 15.6 My Sieve of Eratosthenes using a mod-30 wheel 17.2 A slightly modified verion of Terje Mathisen's mod-30 sieve 35.5 Basic Sieve of Eratosthenes on odd numbers @@ -2820,25 +2820,27 @@ seconds using the Lehmer algorithm in version 0.12. 72.8 Sieve of Atkin, 10-minute fixup of basic algorithm 91.6 Sieve of Atkin, Wikipedia-like -Perl modules, counting the primes to C<800_000_000> (800 million), in seconds: +Perl modules, counting the primes to C<800_000_000> (800 million): Time (s) Module Version Notes --------- -------------------------- ------- ----------- 0.03 Math::Prime::Util 0.12 using Lehmer's method - 0.36 Math::Prime::Util 0.09 segmented mod-30 sieve + 0.28 Math::Prime::Util 0.17 segmented mod-30 sieve 0.52 Math::Prime::Util::PP 0.14 Perl (Lehmer's method) 0.9 Math::Prime::Util 0.01 mod-30 sieve 2.9 Math::Prime::FastSieve 0.12 decent odd-number sieve 11.7 Math::Prime::XS 0.29 "" but needs a count API 15.0 Bit::Vector 7.2 57.3 Math::Prime::Util::PP 0.14 Perl (fastest I know of) - 169.5 Python's mpmath primepi 0.17 Python, 25+GB RAM used 170.0 Faster Perl sieve (net) 2012-01 array of odds - 292.2 Python's sympy primepi 0.7.1 Python 548.1 RosettaCode sieve (net) 2012-06 simplistic Perl ~11000 Math::Primality 0.04 Perl + Math::GMPz >20000 Math::Big 1.12 Perl, > 26GB RAM used +Python can do this in 2.8s using an RWH numpy function, 14.3s using an RWH +pure Python function. However the standard modules are far slower. mpmath +v0.17 primepi takes 169.5s and 25+ GB of RAM. sympi 0.7.1 primepi takes +292.2s. C<is_prime>: my impressions for various sized inputs: diff --git a/sieve.c b/sieve.c index eb38b82..bebd2df 100644 --- a/sieve.c +++ b/sieve.c @@ -163,12 +163,20 @@ unsigned char* sieve_erat30(UV end) assert(d < max_buf); assert(prime = (wdinc[0]+wdinc[1]+wdinc[2]+wdinc[3]+wdinc[4]+wdinc[5]+wdinc[6]+wdinc[7])); #endif - i = 0; /* Mark the composites */ - do { - mem[d] |= wmask[i]; - d += wdinc[i]; - i = (i+1) & 7; - } while (d < max_buf); + /* Regular code to mark composites. + * i = 0; + * do { mem[d] |= wmask[i]; d += wdinc[i]; i = (i+1)&7; } while (d < max_buf); + * Unrolled version: */ + while (1) { + mem[d] |= wmask[0]; d += wdinc[0]; if (d >= max_buf) break; + mem[d] |= wmask[1]; d += wdinc[1]; if (d >= max_buf) break; + mem[d] |= wmask[2]; d += wdinc[2]; if (d >= max_buf) break; + mem[d] |= wmask[3]; d += wdinc[3]; if (d >= max_buf) break; + mem[d] |= wmask[4]; d += wdinc[4]; if (d >= max_buf) break; + mem[d] |= wmask[5]; d += wdinc[5]; if (d >= max_buf) break; + mem[d] |= wmask[6]; d += wdinc[6]; if (d >= max_buf) break; + mem[d] |= wmask[7]; d += wdinc[7]; if (d >= max_buf) break; + } } return mem; @@ -226,6 +234,7 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd) UV minc = (2*p) - dinc*30; UV wdinc[8]; unsigned char wmask[8]; + UV offset_endd = endd - startd; int i; /* Find the positions of the next composites we will mark */ @@ -240,12 +249,37 @@ int sieve_segment(unsigned char* mem, UV startd, UV endd) wmask[i%8] = masktab30[m]; } d -= p; + d -= startd; +#if 0 i = 0; /* Mark the composites */ do { - mem[d-startd] |= wmask[i]; + mem[d] |= wmask[i]; d += wdinc[i]; i = (i+1) & 7; - } while (d <= endd); + } while (d <= offset_endd); +#else + /* Unrolled inner loop for marking composites */ + while ( (d+p) <= offset_endd ) { + mem[d] |= wmask[0]; d += wdinc[0]; + mem[d] |= wmask[1]; d += wdinc[1]; + mem[d] |= wmask[2]; d += wdinc[2]; + mem[d] |= wmask[3]; d += wdinc[3]; + mem[d] |= wmask[4]; d += wdinc[4]; + mem[d] |= wmask[5]; d += wdinc[5]; + mem[d] |= wmask[6]; d += wdinc[6]; + mem[d] |= wmask[7]; d += wdinc[7]; + } + while (1) { + mem[d] |= wmask[0]; d += wdinc[0]; if (d > offset_endd) break; + mem[d] |= wmask[1]; d += wdinc[1]; if (d > offset_endd) break; + mem[d] |= wmask[2]; d += wdinc[2]; if (d > offset_endd) break; + mem[d] |= wmask[3]; d += wdinc[3]; if (d > offset_endd) break; + mem[d] |= wmask[4]; d += wdinc[4]; if (d > offset_endd) break; + mem[d] |= wmask[5]; d += wdinc[5]; if (d > offset_endd) break; + mem[d] |= wmask[6]; d += wdinc[6]; if (d > offset_endd) break; + mem[d] |= wmask[7]; d += wdinc[7]; if (d > offset_endd) break; + } +#endif #endif } } -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git _______________________________________________ Pkg-perl-cvs-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits
