Improved performance for factor.

Ralph Loader Wed, 16 May 2007 05:19:42 -0700

Hi,

factor in coreutils accepts 64 bit numbers, but can be very slow, e.g.,
try


factor 18446743979220271189

Attached is a patch to use the the Pollard rho factorisation algorithm.

On my ancient dog slow PC it does the above factorisation in about 1/4
seconds instead of 2 minutes.

It factors a number n in time approx O(n**(1/4)) so should be perfectly
OK on any 64 bit number.  [If anyone has a 128 bit machine handy feel
free to let me play :-)].

Ralph.

--- orig/coreutils-6.9/src/factor.c	2007-03-19 09:36:43.000000000 +1200
+++ coreutils-6.9/src/factor.c	2007-05-16 20:29:28.000000000 +1200
@@ -16,7 +16,8 @@
    Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.  */
 
 /* Written by Paul Rubin <[EMAIL PROTECTED]>.
-   Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering.  */
+   Adapted for GNU, fixed to factor UINT_MAX by Jim Meyering,
+   Changed to use the Pollard rho algorithm by Ralph Loader.  */
 
 #include <config.h>
 #include <getopt.h>
@@ -42,24 +43,6 @@
 /* Token delimiters when reading from a file.  */
 #define DELIM "\n\t "
 
-/* The maximum number of factors, including -1, for negative numbers.  */
-#define MAX_N_FACTORS (sizeof (uintmax_t) * CHAR_BIT)
-
-/* The trial divisor increment wheel.  Use it to skip over divisors that
-   are composites of 2, 3, 5, 7, or 11.  The part from WHEEL_START up to
-   WHEEL_END is reused periodically, while the "lead in" is used to test
-   for those primes and to jump onto the wheel.  For more information, see
-   http://www.utm.edu/research/primes/glossary/WheelFactorization.html  */
-
-#include "wheel-size.h"  /* For the definition of WHEEL_SIZE.  */
-static const unsigned char wheel_tab[] =
-  {
-#include "wheel.h"
-  };
-
-#define WHEEL_START (wheel_tab + WHEEL_SIZE)
-#define WHEEL_END (wheel_tab + (sizeof wheel_tab / sizeof wheel_tab[0]))
-
 /* The name this program was run with. */
 char *program_name;
 
@@ -92,50 +75,341 @@
   exit (status);
 }
 
-/* FIXME: comment */
 
-static size_t
-factor (uintmax_t n0, size_t max_n_factors, uintmax_t *factors)
+/* Pollard-rho based factoring for 64 bit numbers.  */
+
+/* First 64 primes.  Should be enough to ensure no false negatives whatsoever on
+ * the Fermat primality test, for 64 bit uintmax_t.  */
+#define NPRIMES 64
+const unsigned primes[NPRIMES] = {
+    2,     3,   5,   7,  11,  13,  17,  19,  23,  29,  31,  37,  41,  43,  47,
+    53,   59,  61,  67,  71,  73,  79,  83,  89,  97, 101, 103, 107, 109, 113,
+    127, 131, 137, 139, 149, 151, 157, 163, 167, 173, 179, 181, 191, 193, 197,
+    199, 211, 223, 227, 229, 233, 239, 241, 251, 257, 263, 269, 271, 277, 281,
+    283, 293, 307, 311
+};
+
+
+/* Number of bits in uintmax_t.  */
+#define UINTMAX_BITS (sizeof (uintmax_t) * CHAR_BIT)
+
+/* The uintmax_t value with just the top bit set.  */
+#define TOP_BIT (((uintmax_t) 1) << (UINTMAX_BITS - 1))
+
+/* Maximum number of factors of a uintmax_t value.  Bounded by number of
+ * bits.  */
+#define MAX_N_FACTORS UINTMAX_BITS
+
+/* Just above sqrt of the maximum uintmax_t value.  Below this value, we may
+ * multiply without worrying about overflow.  */
+#define MAX_HALF_INTMAX (((uintmax_t) 1) << (UINTMAX_BITS / 2))
+
+
+#if __GNUC__ >= 4
+/* Recent versions of GCC have this as a builtin.  */
+#define countlz __builtin_clzll
+#else
+/* Return number of leading zero bits, assumes x is not zero.  */
+static unsigned countlz (uintmax_t x)
+{
+    unsigned result = UINTMAX_BITS - 1;
+    assert (UINTMAX_BITS <= 128);
+    /* Anyone want to lend me a 128 bit machine?  */
+    if (UINTMAX_BITS > 64 && (x >> 64) != 0) {
+        /* The % UINTMAX_BITS is to avoid compiler warnings on <= 64 bits.  */
+        x >>= 64 % UINTMAX_BITS;
+        result -= 64;
+    }
+    if (UINTMAX_BITS > 32 && (x >> 32) != 0) {
+        /* The % UINTMAX_BITS is to avoid compiler warnings on <= 32 bits.  */
+        x >>= 32 % UINTMAX_BITS;
+        result -= 32;
+    }
+    if (x >> 16 != 0) {
+        x >>= 16;
+        result -= 16;
+    }
+    if (x >> 8 != 0) {
+        x >>= 8;
+        result -= 8;
+    }
+    if (x >> 4 != 0) {
+        x >>= 4;
+        result -= 4;
+    }
+    if (x >> 2 != 0) {
+        x >>= 2;
+        result -= 2;
+    }
+    if (x >> 1 != 0) {
+        x >>= 1;
+        result -= 1;
+    }
+    return result;
+}
+#endif
+
+
+/* Modular addition.  */
+uintmax_t add (uintmax_t x, uintmax_t y, uintmax_t modulus)
 {
-  uintmax_t n = n0, d, q;
-  size_t n_factors = 0;
-  unsigned char const *w = wheel_tab;
-
-  if (n <= 1)
-    return n_factors;
-
-  /* The exit condition in the following loop is correct because
-     any time it is tested one of these 3 conditions holds:
-      (1) d divides n
-      (2) n is prime
-      (3) n is composite but has no factors less than d.
-     If (1) or (2) obviously the right thing happens.
-     If (3), then since n is composite it is >= d^2. */
+    uintmax_t result = x + y;
+    if (result < x || result >= modulus)
+        result -= modulus;
+    return result;
+}
 
-  d = 2;
-  do
-    {
-      q = n / d;
-      while (n == q * d)
-	{
-	  assert (n_factors < max_n_factors);
-	  factors[n_factors++] = d;
-	  n = q;
-	  q = n / d;
-	}
-      d += *(w++);
-      if (w == WHEEL_END)
-	w = WHEEL_START;
+
+/* Modular multiplication.  */
+uintmax_t mult (uintmax_t x, uintmax_t y, uintmax_t modulus)
+{
+    unsigned lz;
+    unsigned i;
+    uintmax_t result;
+
+    /* If both x and y fit in the bottom half of an uintmax_t, then we can
+     * use '*' without fear of overflow.  */
+    if (x < MAX_HALF_INTMAX && y < MAX_HALF_INTMAX)
+        return x * y % modulus;
+
+    if (x == 0)
+        return 0;
+
+    /* Otherwise we do things bit by bit to avoid overflow.  */
+    lz = countlz (x);
+    x <<= lz;
+    result = y;
+
+    for (i = lz + 1; i != UINTMAX_BITS; ++i) {
+        result = add (result, result, modulus);
+        x += x;
+        if (x & TOP_BIT)
+            result = add (result, y, modulus);
     }
-  while (d <= q);
+    return result;
+}
 
-  if (n != 1 || n0 == 1)
-    {
-      assert (n_factors < max_n_factors);
-      factors[n_factors++] = n;
+
+/* Modular exponentiation.  */
+uintmax_t power (uintmax_t base, uintmax_t exp, uintmax_t modulus)
+{
+    unsigned lz;
+    unsigned i;
+    uintmax_t result;
+
+    if (exp == 0)
+        return 1;
+
+    lz = countlz (exp);
+    exp <<= lz;
+
+    result = base;
+    for (i = lz + 1; i != UINTMAX_BITS; ++i) {
+        result = mult (result, result, modulus);
+        exp += exp;
+        if (exp & TOP_BIT)
+            result = mult (base, result, modulus);
+    }
+    return result;
+}
+
+
+/* Non modular exponentiation.  */
+uintmax_t small_power (uintmax_t base, uintmax_t exp)
+{
+    unsigned lz;
+    unsigned i;
+    uintmax_t result;
+
+    if (exp == 0)
+        return 1;
+
+    lz = countlz (exp);
+    exp <<= lz;
+
+    result = base;
+    for (i = lz + 1; i != UINTMAX_BITS; ++i) {
+        result *= result;
+        exp += exp;
+        if (exp & TOP_BIT)
+            result *= base;
+    }
+    return result;
+}
+
+
+/* factors should be an array big enough to hold the factors; MAX_N_FACTORS will
+ * do.  Returns pointer to just-past the last factor.  */
+static uintmax_t * raw_factorise (uintmax_t n, uintmax_t * factors);
+
+static int uintmax_compare (const void * aa, const void * bb)
+{
+    const uintmax_t * a = aa;
+    const uintmax_t * b = bb;
+    if (*a < *b)
+        return -1;
+    else if (*a > *b)
+        return 1;
+    else
+        return 0;
+}
+
+
+/* Factorises into primes, and sorts the results.  */
+static size_t factorise (uintmax_t n, uintmax_t * factors)
+{
+    uintmax_t * end = factors;
+    unsigned count;
+    
+    /* First deal to factors of 2, 3, 5.  Not strictly necessary, but gets some
+     * common cases out the the way quickly.  This means that the smallest
+     * number that requires Pollard rho factorisation is 49.  */
+    while ((n % 2) == 0) {
+        n /= 2;
+        *end++ = 2;
+    }
+    while ((n % 3) == 0) {
+        n /= 3;
+        *end++ = 3;
+    }
+    while ((n % 5) == 0) {
+        n /= 5;
+        *end++ = 5;
+    }
+
+    end = raw_factorise (n, end);
+    count = end - factors;
+
+    qsort (factors, count, sizeof (uintmax_t), uintmax_compare);
+    return count;
+}
+
+
+/* Fills in an array with each item (n/p) where p is a prime factor of n,
+ * removing duplicates.  */
+static size_t calc_cofactors (uintmax_t n, uintmax_t * cofactors)
+{
+    uintmax_t factors[MAX_N_FACTORS];
+    size_t nfactors = factorise (n, factors);
+    uintmax_t last = 0;
+    unsigned count = 0;
+    unsigned i;
+    for (i = 0; i != nfactors; ++i) {
+        if (factors[i] != last) {
+            last = factors[i];
+            cofactors[count++] = n / factors[i];
+        }
+    }
+    return count;
+}
+
+
+/* Test for primeness, using Fermats little theorem.  This is a probablistic
+ * test, but the chance of a given prime number not being detected is about
+ * (1/2)**NPRIMES.  When UINTMAX_BITS==64, there are probably no uintmax_t
+ * values for which the test fails.  */
+static bool is_prime (uintmax_t n)
+{
+    uintmax_t cofactors[MAX_N_FACTORS];
+    unsigned needed;
+    unsigned num_cofactors;
+    unsigned i;
+    
+    /* First, quick check of 2.  Catches most composites.  */
+    if (power (2, n - 1, n) != 1)
+        return false;
+
+    num_cofactors = calc_cofactors (n - 1, cofactors);
+
+    /* Check that we have enough bits in needed.  We need one bit for each
+     * distinct prime factor of n-1.  The test is a bit loose but is a constant
+     * expression.  */
+    assert (sizeof (needed) * 4 >= sizeof (uintmax_t) &&
+            UINTMAX_BITS >= 64);
+    /* We want to see something of order (a multiple of) (n-1)/p for each
+     * prime factor of n-1.  Keep a bit mask of what is outstanding.  */
+    needed = (1 << num_cofactors) - 1;
+
+    for (i = 0; i != NPRIMES; ++i) {
+        /* Note that it is not essential to use primes for this test...  but if
+         * the test power(x,y,n)!=1 is true, then it is also true replacing x
+         * with some prime factor of x.  So we may as well just do primes.  */
+        unsigned prime = primes[i];
+        unsigned j;
+        if (prime != 2 && power (prime, n - 1, n) != 1)
+            return false;
+        for (j = 0; j != num_cofactors; ++j) {
+            if ((needed & (1 << j)) && power (prime, cofactors[j], n) != 1) {
+                needed &= ~ (1 << j);
+                if (needed == 0)
+                    return true;
+            }
+        }
+    }
+
+    /* We might get here for two reasons (a) it's a Carmichael number (and hence
+     * composite), or (b) we were just really (un)lucky.  We've choosen
+     * NUM_PRIMES big enough so that (b) most likely will never actually
+     * happen.  */
+    return false;
+}
+
+
+/* Calculate the gcd, not to be confused with the function in system.h.  */
+static uintmax_t euclid (uintmax_t a, uintmax_t b)
+{
+    while (a != 0) {
+        uintmax_t next = b % a;
+        b = a;
+        a = next;
+    }
+    return b;
+}
+
+
+/* The Pollard rho function.  We use the usual function x*x+c.  The input should
+ * be composite; the function returns a factor.  */
+static uintmax_t pollard_rho (uintmax_t n)
+{
+    unsigned iterations = 0;
+    for (unsigned c = 3;; ++c) {
+        uintmax_t slow = c;
+        uintmax_t fast = c * (c + 1);
+        uintmax_t g;
+        while ((g = euclid (slow > fast ? slow - fast : fast - slow, n)) == 1) {
+            slow = add (mult (slow, slow, n), c, n);
+            fast = add (mult (fast, fast, n), c, n);
+            fast = add (mult (fast, fast, n), c, n);
+
+            ++iterations;
+            /* For 64 bits, this gives us 2**24 = 16777216 iterations.  If you
+             * ever hit this test, either the code is buggy or your local number
+             * theorist will be amused.  */
+            assert (iterations < (1ul << (3 * UINTMAX_BITS / 8)));
+        }
+        if (g != n)
+            return g;
+    }
+}
+
+
+/* Recusively apply Pollard rho until we arrive at prime numbers.  */
+static uintmax_t * raw_factorise (uintmax_t n, uintmax_t * factors)
+{
+    if (n <= 1)
+        return factors;
+
+    while (!is_prime (n)) {
+        uintmax_t factor = pollard_rho (n);
+        /* It is arbitrary which of factor and (n/factor) we recurse on.  factor
+         * is most likely small (in fact, very often prime), so recursing on
+         * that should limit recursion depth.  */
+        factors = raw_factorise (factor, factors);
+        n /= factor;
     }
 
-  return n_factors;
+    *factors++ = n;
+    return factors;
 }
 
 /* FIXME: comment */
@@ -158,7 +432,7 @@
 	error (0, 0, _("%s is not a valid positive integer"), quote (s));
       return false;
     }
-  n_factors = factor (n, MAX_N_FACTORS, factors);
+  n_factors = factorise (n, factors);
   printf ("%s:", umaxtostr (n, buf));
   for (i = 0; i < n_factors; i++)
     printf (" %s", umaxtostr (factors[i], buf));

_______________________________________________
Bug-coreutils mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/bug-coreutils

Improved performance for factor.

Reply via email to