[libmath-prime-util-perl] 28/50: Rewrite p-1 factoring, enhance racing SQUFOF, switch to racing SQUFOF in factor

Partha P. Mukherjee Thu, 21 May 2015 11:49:39 -0700

This is an automated email from the git hooks/post-receive script.

ppm-guest pushed a commit to annotated tag v0.13
in repository libmath-prime-util-perl.


commit 7b380c20c7909848e18c91346cfe9c74b7ba8f38
Author: Dana Jacobsen <d...@acm.org>
Date:   Mon Nov 5 01:01:54 2012 -0800

    Rewrite p-1 factoring, enhance racing SQUFOF, switch to racing SQUFOF in 
factor
---
 Changes  |   6 ++
 TODO     |   2 +
 XS.xs    |  81 ++++++++------------
 factor.c | 264 ++++++++++++++++++++++++++++++++++++++++-----------------------
 4 files changed, 210 insertions(+), 143 deletions(-)

diff --git a/Changes b/Changes
index b4a1784..d85d848 100644
--- a/Changes
+++ b/Changes
@@ -1,6 +1,7 @@
 Revision history for Perl extension Math::Prime::Util.
 
 0.12  2 November 2012
+
     - Add bin/primes.pl
 
     - Add functions:
@@ -8,6 +9,7 @@ Revision history for Perl extension Math::Prime::Util.
            pn_primorial            product of first n primes
            prime_set_config        set config options
            RiemannZeta             export and make accurate for small reals
+           is_provable_prime       prove primes after BPSW
 
     - Add 'assume_rh' configuration option (default: false) which can be set
       to allow functions to assume the Riemann Hypothesis.
@@ -22,6 +24,10 @@ Revision history for Perl extension Math::Prime::Util.
 
     - In the PP code, use 2 MR bases for more numbers when possible.
 
+    - Fixup of racing SQUFOF, and switch to use it in factor().
+
+    - Complete rewrite of XS p-1 factor routine, includes second stage.
+
 0.11  23 July 2012
     - Turn off threading tests on Cygwin, as threads on some Cygwin platforms
       give random panics (my Win7 64-bit works fine, XP 32-bit does not).
diff --git a/TODO b/TODO
index 184d884..9b53584 100644
--- a/TODO
+++ b/TODO
@@ -29,3 +29,5 @@
 
 - Test all functions return either native or bigints.  Functions that return
   raw MPU::GMP results will return strings, which isn't right.
+
+- Make proper pminus1 in PP code, like factor.c.
diff --git a/XS.xs b/XS.xs
index 11fa453..c846c6b 100644
--- a/XS.xs
+++ b/XS.xs
@@ -196,48 +196,33 @@ erat_primes(IN UV low, IN UV high)
 void
 _XS_factor(IN UV n)
   PPCODE:
-    if (n < 4) {
-      XPUSHs(sv_2mortal(newSVuv( n ))); /* If n is 0-3, we're done. */
+    UV orign = n;
+    if (n < 4) {                        /* If n is 0-3, we're done. */
+      XPUSHs(sv_2mortal(newSVuv( n )));
+    } else if (n < 2000000) {           /* For small n, just trial division */
+      int i;
+      UV facs[32];  /* maximum number of factors is log2n */
+      UV nfacs = trial_factor(n, facs, 0);
+      for (i = 1; i <= nfacs; i++) {
+        XPUSHs(sv_2mortal(newSVuv( facs[i-1] )));
+      }
     } else {
       int const verbose = 0;
-      UV tlim = 101;  /* Below this we've checked with trial division */
+      UV const tlim_lower = 211;  /* Trial division through this prime */
+      UV const tlim = 223;        /* This means we've checked through here */
       UV tofac_stack[MPU_MAX_FACTORS+1];
       UV factored_stack[MPU_MAX_FACTORS+1];
       int ntofac = 0;
       int nfactored = 0;
-      /* Quick trial divisions.  Crude use of GCD to hopefully go faster. */
-      while ( (n% 2) == 0 ) {  n /=  2;  XPUSHs(sv_2mortal(newSVuv(  2 ))); }
-      if ( (n >= UVCONST(3*3)) && (gcd_ui(n, UVCONST(3234846615) != 1)) ) {
-        while ( (n% 3) == 0 ) {  n /=  3;  XPUSHs(sv_2mortal(newSVuv(  3 ))); }
-        while ( (n% 5) == 0 ) {  n /=  5;  XPUSHs(sv_2mortal(newSVuv(  5 ))); }
-        while ( (n% 7) == 0 ) {  n /=  7;  XPUSHs(sv_2mortal(newSVuv(  7 ))); }
-        while ( (n%11) == 0 ) {  n /= 11;  XPUSHs(sv_2mortal(newSVuv( 11 ))); }
-        while ( (n%13) == 0 ) {  n /= 13;  XPUSHs(sv_2mortal(newSVuv( 13 ))); }
-        while ( (n%17) == 0 ) {  n /= 17;  XPUSHs(sv_2mortal(newSVuv( 17 ))); }
-        while ( (n%19) == 0 ) {  n /= 19;  XPUSHs(sv_2mortal(newSVuv( 19 ))); }
-        while ( (n%23) == 0 ) {  n /= 23;  XPUSHs(sv_2mortal(newSVuv( 23 ))); }
-        while ( (n%29) == 0 ) {  n /= 29;  XPUSHs(sv_2mortal(newSVuv( 29 ))); }
-      }
-      if ( (n >= UVCONST(31*31)) && (gcd_ui(n, UVCONST(95041567) != 1)) ) {
-        while ( (n%31) == 0 ) {  n /= 31;  XPUSHs(sv_2mortal(newSVuv( 31 ))); }
-        while ( (n%37) == 0 ) {  n /= 37;  XPUSHs(sv_2mortal(newSVuv( 37 ))); }
-        while ( (n%41) == 0 ) {  n /= 41;  XPUSHs(sv_2mortal(newSVuv( 41 ))); }
-        while ( (n%43) == 0 ) {  n /= 43;  XPUSHs(sv_2mortal(newSVuv( 43 ))); }
-        while ( (n%47) == 0 ) {  n /= 47;  XPUSHs(sv_2mortal(newSVuv( 47 ))); }
-      }
-      if ( (n >= UVCONST(53*53)) && (gcd_ui(n, UVCONST(907383479) != 1)) ) {
-        while ( (n%53) == 0 ) {  n /= 53;  XPUSHs(sv_2mortal(newSVuv( 53 ))); }
-        while ( (n%59) == 0 ) {  n /= 59;  XPUSHs(sv_2mortal(newSVuv( 59 ))); }
-        while ( (n%61) == 0 ) {  n /= 61;  XPUSHs(sv_2mortal(newSVuv( 61 ))); }
-        while ( (n%67) == 0 ) {  n /= 67;  XPUSHs(sv_2mortal(newSVuv( 67 ))); }
-        while ( (n%71) == 0 ) {  n /= 71;  XPUSHs(sv_2mortal(newSVuv( 71 ))); }
-      }
-      if ( (n >= UVCONST(73*73)) && (gcd_ui(n, UVCONST(4132280413) != 1)) ) {
-        while ( (n%73) == 0 ) {  n /= 73;  XPUSHs(sv_2mortal(newSVuv( 73 ))); }
-        while ( (n%79) == 0 ) {  n /= 79;  XPUSHs(sv_2mortal(newSVuv( 79 ))); }
-        while ( (n%83) == 0 ) {  n /= 83;  XPUSHs(sv_2mortal(newSVuv( 83 ))); }
-        while ( (n%89) == 0 ) {  n /= 89;  XPUSHs(sv_2mortal(newSVuv( 89 ))); }
-        while ( (n%97) == 0 ) {  n /= 97;  XPUSHs(sv_2mortal(newSVuv( 97 ))); }
+
+      { /* Trial division, removes all factors below tlim */
+        int i;
+        UV facs[BITS_PER_WORD+1];
+        UV nfacs = trial_factor(n, facs, tlim_lower);
+        for (i = 1; i < nfacs; i++) {
+          XPUSHs(sv_2mortal(newSVuv( facs[i-1] )));
+        }
+        n = facs[nfacs-1];
       }
 
       do { /* loop over each remaining factor */
@@ -245,23 +230,19 @@ _XS_factor(IN UV n)
          * but in practice it seems slower. */
         while ( (n >= (tlim*tlim)) && (!_XS_is_prime(n)) ) {
           int split_success = 0;
-          /* How many rounds of SQUFOF to try depends on the number size */
-          UV sq_rounds = ((n>>29) ==     0) ? 100000 :
-                         ((n>>29) < 100000) ? 250000 :
-                                              600000;
+          /* Adjust the number of rounds based on the number size */
+          UV br_rounds = ((n>>29) < 100000) ?  1500 :  1500;
+          UV sq_rounds = 80000; /* 20k 91%, 40k 98%, 80k 99.9%, 120k 99.99% */
 
           /* Small factors will be found quite rapidly with this */
           if (!split_success) {
-            split_success = pbrent_factor(n, tofac_stack+ntofac, 1500)-1;
+            split_success = pbrent_factor(n, tofac_stack+ntofac, br_rounds)-1;
             if (verbose) { if (split_success) printf("pbrent 1:  %"UVuf" 
%"UVuf"\n", tofac_stack[ntofac], tofac_stack[ntofac+1]); else printf("pbrent 
0\n"); }
           }
 
-          if (!split_success) {
+          if (!split_success && n < (UV_MAX>>3)) {
             /* SQUFOF does very well with what's left after TD and Rho. */
-            /* Racing SQUFOF is about 40% faster and has better success, but
-             * has input size restrictions and I'm seeing cases where it gets
-             * stuck in stage 2.  For now just stick with the old one.  */
-            split_success = squfof_factor(n, tofac_stack+ntofac, sq_rounds)-1;
+            split_success = racing_squfof_factor(n, tofac_stack+ntofac, 
sq_rounds)-1;
             if (verbose) printf("squfof %d\n", split_success);
           }
 
@@ -276,6 +257,10 @@ _XS_factor(IN UV n)
             split_success = holf_factor(n, tofac_stack+ntofac, 2000)-1;
             if (verbose) printf("holf %d\n", split_success);
           }
+          if (!split_success) {
+            split_success = pminus1_factor(n, tofac_stack+ntofac, 1000)-1;
+            if (verbose) printf("pminus1 %d\n", split_success);
+          }
 
           /* A miniscule fraction of numbers make it here */
           if (!split_success) {
@@ -372,9 +357,9 @@ squfof_factor(IN UV n, IN UV maxrounds = 4*1024*1024)
     SIMPLE_FACTOR(squfof_factor, n, maxrounds);
 
 void
-rsqufof_factor(IN UV n)
+rsqufof_factor(IN UV n, IN UV maxrounds = 4*1024*1024)
   PPCODE:
-    SIMPLE_FACTOR(racing_squfof_factor, n, 0);
+    SIMPLE_FACTOR(racing_squfof_factor, n, maxrounds);
 
 void
 pbrent_factor(IN UV n, IN UV maxrounds = 4*1024*1024)
diff --git a/factor.c b/factor.c
index a75d85e..c20d246 100644
--- a/factor.c
+++ b/factor.c
@@ -31,20 +31,15 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
     factors[0] = n;
     return 1;
   }
+  while ( (n & 1) == 0 ) { factors[nfactors++] = 2; n /= 2; }
 
-  while ( (n & 1) == 0 ) {
-    factors[nfactors++] = 2;
-    n /= 2;
-  }
-
-  for (f = 3; (n > 1) && (f <= 7) && (f <= maxtrial); f += 2) {
-    while ( (n % f) == 0 ) {
-      factors[nfactors++] = f;
-      n /= f;
-    }
-  }
+  if (3  <= maxtrial) while ( (n %  3) == 0 ) { factors[nfactors++] =  3; n /= 
 3; }
+  if (5  <= maxtrial) while ( (n %  5) == 0 ) { factors[nfactors++] =  5; n /= 
 5; }
+  if (7  <= maxtrial) while ( (n %  7) == 0 ) { factors[nfactors++] =  7; n /= 
 7; }
+  f = 11;
+  m = 11;
 
-  if ( (n < (7*7)) || (maxtrial < 11) ) {
+  if ( (n < (f*f)) || (maxtrial < f) ) {
     if (n != 1)
       factors[nfactors++] = n;
     return nfactors;
@@ -55,8 +50,6 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
     limit = maxtrial;
 
   /* wheel 30 */
-  f = 11;
-  m = 11;
   while (f <= limit) {
     if ( (n%f) == 0 ) {
       UV newlimit;
@@ -143,10 +136,9 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
     UV t = 1;
     n %= m;
     while (power) {
-      if (power & 1)
-        t = mulmod(t, n, m);
-      n = sqrmod(n, m);
+      if (power & 1) t = mulmod(t, n, m);
       power >>= 1;
+      if (power)     n = sqrmod(n, m);
     }
     return t;
   }
@@ -156,17 +148,15 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
     n %= m;
     if (m < HALF_WORD) {
       while (power) {
-        if (power & 1)
-          t = (t*n)%m;
-        n = (n*n)%m;
+        if (power & 1) t = (t*n)%m;
         power >>= 1;
+        if (power)     n = (n*n)%m;
       }
     } else {
       while (power) {
-        if (power & 1)
-          t = mulmod(t, n, m);
-        n = sqrmod(n,m);
+        if (power & 1) t = mulmod(t, n, m);
         power >>= 1;
+        if (power)     n = sqrmod(n,m);
       }
     }
     return t;
@@ -181,10 +171,24 @@ int trial_factor(UV n, UV *factors, UV maxtrial)
 
 /* Return 0 if n is not a perfect square.  Set sqrtn to int(sqrt(n)) if so.
  *
- * A simplistic solution (but not unhelpful) is:
+ * Some simple solutions:
  *
  *     return ( ((m&2)!= 0) || ((m&7)==5) || ((m&11) == 8) )  ?  0  :  1;
  *
+ * or:
+ *
+ *     m = n & 31;
+ *     if ( m==0 || m==1 || m==4 || m==9 || m==16 || m==17 || m==25 )
+ *       ...test for perfect square...
+ *
+ * or:
+ *
+ *     if (  ((0x0202021202030213ULL >> (n & 63)) & 1) &&
+ *           ((0x0402483012450293ULL >> (n % 63)) & 1) &&
+ *           ((0x218a019866014613ULL >> ((n % 65) & 63)) & 1) &&
+ *           ((0x23b                 >> (n % 11) & 1)) ) {
+ *
+ *
  * The following Bloom filter cascade works very well indeed.  Read all
  * about it here: http://mersenneforum.org/showpost.php?p=110896
  */
@@ -219,6 +223,9 @@ static int is_perfect_square(UV n, UV* sqrtn)
   m = lm % 11;
   if ((m*0xabf1a3a7) & (m*0x2612bf93) & 0x45854000) return 0;
   /* 99.92% of non-squares are rejected now */
+#else
+  m = n % 63;
+  if ((m*0x3d491df7) & (m*0xc824a9f9) & 0x10f14008) return 0;
 #endif
   m = sqrt(n);
   if (n != (m*m))
@@ -547,23 +554,91 @@ int prho_factor(UV n, UV *factors, UV rounds)
  * Probabilistic.  If you give this a prime number, it will loop
  * until it runs out of rounds.
  */
-int pminus1_factor(UV n, UV *factors, UV rounds)
+int pminus1_factor(UV n, UV *factors, UV B1)
 {
-  UV f, i;
-  UV kf = 13;
-
+  UV f, q, restartq, restarta;
+  UV a = 2;
+  UV sqrtB1 = sqrt(B1);
   MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in pminus1_factor");
-
-  for (i = 1; i <= rounds; i++) {
-    kf = powmod(kf, i, n);
-    if (kf == 0) kf = n;
-    f = gcd_ui(kf-1, n);
-    if ( (f != 1) && (f != n) ) {
-      factors[0] = f;
-      factors[1] = n/f;
-      MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
-      return 2;
+  restartq = 2;
+  restarta = a;
+  for (q = 2; q <= sqrtB1; q = _XS_next_prime(q)) {
+    UV k = q;
+    UV kmin = B1/q;
+    while (k <= kmin)
+      k *= q;
+    a = powmod(a, k, n);
+  }
+  if (a == 0) { factors[0] = n; return 1; }
+  f = gcd_ui(a-1, n);
+  if (f == 1) {
+    restartq = q;
+    restarta = a;
+    for ( ; q <= B1; q = _XS_next_prime(q)) {
+      a = powmod(a, q, n);
     }
+    f = gcd_ui(a-1, n);
+  }
+  /* See if we found more than one factor in stage 1, repeat if so */
+  if (f == n) {
+    a = restarta;
+    for (q = restartq; q <= B1; q = _XS_next_prime(q)) {
+      UV k = q;
+      UV kmin = B1/q;
+      while (k <= kmin)
+        k *= q;
+      a = powmod(a, k, n);
+      f = gcd_ui(a-1, n);
+      if (f != 1 && f != n)
+        break;
+    }
+  }
+  if (f == 1 || f == n) {  /* stage 2 */
+    UV B2 = B1 * 10;
+    UV bm = a;
+    UV b = 1;
+    UV j = 1;
+    UV precomp_bm[111] = {0};    /* Enough for B2 = 189M */
+
+    /* calculate (a^q)^2, (a^q)^4, etc. */
+    precomp_bm[0] = sqrmod(bm, n);
+
+    a = powmod(a, q, n);
+    while (q <= B2) {
+      UV lastq = q;
+      UV qdiff, bmdiff;
+      q = _XS_next_prime(q);
+      /* compute a^q = a^lastq * a^(q-lastq) */
+      qdiff = (q - lastq) / 2 - 1;
+      if (qdiff >= 111) {
+        bmdiff = powmod(bm, q-lastq, n);  /* Too big of gap */
+      } else {
+        bmdiff = precomp_bm[qdiff];
+        if (bmdiff == 0) {
+          if (precomp_bm[qdiff-1] != 0)
+            bmdiff = mulmod(mulmod(precomp_bm[qdiff-1],bm,n),bm,n);
+          else
+            bmdiff = powmod(bm, q-lastq, n);
+          precomp_bm[qdiff] = bmdiff;
+        }
+      }
+      a = mulmod( a, bmdiff, n);
+      if (a <= 1) break;
+      b = mulmod( b, a-1, n);
+      if (b == 0) b = 1;
+      if ( (j++ % 64) == 0 ) {
+        f = gcd_ui(b, n);
+        if (f != 1 && f != n)
+          break;
+      }
+    }
+    f = gcd_ui(b, n);
+  }
+  if ( (f != 1) && (f != n) ) {
+    factors[0] = f;
+    factors[1] = n/f;
+    MPUassert( factors[0] * factors[1] == n , "incorrect factoring");
+    return 2;
   }
   factors[0] = n;
   return 1;
@@ -702,27 +777,24 @@ int squfof_factor(UV n, UV *factors, UV rounds)
 
 /* Another version, based on Ben Buhrow's racing SQUFOF. */
 
-typedef unsigned int uint32;
-typedef UV uint64;
-
 typedef struct
 {
-  uint32 mult;
-  uint32 valid;
-  uint32 P;
-  uint32 bn;
-  uint32 Qn;
-  uint32 Q0;
-  uint32 b0;
-  uint32 it;
-  uint32 imax;
+  UV mult;
+  UV valid;
+  UV P;
+  UV bn;
+  UV Qn;
+  UV Q0;
+  UV b0;
+  UV it;
+  UV imax;
 } mult_t;
 
 // N < 2^63 (or 2^31).  *f == 1 if no factor found
-static void squfof_unit(UV n, mult_t* mult_save, uint64* f)
+static void squfof_unit(UV n, mult_t* mult_save, UV* f)
 {
-  uint32 imax,i,Q0,b0,Qn,bn,P,bbn,Ro,S,So,t1,t2;
-  int j = 0;
+  UV imax,i,Q0,b0,Qn,bn,P,bbn,Ro,S,So,t1,t2;
+  int j;
 
   *f = 0;
 
@@ -765,14 +837,8 @@ static void squfof_unit(UV n, mult_t* mult_save, uint64* f)
       SQUARE_SEARCH_ITERATION;
 
       // Even iteration.  Check for square: Qn = S*S
-      // TODO: DAJ try bloom filter?
-      t2 = Qn & 31;
-      if (t2 ==  0 || t2 ==  1 || t2 ==  4 || t2 ==  9 ||
-          t2 == 16 || t2 == 17 || t2 == 25) {
-        S = (uint32)sqrt(Qn);
-        if (Qn == S * S)
-          break;
-      }
+      if (is_perfect_square( Qn, &S ))
+        break;
 
       // Odd iteration.
       SQUARE_SEARCH_ITERATION;
@@ -780,10 +846,9 @@ static void squfof_unit(UV n, mult_t* mult_save, uint64* f)
     /* printf("found square %lu after %lu iterations with mult %d\n", Qn, i, 
mult_save->mult); */
 
     // Reduce to G0
-    // S = (uint32)sqrt(Qn);
     Ro = P + S*((b0 - P)/S);
     t1 = Ro;
-    So = (uint32)(((uint64)n - (uint64)t1*(uint64)t1)/(uint64)S);
+    So = (n - t1*t1)/S;
     bbn = (b0+Ro)/So;
 
     // Search for symmetry point
@@ -796,11 +861,17 @@ static void squfof_unit(UV n, mult_t* mult_save, uint64* 
f)
       bbn = (b0+Ro)/So; \
       if (Ro == t1) break;
 
+    j = 0;
     while (1) {
       SYMMETRY_POINT_ITERATION;
       SYMMETRY_POINT_ITERATION;
       SYMMETRY_POINT_ITERATION;
       SYMMETRY_POINT_ITERATION;
+      if (j++ > 2000000) {
+         mult_save->valid = 0;
+         *f = 0;
+         return;
+      }
     }
 
     *f = gcd_ui(Ro, n);
@@ -809,19 +880,18 @@ static void squfof_unit(UV n, mult_t* mult_save, uint64* 
f)
   }
 }
 
-#define NSQUFOF_MULT 16
+#define NSQUFOF_MULT (sizeof(multipliers)/sizeof(multipliers[0]))
 
 int racing_squfof_factor(UV n, UV *factors, UV rounds)
 {
-  const int multipliers[NSQUFOF_MULT] = {1, 3, 5, 7, 11,
-                                         3*5, 3*7, 3*11,
-                                         5*7, 5*11, 7*11,
-                                         3*5*7, 3*5*11, 3*7*11,
-                                         5*7*11, 3*5*7*11};
+  const UV multipliers[] = {
+    3*5*7*11, 3*5*7, 3*5*11, 3*5, 3*7*11, 3*7, 5*7*11, 5*7,
+    3*11,     3,     5*11,   5,   7*11,   7,   11,     1   };
   const UV big2 = UV_MAX >> 2;
   mult_t mult_save[NSQUFOF_MULT];
-  int i, j, race_rounds;
-  uint64 nn64, f64;
+  int i, still_racing;
+  UV nn64, mult, f64;
+  UV rounds_done = 0;
 
   /* Caller should have handled these trivial cases */
   MPUassert( (n >= 3) && ((n%2) != 0) , "bad n in racing_squfof_factor");
@@ -831,23 +901,24 @@ int racing_squfof_factor(UV n, UV *factors, UV rounds)
     factors[0] = n;  return 1;
   }
 
-  for (i = NSQUFOF_MULT-1; i >= 0; i--) {
-    nn64 = n * (uint64)multipliers[i];
-    if ((big2 / (UV)multipliers[i]) < n) {
-      /* This multiplier would overflow 64-bit */
-      mult_save[i].mult = multipliers[i];
-      mult_save[i].valid = 0;
+  for (i = 0; i < NSQUFOF_MULT; i++) {
+    mult = multipliers[i];
+    nn64 = n * mult;
+    mult_save[i].mult = mult;
+    if ((big2 / mult) < n) {
+      mult_save[i].valid = 0; /* This multiplier would overflow 64-bit */
       continue;
     }
-    mult_save[i].mult = multipliers[i];
     mult_save[i].valid = 1;
 
-    mult_save[i].b0 = (uint32) sqrt( (double)nn64 );
-    mult_save[i].imax = (uint32) sqrt( (double)mult_save[i].b0 );
+    mult_save[i].b0 = sqrt( (double)nn64 );
+    mult_save[i].imax = sqrt( (double)mult_save[i].b0 ) / 3;
+    if (mult_save[i].imax < 20)     mult_save[i].imax = 20;
+    if (mult_save[i].imax > rounds) mult_save[i].imax = rounds;
 
     mult_save[i].Q0 = 1;
     mult_save[i].P  = mult_save[i].b0;
-    mult_save[i].Qn = (uint32) (nn64 - (uint64)mult_save[i].b0 * 
(uint64)mult_save[i].b0);
+    mult_save[i].Qn = nn64 - (mult_save[i].b0 * mult_save[i].b0);
     if (mult_save[i].Qn == 0) {
       factors[0] = mult_save[i].b0;
       factors[1] = n / mult_save[i].b0;
@@ -858,19 +929,17 @@ int racing_squfof_factor(UV n, UV *factors, UV rounds)
     mult_save[i].it = 0;
   }
 
-  /* Process the multipliers a little at a time. */
-  race_rounds = 6;
-  for (i = 0; i < race_rounds; i++) {
-    for (j = 0; j < NSQUFOF_MULT; j++) {
-      if (!mult_save[j].valid)
+  /* Process the multipliers a little at a time: 0.33*(n*mult)^1/4: 20-20k */
+  do {
+    still_racing = 0;
+    for (i = 0; i < NSQUFOF_MULT; i++) {
+      if (!mult_save[i].valid)
         continue;
-      nn64 = n * (UV)multipliers[j];
-      squfof_unit(nn64, &mult_save[j], &f64);
-      if (f64 == -1) {          /* -1 is an error */
-        mult_save[j].valid = 0;
-      } else if (f64 > 1) {
-        if (f64 != multipliers[j]) {
-          f64 /= gcd_ui(f64, multipliers[j]);
+      nn64 = n * (UV)multipliers[i];
+      squfof_unit(nn64, &mult_save[i], &f64);
+      if (f64 > 1) {
+        if (f64 != multipliers[i]) {
+          f64 /= gcd_ui(f64, multipliers[i]);
           if (f64 != 1) {
             factors[0] = f64;
             factors[1] = n / f64;
@@ -879,10 +948,15 @@ int racing_squfof_factor(UV n, UV *factors, UV rounds)
           }
         }
         /* Found trivial factor.  Quit working with this multiplier. */
-        mult_save[j].valid = 0;
+        mult_save[i].valid = 0;
       }
+      if (mult_save[i].valid == 1)
+        still_racing = 1;
+      rounds_done += mult_save[i].imax;
+      if (rounds_done >= rounds)
+        break;
     }
-  }
+  } while (still_racing && rounds_done < rounds);
 
   /* No factors found */
   factors[0] = n;

-- 
Alioth's /usr/local/bin/git-commit-notice on 
/srv/git.debian.org/git/pkg-perl/packages/libmath-prime-util-perl.git

_______________________________________________
Pkg-perl-cvs-commits mailing list
Pkg-perl-cvs-commits@lists.alioth.debian.org
http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-perl-cvs-commits

[libmath-prime-util-perl] 28/50: Rewrite p-1 factoring, enhance racing SQUFOF, switch to racing SQUFOF in factor

Reply via email to