Ciao,

Il Gio, 19 Settembre 2019 10:39 pm, Marco Bodrato ha scritto:
>> Il Mer, 4 Settembre 2019 10:44 am, Niels Möller ha scritto:
>>> I think I've seen comments in the literature saying that 2^e mod m is
>>> "obviously" more efficient than general modular exponentiation. But as

> If nobody started or is willing to try... I'll try to write such a special
> function for base=2.

I tried... the attempt is attached.

I wrote the doubling-modulo step with 3 possible operations:
 if it can not overflow, simply lshift;
 otherwise choose between (x*2-m) and ((x-m)*2)

I had to write special code for size=1, because my first try was slower
than the generic code. The attached code is faster than the current (when
base=2)  for all sizes on my laptop.
But on shell it is not!

[bodrato@shell /var/tmp/bodrato/gmp-repo]$ tune/speed -rs1-10000 -f1.6
mpz_powm mpz_powm.2 mpz_powm.3
overhead 0.000000002 secs, precision 10000 units of 2.86e-10 secs, CPU
freq 3500.08 MHz
             mpz_powm    mpz_powm.2    mpz_powm.3
1         0.000004322       #0.8082        0.9884
2         0.000007201        1.0723       #0.9836
3         0.000013274       #0.9465        0.9928
4         0.000020143       #0.9408        0.9822
6         0.000033094       #0.8662        0.9964
9         0.000062521       #0.8396        0.9964
14        0.000137914       #0.8000        0.9996
22        0.000317164       #0.7911        1.0061
35        0.000779100       #0.7662        0.9771
56        0.001738323       #0.7941        1.0011
[...]
3798      0.839609000       #0.7757        0.9969
6076      1.513591000       #0.7794        1.0269
9721      2.472574000       #0.7542        0.9746

So, let's say that, asymptotically, «2^e mod m is "obviously" more
efficient than general modular exponentiation». For small sizes it is not
so easy to write specialised functions :-)

Ĝis,
m

-- 
http://bodrato.it/papers/
diff -r 3e04a9bbba13 mpn/generic/powm.c
--- a/mpn/generic/powm.c	Fri Sep 20 20:36:30 2019 +0200
+++ b/mpn/generic/powm.c	Sat Sep 21 08:01:23 2019 +0200
@@ -173,6 +173,375 @@
   TMP_FREE;
 }
 
+
+/* rp[n-1..0] = 2 ^ ep[en-1..0] mod mp[n-1..0]
+   Requires that mp[n-1..0] is odd and > 1.
+   Requires that ep[en-1..0] is > 1.
+   Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs.  */
+void
+mpn_2powm (mp_ptr rp, mp_srcptr ep, mp_size_t en,
+	  mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+  mp_limb_t ip[2], *mip;
+  int cnt;
+  mp_bitcnt_t ebi, mbi, tbi;
+  mp_size_t tn;
+  mp_limb_t expbits;
+  mp_ptr pp, this_pp;
+  long i;
+  TMP_DECL;
+
+  ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+  ASSERT (n > 1 || (n == 1 && mp[0] > 1));
+  ASSERT ((mp[0] & 1) != 0);
+
+  MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+  MPN_SIZEINBASE_2EXP(mbi, mp, n, 1);
+
+  tbi = 1;
+  do {
+    if (--ebi == 0)
+      {
+	MPN_FILL (rp, n, 0);
+	rp[tbi / GMP_LIMB_BITS] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
+	return;
+      }
+    else
+      {
+	mp_bitcnt_t t;
+	t = (tbi << 1) + getbit (ep, ebi);
+	if (t >= mbi)
+	  break;
+	tbi = t;
+      }
+  } while (1);
+
+  TMP_MARK;
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      mpn_binvert (mip, mp, 2, tp);
+      mip[0] = -mip[0]; mip[1] = ~mip[1];
+    }
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    {
+      mip = ip;
+      binvert_limb (mip[0], mp[0]);
+      mip[0] = -mip[0];
+    }
+#endif
+  else
+    {
+      mip = TMP_ALLOC_LIMBS (n);
+      mpn_binvert (mip, mp, n, tp);
+    }
+
+  tn = tbi / GMP_LIMB_BITS;
+  MPN_ZERO (tp, tn);
+  tp[tn] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
+
+  redcify (rp, tp, tn + 1, mp, n);
+
+#if ! HAVE_NATIVE_mpn_rsblsh1_n_ip2
+#undef mpn_rsblsh1_n_ip2
+#if HAVE_NATIVE_mpn_rsblsh1_n
+#define mpn_rsblsh1_n_ip2(a,b,n)        mpn_rsblsh1_n(a,b,a,n)
+#else
+#define mpn_rsblsh1_n_ip2(a,b,n)					\
+  do									\
+    {									\
+      ASSERT_CARRY (mpn_lshift (a, a, n, 1));				\
+      ASSERT_CARRY (mpn_sub_n (a, a, b, n));				\
+    } while (0)
+#endif
+#endif
+
+#define INNERLOOP2							\
+  do									\
+    {									\
+      MPN_SQR (tp, rp, n);						\
+      MPN_REDUCE (rp, tp, mp, n, mip);					\
+      if (getbit (ep, ebi) != 0)					\
+	{								\
+	  if (getbit (rp, mbi) == 0)					\
+	    ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1));			\
+	  else if (mpn_cmp (rp, mp, n) >= 0)				\
+	    {								\
+	      ASSERT_NOCARRY (mpn_sub_n (rp, rp, mp, n));		\
+	      ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1));		\
+	    }								\
+	  else								\
+	    mpn_rsblsh1_n_ip2 (rp, mp, n);				\
+	}								\
+    } while (--ebi != 0)
+
+
+
+  if (n == 1)
+    {
+      mp_limb_t r0, m0, invm;
+      r0 = *rp;
+      m0 = *mp;
+      invm = *mip;
+      do
+	{
+	  mp_limb_t t0, t1, _dummy, p1;
+	  /* MPN_SQR (tp, rp, n);			*/
+	  umul_ppmm (t1, t0, r0, r0);
+	  /* MPN_REDUCE (rp, tp, mp, n, mip);		*/
+	  umul_ppmm (p1, _dummy, m0, (t0 * invm) & GMP_NUMB_MASK);
+	  p1 += (t0 != 0);
+	  r0 = t1 + p1;
+	  if (p1 > r0)
+	    r0 -= m0;
+
+	  if (getbit (ep, ebi) != 0)
+	    {
+	      if (r0 <= (m0 >> 1))
+		r0 <<= 1;
+	      else if (r0 >= m0)
+		r0 = (r0 - m0) << 1;
+	      else
+		r0 = (r0 << 1) - m0;
+	    }
+	} while (--ebi != 0);
+      *rp = r0;
+    }
+  else
+#if WANT_REDC_2
+  if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
+	  if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+      else
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	  INNERLOOP2;
+	}
+      else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_2 (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+      else
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+    }
+
+#else  /* WANT_REDC_2 */
+
+  if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
+    {
+      if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
+	  if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	      INNERLOOP2;
+	    }
+	}
+      else
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+    }
+  else
+    {
+      if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+	{
+	  if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+	      || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	  else
+	    {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	      INNERLOOP2;
+	    }
+	}
+      else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	MPN_REDC_1 (rp, tp, mp, n, mip[0])
+	  INNERLOOP2;
+	}
+      else
+	{
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n)			mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip)	mpn_redc_n (rp, tp, mp, n, mip)
+	  INNERLOOP2;
+	}
+    }
+#endif  /* WANT_REDC_2 */
+
+ done:
+
+  MPN_COPY (tp, rp, n);
+  MPN_ZERO (tp + n, n);
+
+#if WANT_REDC_2
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+  else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+    MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+  if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+    MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+#endif
+  else
+    mpn_redc_n (rp, tp, mp, n, mip);
+
+  if (mpn_cmp (rp, mp, n) >= 0)
+    mpn_sub_n (rp, rp, mp, n);
+
+  TMP_FREE;
+}
+
 /* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
    Requires that mp[n-1..0] is odd.
    Requires that ep[en-1..0] is > 1.
@@ -194,6 +563,12 @@
   ASSERT (en > 1 || (en == 1 && ep[0] > 1));
   ASSERT (n >= 1 && ((mp[0] & 1) != 0));
 
+  if (1 && bn == 1 && bp[0] == 2)
+    {
+      mpn_2powm (rp, ep, en, mp, n, tp);
+      return;
+    }
+
   TMP_MARK;
 
   MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
diff -r 3e04a9bbba13 tune/speed.c
--- a/tune/speed.c	Fri Sep 20 20:36:30 2019 +0200
+++ b/tune/speed.c	Sat Sep 21 08:01:23 2019 +0200
@@ -417,7 +417,7 @@
   { "mpz_2fac_ui",       speed_mpz_2fac_ui,  FLAG_NODATA   },
   { "mpz_mfac_uiui",     speed_mpz_mfac_uiui,  FLAG_NODATA | FLAG_R_OPTIONAL },
   { "mpz_primorial_ui",  speed_mpz_primorial_ui, FLAG_NODATA },
-  { "mpz_powm",          speed_mpz_powm             },
+  { "mpz_powm",          speed_mpz_powm,     FLAG_R_OPTIONAL },
   { "mpz_powm_mod",      speed_mpz_powm_mod         },
   { "mpz_powm_redc",     speed_mpz_powm_redc        },
   { "mpz_powm_sec",      speed_mpz_powm_sec        },
diff -r 3e04a9bbba13 tune/speed.h
--- a/tune/speed.h	Fri Sep 20 20:36:30 2019 +0200
+++ b/tune/speed.h	Sat Sep 21 08:01:23 2019 +0200
@@ -2637,7 +2637,10 @@
     SPEED_RESTRICT_COND (s->size >= 1);					\
 									\
     mpz_init (r);							\
-    mpz_init_set_n (b, s->xp, s->size);					\
+    if (s->r < 2)							\
+      mpz_init_set_n (b, s->xp, s->size);				\
+    else								\
+      mpz_init_set_ui (b, s->r);					\
     mpz_init_set_n (m, s->yp, s->size);					\
     mpz_setbit (m, 0);	/* force m to odd */				\
     mpz_init_set_n (e, s->xp_block, 6);					\
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to