Ciao,
Il Gio, 19 Settembre 2019 10:39 pm, Marco Bodrato ha scritto:
>> Il Mer, 4 Settembre 2019 10:44 am, Niels Möller ha scritto:
>>> I think I've seen comments in the literature saying that 2^e mod m is
>>> "obviously" more efficient than general modular exponentiation. But as
> If nobody started or is willing to try... I'll try to write such a special
> function for base=2.
I tried... the attempt is attached.
I wrote the doubling-modulo step with 3 possible operations:
if it can not overflow, simply lshift;
otherwise choose between (x*2-m) and ((x-m)*2)
I had to write special code for size=1, because my first try was slower
than the generic code. The attached code is faster than the current (when
base=2) for all sizes on my laptop.
But on shell it is not!
[bodrato@shell /var/tmp/bodrato/gmp-repo]$ tune/speed -rs1-10000 -f1.6
mpz_powm mpz_powm.2 mpz_powm.3
overhead 0.000000002 secs, precision 10000 units of 2.86e-10 secs, CPU
freq 3500.08 MHz
mpz_powm mpz_powm.2 mpz_powm.3
1 0.000004322 #0.8082 0.9884
2 0.000007201 1.0723 #0.9836
3 0.000013274 #0.9465 0.9928
4 0.000020143 #0.9408 0.9822
6 0.000033094 #0.8662 0.9964
9 0.000062521 #0.8396 0.9964
14 0.000137914 #0.8000 0.9996
22 0.000317164 #0.7911 1.0061
35 0.000779100 #0.7662 0.9771
56 0.001738323 #0.7941 1.0011
[...]
3798 0.839609000 #0.7757 0.9969
6076 1.513591000 #0.7794 1.0269
9721 2.472574000 #0.7542 0.9746
So, let's say that, asymptotically, «2^e mod m is "obviously" more
efficient than general modular exponentiation». For small sizes it is not
so easy to write specialised functions :-)
Ĝis,
m
--
http://bodrato.it/papers/
diff -r 3e04a9bbba13 mpn/generic/powm.c
--- a/mpn/generic/powm.c Fri Sep 20 20:36:30 2019 +0200
+++ b/mpn/generic/powm.c Sat Sep 21 08:01:23 2019 +0200
@@ -173,6 +173,375 @@
TMP_FREE;
}
+
+/* rp[n-1..0] = 2 ^ ep[en-1..0] mod mp[n-1..0]
+ Requires that mp[n-1..0] is odd and > 1.
+ Requires that ep[en-1..0] is > 1.
+ Uses scratch space at tp of MAX(mpn_binvert_itch(n),2n) limbs. */
+void
+mpn_2powm (mp_ptr rp, mp_srcptr ep, mp_size_t en,
+ mp_srcptr mp, mp_size_t n, mp_ptr tp)
+{
+ mp_limb_t ip[2], *mip;
+ int cnt;
+ mp_bitcnt_t ebi, mbi, tbi;
+ mp_size_t tn;
+ mp_limb_t expbits;
+ mp_ptr pp, this_pp;
+ long i;
+ TMP_DECL;
+
+ ASSERT (en > 1 || (en == 1 && ep[0] > 1));
+ ASSERT (n > 1 || (n == 1 && mp[0] > 1));
+ ASSERT ((mp[0] & 1) != 0);
+
+ MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
+ MPN_SIZEINBASE_2EXP(mbi, mp, n, 1);
+
+ tbi = 1;
+ do {
+ if (--ebi == 0)
+ {
+ MPN_FILL (rp, n, 0);
+ rp[tbi / GMP_LIMB_BITS] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
+ return;
+ }
+ else
+ {
+ mp_bitcnt_t t;
+ t = (tbi << 1) + getbit (ep, ebi);
+ if (t >= mbi)
+ break;
+ tbi = t;
+ }
+ } while (1);
+
+ TMP_MARK;
+
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ mip = ip;
+ binvert_limb (mip[0], mp[0]);
+ mip[0] = -mip[0];
+ }
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ {
+ mip = ip;
+ mpn_binvert (mip, mp, 2, tp);
+ mip[0] = -mip[0]; mip[1] = ~mip[1];
+ }
+#else
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ {
+ mip = ip;
+ binvert_limb (mip[0], mp[0]);
+ mip[0] = -mip[0];
+ }
+#endif
+ else
+ {
+ mip = TMP_ALLOC_LIMBS (n);
+ mpn_binvert (mip, mp, n, tp);
+ }
+
+ tn = tbi / GMP_LIMB_BITS;
+ MPN_ZERO (tp, tn);
+ tp[tn] = CNST_LIMB (1) << (tbi % GMP_LIMB_BITS);
+
+ redcify (rp, tp, tn + 1, mp, n);
+
+#if ! HAVE_NATIVE_mpn_rsblsh1_n_ip2
+#undef mpn_rsblsh1_n_ip2
+#if HAVE_NATIVE_mpn_rsblsh1_n
+#define mpn_rsblsh1_n_ip2(a,b,n) mpn_rsblsh1_n(a,b,a,n)
+#else
+#define mpn_rsblsh1_n_ip2(a,b,n) \
+ do \
+ { \
+ ASSERT_CARRY (mpn_lshift (a, a, n, 1)); \
+ ASSERT_CARRY (mpn_sub_n (a, a, b, n)); \
+ } while (0)
+#endif
+#endif
+
+#define INNERLOOP2 \
+ do \
+ { \
+ MPN_SQR (tp, rp, n); \
+ MPN_REDUCE (rp, tp, mp, n, mip); \
+ if (getbit (ep, ebi) != 0) \
+ { \
+ if (getbit (rp, mbi) == 0) \
+ ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1)); \
+ else if (mpn_cmp (rp, mp, n) >= 0) \
+ { \
+ ASSERT_NOCARRY (mpn_sub_n (rp, rp, mp, n)); \
+ ASSERT_NOCARRY (mpn_lshift (rp, rp, n, 1)); \
+ } \
+ else \
+ mpn_rsblsh1_n_ip2 (rp, mp, n); \
+ } \
+ } while (--ebi != 0)
+
+
+
+ if (n == 1)
+ {
+ mp_limb_t r0, m0, invm;
+ r0 = *rp;
+ m0 = *mp;
+ invm = *mip;
+ do
+ {
+ mp_limb_t t0, t1, _dummy, p1;
+ /* MPN_SQR (tp, rp, n); */
+ umul_ppmm (t1, t0, r0, r0);
+ /* MPN_REDUCE (rp, tp, mp, n, mip); */
+ umul_ppmm (p1, _dummy, m0, (t0 * invm) & GMP_NUMB_MASK);
+ p1 += (t0 != 0);
+ r0 = t1 + p1;
+ if (p1 > r0)
+ r0 -= m0;
+
+ if (getbit (ep, ebi) != 0)
+ {
+ if (r0 <= (m0 >> 1))
+ r0 <<= 1;
+ else if (r0 >= m0)
+ r0 = (r0 - m0) << 1;
+ else
+ r0 = (r0 << 1) - m0;
+ }
+ } while (--ebi != 0);
+ *rp = r0;
+ }
+ else
+#if WANT_REDC_2
+ if (REDC_1_TO_REDC_2_THRESHOLD < MUL_TOOM22_THRESHOLD)
+ {
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+ if (REDC_1_TO_REDC_2_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_2 (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+
+#else /* WANT_REDC_2 */
+
+ if (REDC_1_TO_REDC_N_THRESHOLD < MUL_TOOM22_THRESHOLD)
+ {
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ {
+ if (REDC_1_TO_REDC_N_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+ else
+ {
+ if (BELOW_THRESHOLD (n, MUL_TOOM22_THRESHOLD))
+ {
+ if (MUL_TOOM22_THRESHOLD < SQR_BASECASE_THRESHOLD
+ || BELOW_THRESHOLD (n, SQR_BASECASE_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_mul_basecase (r,a,n,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr_basecase (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ }
+ else if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) MPN_REDC_1 (rp, tp, mp, n, mip[0])
+ INNERLOOP2;
+ }
+ else
+ {
+#undef MPN_SQR
+#undef MPN_REDUCE
+#define MPN_SQR(r,a,n) mpn_sqr (r,a,n)
+#define MPN_REDUCE(rp,tp,mp,n,mip) mpn_redc_n (rp, tp, mp, n, mip)
+ INNERLOOP2;
+ }
+ }
+#endif /* WANT_REDC_2 */
+
+ done:
+
+ MPN_COPY (tp, rp, n);
+ MPN_ZERO (tp + n, n);
+
+#if WANT_REDC_2
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_2_THRESHOLD))
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+ else if (BELOW_THRESHOLD (n, REDC_2_TO_REDC_N_THRESHOLD))
+ MPN_REDC_2 (rp, tp, mp, n, mip);
+#else
+ if (BELOW_THRESHOLD (n, REDC_1_TO_REDC_N_THRESHOLD))
+ MPN_REDC_1 (rp, tp, mp, n, mip[0]);
+#endif
+ else
+ mpn_redc_n (rp, tp, mp, n, mip);
+
+ if (mpn_cmp (rp, mp, n) >= 0)
+ mpn_sub_n (rp, rp, mp, n);
+
+ TMP_FREE;
+}
+
/* rp[n-1..0] = bp[bn-1..0] ^ ep[en-1..0] mod mp[n-1..0]
Requires that mp[n-1..0] is odd.
Requires that ep[en-1..0] is > 1.
@@ -194,6 +563,12 @@
ASSERT (en > 1 || (en == 1 && ep[0] > 1));
ASSERT (n >= 1 && ((mp[0] & 1) != 0));
+ if (1 && bn == 1 && bp[0] == 2)
+ {
+ mpn_2powm (rp, ep, en, mp, n, tp);
+ return;
+ }
+
TMP_MARK;
MPN_SIZEINBASE_2EXP(ebi, ep, en, 1);
diff -r 3e04a9bbba13 tune/speed.c
--- a/tune/speed.c Fri Sep 20 20:36:30 2019 +0200
+++ b/tune/speed.c Sat Sep 21 08:01:23 2019 +0200
@@ -417,7 +417,7 @@
{ "mpz_2fac_ui", speed_mpz_2fac_ui, FLAG_NODATA },
{ "mpz_mfac_uiui", speed_mpz_mfac_uiui, FLAG_NODATA | FLAG_R_OPTIONAL },
{ "mpz_primorial_ui", speed_mpz_primorial_ui, FLAG_NODATA },
- { "mpz_powm", speed_mpz_powm },
+ { "mpz_powm", speed_mpz_powm, FLAG_R_OPTIONAL },
{ "mpz_powm_mod", speed_mpz_powm_mod },
{ "mpz_powm_redc", speed_mpz_powm_redc },
{ "mpz_powm_sec", speed_mpz_powm_sec },
diff -r 3e04a9bbba13 tune/speed.h
--- a/tune/speed.h Fri Sep 20 20:36:30 2019 +0200
+++ b/tune/speed.h Sat Sep 21 08:01:23 2019 +0200
@@ -2637,7 +2637,10 @@
SPEED_RESTRICT_COND (s->size >= 1); \
\
mpz_init (r); \
- mpz_init_set_n (b, s->xp, s->size); \
+ if (s->r < 2) \
+ mpz_init_set_n (b, s->xp, s->size); \
+ else \
+ mpz_init_set_ui (b, s->r); \
mpz_init_set_n (m, s->yp, s->size); \
mpz_setbit (m, 0); /* force m to odd */ \
mpz_init_set_n (e, s->xp_block, 6); \
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel