ni...@lysator.liu.se (Niels Möller) writes: > It turns out the explicit division is easily eliminated, v_div_g = s0 + > s1 at the end of the binary loop (when u == v). And in the euclid loop, > u == 0 implies v_div_g = s0.
Updated version below, without input_v and corresponding divisions. Also one of the binary variants use binv. While debugging, I found that the accumulated shift may well exceed GMP_LIMB_BITS (shouldn't be so surprising), but I think it must be < 2 * GMP_LIMB_BITS. And there are a few corner cases. I guess it's possible to do all cofactor shifts as part of the loop, by shifting only one bit per iteration, and making the subtraction conditional on both nubmers being odd. That would be like a single-limb variant of mpn_sec_invert (but with a stop condition to exit early). Unclear if that can be a winner, with less progress per iteration in the main loop. And it may produce a cofactor that's unnecessarily large in the g > 1 case. BTW, the binv/redc part uses an umulhi operation. Maybe we should add that to longlong.h? Regards, /Niels #include <assert.h> #include <stdio.h> #include <stdlib.h> #include "gmp.h" #include "gmp-impl.h" #include "longlong.h" /* v must be odd. Returns d0 = g = gcd(u,v). If u = 0 (mod v), returns d1 = 0. Otherwise, returns d1 = (u/g)^-1 mod (v/g) */ static mp_double_limb_t modinvert_euclid(mp_limb_t u, mp_limb_t v) { mp_limb_t s0, s1; s0 = 1; s1 = 0; assert (v & 1); u %= v; /* Maintain U = t1 u + t0 v V = s1 u + s0 v = s1 (u - q*v) + s1 q v + s0 v = s1(u - qv) + (s0 + q s1) v where U, V are the inputs and the matrix has determinant 1. Inverting gives u = s0 U - t0 V v = -s1 U + t1 V */ for (;;) { mp_limb_t q; if (u == 0) { mp_double_limb_t r; r.d0 = v; /* s0 == V / v */ if (s1 > 0) s1 = s0 - s1; r.d1 = s1; return r; } q = v / u; v -= q*u; s1 += q*s0; if (v == 0) { mp_double_limb_t r; r.d0 = u; r.d1 = s0; return r; } q = u / v; u -= q*v; s0 += q*s1; } } static mp_double_limb_t modinvert_binary (mp_limb_t u, mp_limb_t v) { mp_limb_t s0, s1, v_div_g; mp_double_limb_t r; int shift; assert (v & 1); if (u == 0) { mp_double_limb_t r; r.d0 = v; r.d1 = 0; return r; } count_trailing_zeros (shift, u); u >>= shift; s1 = 0; s0 = 1; /* Maintain U = t1 u + t0 v V = s1 u + s0 v = [2^k s1 (u-v)/ 2^k + (s0 + s1) v] where U, V are the inputs and the matrix has determinant 2^{shift}. */ while (u != v) { int count; if (u > v) { u -= v; count_trailing_zeros (count, u); u >>= count; s0 += s1; s1 <<= count; shift += count; } else { v -= u; count_trailing_zeros (count, v); v >>= count; s1 += s0; s0 <<= count; shift += count; } } /* 2^{shift} g = s0 U */ v_div_g = s0 + s1; if (v_div_g == 1) /* u == 0 (mod v) */ s0 = 0; else { for (; shift > 0; shift--) { if (s0 & 1) s0 = s0 / 2 + (v_div_g / 2) + 1; else s0 /= 2; } } r.d0 = v; r.d1 = s0; return r; } static mp_double_limb_t modinvert_binary_mask (mp_limb_t u, mp_limb_t v) { mp_limb_t s0, s1, v_div_g, sign; mp_double_limb_t r; int shift; assert (v & 1); if (u == 0) { mp_double_limb_t r; r.d0 = v; r.d1 = 0; return r; } count_trailing_zeros (shift, u); u >>= shift; s1 = 0; s0 = 1; /* Maintain U = t1 u + t0 v V = s1 u + s0 v = [2^k s1 (u-v)/ 2^k + (s0 + s1) v] where U, V are the inputs and the matrix has determinant 2^{shift}. */ u >>= 1; v >>= 1; sign = 0; while (u != v) { int count; mp_limb_t d = u - v; mp_limb_t vgtu = LIMB_HIGHBIT_TO_MASK (d); mp_limb_t sx; /* When v < u (vgtu == 0), the updates are: (u; v) <-- ( (u - v) >> count; v) (det = +(1<<count) for corr. M factor) (s1, s0) <-- (s1 << count, s0 + s1) and when v > 0, the updates are (u; v) <-- ( (v - u) >> count; u) (det = -(1<<count)) (s1, s0) <-- (s0 << count, s0 + s1) */ /* v <-- min (u, v) */ v += (vgtu & d); /* u <-- |u - v| */ u = (d ^ vgtu) - vgtu; count_trailing_zeros (count, d); sign ^= vgtu; sx = vgtu & (s0 - s1); s0 += s1; s1 += sx; count++; u >>= count; s1 <<= count; shift += count; } v = (v<<1) | 1; /* 2^{shift} g = s0 U */ v_div_g = s0 + s1; if (v_div_g == 1) /* u == 0 (mod v) */ s0 = 0; else { mp_limb_t vinv, q, p1, p0; assert (s0 > 0); assert (shift > 0); if (shift >= GMP_LIMB_BITS) { binvert_limb (vinv, v_div_g); q = -s0 * vinv; umul_ppmm (p1, p0, q, v_div_g); s0 = p1 + 1; shift -= GMP_LIMB_BITS; if (shift == 0) goto done; } else { int vinv_bits; vinv = binvert_limb_table[(v_div_g/2) & 0x7f]; for (vinv_bits = 8; vinv_bits < shift; vinv_bits *= 2) vinv = 2*vinv - vinv * vinv * v_div_g; } q = (-s0 * vinv) << (GMP_LIMB_BITS - shift); umul_ppmm (p1, p0, q, v_div_g); assert ((s0 << (GMP_LIMB_BITS - shift)) + p0 == 0); s0 = (s0 >> shift) + p1 + (q > 0); done: s0 = (sign & (v_div_g + 1)) + (sign ^ s0); } r.d0 = v; r.d1 = s0; return r; } static int modinvert_validate (mp_limb_t u, mp_limb_t v, mp_limb_t g, mp_limb_t s) { mp_limb_t p1, p0, q, r; u %= v; if (u == 0) return g == v && s == 0; if (u % g || v %g) return 0; u /= g; v /= g; if (s >= v) return 0; umul_ppmm (p1, p0, s, u); udiv_qrnnd (q, r, p1, p0, v); return r == 1; } #define COUNT 1000000 int main (int argc, char **argv) { gmp_randstate_t rands; int i; gmp_randinit_default (rands); for (i = 0; i < COUNT; i++) { unsigned u_bits = 1 + gmp_urandomm_ui (rands, GMP_NUMB_BITS); unsigned v_bits = 1 + gmp_urandomm_ui (rands, GMP_NUMB_BITS); mp_limb_t u = gmp_urandomb_ui(rands, u_bits); mp_limb_t v = gmp_urandomb_ui(rands, v_bits) | 1; mp_double_limb_t ref, r; ref = modinvert_euclid (u, v); if (!modinvert_validate (u, v, ref.d0, ref.d1)) { gmp_printf ("modinvert_euclid failed: u = %Mx v = %Mx\n" " g = %Mx, s = %Mx\n", u, v, ref.d0, ref.d1); exit(EXIT_FAILURE); } r = modinvert_binary (u,v); if (r.d0 != ref.d0 || r.d1 != ref.d1) { gmp_printf ("modinvert_binary failed: u = %Mx v = %Mx\n" " g = %Mx, s = %Mx\n", u, v, r.d0, r.d1); exit(EXIT_FAILURE); } r = modinvert_binary_mask (u,v); if (r.d0 != ref.d0 || r.d1 != ref.d1) { gmp_printf ("modinvert_binary_mask failed: u = %Mx v = %Mx\n" " g = %Mx, s = %Mx\n", u, v, r.d0, r.d1); exit(EXIT_FAILURE); } } return EXIT_SUCCESS; } -- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel