I got something working. It runs quite well, and seems to beat the performance of mpn_gcd. Here is the code:
#include "gmp-impl.h" #include "longlong.h" #ifndef CMP_SWAP #define CMP_SWAP(ap,bp,n) \ do { \ if (ap[n - 1] < bp[n - 1]) \ { \ mp_limb_t *_tp = ap; ap = bp; bp = _tp; \ } \ } while (0) #endif // We should do CMP_SWAP something like this: // cmp // mov up, tmp // cmov vp, up // cmov tmp, vp #define SMASH(a,b) a##b void mpn_gcd_33 (mp_limb_t *, mp_limb_t *, mp_limb_t *); void mpn_gcd_44 (mp_limb_t *, mp_limb_t *, mp_limb_t *); void mpn_gcd_55 (mp_limb_t *, mp_limb_t *, mp_limb_t *); mp_limb_t mpn_sub_3 (mp_ptr, mp_srcptr, mp_srcptr); mp_limb_t mpn_rshift_3 (mp_ptr, mp_srcptr, int); static inline void mpn_gcd_NN (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp, size_t N) { mp_limb_t cy; do { // Conditionally swap U and V. This must be done without jumps. CMP_SWAP(up,vp,N); #if HAVE_NATIVE_mpn_sub_3 cy = mpn_sub_3 (up, up, vp); #else cy = mpn_sub_n (up, up, vp, N); #endif if (UNLIKELY (cy)) mpn_neg (up, up, N); if (UNLIKELY (up[0] == 0)) { // Handle any number of low U zeros including that U = 0 int all_zeros = 1; mp_size_t i; for (i = 1; i < N && all_zeros; i++) all_zeros = (up[i] == 0); if (all_zeros) { mpn_copyi (rp, vp, N); return; } mpn_copyi (up, up + (i - 1), N - (i - 1)); mpn_zero (up + N - (i - 1), i - 1); } int cnt; count_trailing_zeros (cnt, up[0]); #if HAVE_NATIVE_mpn_rshift_3 mpn_rshift_3 (up, up, cnt); #else mpn_rshift (up, up, N, cnt); #endif } while ((up[N - 1] | vp[N - 1]) != 0); switch (N) { case 3: { mp_double_limb_t g = mpn_gcd_22 (up[1], up[0], vp[1], vp[0]); rp[0] = g.d0; rp[1] = g.d1; rp[2] = 0; break; } case 4: mpn_gcd_33 (rp, up, vp); rp[3] = 0; break; case 5: mpn_gcd_44 (rp, up, vp); rp[4] = 0; break; } } void SMASH(mpn_gcd_,33) (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp) { mpn_gcd_NN (rp, up, vp, 3); } void SMASH(mpn_gcd_,44) (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp) { mpn_gcd_NN (rp, up, vp, 4); } void SMASH(mpn_gcd_,55) (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp) { mpn_gcd_NN (rp, up, vp, 5); } -- Torbjörn Please encrypt, key id 0xC8601622 _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel