I got something working.  It runs quite well, and seems to beat the
performance of mpn_gcd.  Here is the code:

#include "gmp-impl.h"
#include "longlong.h"

#ifndef CMP_SWAP
#define CMP_SWAP(ap,bp,n)                                               \
  do {                                                                  \
    if (ap[n - 1] < bp[n - 1])                                          \
      {                                                                 \
        mp_limb_t *_tp = ap; ap = bp; bp = _tp;                         \
      }                                                                 \
  } while (0)
#endif
// We should do CMP_SWAP something like this:
// cmp
// mov up, tmp
// cmov vp, up
// cmov tmp, vp

#define SMASH(a,b) a##b

void mpn_gcd_33 (mp_limb_t *, mp_limb_t *, mp_limb_t *);
void mpn_gcd_44 (mp_limb_t *, mp_limb_t *, mp_limb_t *);
void mpn_gcd_55 (mp_limb_t *, mp_limb_t *, mp_limb_t *);

mp_limb_t mpn_sub_3 (mp_ptr, mp_srcptr, mp_srcptr);
mp_limb_t mpn_rshift_3 (mp_ptr, mp_srcptr, int);

static inline void
mpn_gcd_NN (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp, size_t N)
{
  mp_limb_t cy;

  do
    {
      // Conditionally swap U and V.  This must be done without jumps.
      CMP_SWAP(up,vp,N);

#if HAVE_NATIVE_mpn_sub_3
      cy = mpn_sub_3 (up, up, vp);
#else
      cy = mpn_sub_n (up, up, vp, N);
#endif
      if (UNLIKELY (cy))
        mpn_neg (up, up, N);

      if (UNLIKELY (up[0] == 0))
        {
          // Handle any number of low U zeros including that U = 0
          int all_zeros = 1;
          mp_size_t i;
          for (i = 1; i < N && all_zeros; i++)
            all_zeros = (up[i] == 0);

          if (all_zeros)
            {
              mpn_copyi (rp, vp, N);
              return;
            }
          mpn_copyi (up, up + (i - 1), N - (i - 1));
          mpn_zero (up + N - (i - 1), i - 1);
        }
      int cnt;
      count_trailing_zeros (cnt, up[0]);
#if HAVE_NATIVE_mpn_rshift_3
      mpn_rshift_3 (up, up, cnt);
#else
      mpn_rshift (up, up, N, cnt);
#endif
    }
  while ((up[N - 1] | vp[N - 1]) != 0);

  switch (N)
    {
    case 3:
      {
        mp_double_limb_t g = mpn_gcd_22 (up[1], up[0], vp[1], vp[0]);
        rp[0] = g.d0;
        rp[1] = g.d1;
        rp[2] = 0;
        break;
      }
    case 4:
      mpn_gcd_33 (rp, up, vp);
      rp[3] = 0;
      break;
    case 5:
      mpn_gcd_44 (rp, up, vp);
      rp[4] = 0;
      break;
    }
}

void
SMASH(mpn_gcd_,33) (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp)
{
  mpn_gcd_NN (rp, up, vp, 3);
}

void
SMASH(mpn_gcd_,44) (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp)
{
  mpn_gcd_NN (rp, up, vp, 4);
}

void
SMASH(mpn_gcd_,55) (mp_limb_t *rp, mp_limb_t *up, mp_limb_t *vp)
{
  mpn_gcd_NN (rp, up, vp, 5);
}

-- 
Torbjörn
Please encrypt, key id 0xC8601622
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to