t...@gmplib.org (Torbjörn Granlund) writes:

> We might want to teach tuneup to choose between small quotient division
> primitives., like we do for Jacobi.

If we think there's place for several hgcd2 variants, that's definitely
needed.

First step is to add support to speed. Does the below look reasonable? I
modelled it a bit on SPEED_ROUTINE_MODLIMB_INVERT, which also measures a
fix size, but I don't quite understand all of struct speed_params.

Regards,
/Niels

diff -r 228585220bca tune/common.c
--- a/tune/common.c     Sun Sep 01 02:13:52 2019 +0200
+++ b/tune/common.c     Tue Sep 03 22:41:01 2019 +0200
@@ -1634,6 +1634,12 @@
 }
 
 double
+speed_mpn_hgcd2 (struct speed_params *s)
+{
+  SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2);
+}
+
+double
 speed_mpn_hgcd (struct speed_params *s)
 {
   SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch);
diff -r 228585220bca tune/speed.c
--- a/tune/speed.c      Sun Sep 01 02:13:52 2019 +0200
+++ b/tune/speed.c      Tue Sep 03 22:41:01 2019 +0200
@@ -285,6 +285,7 @@
 
   { "mpn_matrix22_mul",  speed_mpn_matrix22_mul     },
 
+  { "mpn_hgcd2",         speed_mpn_hgcd2, FLAG_NODATA },
   { "mpn_hgcd",          speed_mpn_hgcd             },
   { "mpn_hgcd_lehmer",   speed_mpn_hgcd_lehmer      },
   { "mpn_hgcd_appr",     speed_mpn_hgcd_appr        },
diff -r 228585220bca tune/speed.h
--- a/tune/speed.h      Sun Sep 01 02:13:52 2019 +0200
+++ b/tune/speed.h      Tue Sep 03 22:41:01 2019 +0200
@@ -214,6 +214,7 @@
 double speed_mpn_div_qr_2u (struct speed_params *);
 double speed_mpn_fib2_ui (struct speed_params *);
 double speed_mpn_matrix22_mul (struct speed_params *);
+double speed_mpn_hgcd2 (struct speed_params *);
 double speed_mpn_hgcd (struct speed_params *);
 double speed_mpn_hgcd_lehmer (struct speed_params *);
 double speed_mpn_hgcd_appr (struct speed_params *);
@@ -2843,6 +2844,40 @@
      },                                                                        
\
      function (px[j-1], py[j-1], 0))
 
+#define SPEED_ROUTINE_MPN_HGCD2(function)                              \
+  {                                                                    \
+    unsigned   i, j;                                                   \
+    struct hgcd_matrix1 m = {{{0,0},{0,0}}};                           \
+    double     t;                                                      \
+                                                                       \
+    speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE);              \
+    speed_operand_src (s, s->yp_block, SPEED_BLOCK_SIZE);              \
+    speed_cache_fill (s);                                              \
+                                                                       \
+    speed_starttime ();                                                        
\
+    i = s->reps;                                                       \
+    mp_limb_t chain = 0;                                               \
+    do                                                                 \
+      {                                                                        
\
+       for (j = 0; j < SPEED_BLOCK_SIZE; j+= 2)                        \
+         {                                                             \
+           /* randomized but successively dependent */                 \
+           function (s->xp_block[j] | GMP_NUMB_HIGHBIT,                \
+                     s->xp_block[j+1] + chain,                         \
+                     s->yp_block[j] | GMP_NUMB_HIGHBIT,                \
+                     s->yp_block[j+1], &m);                            \
+           chain += m.u[0][0];                                         \
+         }                                                             \
+      }                                                                        
\
+    while (--i != 0);                                                  \
+    t = speed_endtime ();                                              \
+                                                                       \
+    /* make sure the compiler won't optimize away chain */             \
+    noop_1 (chain);                                                    \
+                                                                       \
+    s->time_divisor = SPEED_BLOCK_SIZE / 2;                            \
+    return t;                                                          \
+  }
 
 #define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc)                    \
   {                                                                    \

-- 
Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677.
Internet email is subject to wholesale government surveillance.
_______________________________________________
gmp-devel mailing list
gmp-devel@gmplib.org
https://gmplib.org/mailman/listinfo/gmp-devel

Reply via email to