t...@gmplib.org (Torbjörn Granlund) writes: > We might want to teach tuneup to choose between small quotient division > primitives., like we do for Jacobi.
If we think there's place for several hgcd2 variants, that's definitely needed. First step is to add support to speed. Does the below look reasonable? I modelled it a bit on SPEED_ROUTINE_MODLIMB_INVERT, which also measures a fix size, but I don't quite understand all of struct speed_params. Regards, /Niels diff -r 228585220bca tune/common.c --- a/tune/common.c Sun Sep 01 02:13:52 2019 +0200 +++ b/tune/common.c Tue Sep 03 22:41:01 2019 +0200 @@ -1634,6 +1634,12 @@ } double +speed_mpn_hgcd2 (struct speed_params *s) +{ + SPEED_ROUTINE_MPN_HGCD2 (mpn_hgcd2); +} + +double speed_mpn_hgcd (struct speed_params *s) { SPEED_ROUTINE_MPN_HGCD_CALL (mpn_hgcd, mpn_hgcd_itch); diff -r 228585220bca tune/speed.c --- a/tune/speed.c Sun Sep 01 02:13:52 2019 +0200 +++ b/tune/speed.c Tue Sep 03 22:41:01 2019 +0200 @@ -285,6 +285,7 @@ { "mpn_matrix22_mul", speed_mpn_matrix22_mul }, + { "mpn_hgcd2", speed_mpn_hgcd2, FLAG_NODATA }, { "mpn_hgcd", speed_mpn_hgcd }, { "mpn_hgcd_lehmer", speed_mpn_hgcd_lehmer }, { "mpn_hgcd_appr", speed_mpn_hgcd_appr }, diff -r 228585220bca tune/speed.h --- a/tune/speed.h Sun Sep 01 02:13:52 2019 +0200 +++ b/tune/speed.h Tue Sep 03 22:41:01 2019 +0200 @@ -214,6 +214,7 @@ double speed_mpn_div_qr_2u (struct speed_params *); double speed_mpn_fib2_ui (struct speed_params *); double speed_mpn_matrix22_mul (struct speed_params *); +double speed_mpn_hgcd2 (struct speed_params *); double speed_mpn_hgcd (struct speed_params *); double speed_mpn_hgcd_lehmer (struct speed_params *); double speed_mpn_hgcd_appr (struct speed_params *); @@ -2843,6 +2844,40 @@ }, \ function (px[j-1], py[j-1], 0)) +#define SPEED_ROUTINE_MPN_HGCD2(function) \ + { \ + unsigned i, j; \ + struct hgcd_matrix1 m = {{{0,0},{0,0}}}; \ + double t; \ + \ + speed_operand_src (s, s->xp_block, SPEED_BLOCK_SIZE); \ + speed_operand_src (s, s->yp_block, SPEED_BLOCK_SIZE); \ + speed_cache_fill (s); \ + \ + speed_starttime (); \ + i = s->reps; \ + mp_limb_t chain = 0; \ + do \ + { \ + for (j = 0; j < SPEED_BLOCK_SIZE; j+= 2) \ + { \ + /* randomized but successively dependent */ \ + function (s->xp_block[j] | GMP_NUMB_HIGHBIT, \ + s->xp_block[j+1] + chain, \ + s->yp_block[j] | GMP_NUMB_HIGHBIT, \ + s->yp_block[j+1], &m); \ + chain += m.u[0][0]; \ + } \ + } \ + while (--i != 0); \ + t = speed_endtime (); \ + \ + /* make sure the compiler won't optimize away chain */ \ + noop_1 (chain); \ + \ + s->time_divisor = SPEED_BLOCK_SIZE / 2; \ + return t; \ + } #define SPEED_ROUTINE_MPN_HGCD_CALL(func, itchfunc) \ { \ -- Niels Möller. PGP-encrypted email is preferred. Keyid 368C6677. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org https://gmplib.org/mailman/listinfo/gmp-devel