Torbjörn reminded me of the work on more clever div_qr_1 we discussed a few years ago, which seemed promising but never got completed.
At the time, I think one reason it stalled was the somewhat unwieldy divrem_1 primitive. The plan (see https://gmplib.org/devel/) is to replace divrem_1 by a couple of simpler functions with only one loop each. Then it's easier to work on and improve one loop at a time. To get going, I've written C implementations of mpn_div_qr_1n_pi1 and mpn_divf_qr1n_pi1, and made divrem_1 call them. Patch below. We've discussed earlier on the interface design; how to store/return the high quotient limb? In general, my opinion is that the mpn division functions should return and not store the high limb. But the low-level primitives can have any interface convenient to them. For mpn_div_qr_1, I think it makes sense to store the high quotient limb together with the rest of the quotient limbs, and return the remainder. But to make the mpn_div_qr_1n_pi1 fit with the use in divrem_1, settled on a different interface, /* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}. Requires that uh < d. */ mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh, mp_limb_t d, mp_limb_t dinv) So it takes the high limb as a separate argument (a bit like the carry in for addmul_1c), and this limb must be already reduced mod d. And it's ok to pass uh == 0, which might be convenient sometimes. Not sure what the mpn_div_qr_1u interface should be (i.e., unnormalized divisor). Comments appreciated. It passes make check for me. For the patch to have any effect, one has to move away any divrem_1.asm so it isn't picked up by configure. Also, for functions using precomputations, I think we should move away from passing d and dinv as arguments, but instead pass a "cps" array like the mod_1_1 functions do, and have an mpn_foo_cps function to fill out the array. Rational: The improved div_qr_1_pi1 I'm thinking of will need some additional precomputed loop-invariants, and the array (with some maximum size nailed by the ABI) provides more freedom for the implementation. Except that I think the cps array should include *all* needed information about the divisor, so d it self need not be passed as an explicit argument. Rational: Some functions need the normalization count, the normalized divisor, and the reciprocal, but not the original unnormalized d. And in that case, it's a waste to pass d as an argument. And except that I still have no idea what the name "cps" stands for... Regards, /Niels diff -Nrc2 gmp.4fa4b9b52bd5/configure.ac gmp/configure.ac *** gmp.4fa4b9b52bd5/configure.ac 2013-10-16 21:15:19.000000000 +0200 --- gmp/configure.ac 2013-10-16 21:15:19.000000000 +0200 *************** *** 2821,2824 **** --- 2821,2825 ---- toom_interpolate_8pts toom_interpolate_12pts toom_interpolate_16pts \ invertappr invert binvert mulmod_bnm1 sqrmod_bnm1 \ + div_qr_1n_pi1 divf_qr_1n_pi1 \ div_qr_2 div_qr_2n_pi1 div_qr_2u_pi1 \ sbpi1_div_q sbpi1_div_qr sbpi1_divappr_q \ diff -Nrc2 gmp.4fa4b9b52bd5/gmp-impl.h gmp/gmp-impl.h *** gmp.4fa4b9b52bd5/gmp-impl.h 2013-10-16 21:15:19.000000000 +0200 --- gmp/gmp-impl.h 2013-10-16 21:15:19.000000000 +0200 *************** *** 1417,1420 **** --- 1417,1426 ---- __GMP_DECLSPEC mp_size_t mpn_fft_next_size (mp_size_t, int) ATTRIBUTE_CONST; + #define mpn_div_qr_1n_pi1 __MPN(div_qr_1n_pi1) + __GMP_DECLSPEC mp_limb_t mpn_div_qr_1n_pi1 (mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); + + #define mpn_divf_qr_1n_pi1 __MPN(divf_qr_1n_pi1) + __GMP_DECLSPEC mp_limb_t mpn_divf_qr_1n_pi1 (mp_ptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); + #define mpn_div_qr_2n_pi1 __MPN(div_qr_2n_pi1) __GMP_DECLSPEC mp_limb_t mpn_div_qr_2n_pi1 (mp_ptr, mp_ptr, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, mp_limb_t); diff -Nrc2 gmp.4fa4b9b52bd5/mpn/generic/divf_qr_1n_pi1.c gmp/mpn/generic/divf_qr_1n_pi1.c *** gmp.4fa4b9b52bd5/mpn/generic/divf_qr_1n_pi1.c 1970-01-01 01:00:00.000000000 +0100 --- gmp/mpn/generic/divf_qr_1n_pi1.c 2013-10-16 21:15:19.000000000 +0200 *************** *** 0 **** --- 1,53 ---- + /* mpn_divf_qr_1n_pi1 + + Contributed to the GNU project by Niels Möller + + THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS + ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS + ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP + RELEASE. + + + Copyright 2013 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + + #include "gmp.h" + #include "gmp-impl.h" + #include "longlong.h" + + #if GMP_NAIL_BITS > 0 + #error Nail bits not supported + #endif + + mp_limb_t + mpn_divf_qr_1n_pi1 (mp_ptr qp, mp_size_t n, mp_limb_t u, + mp_limb_t d, mp_limb_t dinv) + { + ASSERT (n > 0); + ASSERT (d & GMP_NUMB_HIGHBIT); + ASSERT (u < d); + + do + { + mp_limb_t q; + udiv_qrnnd_preinv (q, u, u, 0, d, dinv); + qp[--n] = q; + } + while (n > 0); + + return u; + } diff -Nrc2 gmp.4fa4b9b52bd5/mpn/generic/div_qr_1n_pi1.c gmp/mpn/generic/div_qr_1n_pi1.c *** gmp.4fa4b9b52bd5/mpn/generic/div_qr_1n_pi1.c 1970-01-01 01:00:00.000000000 +0100 --- gmp/mpn/generic/div_qr_1n_pi1.c 2013-10-16 21:15:19.000000000 +0200 *************** *** 0 **** --- 1,58 ---- + /* mpn_div_qr_1n_pi1 + + Contributed to the GNU project by Niels Möller + + THIS FILE CONTAINS INTERNAL FUNCTIONS WITH MUTABLE INTERFACES. IT IS + ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS + ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP + RELEASE. + + + Copyright 2013 Free Software Foundation, Inc. + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 3 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ + + #include "gmp.h" + #include "gmp-impl.h" + #include "longlong.h" + + #if GMP_NAIL_BITS > 0 + #error Nail bits not supported + #endif + + /* Divides (uh B^n + {up, n}) by d, storing the quotient at {qp, n}. + Requires that uh < d. */ + mp_limb_t + mpn_div_qr_1n_pi1 (mp_ptr qp, mp_srcptr up, mp_size_t n, mp_limb_t uh, + mp_limb_t d, mp_limb_t dinv) + { + ASSERT (n > 0); + ASSERT (uh < d); + ASSERT (d & GMP_NUMB_HIGHBIT); + ASSERT (MPN_SAME_OR_SEPARATE_P (qp, up, n)); + + do + { + mp_limb_t q, ul; + + ul = up[--n]; + udiv_qrnnd_preinv (q, uh, uh, ul, d, dinv); + qp[n] = q; + } + while (n > 0); + + return uh; + } diff -Nrc2 gmp.4fa4b9b52bd5/mpn/generic/divrem_1.c gmp/mpn/generic/divrem_1.c *** gmp.4fa4b9b52bd5/mpn/generic/divrem_1.c 2013-10-16 21:15:19.000000000 +0200 --- gmp/mpn/generic/divrem_1.c 2013-10-16 21:15:19.000000000 +0200 *************** *** 138,154 **** invert_limb (dinv, d); ! for (i = un - 1; i >= 0; i--) ! { ! n0 = up[i] << GMP_NAIL_BITS; ! udiv_qrnnd_preinv (*qp, r, r, n0, d, dinv); ! r >>= GMP_NAIL_BITS; ! qp--; ! } ! for (i = qxn - 1; i >= 0; i--) ! { ! udiv_qrnnd_preinv (*qp, r, r, CNST_LIMB(0), d, dinv); ! r >>= GMP_NAIL_BITS; ! qp--; ! } return r; } --- 138,149 ---- invert_limb (dinv, d); ! /* Undo the offsetting */ ! qp -= (n-1); ! ! if (un > 0) ! r = mpn_div_qr_1n_pi1 (qp + qxn, up, un, r, d, dinv); ! ! if (qxn > 0) ! r = mpn_divf_qr_1n_pi1 (qp, qxn, r, d, dinv); return r; } -- Niels Möller. PGP-encrypted email is preferred. Keyid C0B98E26. Internet email is subject to wholesale government surveillance. _______________________________________________ gmp-devel mailing list gmp-devel@gmplib.org http://gmplib.org/mailman/listinfo/gmp-devel