diff --git a/gmp-impl.h b/gmp-impl.h index 3cebbff3..d692101d 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -2688,62 +2688,37 @@ mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)) ATTRIBUTE_CONST; } while (0) #endif -// This macro is only for compatibility with undocumented GMP macros , do not use -#define invert_pi1(dinv, d1, d0) \ - do { \ - mp_limb_t v, p, t1, t0, mask; \ - invert_limb (v, d1); \ - p = d1 * v; \ - p += d0; \ - if (p < d0) \ - { \ - v--; \ - mask = -(p >= d1); \ - p -= d1; \ - v += mask; \ - p -= mask & d1; \ - } \ - umul_ppmm (t1, t0, d0, v); \ - p += t1; \ - if (p < t1) \ - { \ - v--; \ - if (UNLIKELY (p >= d1)) \ - { \ - if (p > d1 || t0 >= d0) \ - v--; \ - } \ - } \ - (dinv).inv32 = v; \ +#define invert_1(dinv, d1, d0) \ + do { \ + mp_limb_t _v, _p, _t1, _t0, _mask; \ + invert_limb (_v, d1); \ + _p = (d1) * _v; \ + _p += (d0); \ + if (_p < (d0)) \ + { \ + _v--; \ + _mask = -(mp_limb_t) (_p >= (d1)); \ + _p -= (d1); \ + _v += _mask; \ + _p -= _mask & (d1); \ + } \ + umul_ppmm (_t1, _t0, d0, _v); \ + _p += _t1; \ + if (_p < _t1) \ + { \ + _v--; \ + if (UNLIKELY (_p >= (d1))) \ + { \ + if (_p > (d1) || _t0 >= (d0)) \ + _v--; \ + } \ + } \ + dinv = _v; \ } while (0) -#define invert_1(dinv, d1, d0) \ - do { \ - mp_limb_t v, p, t1, t0, mask; \ - invert_limb (v, d1); \ - p = d1 * v; \ - p += d0; \ - if (p < d0) \ - { \ - v--; \ - mask = -(p >= d1); \ - p -= d1; \ - v += mask; \ - p -= mask & d1; \ - } \ - umul_ppmm (t1, t0, d0, v); \ - p += t1; \ - if (p < t1) \ - { \ - v--; \ - if (UNLIKELY (p >= d1)) \ - { \ - if (p > d1 || t0 >= d0) \ - v--; \ - } \ - } \ - dinv = v; \ - } while (0) +/* For compatibility with GMP only */ +#define invert_pi1(dinv, d1, d0) \ + invert_1((dinv).inv32, d1, d0) #define mpir_invert_pi2(dinv, d1, d2) \ do { \ @@ -2769,6 +2744,41 @@ do { \ } \ } while (0) +/* Compute quotient the quotient and remainder for n / d. Requires d + >= B^2 / 2 and n < d B. di is the inverse + + floor ((B^3 - 1) / (d0 + d1 B)) - B. + + NOTE: Output variables are updated multiple times. Only some inputs + and outputs may overlap. +*/ +#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ + do { \ + mp_limb_t _q0, _t1, _t0; \ + umul_ppmm ((q), _q0, (n2), (dinv)); \ + add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ + \ + /* Compute the two most significant limbs of n - q'd */ \ + (r1) = (n1) - (d1) * (q); \ + sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ + umul_ppmm (_t1, _t0, (d0), (q)); \ + sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ + (q)++; \ + \ + /* Conditionally adjust q and the remainders */ \ + if ((r1) >= _q0) { \ + (q)--; \ + add_ssaaaa ((r1), (r0), (r1), (r0), (d1), (d0)); } \ + if (UNLIKELY ((r1) >= (d1))) \ + { \ + if ((r1) > (d1) || (r0) >= (d0)) \ + { \ + (q)++; \ + sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ + } \ + } \ + } while (0) + #define mpir_divapprox32_preinv2(q, a_hi, a_lo, dinv) \ do { \ mp_limb_t __q2, __q3, __q4; \ @@ -2800,41 +2810,6 @@ do { \ } \ } while (0) -/* Compute quotient the quotient and remainder for n / d. Requires d - >= B^2 / 2 and n < d B. di is the inverse - - floor ((B^3 - 1) / (d0 + d1 B)) - B. - - NOTE: Output variables are updated multiple times. Only some inputs - and outputs may overlap. -*/ -#define tdiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ - do { \ - mp_limb_t _q0, _t1, _t0; \ - umul_ppmm ((q), _q0, (n2), (dinv)); \ - add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ - \ - /* Compute the two most significant limbs of n - q'd */ \ - (r1) = (n1) - (d1) * (q); \ - sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ - umul_ppmm (_t1, _t0, (d0), (q)); \ - sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ - (q)++; \ - \ - /* Conditionally adjust q and the remainders */ \ - if ((r1) >= _q0) { \ - (q)--; \ - add_ssaaaa ((r1), (r0), (r1), (r0), (d1), (d0)); } \ - if (UNLIKELY ((r1) >= (d1))) \ - { \ - if ((r1) > (d1) || (r0) >= (d0)) \ - { \ - (q)++; \ - sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ - } \ - } \ - } while (0) - #ifndef udiv_qrnnd_preinv #define udiv_qrnnd_preinv udiv_qrnnd_preinv2 #endif @@ -2912,42 +2887,6 @@ do { \ } while (0) -// This macro is only for compatibility with undocumented GMP macros , do not use -/* Compute quotient the quotient and remainder for n / d. Requires d - >= B^2 / 2 and n < d B. di is the inverse - - floor ((B^3 - 1) / (d0 + d1 B)) - B. - - NOTE: Output variables are updated multiple times. Only some inputs - and outputs may overlap. -*/ -#define udiv_qr_3by2(q, r1, r0, n2, n1, n0, d1, d0, dinv) \ - do { \ - mp_limb_t _q0, _t1, _t0; \ - umul_ppmm ((q), _q0, (n2), (dinv)); \ - add_ssaaaa ((q), _q0, (q), _q0, (n2), (n1)); \ - \ - /* Compute the two most significant limbs of n - q'd */ \ - (r1) = (n1) - (d1) * (q); \ - sub_ddmmss ((r1), (r0), (r1), (n0), (d1), (d0)); \ - umul_ppmm (_t1, _t0, (d0), (q)); \ - sub_ddmmss ((r1), (r0), (r1), (r0), _t1, _t0); \ - (q)++; \ - \ - /* Conditionally adjust q and the remainders */ \ - if ((r1) >= _q0) { \ - (q)--; \ - add_ssaaaa ((r1), (r0), (r1), (r0), (d1), (d0)); } \ - if (UNLIKELY ((r1) >= (d1))) \ - { \ - if ((r1) > (d1) || (r0) >= (d0)) \ - { \ - (q)++; \ - sub_ddmmss ((r1), (r0), (r1), (r0), (d1), (d0)); \ - } \ - } \ - } while (0) - #ifndef mpn_preinv_divrem_1 /* if not done with cpuvec in a fat binary */ #define mpn_preinv_divrem_1 __MPN(preinv_divrem_1) __GMP_DECLSPEC mp_limb_t mpn_preinv_divrem_1 _PROTO ((mp_ptr, mp_size_t, mp_srcptr, mp_size_t, mp_limb_t, mp_limb_t, int)); diff --git a/mpn/generic/dc_div_qr.c b/mpn/generic/dc_div_qr.c index 69b652b5..5152a520 100644 --- a/mpn/generic/dc_div_qr.c +++ b/mpn/generic/dc_div_qr.c @@ -93,7 +93,7 @@ mpn_dc_div_qr (mp_ptr qp, } else { - tdiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv); + udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv); if (dn > 2) { diff --git a/mpn/generic/dc_divappr_q.c b/mpn/generic/dc_divappr_q.c index b716ec2a..006c58c1 100644 --- a/mpn/generic/dc_divappr_q.c +++ b/mpn/generic/dc_divappr_q.c @@ -92,7 +92,7 @@ mpn_dc_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, } else { - tdiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv); + udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv); if (dn > 2) { diff --git a/mpn/generic/inv_div_qr.c b/mpn/generic/inv_div_qr.c index 3c71f857..94d8eea8 100644 --- a/mpn/generic/inv_div_qr.c +++ b/mpn/generic/inv_div_qr.c @@ -95,7 +95,7 @@ mpn_inv_div_qr (mp_ptr qp, } else { - tdiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv2); + udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv2); if (dn > 2) { diff --git a/mpn/generic/inv_divappr_q.c b/mpn/generic/inv_divappr_q.c index fca9f3ec..7db403ca 100644 --- a/mpn/generic/inv_divappr_q.c +++ b/mpn/generic/inv_divappr_q.c @@ -94,7 +94,7 @@ mpn_inv_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn, else { invert_1(dinv2, d1, d0); - tdiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv2); + udiv_qr_3by2 (q, n1, n0, n2, n1, n0, d1, d0, dinv2); if (dn > 2) { diff --git a/mpn/generic/sb_div_q.c b/mpn/generic/sb_div_q.c index 8a011035..25bbd1b7 100644 --- a/mpn/generic/sb_div_q.c +++ b/mpn/generic/sb_div_q.c @@ -88,7 +88,7 @@ mpn_sb_div_q (mp_ptr qp, } else { - tdiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); + udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); cy = mpn_submul_1 (np - dn, dp, dn, q); @@ -134,7 +134,7 @@ mpn_sb_div_q (mp_ptr qp, } else { - tdiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); + udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); cy = mpn_submul_1 (np - dn, dp, dn, q); @@ -178,7 +178,7 @@ mpn_sb_div_q (mp_ptr qp, } else { - tdiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); + udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); np[0] = n0; np[1] = n1; diff --git a/mpn/generic/sb_divappr_q.c b/mpn/generic/sb_divappr_q.c index 7c95ac8e..4c58bce2 100644 --- a/mpn/generic/sb_divappr_q.c +++ b/mpn/generic/sb_divappr_q.c @@ -85,7 +85,7 @@ mpn_sb_divappr_q (mp_ptr qp, } else { - tdiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); + udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); cy = mpn_submul_1 (np - dn, dp, dn, q); @@ -131,7 +131,7 @@ mpn_sb_divappr_q (mp_ptr qp, } else { - tdiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); + udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); cy = mpn_submul_1 (np - dn, dp, dn, q); @@ -175,7 +175,7 @@ mpn_sb_divappr_q (mp_ptr qp, } else { - tdiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); + udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv); np[1] = n1; np[0] = n0;