Tried a new kind of basecase division.

This commit is contained in:
William Hart 2013-04-09 03:20:58 +01:00
parent ebe4f759e1
commit 8655d3b7e0
3 changed files with 97 additions and 10 deletions

View File

@ -2745,6 +2745,70 @@ mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
dinv = v; \
} while (0)
#define add_sssaaaaaa(sh, sm, sl, ah, am, al, bh, bm, bl) \
__asm__ ("addq %8,%q2\n\tadcq %6,%q1\n\tadcq %4,%q0" \
: "=r" (sh), "=r" (sm), "=&r" (sl) \
: "0" ((mp_limb_t)(ah)), "rme" ((mp_limb_t)(bh)), \
"1" ((mp_limb_t)(am)), "rme" ((mp_limb_t)(bm)), \
"2" ((mp_limb_t)(al)), "rme" ((mp_limb_t)(bl))) \
#define sub_dddmmmsss(sh, sm, sl, ah, am, al, bh, bm, bl) \
__asm__ ("subq %8,%q2\n\tsbbq %6,%q1\n\tsbbq %4,%q0" \
: "=r" (sh), "=r" (sm), "=&r" (sl) \
: "0" ((mp_limb_t)(ah)), "rme" ((mp_limb_t)(bh)), \
"1" ((mp_limb_t)(am)), "rme" ((mp_limb_t)(bm)), \
"2" ((mp_limb_t)(al)), "rme" ((mp_limb_t)(bl))) \
#define mpir_invert_pi2(dinv, d1, d2) \
do { \
mp_limb_t __q, __r[2], __p[2], __cy; \
\
if ((d2) + 1 == 0 && (d1) + 1 == 0) \
(dinv) = 0; \
else { \
if ((d1) + 1 == 0) \
(dinv) = ~(d1), __r[1] = ~(d2); \
else \
udiv_qrnnd((dinv), __r[1], ~(d1), ~(d2), (d1) + 1); \
\
if ((d2) + 1 != 0) { \
__r[0] = 0; \
umul_ppmm(__p[1], __p[0], (dinv), ~(d2)); \
__cy = mpn_add_n(__r, __r, __p, 2); \
\
__p[0] = (d2) + 1, __p[1] = (d1) + ((d2) + 1 == 0); \
while (__cy || mpn_cmp(__r, __p, 2) >= 0) \
{ (dinv)++; __cy -= mpn_sub_n(__r, __r, __p, 2); } \
} \
} \
} while (0)
#define mpir_div32_preinv2(q, a1, a2, a3, d1, d2, dinv) \
do { \
mp_limb_t __q2, __q3, __q4, __r2, __r3, __p1, __p2; \
umul_ppmm((q), __q2, (a1), (dinv)); \
add_ssaaaa((q), __q2, (q), __q2, (a1), (a2)); \
umul_ppmm(__p1, __p2, (q), (d2)); \
sub_ddmmss(__r2, __r3, (a2) - (q)*(d1), (a3), __p1, __p2); \
sub_ddmmss(__r2, __r3, __r2, __r3, (d1), (d2)); \
if (__r2 < __q2) (q)++; \
} while (0)
#define mpir_divrem32_preinv2(q, r2, r3, a1, a2, a3, d1, d2, dinv) \
do { \
mp_limb_t __q2, __q3, __q4, __p1, __p2, __cy; \
umul_ppmm((q), __q2, (a1), (dinv)); \
add_ssaaaa((q), __q2, (q), __q2, (a1), (a2)); \
umul_ppmm(__p1, __p2, (q), (d2)); \
(r3) = (a3); \
(r2) = (a2) - (q)*(d1); \
sub_ddmmss((r2), (r3), (r2), (r3), __p1, __p2); \
sub_ddmmss((r2), (r3), (r2), (r3), (d1), (d2)); \
(q)++; \
if ((r2) >= __q2) \
{ (q)--; add_ssaaaa((r2), (r3), (r2), (r3), (d1), (d2)); } \
} while (0)
/* Compute quotient the quotient and remainder for n / d. Requires d
>= B^2 / 2 and n < d B. di is the inverse

View File

@ -40,8 +40,8 @@ mpn_sb_div_qr (mp_ptr qp,
mp_limb_t qh;
mp_size_t i;
mp_limb_t n1, n0;
mp_limb_t d1, d0;
mp_limb_t cy, cy1;
mp_limb_t d1, d0, d01, d11;
mp_limb_t cy, cy1, cy2;
mp_limb_t q;
ASSERT (dn > 2);
@ -61,6 +61,9 @@ mpn_sb_div_qr (mp_ptr qp,
d1 = dp[dn + 1];
d0 = dp[dn + 0];
d01 = d0 + 1;
d11 = d1 + (d01 == 0);
np -= 2;
n1 = np[1];
@ -76,20 +79,36 @@ mpn_sb_div_qr (mp_ptr qp,
}
else
{
tdiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
mpir_divrem32_preinv2(q, n1, n0, n1, np[1], np[0], d11, d01, dinv);
cy = mpn_submul_1 (np - dn, dp, dn, q);
add_sssaaaaaa(cy, n1, n0, 0, n1, n0, 0, 0, q);
while (UNLIKELY(cy != 0 || n1 >= d1))
{
if (n1 == d1 && n0 < d0 && cy == 0) break;
sub_dddmmmsss(cy, n1, n0, cy, n1, n0, 0, d1, d0);
(q)++;
}
cy2 = mpn_submul_1 (np - dn, dp, dn, q);
sub_dddmmmsss(cy, n1, n0, 0, n1, n0, 0, 0, cy2);
/*cy1 = n0 < cy2;
n0 = (n0 - cy2);
cy = -(n1 < cy1);
n1 = (n1 - cy1);*/
/*add_sssaaaaaa(cy, n1, n0, cy, n1, n0, 0, 0, q);*/
/*cy1 = (n0 + q < n0);
n0 = (n0 + q);
cy += (n1 + cy1 < n1);
n1 = (n1 + cy1);*/
cy1 = n0 < cy;
n0 = (n0 - cy) & GMP_NUMB_MASK;
cy = n1 < cy1;
n1 = (n1 - cy1) & GMP_NUMB_MASK;
np[0] = n0;
if (UNLIKELY (cy != 0))
{
n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
q--;
q--;
}
}

View File

@ -64,7 +64,7 @@ check_sb_div_qr (void)
MPN_COPY(np2, np, nn);
invert_1(dip, dp[dn - 1], dp[dn - 2]);
mpir_invert_pi2(dip, dp[dn - 1], dp[dn - 2]);
qn = nn - dn + 1;
@ -82,12 +82,14 @@ check_sb_div_qr (void)
if (rn > nn)
{
printf("iteration = %ld\n", i);
printf("failed: q*d has too many limbs\n");
abort();
}
if (mpn_cmp(rp, np2, nn) > 0)
{
printf("iteration = %ld\n", i);
printf("failed: remainder negative\n");
abort();
}
@ -104,6 +106,7 @@ check_sb_div_qr (void)
s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);
if (s >= 0)
{
printf("iteration = %ld\n", i);
printf ("failed:\n");
printf ("nn = %lu, dn = %lu, qn = %lu, rn = %lu\n\n", nn, dn, qn, rn);
gmp_printf (" np: %Nx\n\n", np2, nn);
@ -115,6 +118,7 @@ check_sb_div_qr (void)
if (mpn_cmp(rp, np, rn) != 0)
{
printf("iteration = %ld\n", i);
printf("failed: remainder does not match\n");
gmp_printf (" np: %Nx\n\n", np2, nn);
gmp_printf (" dp: %Nx\n\n", dp, dn);