Tried a new kind of basecase division.
This commit is contained in:
parent
ebe4f759e1
commit
8655d3b7e0
64
gmp-impl.h
64
gmp-impl.h
@ -2745,6 +2745,70 @@ mp_limb_t mpn_invert_limb _PROTO ((mp_limb_t)) ATTRIBUTE_CONST;
|
||||
dinv = v; \
|
||||
} while (0)
|
||||
|
||||
#define add_sssaaaaaa(sh, sm, sl, ah, am, al, bh, bm, bl) \
|
||||
__asm__ ("addq %8,%q2\n\tadcq %6,%q1\n\tadcq %4,%q0" \
|
||||
: "=r" (sh), "=r" (sm), "=&r" (sl) \
|
||||
: "0" ((mp_limb_t)(ah)), "rme" ((mp_limb_t)(bh)), \
|
||||
"1" ((mp_limb_t)(am)), "rme" ((mp_limb_t)(bm)), \
|
||||
"2" ((mp_limb_t)(al)), "rme" ((mp_limb_t)(bl))) \
|
||||
|
||||
#define sub_dddmmmsss(sh, sm, sl, ah, am, al, bh, bm, bl) \
|
||||
__asm__ ("subq %8,%q2\n\tsbbq %6,%q1\n\tsbbq %4,%q0" \
|
||||
: "=r" (sh), "=r" (sm), "=&r" (sl) \
|
||||
: "0" ((mp_limb_t)(ah)), "rme" ((mp_limb_t)(bh)), \
|
||||
"1" ((mp_limb_t)(am)), "rme" ((mp_limb_t)(bm)), \
|
||||
"2" ((mp_limb_t)(al)), "rme" ((mp_limb_t)(bl))) \
|
||||
|
||||
#define mpir_invert_pi2(dinv, d1, d2) \
|
||||
do { \
|
||||
mp_limb_t __q, __r[2], __p[2], __cy; \
|
||||
\
|
||||
if ((d2) + 1 == 0 && (d1) + 1 == 0) \
|
||||
(dinv) = 0; \
|
||||
else { \
|
||||
if ((d1) + 1 == 0) \
|
||||
(dinv) = ~(d1), __r[1] = ~(d2); \
|
||||
else \
|
||||
udiv_qrnnd((dinv), __r[1], ~(d1), ~(d2), (d1) + 1); \
|
||||
\
|
||||
if ((d2) + 1 != 0) { \
|
||||
__r[0] = 0; \
|
||||
umul_ppmm(__p[1], __p[0], (dinv), ~(d2)); \
|
||||
__cy = mpn_add_n(__r, __r, __p, 2); \
|
||||
\
|
||||
__p[0] = (d2) + 1, __p[1] = (d1) + ((d2) + 1 == 0); \
|
||||
while (__cy || mpn_cmp(__r, __p, 2) >= 0) \
|
||||
{ (dinv)++; __cy -= mpn_sub_n(__r, __r, __p, 2); } \
|
||||
} \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#define mpir_div32_preinv2(q, a1, a2, a3, d1, d2, dinv) \
|
||||
do { \
|
||||
mp_limb_t __q2, __q3, __q4, __r2, __r3, __p1, __p2; \
|
||||
umul_ppmm((q), __q2, (a1), (dinv)); \
|
||||
add_ssaaaa((q), __q2, (q), __q2, (a1), (a2)); \
|
||||
umul_ppmm(__p1, __p2, (q), (d2)); \
|
||||
sub_ddmmss(__r2, __r3, (a2) - (q)*(d1), (a3), __p1, __p2); \
|
||||
sub_ddmmss(__r2, __r3, __r2, __r3, (d1), (d2)); \
|
||||
if (__r2 < __q2) (q)++; \
|
||||
} while (0)
|
||||
|
||||
#define mpir_divrem32_preinv2(q, r2, r3, a1, a2, a3, d1, d2, dinv) \
|
||||
do { \
|
||||
mp_limb_t __q2, __q3, __q4, __p1, __p2, __cy; \
|
||||
umul_ppmm((q), __q2, (a1), (dinv)); \
|
||||
add_ssaaaa((q), __q2, (q), __q2, (a1), (a2)); \
|
||||
umul_ppmm(__p1, __p2, (q), (d2)); \
|
||||
(r3) = (a3); \
|
||||
(r2) = (a2) - (q)*(d1); \
|
||||
sub_ddmmss((r2), (r3), (r2), (r3), __p1, __p2); \
|
||||
sub_ddmmss((r2), (r3), (r2), (r3), (d1), (d2)); \
|
||||
(q)++; \
|
||||
if ((r2) >= __q2) \
|
||||
{ (q)--; add_ssaaaa((r2), (r3), (r2), (r3), (d1), (d2)); } \
|
||||
} while (0)
|
||||
|
||||
/* Compute quotient the quotient and remainder for n / d. Requires d
|
||||
>= B^2 / 2 and n < d B. di is the inverse
|
||||
|
||||
|
@ -40,8 +40,8 @@ mpn_sb_div_qr (mp_ptr qp,
|
||||
mp_limb_t qh;
|
||||
mp_size_t i;
|
||||
mp_limb_t n1, n0;
|
||||
mp_limb_t d1, d0;
|
||||
mp_limb_t cy, cy1;
|
||||
mp_limb_t d1, d0, d01, d11;
|
||||
mp_limb_t cy, cy1, cy2;
|
||||
mp_limb_t q;
|
||||
|
||||
ASSERT (dn > 2);
|
||||
@ -61,6 +61,9 @@ mpn_sb_div_qr (mp_ptr qp,
|
||||
d1 = dp[dn + 1];
|
||||
d0 = dp[dn + 0];
|
||||
|
||||
d01 = d0 + 1;
|
||||
d11 = d1 + (d01 == 0);
|
||||
|
||||
np -= 2;
|
||||
|
||||
n1 = np[1];
|
||||
@ -76,20 +79,36 @@ mpn_sb_div_qr (mp_ptr qp,
|
||||
}
|
||||
else
|
||||
{
|
||||
tdiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
|
||||
mpir_divrem32_preinv2(q, n1, n0, n1, np[1], np[0], d11, d01, dinv);
|
||||
|
||||
cy = mpn_submul_1 (np - dn, dp, dn, q);
|
||||
add_sssaaaaaa(cy, n1, n0, 0, n1, n0, 0, 0, q);
|
||||
while (UNLIKELY(cy != 0 || n1 >= d1))
|
||||
{
|
||||
if (n1 == d1 && n0 < d0 && cy == 0) break;
|
||||
sub_dddmmmsss(cy, n1, n0, cy, n1, n0, 0, d1, d0);
|
||||
(q)++;
|
||||
}
|
||||
|
||||
cy2 = mpn_submul_1 (np - dn, dp, dn, q);
|
||||
|
||||
sub_dddmmmsss(cy, n1, n0, 0, n1, n0, 0, 0, cy2);
|
||||
/*cy1 = n0 < cy2;
|
||||
n0 = (n0 - cy2);
|
||||
cy = -(n1 < cy1);
|
||||
n1 = (n1 - cy1);*/
|
||||
|
||||
/*add_sssaaaaaa(cy, n1, n0, cy, n1, n0, 0, 0, q);*/
|
||||
/*cy1 = (n0 + q < n0);
|
||||
n0 = (n0 + q);
|
||||
cy += (n1 + cy1 < n1);
|
||||
n1 = (n1 + cy1);*/
|
||||
|
||||
cy1 = n0 < cy;
|
||||
n0 = (n0 - cy) & GMP_NUMB_MASK;
|
||||
cy = n1 < cy1;
|
||||
n1 = (n1 - cy1) & GMP_NUMB_MASK;
|
||||
np[0] = n0;
|
||||
|
||||
if (UNLIKELY (cy != 0))
|
||||
{
|
||||
n1 += d1 + mpn_add_n (np - dn, np - dn, dp, dn + 1);
|
||||
q--;
|
||||
q--;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -64,7 +64,7 @@ check_sb_div_qr (void)
|
||||
|
||||
MPN_COPY(np2, np, nn);
|
||||
|
||||
invert_1(dip, dp[dn - 1], dp[dn - 2]);
|
||||
mpir_invert_pi2(dip, dp[dn - 1], dp[dn - 2]);
|
||||
|
||||
qn = nn - dn + 1;
|
||||
|
||||
@ -82,12 +82,14 @@ check_sb_div_qr (void)
|
||||
|
||||
if (rn > nn)
|
||||
{
|
||||
printf("iteration = %ld\n", i);
|
||||
printf("failed: q*d has too many limbs\n");
|
||||
abort();
|
||||
}
|
||||
|
||||
if (mpn_cmp(rp, np2, nn) > 0)
|
||||
{
|
||||
printf("iteration = %ld\n", i);
|
||||
printf("failed: remainder negative\n");
|
||||
abort();
|
||||
}
|
||||
@ -104,6 +106,7 @@ check_sb_div_qr (void)
|
||||
s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);
|
||||
if (s >= 0)
|
||||
{
|
||||
printf("iteration = %ld\n", i);
|
||||
printf ("failed:\n");
|
||||
printf ("nn = %lu, dn = %lu, qn = %lu, rn = %lu\n\n", nn, dn, qn, rn);
|
||||
gmp_printf (" np: %Nx\n\n", np2, nn);
|
||||
@ -115,6 +118,7 @@ check_sb_div_qr (void)
|
||||
|
||||
if (mpn_cmp(rp, np, rn) != 0)
|
||||
{
|
||||
printf("iteration = %ld\n", i);
|
||||
printf("failed: remainder does not match\n");
|
||||
gmp_printf (" np: %Nx\n\n", np2, nn);
|
||||
gmp_printf (" dp: %Nx\n\n", dp, dn);
|
||||
|
Loading…
Reference in New Issue
Block a user