Fast sb_divappr_q code.

This commit is contained in:
William Hart 2013-04-12 17:49:29 +01:00
parent 019ddbfb99
commit 0b033b07f2
2 changed files with 87 additions and 5 deletions

View File

@ -32,6 +32,20 @@ along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "gmp-impl.h"
#include "longlong.h"
#define SB_DIVAPPR_Q_SMALL_THRESHOLD 30
void __divapprox_helper(mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t qn)
{
mpn_sub_n(np + 1, np + 1, dp, qn + 1);
np[1] += dp[qn];
for (qn--; qn >= 0; qn--)
{
qp[qn] = ~(mp_limb_t) 0;
add_ssaaaa(np[1], np[0], np[1], np[0], 0, dp[qn]);
}
}
mp_limb_t
mpn_sb_divappr_q (mp_ptr qp,
mp_ptr np, mp_size_t nn,
@ -41,7 +55,7 @@ mpn_sb_divappr_q (mp_ptr qp,
mp_limb_t qh;
mp_size_t qn, i;
mp_limb_t n1, n0;
mp_limb_t d1, d0;
mp_limb_t d1, d0, d11, d01;
mp_limb_t cy, cy1;
mp_limb_t q;
mp_limb_t flag;
@ -63,6 +77,70 @@ mpn_sb_divappr_q (mp_ptr qp,
if (qh != 0)
mpn_sub_n (np - dn, np - dn, dp, dn);
if (dn <= SB_DIVAPPR_Q_SMALL_THRESHOLD)
{
/* Reduce until n - 2 >= qn */
for (qn--, np--; qn > dn - 2; qn--)
{
/* fetch next word */
cy = np[0];
np--;
mpir_divapprox32_preinv2(q, cy, np[0], dinv);
/* a -= d*q1 */
cy -= mpn_submul_1(np - dn + 1, dp, dn, q);
/* correct if remainder is too large */
if (UNLIKELY(cy || np[0] >= dp[dn - 1]))
{
if (cy || mpn_cmp(np - dn + 1, dp, dn) >= 0)
{
q++;
cy -= mpn_sub_n(np - dn + 1, np - dn + 1, dp, dn);
}
}
qp[qn] = q;
}
qn++;
dp = dp + dn - qn - 1; /* make d length qn + 1 */
for ( ; qn > 0; qn--)
{
/* fetch next word */
cy = np[0];
np--;
/* rare case where truncation ruins normalisation */
if (cy > dp[qn] || (cy == dp[qn] && mpn_cmp(np - qn + 1, dp, qn) >= 0))
{
__divapprox_helper(qp, np - qn, dp, qn);
return qh;
}
mpir_divapprox32_preinv2(q, cy, np[0], dinv);
/* a -= d*q1 */
cy -= mpn_submul_1(np - qn, dp, qn + 1, q);
/* correct if remainder is too large */
if (UNLIKELY(cy || np[0] >= dp[qn]))
{
if (cy || mpn_cmp(np - qn, dp, qn + 1) >= 0)
{
q++;
cy -= mpn_sub_n(np - qn, np - qn, dp, qn + 1);
}
}
qp[qn - 1] = q;
dp++;
}
}
else
{
qp += qn;
dn -= 2; /* offset dn by 2 for main division loops,
@ -74,6 +152,9 @@ mpn_sb_divappr_q (mp_ptr qp,
n1 = np[1];
d01 = d0 + 1;
d11 = d1 + (d01 < d0);
for (i = qn - (dn + 2); i >= 0; i--)
{
np--;
@ -85,7 +166,7 @@ mpn_sb_divappr_q (mp_ptr qp,
}
else
{
udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);
cy = mpn_submul_1 (np - dn, dp, dn, q);
@ -131,7 +212,7 @@ mpn_sb_divappr_q (mp_ptr qp,
}
else
{
udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);
cy = mpn_submul_1 (np - dn, dp, dn, q);
@ -175,7 +256,7 @@ mpn_sb_divappr_q (mp_ptr qp,
}
else
{
udiv_qr_3by2 (q, n1, n0, n1, np[1], np[0], d1, d0, dinv);
mpir_divrem32_preinv2 (q, n1, n0, n1, np[1], np[0], d11, d01, d1, d0, dinv);
np[1] = n1;
np[0] = n0;
@ -185,6 +266,7 @@ mpn_sb_divappr_q (mp_ptr qp,
}
ASSERT_ALWAYS (np[1] == n1);
}
return qh;
}

View File

@ -64,7 +64,7 @@ check_sb_divappr_q (void)
MPN_COPY(np2, np, nn);
mpir_invert_pi1(dip, dp[dn - 1], dp[dn - 2]);
mpir_invert_pi2(dip, dp[dn - 1], dp[dn - 2]);
qn = nn - dn + 1;