Fixed the bugs in my speedup of the inv_div_qr_n code.

This commit is contained in:
(no author) 2010-03-07 01:37:51 +00:00
parent c0d907e00d
commit ae48fa4733
2 changed files with 15 additions and 11 deletions

View File

@ -1105,6 +1105,7 @@ __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
#define MPN_TOOM4_SQR_N_MINSIZE 32
#define MPN_TOOM7_SQR_N_MINSIZE 56
#define MPN_TOOM8_SQR_N_MINSIZE 40
#define MPN_FFT_MUL_N_MINSIZE 64
#define mpn_sqr_diagonal __MPN(sqr_diagonal)
void mpn_sqr_diagonal _PROTO ((mp_ptr, mp_srcptr, mp_size_t));

View File

@ -33,8 +33,8 @@ mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np,
mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
{
mp_limb_t cy, lo, ret = 0;
mp_size_t m;
mp_ptr tp;
mp_size_t m, i;
mp_ptr tp, tp2;
TMP_DECL;
TMP_MARK;
@ -66,16 +66,18 @@ mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np,
ret = 1;
mpn_sub_1(qp, qp, dn, 1);
}
ret -= mpn_sub_1(qp, qp, dn, 1);
if (UNLIKELY(ret == ~CNST_LIMB(0)))
ret += mpn_add_1(qp, qp, dn, 1);
/* ret is now guaranteed to be 0 */
m = dn + 1;
if (dn <= MPN_FFT_MUL_N_MINSIZE)
{
mpn_mul_n(tp, qp, dp, dn);
} else
{
//mpn_mul_n(tp, qp, dp, dn);
mp_limb_t cy, cy2;
mp_size_t k;
k = mpn_fft_best_k (m, 0);
@ -83,20 +85,21 @@ mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np,
cy = mpn_mul_fft (tp, m, qp, dn, dp, dn, k);
/* cy, {tp, m} = qp * dp mod (B^m+1) */
cy2 = mpn_add_n(tp, tp, np + 2*dn - m, 2*dn - m);
cy += mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2);
if (cy)
mpn_sub_1(tp, tp, m, 1);
cy2 = mpn_add_n(tp, tp, np + m, 2*dn - m);
mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2);
/* Make correction */
mpn_sub_1(tp, tp, m, tp[0] - dp[0]*qp[0]);
}
mpn_sub_n(np, np, tp, m);
MPN_ZERO(np + m, 2*dn - m);
while (np[dn] || mpn_cmp(np, dp, dn) >= 0)
{
ret += mpn_add_1(qp, qp, dn, 1);
np[dn] -= mpn_sub_n(np, np, dp, dn);
}
/* Not possible for ret == 2 as we have qp*dp <= np */
TMP_FREE;