Fixed the bugs in my speedup of the inv_div_qr_n code.
This commit is contained in:
parent
c0d907e00d
commit
ae48fa4733
@ -1105,6 +1105,7 @@ __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands;
|
||||
#define MPN_TOOM4_SQR_N_MINSIZE 32
|
||||
#define MPN_TOOM7_SQR_N_MINSIZE 56
|
||||
#define MPN_TOOM8_SQR_N_MINSIZE 40
|
||||
#define MPN_FFT_MUL_N_MINSIZE 64
|
||||
|
||||
#define mpn_sqr_diagonal __MPN(sqr_diagonal)
|
||||
void mpn_sqr_diagonal _PROTO ((mp_ptr, mp_srcptr, mp_size_t));
|
||||
|
@ -33,8 +33,8 @@ mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np,
|
||||
mp_srcptr dp, mp_size_t dn, mp_srcptr inv)
|
||||
{
|
||||
mp_limb_t cy, lo, ret = 0;
|
||||
mp_size_t m;
|
||||
mp_ptr tp;
|
||||
mp_size_t m, i;
|
||||
mp_ptr tp, tp2;
|
||||
TMP_DECL;
|
||||
|
||||
TMP_MARK;
|
||||
@ -66,16 +66,18 @@ mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np,
|
||||
ret = 1;
|
||||
mpn_sub_1(qp, qp, dn, 1);
|
||||
}
|
||||
|
||||
|
||||
ret -= mpn_sub_1(qp, qp, dn, 1);
|
||||
if (UNLIKELY(ret == ~CNST_LIMB(0)))
|
||||
ret += mpn_add_1(qp, qp, dn, 1);
|
||||
/* ret is now guaranteed to be 0 */
|
||||
|
||||
|
||||
m = dn + 1;
|
||||
if (dn <= MPN_FFT_MUL_N_MINSIZE)
|
||||
{
|
||||
mpn_mul_n(tp, qp, dp, dn);
|
||||
} else
|
||||
{
|
||||
//mpn_mul_n(tp, qp, dp, dn);
|
||||
mp_limb_t cy, cy2;
|
||||
mp_size_t k;
|
||||
k = mpn_fft_best_k (m, 0);
|
||||
@ -83,20 +85,21 @@ mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np,
|
||||
|
||||
cy = mpn_mul_fft (tp, m, qp, dn, dp, dn, k);
|
||||
/* cy, {tp, m} = qp * dp mod (B^m+1) */
|
||||
cy2 = mpn_add_n(tp, tp, np + 2*dn - m, 2*dn - m);
|
||||
cy += mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2);
|
||||
|
||||
if (cy)
|
||||
mpn_sub_1(tp, tp, m, 1);
|
||||
cy2 = mpn_add_n(tp, tp, np + m, 2*dn - m);
|
||||
mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2);
|
||||
|
||||
/* Make correction */
|
||||
mpn_sub_1(tp, tp, m, tp[0] - dp[0]*qp[0]);
|
||||
}
|
||||
|
||||
mpn_sub_n(np, np, tp, m);
|
||||
MPN_ZERO(np + m, 2*dn - m);
|
||||
while (np[dn] || mpn_cmp(np, dp, dn) >= 0)
|
||||
{
|
||||
ret += mpn_add_1(qp, qp, dn, 1);
|
||||
np[dn] -= mpn_sub_n(np, np, dp, dn);
|
||||
}
|
||||
|
||||
|
||||
/* Not possible for ret == 2 as we have qp*dp <= np */
|
||||
|
||||
TMP_FREE;
|
||||
|
Loading…
Reference in New Issue
Block a user