diff --git a/gmp-impl.h b/gmp-impl.h index 71fc77fa..91098aaa 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -1105,6 +1105,7 @@ __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands; #define MPN_TOOM4_SQR_N_MINSIZE 32 #define MPN_TOOM7_SQR_N_MINSIZE 56 #define MPN_TOOM8_SQR_N_MINSIZE 40 +#define MPN_FFT_MUL_N_MINSIZE 64 #define mpn_sqr_diagonal __MPN(sqr_diagonal) void mpn_sqr_diagonal _PROTO ((mp_ptr, mp_srcptr, mp_size_t)); diff --git a/mpn/generic/inv_div_qr_n.c b/mpn/generic/inv_div_qr_n.c index a156b51a..518450cb 100644 --- a/mpn/generic/inv_div_qr_n.c +++ b/mpn/generic/inv_div_qr_n.c @@ -33,8 +33,8 @@ mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np, mp_srcptr dp, mp_size_t dn, mp_srcptr inv) { mp_limb_t cy, lo, ret = 0; - mp_size_t m; - mp_ptr tp; + mp_size_t m, i; + mp_ptr tp, tp2; TMP_DECL; TMP_MARK; @@ -66,16 +66,18 @@ mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np, ret = 1; mpn_sub_1(qp, qp, dn, 1); } - ret -= mpn_sub_1(qp, qp, dn, 1); if (UNLIKELY(ret == ~CNST_LIMB(0))) ret += mpn_add_1(qp, qp, dn, 1); /* ret is now guaranteed to be 0 */ - + m = dn + 1; + if (dn <= MPN_FFT_MUL_N_MINSIZE) + { + mpn_mul_n(tp, qp, dp, dn); + } else { - //mpn_mul_n(tp, qp, dp, dn); mp_limb_t cy, cy2; mp_size_t k; k = mpn_fft_best_k (m, 0); @@ -83,20 +85,21 @@ mpn_inv_div_qr_n(mp_ptr qp, mp_ptr np, cy = mpn_mul_fft (tp, m, qp, dn, dp, dn, k); /* cy, {tp, m} = qp * dp mod (B^m+1) */ - cy2 = mpn_add_n(tp, tp, np + 2*dn - m, 2*dn - m); - cy += mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2); - - if (cy) - mpn_sub_1(tp, tp, m, 1); + cy2 = mpn_add_n(tp, tp, np + m, 2*dn - m); + mpn_add_1(tp + 2*dn - m, tp + 2*dn - m, 2*m - 2*dn, cy2); + + /* Make correction */ + mpn_sub_1(tp, tp, m, tp[0] - dp[0]*qp[0]); } mpn_sub_n(np, np, tp, m); + MPN_ZERO(np + m, 2*dn - m); while (np[dn] || mpn_cmp(np, dp, dn) >= 0) { ret += mpn_add_1(qp, qp, dn, 1); np[dn] -= mpn_sub_n(np, np, dp, dn); } - + /* Not possible for ret == 2 as we have qp*dp <= np */ TMP_FREE;