Fix merge of new mpz_powm, mpz_powm_ui code.

This commit is contained in:
William Hart 2017-02-13 17:24:01 +00:00
parent 8f7df128db
commit 7794ae01b4
4 changed files with 37 additions and 25 deletions

14
configure vendored
View File

@ -4949,13 +4949,13 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
penryn-*-*)
path_64="x86_64/core2/penryn x86_64/core2 x86_64" ;;
nehalem-*-*)
path_64="x86_64/nehalem x86_64" ;;
path_64="x86_64/nehalem x86_64/core2 x86_64" ;;
westmere-*-*)
path_64="x86_64/nehalem/westmere x86_64/nehalem x86_64" ;;
path_64="x86_64/nehalem/westmere x86_64/nehalem x86_64/core2 x86_64" ;;
sandybridge-*-*)
path_64="x86_64/sandybridge x86_64" ;;
path_64="x86_64/sandybridge x86_64/nehalem x86_64/core2 x86_64" ;;
ivybridge-*-*)
path_64="x86_64/sandybridge/ivybridge x86_64/sandybridge x86_64" ;;
path_64="x86_64/sandybridge/ivybridge x86_64/sandybridge x86_64/nehalem x86_64/core2 x86_64" ;;
haswell-*-*)
path_64="x86_64/haswell/avx x86_64/haswell x86_64/sandybridge x86_64" ;;
skylake-*-*)
@ -23867,11 +23867,13 @@ gmp_mpn_functions="$extra_functions \
mod_1_1 mod_1_2 mod_1_3 tdiv_q mp_bases fib_table \
mulmid_basecase mulmid mulmid_n toom42_mulmid mulmod_bexpp1 mulmod_2expm1 \
mulmod_2expp1_basecase mul_fft \
mul mul_n mul_basecase sqr_basecase random random2 pow_1 \
mul mul_n mul_basecase sqr_basecase random random2 \
pow_1 powlo powm binvert \
urandomb urandomm randomb rrandom invert \
rootrem sizeinbase sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfect_square_p \
bdivmod gcd gcd_1 gcdext tdiv_qr jacobi_base jacobi jacobi_2 get_d \
mullow_n mulhigh_n mullow_n_basecase mullow_basecase redc_1 redc_2 \
mullow_n mulhigh_n mullow_n_basecase mullow_basecase \
redc_1 redc_2 redc_n \
sb_divappr_q toom3_mul toom3_mul_n toom4_mul toom4_mul_n \
dc_div_q dc_divappr_q sb_div_q sb_div_qr dc_div_qr dc_div_qr_n inv_divappr_q_n \
inv_divappr_q inv_div_q inv_div_qr inv_div_qr_n rootrem_basecase \

View File

@ -2073,6 +2073,10 @@ __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
#define INV_DIV_Q_THRESHOLD (MUL_FFT_THRESHOLD/3)
#endif
#ifndef BINV_NEWTON_THRESHOLD
#define BINV_NEWTON_THRESHOLD 300
#endif
#ifndef DC_DIVAPPR_Q_THRESHOLD
#define DC_DIVAPPR_Q_THRESHOLD (3 * MUL_TOOM3_THRESHOLD)
#endif
@ -4428,6 +4432,10 @@ extern mp_size_t dc_div_q_threshold;
#define INV_DIV_Q_THRESHOLD inv_div_q_threshold
extern mp_size_t inv_div_q_threshold;
#undef BINV_NEWTON_THRESHOLD
#define BINV_NEWTON_THRESHOLD binv_newton_threshold
extern mp_size_t binv_newton_threshold
#undef DC_DIVAPPR_Q_THRESHOLD
#define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold
extern mp_size_t dc_divappr_q_threshold;

View File

@ -58,7 +58,7 @@ see https://www.gnu.org/licenses/. */
static void
mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv,
mp_limb_t d1inv, mp_ptr tp)
mp_ptr tp)
{
mp_ptr qp;
TMP_DECL;
@ -77,13 +77,13 @@ mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv,
else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) ||
BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))
{
mpn_sb_div_qr (qp, np, nn, dp, dn, dinv, d1inv); /* JPF: no gmp_pi1_t, two limbs pi */
mpn_sb_div_qr (qp, np, nn, dp, dn, dinv); /* JPF: no gmp_pi1_t, two limbs pi */
}
/* Different conditions */
else if (BELOW_THRESHOLD (dn, INV_DIV_QR_THRESHOLD) || /* fast condition */
BELOW_THRESHOLD (nn, 2 * INV_DIV_QR_THRESHOLD)) /* fast condition */
{
mpn_dc_div_qr (qp, np, nn, dp, dn, dinv, d1inv); /* JPF: no gmp_pi1_t */
mpn_dc_div_qr (qp, np, nn, dp, dn, dinv); /* JPF: no gmp_pi1_t */
}
else
{
@ -99,7 +99,7 @@ mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv,
t is defined by (tp,mn). */
static void
reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn,
mp_limb_t dinv, mp_limb_t d1inv)
mp_limb_t dinv)
{
mp_ptr rp, scratch;
TMP_DECL;
@ -108,7 +108,7 @@ reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn,
rp = TMP_ALLOC_LIMBS (an);
scratch = TMP_ALLOC_LIMBS (an - mn + 1);
MPN_COPY (rp, ap, an);
mod (rp, an, mp, mn, dinv, d1inv, scratch);
mod (rp, an, mp, mn, dinv, scratch);
MPN_COPY (tp, rp, mn);
TMP_FREE;
@ -124,7 +124,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
int m_zero_cnt;
int c;
mp_limb_t e, m2;
mp_limb_t dinv, d1inv;
mp_limb_t dinv;
TMP_DECL;
mp = PTR(m);
@ -155,7 +155,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
}
m2 = mn == 1 ? 0 : mp[mn - 2];
mpir_invert_pi2 (dinv, d1inv, mp[mn - 1], m2); /* JPF: don't use gmp_pi1_t */
mpir_invert_pi1 (dinv, mp[mn - 1], m2); /* JPF: don't use gmp_pi1_t */
bn = ABSIZ(b);
bp = PTR(b);
@ -164,7 +164,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
/* Reduce possibly huge base. Use a function call to reduce, since we
don't want the quotient allocation to live until function return. */
mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
reduce (new_bp, bp, bn, mp, mn, dinv, d1inv); /* JPF */
reduce (new_bp, bp, bn, mp, mn, dinv); /* JPF */
bp = new_bp;
bn = mn;
/* Canonicalize the base, since we are potentially going to multiply with
@ -214,7 +214,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
}
else
{
mod (tp, tn, mp, mn, dinv, d1inv, scratch); /* JPF */
mod (tp, tn, mp, mn, dinv, scratch); /* JPF */
MPN_COPY (xp, tp, mn);
xn = mn;
}
@ -230,7 +230,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
}
else
{
mod (tp, tn, mp, mn, dinv, d1inv, scratch); /* JPF */
mod (tp, tn, mp, mn, dinv, scratch); /* JPF */
MPN_COPY (xp, tp, mn);
xn = mn;
}
@ -255,7 +255,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
}
else
{
mod (tp, xn, mp, mn, dinv, d1inv, scratch); /* JPF */
mod (tp, xn, mp, mn, dinv, scratch); /* JPF */
MPN_COPY (xp, tp, mn);
xn = mn;
}

View File

@ -174,14 +174,15 @@ am__objects_1 = divrem_2.$(OBJEXT) gcd.$(OBJEXT) gcdext.$(OBJEXT) \
get_str.$(OBJEXT) set_str.$(OBJEXT) matrix22_mul.$(OBJEXT) \
hgcd.$(OBJEXT) hgcd_appr.$(OBJEXT) hgcd_reduce.$(OBJEXT) \
mul_n.$(OBJEXT) mullow_n.$(OBJEXT) mulhigh_n.$(OBJEXT) \
mul.$(OBJEXT) tdiv_qr.$(OBJEXT) toom4_mul_n.$(OBJEXT) \
toom4_mul.$(OBJEXT) toom3_mul.$(OBJEXT) toom3_mul_n.$(OBJEXT) \
toom8h_mul.$(OBJEXT) toom8_sqr_n.$(OBJEXT) \
mulmod_2expm1.$(OBJEXT) mulmod_2expp1_basecase.$(OBJEXT) \
rootrem.$(OBJEXT) divrem_euclidean_r_1.$(OBJEXT) \
divrem_hensel_qr_1.$(OBJEXT) rsh_divrem_hensel_qr_1.$(OBJEXT) \
sb_divappr_q.$(OBJEXT) sb_div_qr.$(OBJEXT) \
dc_divappr_q.$(OBJEXT) dc_div_qr.$(OBJEXT) \
mul.$(OBJEXT) tdiv_qr.$(OBJEXT) mulmid.$(OBJEXT) \
mulmid_n.$(OBJEXT) toom42_mulmid.$(OBJEXT) \
toom4_mul_n.$(OBJEXT) toom4_mul.$(OBJEXT) toom3_mul.$(OBJEXT) \
toom3_mul_n.$(OBJEXT) toom8h_mul.$(OBJEXT) \
toom8_sqr_n.$(OBJEXT) mulmod_2expm1.$(OBJEXT) \
mulmod_2expp1_basecase.$(OBJEXT) rootrem.$(OBJEXT) \
divrem_euclidean_r_1.$(OBJEXT) divrem_hensel_qr_1.$(OBJEXT) \
rsh_divrem_hensel_qr_1.$(OBJEXT) sb_divappr_q.$(OBJEXT) \
sb_div_qr.$(OBJEXT) dc_divappr_q.$(OBJEXT) dc_div_qr.$(OBJEXT) \
dc_div_qr_n.$(OBJEXT) inv_divappr_q.$(OBJEXT) \
inv_div_qr.$(OBJEXT) tdiv_q.$(OBJEXT) dc_bdiv_qr.$(OBJEXT) \
dc_bdiv_qr_n.$(OBJEXT) dc_bdiv_q.$(OBJEXT)
@ -490,6 +491,7 @@ TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) $(TUNE_FFT_SRCS_BASIC) divrem_1.c mod_1.c
TUNE_MPN_SRCS_BASIC = divrem_2.c gcd.c gcdext.c get_str.c set_str.c \
matrix22_mul.c hgcd.c hgcd_appr.c hgcd_reduce.c \
mul_n.c mullow_n.c mulhigh_n.c mul.c tdiv_qr.c \
mulmid.c mulmid_n.c toom42_mulmid.c \
toom4_mul_n.c toom4_mul.c toom3_mul.c toom3_mul_n.c \
toom8h_mul.c toom8_sqr_n.c mulmod_2expm1.c mulmod_2expp1_basecase.c \
rootrem.c divrem_euclidean_r_1.c \