Fix merge of new mpz_powm, mpz_powm_ui code.
This commit is contained in:
parent
8f7df128db
commit
7794ae01b4
14
configure
vendored
14
configure
vendored
@ -4949,13 +4949,13 @@ echo "include_mpn(\`sparc32/sparc-defs.m4')" >> $gmp_tmpconfigm4i
|
||||
penryn-*-*)
|
||||
path_64="x86_64/core2/penryn x86_64/core2 x86_64" ;;
|
||||
nehalem-*-*)
|
||||
path_64="x86_64/nehalem x86_64" ;;
|
||||
path_64="x86_64/nehalem x86_64/core2 x86_64" ;;
|
||||
westmere-*-*)
|
||||
path_64="x86_64/nehalem/westmere x86_64/nehalem x86_64" ;;
|
||||
path_64="x86_64/nehalem/westmere x86_64/nehalem x86_64/core2 x86_64" ;;
|
||||
sandybridge-*-*)
|
||||
path_64="x86_64/sandybridge x86_64" ;;
|
||||
path_64="x86_64/sandybridge x86_64/nehalem x86_64/core2 x86_64" ;;
|
||||
ivybridge-*-*)
|
||||
path_64="x86_64/sandybridge/ivybridge x86_64/sandybridge x86_64" ;;
|
||||
path_64="x86_64/sandybridge/ivybridge x86_64/sandybridge x86_64/nehalem x86_64/core2 x86_64" ;;
|
||||
haswell-*-*)
|
||||
path_64="x86_64/haswell/avx x86_64/haswell x86_64/sandybridge x86_64" ;;
|
||||
skylake-*-*)
|
||||
@ -23867,11 +23867,13 @@ gmp_mpn_functions="$extra_functions \
|
||||
mod_1_1 mod_1_2 mod_1_3 tdiv_q mp_bases fib_table \
|
||||
mulmid_basecase mulmid mulmid_n toom42_mulmid mulmod_bexpp1 mulmod_2expm1 \
|
||||
mulmod_2expp1_basecase mul_fft \
|
||||
mul mul_n mul_basecase sqr_basecase random random2 pow_1 \
|
||||
mul mul_n mul_basecase sqr_basecase random random2 \
|
||||
pow_1 powlo powm binvert \
|
||||
urandomb urandomm randomb rrandom invert \
|
||||
rootrem sizeinbase sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfect_square_p \
|
||||
bdivmod gcd gcd_1 gcdext tdiv_qr jacobi_base jacobi jacobi_2 get_d \
|
||||
mullow_n mulhigh_n mullow_n_basecase mullow_basecase redc_1 redc_2 \
|
||||
mullow_n mulhigh_n mullow_n_basecase mullow_basecase \
|
||||
redc_1 redc_2 redc_n \
|
||||
sb_divappr_q toom3_mul toom3_mul_n toom4_mul toom4_mul_n \
|
||||
dc_div_q dc_divappr_q sb_div_q sb_div_qr dc_div_qr dc_div_qr_n inv_divappr_q_n \
|
||||
inv_divappr_q inv_div_q inv_div_qr inv_div_qr_n rootrem_basecase \
|
||||
|
@ -2073,6 +2073,10 @@ __GMP_DECLSPEC mp_limb_t gmp_primesieve (mp_ptr, mp_limb_t);
|
||||
#define INV_DIV_Q_THRESHOLD (MUL_FFT_THRESHOLD/3)
|
||||
#endif
|
||||
|
||||
#ifndef BINV_NEWTON_THRESHOLD
|
||||
#define BINV_NEWTON_THRESHOLD 300
|
||||
#endif
|
||||
|
||||
#ifndef DC_DIVAPPR_Q_THRESHOLD
|
||||
#define DC_DIVAPPR_Q_THRESHOLD (3 * MUL_TOOM3_THRESHOLD)
|
||||
#endif
|
||||
@ -4428,6 +4432,10 @@ extern mp_size_t dc_div_q_threshold;
|
||||
#define INV_DIV_Q_THRESHOLD inv_div_q_threshold
|
||||
extern mp_size_t inv_div_q_threshold;
|
||||
|
||||
#undef BINV_NEWTON_THRESHOLD
|
||||
#define BINV_NEWTON_THRESHOLD binv_newton_threshold
|
||||
extern mp_size_t binv_newton_threshold
|
||||
|
||||
#undef DC_DIVAPPR_Q_THRESHOLD
|
||||
#define DC_DIVAPPR_Q_THRESHOLD dc_divappr_q_threshold
|
||||
extern mp_size_t dc_divappr_q_threshold;
|
||||
|
@ -58,7 +58,7 @@ see https://www.gnu.org/licenses/. */
|
||||
|
||||
static void
|
||||
mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv,
|
||||
mp_limb_t d1inv, mp_ptr tp)
|
||||
mp_ptr tp)
|
||||
{
|
||||
mp_ptr qp;
|
||||
TMP_DECL;
|
||||
@ -77,13 +77,13 @@ mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv,
|
||||
else if (BELOW_THRESHOLD (dn, DC_DIV_QR_THRESHOLD) ||
|
||||
BELOW_THRESHOLD (nn - dn, DC_DIV_QR_THRESHOLD))
|
||||
{
|
||||
mpn_sb_div_qr (qp, np, nn, dp, dn, dinv, d1inv); /* JPF: no gmp_pi1_t, two limbs pi */
|
||||
mpn_sb_div_qr (qp, np, nn, dp, dn, dinv); /* JPF: no gmp_pi1_t, two limbs pi */
|
||||
}
|
||||
/* Different conditions */
|
||||
else if (BELOW_THRESHOLD (dn, INV_DIV_QR_THRESHOLD) || /* fast condition */
|
||||
BELOW_THRESHOLD (nn, 2 * INV_DIV_QR_THRESHOLD)) /* fast condition */
|
||||
{
|
||||
mpn_dc_div_qr (qp, np, nn, dp, dn, dinv, d1inv); /* JPF: no gmp_pi1_t */
|
||||
mpn_dc_div_qr (qp, np, nn, dp, dn, dinv); /* JPF: no gmp_pi1_t */
|
||||
}
|
||||
else
|
||||
{
|
||||
@ -99,7 +99,7 @@ mod (mp_ptr np, mp_size_t nn, mp_srcptr dp, mp_size_t dn, mp_limb_t dinv,
|
||||
t is defined by (tp,mn). */
|
||||
static void
|
||||
reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn,
|
||||
mp_limb_t dinv, mp_limb_t d1inv)
|
||||
mp_limb_t dinv)
|
||||
{
|
||||
mp_ptr rp, scratch;
|
||||
TMP_DECL;
|
||||
@ -108,7 +108,7 @@ reduce (mp_ptr tp, mp_srcptr ap, mp_size_t an, mp_srcptr mp, mp_size_t mn,
|
||||
rp = TMP_ALLOC_LIMBS (an);
|
||||
scratch = TMP_ALLOC_LIMBS (an - mn + 1);
|
||||
MPN_COPY (rp, ap, an);
|
||||
mod (rp, an, mp, mn, dinv, d1inv, scratch);
|
||||
mod (rp, an, mp, mn, dinv, scratch);
|
||||
MPN_COPY (tp, rp, mn);
|
||||
|
||||
TMP_FREE;
|
||||
@ -124,7 +124,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
|
||||
int m_zero_cnt;
|
||||
int c;
|
||||
mp_limb_t e, m2;
|
||||
mp_limb_t dinv, d1inv;
|
||||
mp_limb_t dinv;
|
||||
TMP_DECL;
|
||||
|
||||
mp = PTR(m);
|
||||
@ -155,7 +155,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
|
||||
}
|
||||
|
||||
m2 = mn == 1 ? 0 : mp[mn - 2];
|
||||
mpir_invert_pi2 (dinv, d1inv, mp[mn - 1], m2); /* JPF: don't use gmp_pi1_t */
|
||||
mpir_invert_pi1 (dinv, mp[mn - 1], m2); /* JPF: don't use gmp_pi1_t */
|
||||
|
||||
bn = ABSIZ(b);
|
||||
bp = PTR(b);
|
||||
@ -164,7 +164,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
|
||||
/* Reduce possibly huge base. Use a function call to reduce, since we
|
||||
don't want the quotient allocation to live until function return. */
|
||||
mp_ptr new_bp = TMP_ALLOC_LIMBS (mn);
|
||||
reduce (new_bp, bp, bn, mp, mn, dinv, d1inv); /* JPF */
|
||||
reduce (new_bp, bp, bn, mp, mn, dinv); /* JPF */
|
||||
bp = new_bp;
|
||||
bn = mn;
|
||||
/* Canonicalize the base, since we are potentially going to multiply with
|
||||
@ -214,7 +214,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
|
||||
}
|
||||
else
|
||||
{
|
||||
mod (tp, tn, mp, mn, dinv, d1inv, scratch); /* JPF */
|
||||
mod (tp, tn, mp, mn, dinv, scratch); /* JPF */
|
||||
MPN_COPY (xp, tp, mn);
|
||||
xn = mn;
|
||||
}
|
||||
@ -230,7 +230,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
|
||||
}
|
||||
else
|
||||
{
|
||||
mod (tp, tn, mp, mn, dinv, d1inv, scratch); /* JPF */
|
||||
mod (tp, tn, mp, mn, dinv, scratch); /* JPF */
|
||||
MPN_COPY (xp, tp, mn);
|
||||
xn = mn;
|
||||
}
|
||||
@ -255,7 +255,7 @@ mpz_powm_ui (mpz_ptr r, mpz_srcptr b, mpir_ui el, mpz_srcptr m)
|
||||
}
|
||||
else
|
||||
{
|
||||
mod (tp, xn, mp, mn, dinv, d1inv, scratch); /* JPF */
|
||||
mod (tp, xn, mp, mn, dinv, scratch); /* JPF */
|
||||
MPN_COPY (xp, tp, mn);
|
||||
xn = mn;
|
||||
}
|
||||
|
@ -174,14 +174,15 @@ am__objects_1 = divrem_2.$(OBJEXT) gcd.$(OBJEXT) gcdext.$(OBJEXT) \
|
||||
get_str.$(OBJEXT) set_str.$(OBJEXT) matrix22_mul.$(OBJEXT) \
|
||||
hgcd.$(OBJEXT) hgcd_appr.$(OBJEXT) hgcd_reduce.$(OBJEXT) \
|
||||
mul_n.$(OBJEXT) mullow_n.$(OBJEXT) mulhigh_n.$(OBJEXT) \
|
||||
mul.$(OBJEXT) tdiv_qr.$(OBJEXT) toom4_mul_n.$(OBJEXT) \
|
||||
toom4_mul.$(OBJEXT) toom3_mul.$(OBJEXT) toom3_mul_n.$(OBJEXT) \
|
||||
toom8h_mul.$(OBJEXT) toom8_sqr_n.$(OBJEXT) \
|
||||
mulmod_2expm1.$(OBJEXT) mulmod_2expp1_basecase.$(OBJEXT) \
|
||||
rootrem.$(OBJEXT) divrem_euclidean_r_1.$(OBJEXT) \
|
||||
divrem_hensel_qr_1.$(OBJEXT) rsh_divrem_hensel_qr_1.$(OBJEXT) \
|
||||
sb_divappr_q.$(OBJEXT) sb_div_qr.$(OBJEXT) \
|
||||
dc_divappr_q.$(OBJEXT) dc_div_qr.$(OBJEXT) \
|
||||
mul.$(OBJEXT) tdiv_qr.$(OBJEXT) mulmid.$(OBJEXT) \
|
||||
mulmid_n.$(OBJEXT) toom42_mulmid.$(OBJEXT) \
|
||||
toom4_mul_n.$(OBJEXT) toom4_mul.$(OBJEXT) toom3_mul.$(OBJEXT) \
|
||||
toom3_mul_n.$(OBJEXT) toom8h_mul.$(OBJEXT) \
|
||||
toom8_sqr_n.$(OBJEXT) mulmod_2expm1.$(OBJEXT) \
|
||||
mulmod_2expp1_basecase.$(OBJEXT) rootrem.$(OBJEXT) \
|
||||
divrem_euclidean_r_1.$(OBJEXT) divrem_hensel_qr_1.$(OBJEXT) \
|
||||
rsh_divrem_hensel_qr_1.$(OBJEXT) sb_divappr_q.$(OBJEXT) \
|
||||
sb_div_qr.$(OBJEXT) dc_divappr_q.$(OBJEXT) dc_div_qr.$(OBJEXT) \
|
||||
dc_div_qr_n.$(OBJEXT) inv_divappr_q.$(OBJEXT) \
|
||||
inv_div_qr.$(OBJEXT) tdiv_q.$(OBJEXT) dc_bdiv_qr.$(OBJEXT) \
|
||||
dc_bdiv_qr_n.$(OBJEXT) dc_bdiv_q.$(OBJEXT)
|
||||
@ -490,6 +491,7 @@ TUNE_MPN_SRCS = $(TUNE_MPN_SRCS_BASIC) $(TUNE_FFT_SRCS_BASIC) divrem_1.c mod_1.c
|
||||
TUNE_MPN_SRCS_BASIC = divrem_2.c gcd.c gcdext.c get_str.c set_str.c \
|
||||
matrix22_mul.c hgcd.c hgcd_appr.c hgcd_reduce.c \
|
||||
mul_n.c mullow_n.c mulhigh_n.c mul.c tdiv_qr.c \
|
||||
mulmid.c mulmid_n.c toom42_mulmid.c \
|
||||
toom4_mul_n.c toom4_mul.c toom3_mul.c toom3_mul_n.c \
|
||||
toom8h_mul.c toom8_sqr_n.c mulmod_2expm1.c mulmod_2expp1_basecase.c \
|
||||
rootrem.c divrem_euclidean_r_1.c \
|
||||
|
Loading…
Reference in New Issue
Block a user