Fixed fft stuff in tuneup.
This commit is contained in:
parent
ea8c9d864a
commit
6f542aa81d
75
gmp-impl.h
75
gmp-impl.h
@ -1828,31 +1828,7 @@ __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
|
||||
#define SQR_TOOM8_THRESHOLD_LIMIT SQR_TOOM8_THRESHOLD
|
||||
#endif
|
||||
|
||||
/* First k to use for an FFT modF multiply. A modF FFT is an order
|
||||
log(2^k)/log(2^(k-1)) algorithm, so k=3 is merely 1.5 like karatsuba,
|
||||
whereas k=4 is 1.33 which is faster than toom3 at 1.485. */
|
||||
#define FFT_FIRST_K 4
|
||||
|
||||
/* Threshold at which FFT should be used to do a modF NxN -> N multiply. */
|
||||
#ifndef MUL_FFT_MODF_THRESHOLD
|
||||
#define MUL_FFT_MODF_THRESHOLD (MUL_TOOM3_THRESHOLD * 3)
|
||||
#endif
|
||||
#ifndef SQR_FFT_MODF_THRESHOLD
|
||||
#define SQR_FFT_MODF_THRESHOLD (SQR_TOOM3_THRESHOLD * 3)
|
||||
#endif
|
||||
|
||||
/* Threshold at which FFT should be used to do an NxN -> 2N multiply. This
|
||||
will be a size where FFT is using k=7 or k=8, since an FFT-k used for an
|
||||
NxN->2N multiply and not recursing into itself is an order
|
||||
log(2^k)/log(2^(k-2)) algorithm, so it'll be at least k=7 at 1.39 which
|
||||
is the first better than toom3. */
|
||||
#ifndef MUL_FFT_THRESHOLD
|
||||
#define MUL_FFT_THRESHOLD (MUL_FFT_MODF_THRESHOLD * 10)
|
||||
#endif
|
||||
#ifndef SQR_FFT_THRESHOLD
|
||||
#define SQR_FFT_THRESHOLD (SQR_FFT_MODF_THRESHOLD * 10)
|
||||
#endif
|
||||
|
||||
/* points at which fft is used for mul/sqr and mulmod_Bexp resp. */
|
||||
#ifndef MUL_FFT_FULL_THRESHOLD
|
||||
#define MUL_FFT_FULL_THRESHOLD (MUL_TOOM8H_THRESHOLD * 10)
|
||||
#endif
|
||||
@ -1860,36 +1836,13 @@ __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[];
|
||||
#define SQR_FFT_FULL_THRESHOLD (SQR_TOOM8_THRESHOLD * 10)
|
||||
#endif
|
||||
|
||||
/* Table of thresholds for successive modF FFT "k"s. The first entry is
|
||||
where FFT_FIRST_K+1 should be used, the second FFT_FIRST_K+2,
|
||||
etc. See mpn_fft_best_k(). */
|
||||
#ifndef MUL_FFT_TABLE
|
||||
#define MUL_FFT_TABLE \
|
||||
{ MUL_TOOM3_THRESHOLD * 4, /* k=5 */ \
|
||||
MUL_TOOM3_THRESHOLD * 8, /* k=6 */ \
|
||||
MUL_TOOM3_THRESHOLD * 16, /* k=7 */ \
|
||||
MUL_TOOM3_THRESHOLD * 32, /* k=8 */ \
|
||||
MUL_TOOM3_THRESHOLD * 96, /* k=9 */ \
|
||||
MUL_TOOM3_THRESHOLD * 288, /* k=10 */ \
|
||||
0 }
|
||||
#ifndef MUL_FFT_THRESHOLD
|
||||
#define MUL_FFT_THRESHOLD (MUL_FFT_FULL_THRESHOLD / 2)
|
||||
#endif
|
||||
#ifndef SQR_FFT_TABLE
|
||||
#define SQR_FFT_TABLE \
|
||||
{ SQR_TOOM3_THRESHOLD * 4, /* k=5 */ \
|
||||
SQR_TOOM3_THRESHOLD * 8, /* k=6 */ \
|
||||
SQR_TOOM3_THRESHOLD * 16, /* k=7 */ \
|
||||
SQR_TOOM3_THRESHOLD * 32, /* k=8 */ \
|
||||
SQR_TOOM3_THRESHOLD * 96, /* k=9 */ \
|
||||
SQR_TOOM3_THRESHOLD * 288, /* k=10 */ \
|
||||
0 }
|
||||
#ifndef SQR_FFT_THRESHOLD
|
||||
#define SQR_FFT_THRESHOLD (SQR_FFT_FULL_THRESHOLD / 2)
|
||||
#endif
|
||||
|
||||
#ifndef FFT_TABLE_ATTRS
|
||||
#define FFT_TABLE_ATTRS static const
|
||||
#endif
|
||||
|
||||
#define MPN_FFT_TABLE_SIZE 16
|
||||
|
||||
#ifndef DC_DIV_QR_THRESHOLD
|
||||
#define DC_DIV_QR_THRESHOLD (3 * MUL_KARATSUBA_THRESHOLD)
|
||||
#endif
|
||||
@ -4067,13 +4020,6 @@ extern mp_size_t mul_fft_threshold;
|
||||
#define MUL_FFT_FULL_THRESHOLD mul_fft_full_threshold
|
||||
extern mp_size_t mul_fft_full_threshold;
|
||||
|
||||
#undef MUL_FFT_MODF_THRESHOLD
|
||||
#define MUL_FFT_MODF_THRESHOLD mul_fft_modf_threshold
|
||||
extern mp_size_t mul_fft_modf_threshold;
|
||||
|
||||
#undef MUL_FFT_TABLE
|
||||
#define MUL_FFT_TABLE { 0 }
|
||||
|
||||
/* A native mpn_sqr_basecase is not tuned and SQR_BASECASE_THRESHOLD should
|
||||
remain as zero (always use it). */
|
||||
#if ! HAVE_NATIVE_mpn_sqr_basecase
|
||||
@ -4111,13 +4057,6 @@ extern mp_size_t sqr_fft_threshold;
|
||||
#define SQR_FFT_FULL_THRESHOLD sqr_fft_full_threshold
|
||||
extern mp_size_t sqr_fft_full_threshold;
|
||||
|
||||
#undef SQR_FFT_MODF_THRESHOLD
|
||||
#define SQR_FFT_MODF_THRESHOLD sqr_fft_modf_threshold
|
||||
extern mp_size_t sqr_fft_modf_threshold;
|
||||
|
||||
#undef SQR_FFT_TABLE
|
||||
#define SQR_FFT_TABLE { 0 }
|
||||
|
||||
#undef MULLOW_BASECASE_THRESHOLD
|
||||
#define MULLOW_BASECASE_THRESHOLD mullow_basecase_threshold
|
||||
extern mp_size_t mullow_basecase_threshold;
|
||||
@ -4279,10 +4218,6 @@ extern mp_size_t set_str_dc_threshold;
|
||||
#define SET_STR_PRECOMPUTE_THRESHOLD set_str_precompute_threshold
|
||||
extern mp_size_t set_str_precompute_threshold;
|
||||
|
||||
#undef FFT_TABLE_ATTRS
|
||||
#define FFT_TABLE_ATTRS
|
||||
extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE];
|
||||
|
||||
/* Sizes the tune program tests up to, used in a couple of recompilations. */
|
||||
#undef MUL_KARATSUBA_THRESHOLD_LIMIT
|
||||
#undef MUL_TOOM3_THRESHOLD_LIMIT
|
||||
|
@ -1150,16 +1150,16 @@ speed_mpn_toom8_sqr_n (struct speed_params *s)
|
||||
}
|
||||
|
||||
double
|
||||
speed_mpn_mul_fft_full (struct speed_params *s)
|
||||
speed_mpn_mul_fft_main (struct speed_params *s)
|
||||
{
|
||||
SPEED_ROUTINE_MPN_MUL_N_CALL
|
||||
(mpn_mul_fft_full (wp, s->xp, s->size, s->yp, s->size));
|
||||
(mpn_mul_fft_main (wp, s->xp, s->size, s->yp, s->size));
|
||||
}
|
||||
double
|
||||
speed_mpn_mul_fft_full_sqr (struct speed_params *s)
|
||||
speed_mpn_sqr_fft_main (struct speed_params *s)
|
||||
{
|
||||
SPEED_ROUTINE_MPN_SQR_CALL
|
||||
(mpn_mul_fft_full (wp, s->xp, s->size, s->xp, s->size));
|
||||
(mpn_mul_fft_main (wp, s->xp, s->size, s->xp, s->size));
|
||||
}
|
||||
|
||||
|
||||
@ -1171,20 +1171,15 @@ speed_mpn_mul_fft_full_sqr (struct speed_params *s)
|
||||
{ \
|
||||
mp_ptr wp; \
|
||||
mp_size_t pl; \
|
||||
int k; \
|
||||
unsigned i; \
|
||||
double t; \
|
||||
TMP_DECL; \
|
||||
\
|
||||
SPEED_RESTRICT_COND (s->size >= 1); \
|
||||
\
|
||||
if (s->r != 0) \
|
||||
k = s->r; \
|
||||
else \
|
||||
k = mpn_fft_best_k (s->size, sqr); \
|
||||
\
|
||||
TMP_MARK; \
|
||||
pl = mpn_fft_next_size (s->size, k); \
|
||||
pl = fft_adjust_limbs (s->size); \
|
||||
SPEED_TMP_ALLOC_LIMBS (wp, pl+1, s->align_wp); \
|
||||
\
|
||||
speed_operand_src (s, s->xp, s->size); \
|
||||
@ -1204,20 +1199,6 @@ speed_mpn_mul_fft_full_sqr (struct speed_params *s)
|
||||
return t; \
|
||||
}
|
||||
|
||||
double
|
||||
speed_mpn_mul_fft (struct speed_params *s)
|
||||
{
|
||||
SPEED_ROUTINE_MPN_MUL_FFT_CALL
|
||||
(mpn_mul_fft (wp, pl, s->xp, s->size, s->yp, s->size, k), 0);
|
||||
}
|
||||
|
||||
double
|
||||
speed_mpn_mul_fft_sqr (struct speed_params *s)
|
||||
{
|
||||
SPEED_ROUTINE_MPN_MUL_FFT_CALL
|
||||
(mpn_mul_fft (wp, pl, s->xp, s->size, s->xp, s->size, k), 1);
|
||||
}
|
||||
|
||||
double
|
||||
speed_mpn_mullow_n (struct speed_params *s)
|
||||
{
|
||||
|
@ -326,11 +326,9 @@ const struct routine_t {
|
||||
{ "mpn_toom8h_mul", speed_mpn_toom8h_mul },
|
||||
{ "mpn_toom3_sqr_n", speed_mpn_toom3_sqr_n },
|
||||
{ "mpn_toom4_sqr_n", speed_mpn_toom4_sqr_n },
|
||||
{ "mpn_mul_fft_full", speed_mpn_mul_fft_full },
|
||||
{ "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr },
|
||||
|
||||
{ "mpn_mul_fft", speed_mpn_mul_fft, FLAG_R_OPTIONAL },
|
||||
{ "mpn_mul_fft_sqr", speed_mpn_mul_fft_sqr, FLAG_R_OPTIONAL },
|
||||
|
||||
{ "mpn_mul_fft_main", speed_mpn_mul_fft_main, FLAG_R_OPTIONAL },
|
||||
{ "mpn_sqr_fft_main", speed_mpn_sqr_fft_main, FLAG_R_OPTIONAL },
|
||||
|
||||
{ "mpn_mullow_n", speed_mpn_mullow_n },
|
||||
{ "mpn_mullow_n_basecase", speed_mpn_mullow_n_basecase},
|
||||
|
@ -251,10 +251,8 @@ double speed_mpn_mul_1 _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_1_inplace _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_2 _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_basecase _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_fft _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_fft_sqr _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_fft_full _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_fft_full_sqr _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_fft_main _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_sqr_fft_main _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_n _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mul_n_sqr _PROTO ((struct speed_params *s));
|
||||
double speed_mpn_mullow_n _PROTO ((struct speed_params *s));
|
||||
|
160
tune/tuneup.c
160
tune/tuneup.c
@ -67,12 +67,6 @@ MA 02110-1301, USA. */
|
||||
limits are recompiled though, to make them accept a bigger range of sizes
|
||||
than normal, eg. mpn_sqr_basecase to compare against mpn_kara_sqr_n.
|
||||
|
||||
Limitations:
|
||||
|
||||
The FFTs aren't subject to the same badness rule as the other thresholds,
|
||||
so each k is probably being brought on a touch early. This isn't likely
|
||||
to make a difference, and the simpler probing means fewer tests.
|
||||
|
||||
Code:
|
||||
- main : checks for various command line options and calls all()
|
||||
- all : prints the tuneup message, date and compiler, then calls
|
||||
@ -182,18 +176,14 @@ mp_size_t mul_karatsuba_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t mul_toom3_threshold = MUL_TOOM3_THRESHOLD_LIMIT;
|
||||
mp_size_t mul_toom4_threshold = MUL_TOOM4_THRESHOLD_LIMIT;
|
||||
mp_size_t mul_toom8h_threshold = MUL_TOOM8H_THRESHOLD_LIMIT;
|
||||
mp_size_t mul_fft_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t mul_fft_full_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t mul_fft_modf_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t sqr_basecase_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t sqr_karatsuba_threshold
|
||||
= (TUNE_SQR_KARATSUBA_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_KARATSUBA_MAX);
|
||||
mp_size_t sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT;
|
||||
mp_size_t sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT;
|
||||
mp_size_t sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT;
|
||||
mp_size_t sqr_fft_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t sqr_fft_full_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t mulmod_2expm1_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t mullow_basecase_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t mullow_dc_threshold = MP_SIZE_T_MAX;
|
||||
@ -234,9 +224,6 @@ mp_size_t divrem_hensel_qr_1_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t rsh_divrem_hensel_qr_1_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t divrem_euclid_hensel_threshold = MP_SIZE_T_MAX;
|
||||
|
||||
mp_size_t fft_modf_sqr_threshold = MP_SIZE_T_MAX;
|
||||
mp_size_t fft_modf_mul_threshold = MP_SIZE_T_MAX;
|
||||
|
||||
struct param_t {
|
||||
const char *name;
|
||||
speed_function_t function;
|
||||
@ -702,11 +689,8 @@ one (mp_size_t *threshold, gmp_randstate_t rands,struct param_t *param)
|
||||
if this step effect remains. */
|
||||
|
||||
struct fft_param_t {
|
||||
const char *table_name;
|
||||
const char *threshold_name;
|
||||
const char *modf_threshold_name;
|
||||
mp_size_t *p_threshold;
|
||||
mp_size_t *p_modf_threshold;
|
||||
mp_size_t first_size;
|
||||
mp_size_t max_size;
|
||||
speed_function_t function;
|
||||
@ -714,103 +698,32 @@ struct fft_param_t {
|
||||
mp_size_t sqr;
|
||||
};
|
||||
|
||||
|
||||
/* mpn_mul_fft requires pl a multiple of 2^k limbs, but with
|
||||
N=pl*BIT_PER_MP_LIMB it internally also pads out so N/2^k is a multiple
|
||||
of 2^(k-1) bits. */
|
||||
|
||||
mp_size_t
|
||||
fft_step_size (int k)
|
||||
fft_step_size (int size)
|
||||
{
|
||||
mp_size_t step;
|
||||
|
||||
step = MAX ((mp_size_t) 1 << (k-1), BITS_PER_MP_LIMB) / BITS_PER_MP_LIMB;
|
||||
step *= (mp_size_t) 1 << k;
|
||||
|
||||
step = fft_adjust_limbs(size + 1) - size;
|
||||
|
||||
if (step <= 0)
|
||||
{
|
||||
printf ("Can't handle k=%d\n", k);
|
||||
printf ("Can't handle size=%d\n", size);
|
||||
abort ();
|
||||
}
|
||||
|
||||
return step;
|
||||
}
|
||||
|
||||
mp_size_t
|
||||
fft_next_size (mp_size_t pl, int k)
|
||||
{
|
||||
mp_size_t m = fft_step_size (k);
|
||||
|
||||
/* printf ("[k=%d %ld] %ld ->", k, m, pl); */
|
||||
|
||||
if (pl == 0 || (pl & (m-1)) != 0)
|
||||
pl = (pl | (m-1)) + 1;
|
||||
|
||||
/* printf (" %ld\n", pl); */
|
||||
return pl;
|
||||
}
|
||||
|
||||
void
|
||||
fft (struct fft_param_t *p,gmp_randstate_t rands)
|
||||
{
|
||||
mp_size_t size;
|
||||
int i, k;
|
||||
|
||||
for (i = 0; i < numberof (mpn_fft_table[p->sqr]); i++)
|
||||
mpn_fft_table[p->sqr][i] = MP_SIZE_T_MAX;
|
||||
|
||||
*p->p_threshold = MP_SIZE_T_MAX;
|
||||
*p->p_modf_threshold = MP_SIZE_T_MAX;
|
||||
|
||||
|
||||
option_trace = MAX (option_trace, option_fft_trace);
|
||||
|
||||
printf ("#define %s {", p->table_name);
|
||||
if (option_trace >= 2)
|
||||
printf ("\n");
|
||||
|
||||
k = FFT_FIRST_K;
|
||||
size = p->first_size;
|
||||
for (;;)
|
||||
{
|
||||
double tk, tk1;
|
||||
|
||||
size = fft_next_size (size+1, k+1);
|
||||
|
||||
if (size >= p->max_size)
|
||||
break;
|
||||
if (k >= FFT_FIRST_K + numberof (mpn_fft_table[p->sqr]))
|
||||
break;
|
||||
|
||||
/* compare k to k+1 in the middle of the current k+1 step */
|
||||
s.size = size + fft_step_size (k+1) / 2;
|
||||
s.r = k;
|
||||
tk = tuneup_measure (p->function, rands, NULL, &s);
|
||||
if (tk == -1.0)
|
||||
abort ();
|
||||
|
||||
s.r = k+1;
|
||||
tk1 = tuneup_measure (p->function, rands, NULL, &s);
|
||||
if (tk1 == -1.0)
|
||||
abort ();
|
||||
|
||||
if (option_trace >= 2)
|
||||
printf ("at %ld size=%ld k=%d %.9f k=%d %.9f\n",
|
||||
(long) size, (long) s.size, k, tk, k+1, tk1);
|
||||
|
||||
/* declare the k+1 threshold as soon as it's faster at its midpoint */
|
||||
if (tk1 < tk)
|
||||
{
|
||||
mpn_fft_table[p->sqr][k-FFT_FIRST_K] = s.size;
|
||||
printf (" %ld,", (long) s.size);
|
||||
if (option_trace >= 2) printf ("\n");
|
||||
k++;
|
||||
}
|
||||
}
|
||||
|
||||
mpn_fft_table[p->sqr][k-FFT_FIRST_K] = 0;
|
||||
printf (" 0 }\n");
|
||||
|
||||
|
||||
size = p->first_size;
|
||||
|
||||
/* Declare an FFT faster than a plain toom3 etc multiplication found as
|
||||
@ -818,62 +731,39 @@ fft (struct fft_param_t *p,gmp_randstate_t rands)
|
||||
middle of the FFT step is tested. */
|
||||
for (;;)
|
||||
{
|
||||
int modf = (*p->p_modf_threshold == MP_SIZE_T_MAX);
|
||||
double tk, tm;
|
||||
|
||||
/* k=7 should be the first FFT which can beat toom3 on a full
|
||||
multiply, so jump to that threshold and save some probing after the
|
||||
modf threshold is found. */
|
||||
if (!modf && size < mpn_fft_table[p->sqr][2])
|
||||
{
|
||||
size = mpn_fft_table[p->sqr][2];
|
||||
if (option_trace >= 2)
|
||||
printf ("jump to size=%ld\n", (long) size);
|
||||
}
|
||||
|
||||
size = fft_next_size (size+1, mpn_fft_best_k (size, p->sqr));
|
||||
k = mpn_fft_best_k (size, p->sqr);
|
||||
|
||||
size = fft_adjust_limbs (size+1);
|
||||
|
||||
if (size >= p->max_size)
|
||||
break;
|
||||
|
||||
s.size = size + fft_step_size (k) / 2;
|
||||
s.r = k;
|
||||
s.size = size + fft_step_size (size) / 2;
|
||||
|
||||
tk = tuneup_measure (p->function, rands, NULL, &s);
|
||||
if (tk == -1.0)
|
||||
abort ();
|
||||
|
||||
if (!modf) s.size;
|
||||
tm = tuneup_measure (p->mul_function, rands, NULL, &s);
|
||||
if (tm == -1.0)
|
||||
abort ();
|
||||
|
||||
if (option_trace >= 2)
|
||||
printf ("at %ld size=%ld k=%d %.9f size=%ld %s mul %.9f\n",
|
||||
printf ("at %ld size=%ld %.9f size=%ld %s mul %.9f\n",
|
||||
(long) size,
|
||||
(long) size + fft_step_size (k) / 2, k, tk,
|
||||
(long) s.size, modf ? "modf" : "full", tm);
|
||||
(long) size + fft_step_size (size) / 2, tk,
|
||||
(long) s.size, "full", tm);
|
||||
|
||||
if (tk < tm)
|
||||
{
|
||||
if (modf)
|
||||
{
|
||||
*p->p_modf_threshold = s.size;
|
||||
print_define (p->modf_threshold_name, *p->p_modf_threshold);
|
||||
}
|
||||
else
|
||||
{
|
||||
*p->p_threshold = s.size;
|
||||
print_define (p->threshold_name, *p->p_threshold);
|
||||
break;
|
||||
}
|
||||
*p->p_threshold = s.size;
|
||||
print_define (p->threshold_name, *p->p_threshold);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
|
||||
|
||||
/* Start karatsuba from 4, since the Cray t90 ieee code is much faster at 2,
|
||||
giving wrong results. */
|
||||
void
|
||||
@ -904,7 +794,7 @@ tune_mul (gmp_randstate_t rands)
|
||||
one (&mul_toom8h_threshold, rands, ¶m);
|
||||
|
||||
/* disabled until tuned */
|
||||
MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
|
||||
MUL_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;
|
||||
}
|
||||
|
||||
|
||||
@ -936,7 +826,7 @@ tune_mullow (gmp_randstate_t rands)
|
||||
one (&mullow_mul_threshold, rands, ¶m);
|
||||
|
||||
/* disabled until tuned */
|
||||
MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
|
||||
MUL_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;
|
||||
}
|
||||
|
||||
void
|
||||
@ -949,7 +839,7 @@ tune_mulmod_2expm1 (gmp_randstate_t rands)
|
||||
//param.max_size = ?? ;
|
||||
one (&mulmod_2expm1_threshold, rands, ¶m);
|
||||
/* disabled until tuned */
|
||||
MUL_FFT_THRESHOLD = MP_SIZE_T_MAX; // ??????????????
|
||||
MUL_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX; // ??????????????
|
||||
}
|
||||
|
||||
void
|
||||
@ -978,7 +868,7 @@ tune_mulhigh (gmp_randstate_t rands)
|
||||
one (&mulhigh_mul_threshold, rands, ¶m);
|
||||
|
||||
/* disabled until tuned */
|
||||
MUL_FFT_THRESHOLD = MP_SIZE_T_MAX;
|
||||
MUL_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;
|
||||
}
|
||||
|
||||
void
|
||||
@ -1057,7 +947,7 @@ void
|
||||
tune_sqr (gmp_randstate_t rands)
|
||||
{
|
||||
/* disabled until tuned */
|
||||
SQR_FFT_THRESHOLD = MP_SIZE_T_MAX;
|
||||
SQR_FFT_FULL_THRESHOLD = MP_SIZE_T_MAX;
|
||||
|
||||
if (HAVE_NATIVE_mpn_sqr_basecase)
|
||||
{
|
||||
@ -1909,14 +1799,11 @@ tune_fft_mul (gmp_randstate_t rands)
|
||||
if (option_fft_max_size == 0)
|
||||
return;
|
||||
|
||||
param.table_name = "MUL_FFT_TABLE";
|
||||
param.threshold_name = "MUL_FFT_FULL_THRESHOLD";
|
||||
param.p_threshold = &mul_fft_full_threshold;
|
||||
param.modf_threshold_name = "MUL_FFT_MODF_THRESHOLD";
|
||||
param.p_modf_threshold = &mul_fft_modf_threshold;
|
||||
param.first_size = MUL_TOOM8H_THRESHOLD / 2;
|
||||
param.max_size = option_fft_max_size;
|
||||
param.function = speed_mpn_mul_fft;
|
||||
param.function = speed_mpn_mul_fft_main;
|
||||
param.mul_function = speed_mpn_mul_n;
|
||||
param.sqr = 0;
|
||||
fft (¶m,rands);
|
||||
@ -1931,14 +1818,11 @@ tune_fft_sqr (gmp_randstate_t rands)
|
||||
if (option_fft_max_size == 0)
|
||||
return;
|
||||
|
||||
param.table_name = "SQR_FFT_TABLE";
|
||||
param.threshold_name = "SQR_FFT_FULL_THRESHOLD";
|
||||
param.p_threshold = &sqr_fft_full_threshold;
|
||||
param.modf_threshold_name = "SQR_FFT_MODF_THRESHOLD";
|
||||
param.p_modf_threshold = &sqr_fft_modf_threshold;
|
||||
param.first_size = SQR_TOOM8_THRESHOLD / 2;
|
||||
param.max_size = option_fft_max_size;
|
||||
param.function = speed_mpn_mul_fft_sqr;
|
||||
param.function = speed_mpn_sqr_fft_main;
|
||||
param.mul_function = speed_mpn_sqr;
|
||||
param.sqr = 0;
|
||||
fft (¶m,rands);
|
||||
|
Loading…
Reference in New Issue
Block a user