Finished tuning code for dc_bdiv fns and tuned for K102.
This commit is contained in:
parent
80f6015f3c
commit
c7bd66e7a4
@ -27,6 +27,8 @@
|
||||
#define DC_DIVAPPR_Q_N_THRESHOLD 483
|
||||
#define INV_DIV_QR_THRESHOLD 400
|
||||
#define INV_DIVAPPR_Q_N_THRESHOLD 483
|
||||
#define DC_BDIV_QR_THRESHOLD 58
|
||||
#define DC_BDIV_Q_THRESHOLD 28
|
||||
#define DC_DIVAPPR_Q_THRESHOLD 221
|
||||
#define INV_DIVAPPR_Q_THRESHOLD 998
|
||||
#define POWM_THRESHOLD 154
|
||||
|
@ -912,7 +912,7 @@ speed_mpn_dc_bdiv_qr_n (struct speed_params *s)
|
||||
double
|
||||
speed_mpn_dc_bdiv_q (struct speed_params *s)
|
||||
{
|
||||
SPEED_ROUTINE_MPN_DC_BDIV_SMALL_Q (mpn_dc_bdiv_q);
|
||||
SPEED_ROUTINE_MPN_DC_BDIV_Q (mpn_dc_bdiv_q);
|
||||
}
|
||||
|
||||
double
|
||||
|
20
tune/speed.h
20
tune/speed.h
@ -1840,7 +1840,7 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s,
|
||||
return t; \
|
||||
}
|
||||
|
||||
#define SPEED_ROUTINE_MPN_DC_BDIV_SMALL_Q(function) \
|
||||
#define SPEED_ROUTINE_MPN_DC_BDIV_Q(function) \
|
||||
{ \
|
||||
unsigned i; \
|
||||
mp_ptr a, d, q; \
|
||||
@ -1851,22 +1851,18 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s,
|
||||
SPEED_RESTRICT_COND (s->size >= 2); \
|
||||
\
|
||||
TMP_MARK; \
|
||||
SPEED_TMP_ALLOC_LIMBS (a, 3*s->size, s->align_xp); \
|
||||
SPEED_TMP_ALLOC_LIMBS (d, 2*s->size, s->align_yp); \
|
||||
SPEED_TMP_ALLOC_LIMBS (a, s->size, s->align_xp); \
|
||||
SPEED_TMP_ALLOC_LIMBS (d, s->size, s->align_yp); \
|
||||
SPEED_TMP_ALLOC_LIMBS (q, s->size, s->align_wp); \
|
||||
\
|
||||
MPN_COPY (a, s->xp, s->size); \
|
||||
MPN_COPY (a+s->size, s->xp, s->size); \
|
||||
MPN_COPY (a+2*s->size, s->xp, s->size); \
|
||||
\
|
||||
MPN_COPY (d, s->yp, s->size); \
|
||||
MPN_COPY (d+s->size, s->yp, s->size); \
|
||||
\
|
||||
\
|
||||
/* normalize the data */ \
|
||||
d[0] |= 1; \
|
||||
\
|
||||
speed_operand_src (s, a, 3*s->size); \
|
||||
speed_operand_src (s, d, 2*s->size); \
|
||||
speed_operand_src (s, a, s->size); \
|
||||
speed_operand_src (s, d, s->size); \
|
||||
speed_operand_dst (s, q, s->size); \
|
||||
speed_cache_fill (s); \
|
||||
\
|
||||
@ -1876,9 +1872,7 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s,
|
||||
i = s->reps; \
|
||||
do { \
|
||||
MPN_COPY (a, s->xp, s->size); \
|
||||
MPN_COPY (a+s->size, s->xp, s->size); \
|
||||
MPN_COPY (a+2*s->size, s->xp, s->size); \
|
||||
function(q, a, 3*s->size, d, 2*s->size, inv); \
|
||||
function(q, a, s->size, d, s->size, inv); \
|
||||
} while (--i != 0); \
|
||||
t = speed_endtime (); \
|
||||
\
|
||||
|
Loading…
Reference in New Issue
Block a user