Finished tuning code for dc_bdiv fns and tuned for K102.

This commit is contained in:
(no author) 2010-03-03 00:30:14 +00:00
parent 80f6015f3c
commit c7bd66e7a4
3 changed files with 10 additions and 14 deletions

View File

@ -27,6 +27,8 @@
#define DC_DIVAPPR_Q_N_THRESHOLD 483
#define INV_DIV_QR_THRESHOLD 400
#define INV_DIVAPPR_Q_N_THRESHOLD 483
#define DC_BDIV_QR_THRESHOLD 58
#define DC_BDIV_Q_THRESHOLD 28
#define DC_DIVAPPR_Q_THRESHOLD 221
#define INV_DIVAPPR_Q_THRESHOLD 998
#define POWM_THRESHOLD 154

View File

@ -912,7 +912,7 @@ speed_mpn_dc_bdiv_qr_n (struct speed_params *s)
double
speed_mpn_dc_bdiv_q (struct speed_params *s)
{
SPEED_ROUTINE_MPN_DC_BDIV_SMALL_Q (mpn_dc_bdiv_q);
SPEED_ROUTINE_MPN_DC_BDIV_Q (mpn_dc_bdiv_q);
}
double

View File

@ -1840,7 +1840,7 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s,
return t; \
}
#define SPEED_ROUTINE_MPN_DC_BDIV_SMALL_Q(function) \
#define SPEED_ROUTINE_MPN_DC_BDIV_Q(function) \
{ \
unsigned i; \
mp_ptr a, d, q; \
@ -1851,22 +1851,18 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s,
SPEED_RESTRICT_COND (s->size >= 2); \
\
TMP_MARK; \
SPEED_TMP_ALLOC_LIMBS (a, 3*s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (d, 2*s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (a, s->size, s->align_xp); \
SPEED_TMP_ALLOC_LIMBS (d, s->size, s->align_yp); \
SPEED_TMP_ALLOC_LIMBS (q, s->size, s->align_wp); \
\
MPN_COPY (a, s->xp, s->size); \
MPN_COPY (a+s->size, s->xp, s->size); \
MPN_COPY (a+2*s->size, s->xp, s->size); \
\
MPN_COPY (d, s->yp, s->size); \
MPN_COPY (d+s->size, s->yp, s->size); \
\
\
/* normalize the data */ \
d[0] |= 1; \
\
speed_operand_src (s, a, 3*s->size); \
speed_operand_src (s, d, 2*s->size); \
speed_operand_src (s, a, s->size); \
speed_operand_src (s, d, s->size); \
speed_operand_dst (s, q, s->size); \
speed_cache_fill (s); \
\
@ -1876,9 +1872,7 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s,
i = s->reps; \
do { \
MPN_COPY (a, s->xp, s->size); \
MPN_COPY (a+s->size, s->xp, s->size); \
MPN_COPY (a+2*s->size, s->xp, s->size); \
function(q, a, 3*s->size, d, 2*s->size, inv); \
function(q, a, s->size, d, s->size, inv); \
} while (--i != 0); \
t = speed_endtime (); \
\