2008-06-25 03:33:36 -04:00
|
|
|
/* An example of extending the speed program to measure routines not in GMP.
|
|
|
|
|
|
|
|
Copyright 1999, 2000, 2002, 2003, 2005 Free Software Foundation, Inc.
|
|
|
|
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or (at your
|
|
|
|
option) any later version.
|
|
|
|
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
|
|
License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
|
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
MA 02110-1301, USA. */
|
|
|
|
|
|
|
|
|
|
|
|
/* The extension here is three versions of an mpn arithmetic mean. These
|
|
|
|
aren't meant to be particularly useful, just examples.
|
|
|
|
|
|
|
|
You can run something like the following to compare their speeds.
|
|
|
|
|
|
|
|
./speed-ext -s 1-20 -c mean_calls mean_open mean_open2
|
|
|
|
|
|
|
|
On RISC chips, mean_open() might be fastest if the compiler is doing a
|
|
|
|
good job. On the register starved x86s, mean_calls will be fastest.
|
|
|
|
|
|
|
|
|
|
|
|
Notes:
|
|
|
|
|
|
|
|
SPEED_EXTRA_PROTOS and SPEED_EXTRA_ROUTINES are macros that get expanded
|
|
|
|
by speed.c in useful places. SPEED_EXTRA_PROTOS goes after the header
|
|
|
|
files, and SPEED_EXTRA_ROUTINES goes in the array of available routines.
|
|
|
|
|
|
|
|
The advantage of this #include "speed.c" scheme is that there's no
|
|
|
|
editing of a copy of that file, and new features in new versions of it
|
|
|
|
will be immediately available.
|
|
|
|
|
|
|
|
In a real program the routines mean_calls() etc would probably be in
|
|
|
|
separate C or assembler source files, and just the measuring
|
|
|
|
speed_mean_calls() etc would be here. Linking against other libraries
|
|
|
|
for things to measure is perfectly possible too.
|
|
|
|
|
|
|
|
When attempting to compare two versions of the same named routine, say
|
|
|
|
like the generic and assembler versions of mpn_add_n(), creative use of
|
|
|
|
cc -D or #define is suggested, so one or both can be renamed and linked
|
|
|
|
into the same program. It'll be much easier to compare them side by side
|
|
|
|
than with separate programs for each.
|
|
|
|
|
|
|
|
common.c has notes on writing speed measuring routines.
|
|
|
|
|
|
|
|
Remember to link against tune/libspeed.la (or tune/.libs/libspeed.a if
|
|
|
|
not using libtool) to get common.o and other objects needed by speed.c. */
|
|
|
|
|
|
|
|
|
|
|
|
#define SPEED_EXTRA_PROTOS \
|
|
|
|
double speed_mean_calls __GMP_PROTO ((struct speed_params *s)); \
|
|
|
|
double speed_mean_open __GMP_PROTO ((struct speed_params *s)); \
|
|
|
|
double speed_mean_open2 __GMP_PROTO ((struct speed_params *s));
|
|
|
|
|
|
|
|
#define SPEED_EXTRA_ROUTINES \
|
|
|
|
{ "mean_calls", speed_mean_calls }, \
|
|
|
|
{ "mean_open", speed_mean_open }, \
|
|
|
|
{ "mean_open2", speed_mean_open2 },
|
|
|
|
|
|
|
|
#include "speed.c"
|
|
|
|
|
|
|
|
|
|
|
|
/* A straightforward implementation calling mpn subroutines.
|
|
|
|
|
|
|
|
wp,size is set to (xp,size + yp,size) / 2. The return value is the
|
|
|
|
remainder from the division. The other versions are the same. */
|
|
|
|
|
|
|
|
mp_limb_t
|
|
|
|
mean_calls (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
|
|
|
|
{
|
|
|
|
mp_limb_t c, ret;
|
|
|
|
|
|
|
|
ASSERT (size >= 1);
|
|
|
|
|
|
|
|
c = mpn_add_n (wp, xp, yp, size);
|
|
|
|
ret = mpn_rshift (wp, wp, size, 1) >> (BITS_PER_MP_LIMB-1);
|
|
|
|
wp[size-1] |= (c << (BITS_PER_MP_LIMB-1));
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* An open-coded version, making one pass over the data. The right shift is
|
|
|
|
done as the added limbs are produced. The addition code follows
|
|
|
|
mpn/generic/add_n.c. */
|
|
|
|
|
|
|
|
mp_limb_t
|
|
|
|
mean_open (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
|
|
|
|
{
|
|
|
|
mp_limb_t w, wprev, x, y, c, ret;
|
|
|
|
mp_size_t i;
|
|
|
|
|
|
|
|
ASSERT (size >= 1);
|
|
|
|
|
|
|
|
x = xp[0];
|
|
|
|
y = yp[0];
|
|
|
|
|
|
|
|
wprev = x + y;
|
|
|
|
c = (wprev < x);
|
|
|
|
ret = (wprev & 1);
|
|
|
|
|
|
|
|
#define RSHIFT(hi,lo) (((lo) >> 1) | ((hi) << (BITS_PER_MP_LIMB-1)))
|
|
|
|
|
|
|
|
for (i = 1; i < size; i++)
|
|
|
|
{
|
|
|
|
x = xp[i];
|
|
|
|
y = yp[i];
|
|
|
|
|
|
|
|
w = x + c;
|
|
|
|
c = (w < x);
|
|
|
|
w += y;
|
|
|
|
c += (w < y);
|
|
|
|
|
|
|
|
wp[i-1] = RSHIFT (w, wprev);
|
|
|
|
wprev = w;
|
|
|
|
}
|
|
|
|
|
|
|
|
wp[i-1] = RSHIFT (c, wprev);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Another one-pass version, but right shifting the source limbs rather than
|
|
|
|
the result limbs. There's not much chance of this being better than the
|
|
|
|
above, but it's an alternative at least. */
|
|
|
|
|
|
|
|
mp_limb_t
|
|
|
|
mean_open2 (mp_ptr wp, mp_srcptr xp, mp_srcptr yp, mp_size_t size)
|
|
|
|
{
|
|
|
|
mp_limb_t w, x, y, xnext, ynext, c, ret;
|
|
|
|
mp_size_t i;
|
|
|
|
|
|
|
|
ASSERT (size >= 1);
|
|
|
|
|
|
|
|
x = xp[0];
|
|
|
|
y = yp[0];
|
|
|
|
|
|
|
|
/* ret is the low bit of x+y, c is the carry out of that low bit add */
|
|
|
|
ret = (x ^ y) & 1;
|
|
|
|
c = (x & y) & 1;
|
|
|
|
|
|
|
|
for (i = 0; i < size-1; i++)
|
|
|
|
{
|
|
|
|
xnext = xp[i+1];
|
|
|
|
ynext = yp[i+1];
|
|
|
|
x = RSHIFT (xnext, x);
|
|
|
|
y = RSHIFT (ynext, y);
|
|
|
|
|
|
|
|
w = x + c;
|
|
|
|
c = (w < x);
|
|
|
|
w += y;
|
|
|
|
c += (w < y);
|
|
|
|
wp[i] = w;
|
|
|
|
|
|
|
|
x = xnext;
|
|
|
|
y = ynext;
|
|
|
|
}
|
|
|
|
|
|
|
|
wp[i] = (x >> 1) + (y >> 1) + c;
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* The speed measuring routines are the same apart from which function they
|
|
|
|
run, so a macro is used. Actually this macro is the same as
|
|
|
|
SPEED_ROUTINE_MPN_BINARY_N. */
|
|
|
|
|
|
|
|
#define SPEED_ROUTINE_MEAN(mean_fun) \
|
|
|
|
{ \
|
|
|
|
unsigned i; \
|
|
|
|
mp_ptr wp; \
|
|
|
|
double t; \
|
|
|
|
TMP_DECL; \
|
|
|
|
\
|
|
|
|
SPEED_RESTRICT_COND (s->size >= 1); \
|
|
|
|
\
|
|
|
|
TMP_MARK; \
|
|
|
|
SPEED_TMP_ALLOC_LIMBS (wp, s->size, s->align_wp); \
|
|
|
|
\
|
|
|
|
speed_operand_src (s, s->xp, s->size); \
|
|
|
|
speed_operand_src (s, s->yp, s->size); \
|
|
|
|
speed_operand_dst (s, wp, s->size); \
|
|
|
|
speed_cache_fill (s); \
|
|
|
|
\
|
|
|
|
speed_starttime (); \
|
|
|
|
i = s->reps; \
|
|
|
|
do \
|
|
|
|
mean_fun (wp, s->xp, s->yp, s->size); \
|
|
|
|
while (--i != 0); \
|
|
|
|
t = speed_endtime (); \
|
|
|
|
\
|
|
|
|
TMP_FREE; \
|
|
|
|
return t; \
|
|
|
|
}
|
|
|
|
|
|
|
|
double
|
|
|
|
speed_mean_calls (struct speed_params *s)
|
|
|
|
{
|
|
|
|
SPEED_ROUTINE_MEAN (mean_calls);
|
|
|
|
}
|
|
|
|
|
|
|
|
double
|
|
|
|
speed_mean_open (struct speed_params *s)
|
|
|
|
{
|
|
|
|
SPEED_ROUTINE_MEAN (mean_open);
|
|
|
|
}
|
|
|
|
|
|
|
|
double
|
|
|
|
speed_mean_open2 (struct speed_params *s)
|
|
|
|
{
|
|
|
|
SPEED_ROUTINE_MEAN (mean_open2);
|
|
|
|
}
|