e554550755
for file in $(find -name \*.h ) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done for file in $(find -name \*.cc) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done
226 lines
5.3 KiB
C
226 lines
5.3 KiB
C
/* Schönhage algorithm, from GMP cvs */
|
|
#include <stdio.h> /* for NULL */
|
|
|
|
#include "mpir.h"
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
#define EVEN_P(x) (((x) & 1) == 0)
|
|
|
|
/* Allows an even v */
|
|
static mp_size_t
|
|
gcd_binary (mp_ptr gp, mp_ptr up, mp_size_t un, mp_ptr vp, mp_size_t vn)
|
|
{
|
|
if ((vp[0] & 1) == 0)
|
|
{
|
|
/* Calling convention says there can be no common factors of two. */
|
|
ASSERT ((up[0] & 1) == 1);
|
|
|
|
while (vp[0] == 0)
|
|
{
|
|
ASSERT (vn > 0);
|
|
vp++;
|
|
vn--;
|
|
}
|
|
|
|
if ((vp[0] & 1) == 0)
|
|
{
|
|
int shift;
|
|
count_trailing_zeros (shift, vp[0]);
|
|
ASSERT_NOCARRY (mpn_rshift (vp, vp, vn, shift));
|
|
vn -= (vp[vn-1] == 0);
|
|
}
|
|
}
|
|
return mpn_gcd (gp, up, un, vp, vn);
|
|
}
|
|
|
|
#define MPN_LEQ_P(ap, asize, bp, bsize) \
|
|
((asize) < (bsize) || ((asize) == (bsize) \
|
|
&& mpn_cmp ((ap), (bp), (asize)) <= 0))
|
|
|
|
/* Sets (a, b, c, d) <-- (c, d, a, b) */
|
|
#define NHGCD_SWAP4_2(row) \
|
|
do { \
|
|
struct hgcd_row __nhgcd_swap4_2_tmp; \
|
|
__nhgcd_swap4_2_tmp = row[0]; \
|
|
row[0] = row[2]; \
|
|
row[2] = __nhgcd_swap4_2_tmp; \
|
|
__nhgcd_swap4_2_tmp = row[1]; \
|
|
row[1] = row[3]; \
|
|
row[3] = __nhgcd_swap4_2_tmp; \
|
|
} while (0)
|
|
|
|
/* Sets (a, b, c) <-- (b, c, a) */
|
|
#define NHGCD_SWAP3_LEFT(row) \
|
|
do { \
|
|
struct hgcd_row __nhgcd_swap4_left_tmp; \
|
|
__nhgcd_swap4_left_tmp = row[0]; \
|
|
row[0] = row[1]; \
|
|
row[1] = row[2]; \
|
|
row[2] = __nhgcd_swap4_left_tmp; \
|
|
} while (0)
|
|
|
|
static mp_size_t
|
|
hgcd_tdiv (mp_ptr qp,
|
|
mp_ptr rp, mp_size_t *rsizep,
|
|
mp_srcptr ap, mp_size_t asize,
|
|
mp_srcptr bp, mp_size_t bsize)
|
|
{
|
|
mp_size_t qsize;
|
|
mp_size_t rsize;
|
|
|
|
mpn_tdiv_qr (qp, rp, 0, ap, asize, bp, bsize);
|
|
|
|
rsize = bsize;
|
|
MPN_NORMALIZE (rp, rsize);
|
|
*rsizep = rsize;
|
|
|
|
qsize = asize - bsize + 1;
|
|
qsize -= (qp[qsize - 1] == 0);
|
|
|
|
if (qsize == 1 && qp[0] == 1)
|
|
return 0;
|
|
|
|
return qsize;
|
|
}
|
|
|
|
static mp_size_t
|
|
gcd_schoenhage_itch (mp_size_t asize)
|
|
{
|
|
/* Size for hgcd calls */
|
|
mp_size_t ralloc = asize + 1;
|
|
mp_size_t hgcd_size = (asize + 1) / 2;
|
|
return (4 * ralloc /* Remainder storage */
|
|
+ mpn_hgcd_init_itch (hgcd_size) /* hgcd storage */
|
|
+ mpn_hgcd_quotients_init_itch (hgcd_size)
|
|
+ mpn_hgcd_itch (hgcd_size) /* nhgcd call */
|
|
+ 1+ 3 * asize / 4); /* hgcd_fix */
|
|
}
|
|
|
|
static mp_size_t
|
|
gcd_schoenhage (mp_ptr gp, mp_srcptr ap, mp_size_t asize,
|
|
mp_srcptr bp, mp_size_t bsize,
|
|
mp_ptr tp, mp_size_t talloc)
|
|
{
|
|
mp_size_t scratch;
|
|
struct hgcd hgcd;
|
|
struct hgcd_row r[4];
|
|
|
|
mp_size_t ralloc = asize + 1;
|
|
|
|
ASSERT (asize >= bsize);
|
|
ASSERT (bsize > 0);
|
|
|
|
ASSERT (MPN_LEQ_P (bp, bsize, ap, asize));
|
|
|
|
ASSERT (4 * ralloc <= talloc);
|
|
tp += ralloc; talloc -= ralloc;
|
|
r[0].rp = tp; tp += ralloc; talloc -= ralloc;
|
|
r[1].rp = tp; tp += ralloc; talloc -= ralloc;
|
|
r[2].rp = tp; tp += ralloc; talloc -= ralloc;
|
|
r[3].rp = tp; tp += ralloc; talloc -= ralloc;
|
|
|
|
MPN_COPY (r[0].rp, ap, asize); r[0].rsize = asize;
|
|
MPN_COPY (r[1].rp, bp, bsize); r[1].rsize = bsize;
|
|
|
|
scratch = mpn_hgcd_init_itch ((asize + 1)/2);
|
|
ASSERT (scratch <= talloc);
|
|
mpn_hgcd_init (&hgcd, (asize + 1)/2, tp);
|
|
tp += scratch; talloc -= scratch;
|
|
|
|
{
|
|
mp_size_t nlimbs = mpn_hgcd_quotients_init_itch ((asize + 1)/2);
|
|
|
|
ASSERT (nlimbs <= talloc);
|
|
|
|
mpn_hgcd_quotients_init (&hgcd, (asize + 1) / 2, tp);
|
|
|
|
tp += nlimbs;
|
|
talloc -= nlimbs;
|
|
}
|
|
|
|
while (ABOVE_THRESHOLD (r[0].rsize, GCD_SCHOENHAGE_THRESHOLD)
|
|
&& r[1].rsize > 0)
|
|
{
|
|
mp_size_t k = r[0].rsize / 2;
|
|
int res;
|
|
|
|
if (r[1].rsize <= k)
|
|
goto euclid;
|
|
|
|
res = mpn_hgcd (&hgcd,
|
|
r[0].rp + k, r[0].rsize - k,
|
|
r[1].rp + k, r[1].rsize - k,
|
|
tp, talloc);
|
|
|
|
if (res == 0 || res == 1)
|
|
{
|
|
euclid:
|
|
ASSERT (r[0].rsize - r[1].rsize + 1 <= talloc);
|
|
hgcd_tdiv (tp, r[2].rp, &r[2].rsize,
|
|
r[0].rp, r[0].rsize,
|
|
r[1].rp, r[1].rsize);
|
|
|
|
NHGCD_SWAP3_LEFT (r);
|
|
}
|
|
else
|
|
{
|
|
const struct hgcd_row *s = hgcd.row + (res - 2);
|
|
int sign = hgcd.sign;
|
|
if (res == 3)
|
|
sign = ~sign;
|
|
|
|
/* s[0] and s[1] are correct */
|
|
r[2].rsize
|
|
= mpn_hgcd_fix (k, r[2].rp, ralloc,
|
|
sign, hgcd.size, s,
|
|
r[0].rp, r[1].rp,
|
|
tp, talloc);
|
|
|
|
r[3].rsize
|
|
= mpn_hgcd_fix (k, r[3].rp, ralloc,
|
|
~sign, hgcd.size, s+1,
|
|
r[0].rp, r[1].rp,
|
|
tp, talloc);
|
|
|
|
NHGCD_SWAP4_2 (r);
|
|
}
|
|
}
|
|
|
|
if (r[1].rsize == 0)
|
|
{
|
|
MPN_COPY (gp, r[0].rp, r[0].rsize);
|
|
return r[0].rsize;
|
|
}
|
|
else
|
|
return gcd_binary (gp,
|
|
r[0].rp, r[0].rsize,
|
|
r[1].rp, r[1].rsize);
|
|
}
|
|
|
|
/* Should we perform an initial division? */
|
|
mp_size_t
|
|
mpn_rgcd (mp_ptr gp, mp_ptr up, mp_size_t usize, mp_ptr vp, mp_size_t vsize)
|
|
{
|
|
if (BELOW_THRESHOLD (usize, GCD_SCHOENHAGE_THRESHOLD))
|
|
return mpn_gcd (gp, up, usize, vp, vsize);
|
|
|
|
/* The algorithms below require U >= V, while mpn_gcd is long documented as
|
|
requiring only that the position of U's msb >= V's msb. */
|
|
if (usize == vsize && mpn_cmp (up, vp, usize) < 0)
|
|
MP_PTR_SWAP (up, vp);
|
|
|
|
{
|
|
mp_size_t scratch;
|
|
mp_ptr tp;
|
|
mp_size_t gsize;
|
|
|
|
scratch = gcd_schoenhage_itch (usize);
|
|
tp = __GMP_ALLOCATE_FUNC_LIMBS (scratch);
|
|
|
|
gsize = gcd_schoenhage (gp, up, usize, vp, vsize, tp, scratch);
|
|
__GMP_FREE_FUNC_LIMBS (tp, scratch);
|
|
return gsize;
|
|
}
|
|
}
|