e554550755
for file in $(find -name \*.h ) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done for file in $(find -name \*.cc) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done
337 lines
7.0 KiB
C
337 lines
7.0 KiB
C
/* Schönhage's 1987 algorithm */
|
|
|
|
#include <stdio.h> /* for NULL */
|
|
|
|
#include "mpir.h"
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
#define SGCD_MATRIX_INIT_ITCH(n) (5 * ((n)+1))
|
|
|
|
static void
|
|
sgcd_matrix_init (struct ngcd_matrix *M, mp_size_t n, mp_ptr p)
|
|
{
|
|
/* Need one extra limb */
|
|
n++;
|
|
M->alloc = n;
|
|
M->n = 1;
|
|
MPN_ZERO (p, 4 * n);
|
|
M->p[0][0] = p;
|
|
M->p[0][1] = p + n;
|
|
M->p[1][0] = p + 2 * n;
|
|
M->p[1][1] = p + 3 * n;
|
|
M->tp = p + 4*n;
|
|
|
|
M->p[0][0][0] = M->p[1][1][0] = 1;
|
|
}
|
|
|
|
|
|
#define SGCD_BASE_ITCH MPN_NGCD_STEP_ITCH
|
|
|
|
/* Reduces a,b until |a-b| fits in s limbs. Constructs matrix M with
|
|
elements of size at most n-s. Returns new size on success, or 0 if
|
|
a or b are too small. M = NULL is allowed is M is not needed. */
|
|
static mp_size_t
|
|
sgcd_base (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
|
|
struct ngcd_matrix *M, mp_ptr tp)
|
|
{
|
|
int success = 0;
|
|
|
|
ASSERT (s >= 1);
|
|
/* Should these be assertions? */
|
|
ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
|
|
|
|
for (;;)
|
|
{
|
|
mp_size_t nn;
|
|
ASSERT (n > s);
|
|
nn = mpn_ngcd_step (n, ap, bp, s, M, tp);
|
|
if (nn > 0)
|
|
{
|
|
success = 1;
|
|
n = nn;
|
|
}
|
|
else break;
|
|
}
|
|
return success ? n : 0;
|
|
}
|
|
|
|
/* Size analysis for sgcd:
|
|
|
|
For all recursive calls, we have n <= 4 r. Then the storage need is
|
|
determined by the storage for the recursive call computing M1, and
|
|
the multiplication M * M1 (after this, the storage needed for M1
|
|
can be recycled).
|
|
|
|
Let S(r) denote the storage needed for this. For the storage M1 we
|
|
need 5*(r/2 + 1). For the recursive call, we need S(r/2). Then if
|
|
$k$ is the recursive depth, we have
|
|
|
|
S(r) <= 5 (r/2 + r/4 + n/(2^k)) + 5k < 5(r+k)
|
|
|
|
If we allocate 5(r+k), then after allocating M1, the remaining storage is
|
|
|
|
5*(r+k) - 5*(r/2 + 1) = 5 * ceil(r/2) + 5*(k-1) >= 5*r / 2
|
|
|
|
This is sufficient for the call to sgcd_matrix_mul, which needs storage 2*r.
|
|
|
|
Then the other operations need storage depending on n, not only r:
|
|
|
|
* sgcd_base and sgcd_step: n + 1
|
|
|
|
* sgcd_matrix_adjust: 2 * n
|
|
|
|
So the storage need is MAX (5*(r+k), 2*n)
|
|
*/
|
|
|
|
static mp_size_t
|
|
sgcd_itch (mp_size_t n, mp_size_t s)
|
|
{
|
|
mp_size_t n1, n2, r, rr;
|
|
int k;
|
|
|
|
ASSERT (s < n);
|
|
|
|
/* Inefficient way to compute log2, but it doesn'shouldn't matter */
|
|
for (k = 0, rr = r = n - s;
|
|
ABOVE_THRESHOLD (rr, SGCD_BASE_THRESHOLD);
|
|
rr /= 2)
|
|
k++;
|
|
|
|
if (k == 0)
|
|
return SGCD_BASE_ITCH (n);
|
|
|
|
n1 = 5*(r + k);
|
|
n2 = 2*n;
|
|
|
|
return MAX (n1, n2);
|
|
}
|
|
|
|
/* Reduces a,b until |a-b| fits in s limbs. Constructs matrix M with
|
|
elements of size at most n-s. Returns new size on success, or 0 if
|
|
the inputs a or b or |a-b| already fit in s limbs. FIXME: Is the
|
|
failure return value really useful? */
|
|
static mp_size_t
|
|
sgcd (mp_size_t n, mp_ptr ap, mp_ptr bp, mp_size_t s,
|
|
struct ngcd_matrix *M, int need_m, mp_ptr tp)
|
|
{
|
|
struct ngcd_matrix M1;
|
|
mp_ptr trp;
|
|
mp_size_t r, p, s1, h, nn;
|
|
|
|
int success;
|
|
|
|
ASSERT (n > s);
|
|
ASSERT ((ap[n-1] | bp[n-1]) > 0);
|
|
|
|
/* Number of limbs to reduce */
|
|
r = n - s;
|
|
ASSERT (r < M->alloc);
|
|
|
|
/* Needs storage n + 1 (<= SGCD_BASE_THRESHOLD + 1) */
|
|
if (BELOW_THRESHOLD (r, SGCD_BASE_THRESHOLD))
|
|
return sgcd_base (n, ap, bp, s, need_m ? M : NULL, tp);
|
|
success = 0;
|
|
|
|
/* Is the smallest number of size smaller than s+2? */
|
|
if (n <= s + 2)
|
|
goto final;
|
|
|
|
if (ap[n-1] == 0)
|
|
{
|
|
mp_size_t k = n - s;
|
|
MPN_NORMALIZE (ap + s, k);
|
|
if (k == 0)
|
|
return 0;
|
|
|
|
if (k <= 2)
|
|
goto final;
|
|
}
|
|
else if (bp[n-1] == 0)
|
|
{
|
|
mp_size_t k = n - s;
|
|
MPN_NORMALIZE (ap + s, k);
|
|
|
|
if (k == 0)
|
|
return 0;
|
|
if (k <= 2)
|
|
goto final;
|
|
}
|
|
|
|
if ( (ap[n-1] == 0 && ap[n-2] == 0)
|
|
|| (bp[n-1] == 0 && bp[n-2] == 0) )
|
|
goto final;
|
|
|
|
if (s <= r)
|
|
{
|
|
p = 0;
|
|
s1 = s;
|
|
}
|
|
else
|
|
{
|
|
s1 = r;
|
|
p = s - r + 1;
|
|
need_m = 1;
|
|
}
|
|
|
|
h = s1 + r/2;
|
|
|
|
/* Bound for the first recursive call */
|
|
ASSERT (s1 <= r);
|
|
ASSERT (h <= r + r/2);
|
|
ASSERT (n - p <= 2*r);
|
|
|
|
/* Shouldn't need any more storage than ourselves */
|
|
nn = sgcd (n - p, ap + p, bp + p, h, M, need_m, tp);
|
|
if (nn)
|
|
{
|
|
success = 1;
|
|
n = nn + p;
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
/* Needs (n-p + 1) <= n + 1 storage */
|
|
nn = mpn_ngcd_step (n - p, ap + p, bp + p, s1, need_m ? M : NULL, tp);
|
|
if (nn == 0)
|
|
goto adjust;
|
|
|
|
success = 1;
|
|
n = nn + p;
|
|
if (nn < h)
|
|
break;
|
|
}
|
|
|
|
/* Size bounds for the second recursive call */
|
|
ASSERT (n - p <= h);
|
|
|
|
/* Needs 5 * (r/2 + 1) words of storage. */
|
|
sgcd_matrix_init (&M1, h - s1, tp);
|
|
/* The remaining storage, until we can recycle the storage needed for M1. */
|
|
trp = tp + SGCD_MATRIX_INIT_ITCH (h - s1);
|
|
|
|
/* Needs storage corresponding to r/2 */
|
|
nn = sgcd (n - p, ap + p, bp + p, s1, &M1, need_m, trp);
|
|
if (nn)
|
|
{
|
|
success = 1;
|
|
n = nn + p;
|
|
/* Needs 2*M->n <= 2*r words of storage */
|
|
if (need_m)
|
|
mpn_ngcd_matrix_mul (M, &M1, trp);
|
|
}
|
|
|
|
adjust:
|
|
if (p > 0)
|
|
{
|
|
ASSERT (need_m);
|
|
/* Needs 2*(p + M->n) <= 2*(p + r) = 2*(s+1). */
|
|
n = mpn_ngcd_matrix_adjust (M, n, ap, bp, p, tp);
|
|
}
|
|
final:
|
|
for (;;)
|
|
{
|
|
/* Needs n + 1 limbs of storage */
|
|
nn = mpn_ngcd_step (n, ap, bp, s, need_m ? M : NULL, tp);
|
|
if (nn > 0)
|
|
{
|
|
success = 1;
|
|
n = nn;
|
|
}
|
|
else break;
|
|
}
|
|
return success ? n : 0;
|
|
}
|
|
|
|
#define EVEN_P(x) (((x) & 1) == 0)
|
|
|
|
mp_size_t
|
|
mpn_sgcd (mp_ptr gp, mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t bn)
|
|
{
|
|
mp_size_t s;
|
|
mp_size_t sgcd_matrix_scratch;
|
|
mp_size_t sgcd_scratch;
|
|
mp_size_t n;
|
|
mp_ptr tp;
|
|
struct ngcd_matrix M;
|
|
int c;
|
|
|
|
TMP_DECL;
|
|
|
|
if (BELOW_THRESHOLD (bn, SGCD_THRESHOLD))
|
|
return mpn_gcd (gp, ap, an, bp, bn);
|
|
|
|
TMP_MARK;
|
|
|
|
if (an > bn)
|
|
{
|
|
mp_ptr rp = TMP_ALLOC_LIMBS (an + 1);
|
|
mp_ptr qp = rp + bn;
|
|
|
|
mpn_tdiv_qr (qp, rp, 0, ap, an, bp, bn);
|
|
MPN_COPY (ap, rp, bn);
|
|
an = bn;
|
|
MPN_NORMALIZE (ap, an);
|
|
if (an == 0)
|
|
{
|
|
MPN_COPY (gp, bp, bn);
|
|
TMP_FREE;
|
|
return bn;
|
|
}
|
|
}
|
|
|
|
s = SGCD_THRESHOLD / 2;
|
|
sgcd_matrix_scratch = SGCD_MATRIX_INIT_ITCH (bn - s);
|
|
sgcd_scratch = sgcd_itch (bn, s);
|
|
|
|
/* FIXME: Allocates space for M. Even if the full M isn't needed,
|
|
some space is needed later in the recursion. But we allocate more
|
|
than needed here. */
|
|
tp = TMP_ALLOC_LIMBS (sgcd_matrix_scratch + sgcd_scratch);
|
|
sgcd_matrix_init (&M, bn - s, tp);
|
|
|
|
n = sgcd (bn, ap, bp, s, &M, 0, tp + sgcd_matrix_scratch);
|
|
|
|
TMP_FREE;
|
|
|
|
if (n == 0)
|
|
/* sgcd failed */
|
|
n = bn;
|
|
|
|
MPN_CMP (c, ap, bp, n);
|
|
if (c < 0)
|
|
MP_PTR_SWAP (ap, bp);
|
|
else if (c == 0)
|
|
{
|
|
MPN_COPY (gp, ap, n);
|
|
return n;
|
|
}
|
|
|
|
an = n;
|
|
bn = n;
|
|
MPN_NORMALIZE (bp, bn);
|
|
ASSERT (bn > 0);
|
|
|
|
if (EVEN_P (bp[0]))
|
|
{
|
|
/* Then a must be odd (since the calling convention implies that
|
|
there's no common factor of 2) */
|
|
ASSERT (!EVEN_P (ap[0]));
|
|
while (bp[0] == 0)
|
|
{
|
|
bp++;
|
|
bn--;
|
|
}
|
|
|
|
if (EVEN_P(bp[0]))
|
|
{
|
|
int count;
|
|
count_trailing_zeros (count, bp[0]);
|
|
ASSERT_NOCARRY (mpn_rshift (bp, bp, n, count));
|
|
bn -= (bp[bn-1] == 0);
|
|
}
|
|
}
|
|
|
|
return mpn_gcd (gp, ap, an, bp, bn);
|
|
}
|