558 lines
13 KiB
C
558 lines
13 KiB
C
/* hgcd2.c
|
|
|
|
THE FUNCTIONS IN THIS FILE ARE INTERNAL WITH MUTABLE INTERFACES. IT IS ONLY
|
|
SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST
|
|
GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
|
|
|
|
Copyright 1996, 1998, 2000, 2001, 2002, 2003, 2004 Free Software Foundation,
|
|
Inc.
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 2.1 of the License, or (at your
|
|
option) any later version.
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
|
the Free Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
|
|
MA 02111-1307, USA. */
|
|
|
|
#include "gmp.h"
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
#if GMP_NAIL_BITS == 0
|
|
|
|
/* Copied from mpn/generic/gcdext.c, and modified slightly to return
|
|
the remainder. */
|
|
/* Two-limb division optimized for small quotients. */
|
|
static inline mp_limb_t
|
|
div2 (mp_ptr rp,
|
|
mp_limb_t nh, mp_limb_t nl,
|
|
mp_limb_t dh, mp_limb_t dl)
|
|
{
|
|
mp_limb_t q = 0;
|
|
|
|
if ((mp_limb_signed_t) nh < 0)
|
|
{
|
|
int cnt;
|
|
for (cnt = 1; (mp_limb_signed_t) dh >= 0; cnt++)
|
|
{
|
|
dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
|
|
dl = dl << 1;
|
|
}
|
|
|
|
while (cnt)
|
|
{
|
|
q <<= 1;
|
|
if (nh > dh || (nh == dh && nl >= dl))
|
|
{
|
|
sub_ddmmss (nh, nl, nh, nl, dh, dl);
|
|
q |= 1;
|
|
}
|
|
dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
|
|
dh = dh >> 1;
|
|
cnt--;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
int cnt;
|
|
for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
|
|
{
|
|
dh = (dh << 1) | (dl >> (GMP_LIMB_BITS - 1));
|
|
dl = dl << 1;
|
|
}
|
|
|
|
while (cnt)
|
|
{
|
|
dl = (dh << (GMP_LIMB_BITS - 1)) | (dl >> 1);
|
|
dh = dh >> 1;
|
|
q <<= 1;
|
|
if (nh > dh || (nh == dh && nl >= dl))
|
|
{
|
|
sub_ddmmss (nh, nl, nh, nl, dh, dl);
|
|
q |= 1;
|
|
}
|
|
cnt--;
|
|
}
|
|
}
|
|
|
|
rp[0] = nl;
|
|
rp[1] = nh;
|
|
|
|
return q;
|
|
}
|
|
#else /* GMP_NAIL_BITS != 0 */
|
|
/* Two-limb division optimized for small quotients. Input words
|
|
include nails, which must be zero. */
|
|
static inline mp_limb_t
|
|
div2 (mp_ptr rp,
|
|
mp_limb_t nh, mp_limb_t nl,
|
|
mp_limb_t dh, mp_limb_t dl)
|
|
{
|
|
mp_limb_t q = 0;
|
|
int cnt;
|
|
|
|
ASSERT_LIMB (nh);
|
|
ASSERT_LIMB (nl);
|
|
ASSERT_LIMB (dh);
|
|
ASSERT_LIMB (dl);
|
|
|
|
/* FIXME: Always called with nh > 0 and dh >0. Then it should be
|
|
enough to look at the high limbs to select cnt. */
|
|
for (cnt = 0; nh > dh || (nh == dh && nl >= dl); cnt++)
|
|
{
|
|
dh = (dh << 1) | (dl >> (GMP_NUMB_BITS - 1));
|
|
dl = (dl << 1) & GMP_NUMB_MASK;
|
|
}
|
|
|
|
while (cnt)
|
|
{
|
|
dl = (dh << (GMP_NUMB_BITS - 1)) | (dl >> 1);
|
|
dh = dh >> 1;
|
|
dl &= GMP_NUMB_MASK;
|
|
|
|
q <<= 1;
|
|
if (nh > dh || (nh == dh && nl >= dl))
|
|
{
|
|
/* FIXME: We could perhaps optimize this by unrolling the
|
|
loop 2^GMP_NUMB_BITS - 1 times? */
|
|
nl -= dl;
|
|
nh -= dh;
|
|
nh -= (nl >> (GMP_LIMB_BITS - 1));
|
|
nl &= GMP_NUMB_MASK;
|
|
|
|
q |= 1;
|
|
}
|
|
cnt--;
|
|
}
|
|
ASSERT (nh < dh || (nh == dh && nl < dl));
|
|
rp[0] = nl;
|
|
rp[1] = nh;
|
|
|
|
return q;
|
|
}
|
|
#endif /* GMP_NAIL_BITS != 0 */
|
|
|
|
#define SUB_2(w1,w0, x1,x0, y1,y0) \
|
|
do { \
|
|
ASSERT_LIMB (x1); \
|
|
ASSERT_LIMB (x0); \
|
|
ASSERT_LIMB (y1); \
|
|
ASSERT_LIMB (y0); \
|
|
\
|
|
if (GMP_NAIL_BITS == 0) \
|
|
sub_ddmmss (w1,w0, x1,x0, y1,y0); \
|
|
else \
|
|
{ \
|
|
mp_limb_t __w0, __c; \
|
|
SUBC_LIMB (__c, __w0, x0, y0); \
|
|
(w1) = ((x1) - (y1) - __c) & GMP_NUMB_MASK; \
|
|
(w0) = __w0; \
|
|
} \
|
|
} while (0)
|
|
|
|
|
|
/* Produce r_k from r_i and r_j, and the corresponding quotient. */
|
|
#if __GMP_HAVE_TOKEN_PASTE
|
|
#define HGCD2_STEP(i, j, k, q) do { \
|
|
SUB_2 (rh ## k, rl ## k, \
|
|
rh ## i, rl ## i, \
|
|
rh ## j, rl ## j); \
|
|
\
|
|
/* Could check here for the special case rh3 == 0, \
|
|
but it's covered by the below condition as well */ \
|
|
if ( rh ## k < rh ## j \
|
|
|| ( rh ## k == rh ## j \
|
|
&& rl ## k < rl ## j)) \
|
|
{ \
|
|
/* Unit quotient */ \
|
|
u ## k = u ## i + u ## j; \
|
|
v ## k = v ## i + v ## j; \
|
|
\
|
|
(q) = 1; \
|
|
} \
|
|
else \
|
|
{ \
|
|
mp_limb_t r[2]; \
|
|
q = 1 + div2 (r, rh ## k, rl ## k, \
|
|
rh ## j, rl ## j); \
|
|
rl ## k = r[0]; rh ## k = r[1]; \
|
|
u ## k = u ## i + q * u ## j; \
|
|
v ## k = v ## i + q * v ## j; \
|
|
} \
|
|
} while (0)
|
|
#else /* ! __GMP_HAVE_TOKEN_PASTE */
|
|
#define HGCD2_STEP(i, j, k, q) do { \
|
|
SUB_2 (rh/**/k, rl/**/k, \
|
|
rh/**/i, rl/**/i, \
|
|
rh/**/j, rl/**/j); \
|
|
\
|
|
/* Could check here for the special case rh3 == 0, \
|
|
but it's covered by the below condition as well */ \
|
|
if ( rh/**/k < rh/**/j \
|
|
|| ( rh/**/k == rh/**/j \
|
|
&& rl/**/k < rl/**/j)) \
|
|
{ \
|
|
/* Unit quotient */ \
|
|
u/**/k = u/**/i + u/**/j; \
|
|
v/**/k = v/**/i + v/**/j; \
|
|
\
|
|
q = 1; \
|
|
} \
|
|
else \
|
|
{ \
|
|
mp_limb_t r[2]; \
|
|
q = 1 + div2 (r, rh/**/k, rl/**/k, \
|
|
rh/**/j, rl/**/j); \
|
|
rl/**/k = r[0]; rh/**/k = r[1]; \
|
|
u/**/k = u/**/i + q * u/**/j; \
|
|
v/**/k = v/**/i + q * v/**/j; \
|
|
} \
|
|
} while (0)
|
|
#endif /* ! __GMP_HAVE_TOKEN_PASTE */
|
|
|
|
/* Repeatedly divides A by B, until the remainder is a single limb.
|
|
Stores cofactors and quotients in HGCD. On success, HGCD->row[0, 1,
|
|
2] correspond to remainders that are larger than one limb, while
|
|
HGCD->row[3] correspond to a remainder that fit in a single limb.
|
|
|
|
Return 0 on failure (if B or A mod B fits in a single limb). Return
|
|
1 if r0 and r1 are correct, but we still make no progress because
|
|
r0 = A, r1 = B.
|
|
|
|
Otherwise return 2, 3 or 4 depending on how many of the r:s that
|
|
satisfy Jebelean's criterion. */
|
|
/* FIXME: There are two more micro optimizations that could be done to
|
|
this code:
|
|
|
|
The div2 function starts with checking the most significant bit of
|
|
the numerator. When we call div2, that bit is know in advance for
|
|
all but the one or two first calls, so we could split div2 in two
|
|
functions, and call the right one.
|
|
*/
|
|
|
|
int
|
|
mpn_hgcd2 (struct hgcd2 *hgcd,
|
|
mp_limb_t ah, mp_limb_t al,
|
|
mp_limb_t bh, mp_limb_t bl)
|
|
{
|
|
/* For all divisions, we special case q = 1, which accounts for
|
|
approximately 41% of the quotients for random numbers (Knuth,
|
|
TAOCP 4.5.3) */
|
|
|
|
/* Use scalar variables */
|
|
mp_limb_t rh1, rl1, u1, v1;
|
|
mp_limb_t rh2, rl2, u2, v2;
|
|
mp_limb_t rh3, rl3, u3, v3;
|
|
|
|
mp_limb_t q0, q1;
|
|
|
|
ASSERT_LIMB (ah);
|
|
ASSERT_LIMB (al);
|
|
ASSERT_LIMB (bh);
|
|
ASSERT_LIMB (bl);
|
|
ASSERT (ah > bh || (ah == bh && al >= bl));
|
|
|
|
if (bh == 0)
|
|
return 0;
|
|
|
|
{
|
|
mp_limb_t rh0, rl0, u0, v0;
|
|
|
|
/* Initialize first two rows */
|
|
rh0 = ah; rl0 = al; u0 = 1; v0 = 0;
|
|
rh1 = bh; rl1 = bl; u1 = 0; v1 = 1;
|
|
|
|
SUB_2 (rh2, rl2, rh0, rl0, rh1, rl1);
|
|
|
|
if (rh2 == 0)
|
|
return 0;
|
|
|
|
if (rh2 < rh1 || (rh2 == rh1 && rl2 < rl1))
|
|
{
|
|
/* Unit quotient */
|
|
q0 = 1;
|
|
}
|
|
else
|
|
{
|
|
mp_limb_t r[2];
|
|
q0 = 1 + div2 (r, rh2, rl2, rh1, rl1);
|
|
|
|
rl2 = r[0]; rh2 = r[1];
|
|
|
|
if (rh2 == 0)
|
|
return 0;
|
|
}
|
|
|
|
u2 = 1;
|
|
v2 = q0;
|
|
|
|
/* The simple version of the loop is as follows:
|
|
|
|
|
| hgcd->sign = 0;
|
|
| for (;;)
|
|
| {
|
|
| (q, rh3, rl3]) = divmod (r1, r2);
|
|
| u[3] = u1 + q * u2;
|
|
| v[3] = v1 + q * v2;
|
|
|
|
|
| if (rh3 == 0)
|
|
| break;
|
|
|
|
|
| HGCD2_SHIFT4_LEFT (hgcd->row);
|
|
| hgcd->sign = ~hgcd->sign;
|
|
| }
|
|
|
|
|
| But then we special case for q = 1, and unroll the loop four times
|
|
| to avoid data movement. */
|
|
|
|
for (;;)
|
|
{
|
|
HGCD2_STEP (1, 2, 3, q1);
|
|
if (rh3 == 0)
|
|
{
|
|
hgcd->row[0].u = u0; hgcd->row[0].v = v0;
|
|
hgcd->sign = 0;
|
|
|
|
break;
|
|
}
|
|
HGCD2_STEP (2, 3, 0, q0);
|
|
if (rh0 == 0)
|
|
{
|
|
hgcd->row[0].u = u1; hgcd->row[0].v = v1;
|
|
|
|
rh1 = rh2; rl1 = rl2; u1 = u2; v1 = v2;
|
|
rh2 = rh3; rl2 = rl3; u2 = u3; v2 = v3;
|
|
rh3 = rh0; rl3 = rl0; u3 = u0; v3 = v0;
|
|
|
|
hgcd->sign = -1;
|
|
break;
|
|
}
|
|
|
|
HGCD2_STEP (3, 0, 1, q1);
|
|
if (rh1 == 0)
|
|
{
|
|
hgcd->row[0].u = u2; hgcd->row[0].v = v2;
|
|
rh2 = rh0; rl2 = rl0; u2 = u0; v2 = v0;
|
|
|
|
MP_LIMB_T_SWAP (rh1, rh3); MP_LIMB_T_SWAP (rl1, rl3);
|
|
MP_LIMB_T_SWAP ( u1, u3); MP_LIMB_T_SWAP ( v1, v3);
|
|
|
|
hgcd->sign = 0;
|
|
break;
|
|
}
|
|
|
|
HGCD2_STEP (0, 1, 2, q0);
|
|
if (rh2 == 0)
|
|
{
|
|
hgcd->row[0].u = u3; hgcd->row[0].v = v3;
|
|
|
|
rh3 = rh2; rl3 = rl2; u3 = u2; v3 = v2;
|
|
rh2 = rh1; rl2 = rl1; u2 = u1; v2 = v1;
|
|
rh1 = rh0; rl1 = rl0; u1 = u0; v1 = v0;
|
|
|
|
hgcd->sign = -1;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
ASSERT (rh1 != 0);
|
|
ASSERT (rh2 != 0);
|
|
ASSERT (rh3 == 0);
|
|
ASSERT (rh1 > rh2 || (rh1 == rh2 && rl1 > rl2));
|
|
ASSERT (rh2 > rh3 || (rh2 == rh3 && rl2 > rl3));
|
|
|
|
/* Coefficients to be returned */
|
|
hgcd->row[1].u = u1; hgcd->row[1].v = v1;
|
|
hgcd->row[2].u = u2; hgcd->row[2].v = v2;
|
|
hgcd->row[3].u = u3; hgcd->row[3].v = v3;
|
|
|
|
if (hgcd->sign >= 0)
|
|
{
|
|
hgcd->q[0] = q0;
|
|
hgcd->q[1] = q1;
|
|
}
|
|
else
|
|
{
|
|
hgcd->q[0] = q1;
|
|
hgcd->q[1] = q0;
|
|
}
|
|
|
|
/* Rows 1, 2 and 3 are used below, rh0, rl0, u0 and v0 are not. */
|
|
#if GMP_NAIL_BITS == 0
|
|
{
|
|
mp_limb_t sh;
|
|
mp_limb_t sl;
|
|
mp_limb_t th;
|
|
mp_limb_t tl;
|
|
|
|
/* Check r2 */
|
|
/* We always have r2 > u2, v2 */
|
|
|
|
if (hgcd->sign >= 0)
|
|
{
|
|
/* Check if r1 - r2 >= u2 - u1 = |u2| + |u1| */
|
|
sl = u2 + u1;
|
|
sh = (sl < u1);
|
|
}
|
|
else
|
|
{
|
|
/* Check if r1 - r2 >= v2 - v1 = |v2| + |v1| */
|
|
sl = v2 + v1;
|
|
sh = (sl < v1);
|
|
}
|
|
|
|
sub_ddmmss (th, tl, rh1, rl1, rh2, rl2);
|
|
|
|
if (th < sh || (th == sh && tl < sl))
|
|
return 2 - (hgcd->row[0].v == 0);
|
|
|
|
/* Check r3 */
|
|
|
|
if (hgcd->sign >= 0)
|
|
{
|
|
/* Check r3 >= max (-u3, -v3) = |u3| */
|
|
if (rl3 < u3)
|
|
return 3;
|
|
|
|
/* Check r3 - r2 >= v3 - v2 = |v2| + |v1|*/
|
|
sl = v3 + v2;
|
|
sh = (sl < v2);
|
|
}
|
|
else
|
|
{
|
|
/* Check r3 >= max (-u3, -v3) = |v3| */
|
|
if (rl3 < v3)
|
|
return 3;
|
|
|
|
/* Check r3 - r2 >= u3 - u2 = |u2| + |u1| */
|
|
sl = u3 + u2;
|
|
sh = (sl < u2);
|
|
}
|
|
|
|
sub_ddmmss (th, tl, rh2, rl2, 0, rl3);
|
|
|
|
if (th < sh || (th == sh && tl < sl))
|
|
return 3;
|
|
|
|
return 4;
|
|
}
|
|
#else /* GMP_NAIL_BITS > 0 */
|
|
{
|
|
mp_limb_t sl;
|
|
mp_limb_t th;
|
|
mp_limb_t tl;
|
|
|
|
/* Check r2 */
|
|
/* We always have r2 > u2, v2 */
|
|
|
|
if (hgcd->sign >= 0)
|
|
{
|
|
/* Check if r1 - r2 >= u2 - u1 = |u2| + |u1| */
|
|
sl = u2 + u1;
|
|
}
|
|
else
|
|
{
|
|
/* Check if r1 - r2 >= v2 - v1 = |v2| + |v1| */
|
|
sl = v2 + v1;
|
|
}
|
|
|
|
tl = rl1 - rl2;
|
|
th = rh1 - rh2 - (tl >> (GMP_LIMB_BITS - 1));
|
|
ASSERT_LIMB (th);
|
|
|
|
if (th < (CNST_LIMB (1) << GMP_NAIL_BITS)
|
|
&& ((th << GMP_NUMB_BITS) | (tl & GMP_NUMB_MASK)) < sl)
|
|
return 2 - (hgcd->row[0].v == 0);
|
|
|
|
/* Check r3 */
|
|
|
|
if (hgcd->sign >= 0)
|
|
{
|
|
/* Check r3 >= max (-u3, -v3) = |u3| */
|
|
if (rl3 < u3)
|
|
return 3;
|
|
|
|
/* Check r3 - r2 >= v3 - v2 = |v2| + |v1|*/
|
|
sl = v3 + v2;
|
|
}
|
|
else
|
|
{
|
|
/* Check r3 >= max (-u3, -v3) = |v3| */
|
|
if (rl3 < v3)
|
|
return 3;
|
|
|
|
/* Check r3 - r2 >= u3 - u2 = |u2| + |u1| */
|
|
sl = u3 + u2;
|
|
}
|
|
|
|
tl = rl2 - rl3;
|
|
th = rh2 - (tl >> (GMP_LIMB_BITS - 1));
|
|
ASSERT_LIMB (th);
|
|
|
|
if (th < (CNST_LIMB (1) << GMP_NAIL_BITS)
|
|
&& ((th << GMP_NUMB_BITS) | (tl & GMP_NUMB_MASK)) < sl)
|
|
return 3;
|
|
|
|
return 4;
|
|
}
|
|
#endif /* GMP_NAIL_BITS > 0 */
|
|
}
|
|
|
|
mp_size_t
|
|
mpn_hgcd2_fix (mp_ptr rp, mp_size_t ralloc,
|
|
int sign,
|
|
mp_limb_t u, mp_srcptr ap, mp_size_t asize,
|
|
mp_limb_t v, mp_srcptr bp, mp_size_t bsize)
|
|
{
|
|
mp_size_t rsize;
|
|
mp_limb_t cy;
|
|
|
|
ASSERT_LIMB (u);
|
|
ASSERT_LIMB (v);
|
|
|
|
if (sign < 0)
|
|
{
|
|
MP_LIMB_T_SWAP (u,v);
|
|
MPN_SRCPTR_SWAP (ap, asize, bp, bsize);
|
|
}
|
|
|
|
ASSERT (u > 0);
|
|
|
|
ASSERT (asize <= ralloc);
|
|
rsize = asize;
|
|
cy = mpn_mul_1 (rp, ap, asize, u);
|
|
if (cy)
|
|
{
|
|
ASSERT (rsize < ralloc);
|
|
rp[rsize++] = cy;
|
|
}
|
|
|
|
if (v > 0)
|
|
{
|
|
ASSERT (bsize <= rsize);
|
|
cy = mpn_submul_1 (rp, bp, bsize, v);
|
|
if (cy)
|
|
{
|
|
ASSERT (bsize < rsize);
|
|
ASSERT_NOCARRY (mpn_sub_1 (rp + bsize,
|
|
rp + bsize, rsize - bsize, cy));
|
|
}
|
|
|
|
MPN_NORMALIZE (rp, rsize);
|
|
}
|
|
return rsize;
|
|
}
|
|
|
|
#undef HGCD2_STEP
|