/* mpn_gcdext -- Extended Greatest Common Divisor. Copyright 1996, 1998, 2000, 2001, 2002 Free Software Foundation, Inc. Copyright 2004, 2005 Niels Möller Copyright 2010 William Hart This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "mpir.h" #include "gmp-impl.h" #include "longlong.h" /*--------------------------------------------------------- Asymptotically fast xgcd based on Niels Mohler's ngcd ----------------------------------------------------------*/ /* Needs temporary storage for the division and multiplication The division has quotient of size an - bn + 1 product needs an + un at most Thus we need space at most n + un + 1 If the gcd is found, stores it in gp and *gn, and the associated cofactor in {sp, *un} and returns zero. Otherwise, compute the reduced a and b, update u0p and u1p, and return the new size. /* * * To make this code work with "make tune" we need to conditionally * exclude the Moller code when this file gets included inside of * gcdext*.c in ../tune. */ #ifndef INSIDE_TUNE_GCDEXT_BIN #define P_SIZE(n) (n/3) mp_size_t mpn_ngcdext_subdiv_step (mp_ptr gp, mp_size_t *gn, mp_ptr s0p, mp_ptr u0, mp_ptr u1, mp_size_t *un, mp_ptr ap, mp_ptr bp, mp_size_t n, mp_ptr tp) { /* Called when nhgcd or mpn_nhgcd2 has failed. Then either one of a or b is very small, or the difference is very small. Perform one subtraction followed by one division. */ mp_size_t an, bn, cy, qn, qn2, u0n, u1n; int negate = 0; int c; ASSERT (n > 0); ASSERT (ap[n-1] > 0 || bp[n-1] > 0); /* See to what extend ap and bp are the same */ for (an = n; an > 0; an--) if (ap[an-1] != bp[an-1]) break; if (an == 0) { /* ap OR bp is the gcd, two possible normalisations u1 or -u0, pick the smallest */ MPN_COPY (gp, ap, n); (*gn) = n; MPN_CMP(c, u1, u0, *un); if (c <= 0) // u1 is smallest { MPN_NORMALIZE(u1, (*un)); MPN_COPY (s0p, u1, (*un)); } else // -u0 is smallest { MPN_NORMALIZE(u0, (*un)); MPN_COPY (s0p, u0, (*un)); (*un) = -(*un); } return 0; } if (ap[an-1] < bp[an-1]) /* swap so that ap >= bp */ { MP_PTR_SWAP (ap, bp); MP_PTR_SWAP (u0, u1); negate = ~negate; } bn = n; MPN_NORMALIZE (bp, bn); if (bn == 0) { /* ap is the gcd */ MPN_COPY (gp, ap, n); MPN_NORMALIZE(u1, (*un)); MPN_COPY (s0p, u1, (*un)); if (negate) (*un) = -(*un); (*gn) = n; return 0; } ASSERT_NOCARRY (mpn_sub_n (ap, ap, bp, an)); /* ap -= bp, u1 += u0 */ MPN_NORMALIZE (ap, an); ASSERT (an > 0); cy = mpn_add_n(u1, u1, u0, *un); if (cy) u1[(*un)++] = cy; if (an < bn) /* make an >= bn */ { MPN_PTR_SWAP (ap, an, bp, bn); MP_PTR_SWAP(u0, u1); negate = ~negate; } else if (an == bn) { MPN_CMP (c, ap, bp, an); if (c < 0) { MP_PTR_SWAP (ap, bp); MP_PTR_SWAP(u0, u1); negate = ~negate; } else if (c == 0) /* gcd is ap OR bp */ { /* this case seems to never occur it should happen only if ap = 2*bp */ MPN_COPY (gp, ap, an); (*gn) = an; /* As the gcd is ap OR bp, there are two possible cofactors here u1 or -u0, and we want the least of the two. */ MPN_CMP(c, u1, u0, *un); if (c < 0) // u1 is less { MPN_NORMALIZE(u1, (*un)); MPN_COPY (s0p, u1, (*un)); if (negate) (*un) = -(*un); } else if (c > 0) // -u0 is less { MPN_NORMALIZE(u0, (*un)); MPN_COPY (s0p, u0, (*un)); if (!negate) (*un) = -(*un); } else // same { MPN_NORMALIZE(u0, (*un)); MPN_COPY (s0p, u0, (*un)); } return 0; } } ASSERT (an >= bn); qn = an - bn + 1; mpn_tdiv_qr (tp, ap, 0, ap, an, bp, bn); /* ap -= q * bp, u1 += q * u0 */ /* Normalizing seems to be the simplest way to test if the remainder is zero. */ an = bn; MPN_NORMALIZE (ap, an); if (an == 0) { /* this case never seems to occur*/ /* gcd = bp */ MPN_COPY (gp, bp, bn); MPN_NORMALIZE(u0, (*un)); MPN_COPY (s0p, u0, (*un)); if (!negate) (*un) = -(*un); (*gn) = bn; return 0; } qn2 = qn; u0n = (*un); MPN_NORMALIZE (tp, qn2); MPN_NORMALIZE (u0, u0n); if (u0n > 0) { if (qn2 > u0n) mpn_mul(tp + qn, tp, qn2, u0, u0n); else mpn_mul(tp + qn, u0, u0n, tp, qn2); u0n += qn2; MPN_NORMALIZE(tp + qn, u0n); if ((*un) >= u0n) { cy = mpn_add(u1, u1, (*un), tp + qn, u0n); if (cy) u1[(*un)++] = cy; } else { cy = mpn_add(u1, tp + qn, u0n, u1, (*un)); (*un) = u0n; if (cy) u1[(*un)++] = cy; } } return bn; } /* Set (u0, u1) = (u0, u1) M Requires temporary space un + un + M->n = 2*un + M->n */ void ngcdext_cofactor_adjust(mp_ptr u0, mp_ptr u1, mp_size_t * un, struct ngcd_matrix *M, mp_ptr tp) { /* Let M = (r00, r01) (r10, r11) We want u0 = u0 * r00 + u1 * r10 u1 = u0 * r01 + u1 * r11 We make a copy of u0 at tp and update u0 first */ mp_limb_t cy, cy2; mp_ptr t2p =(tp + (*un)); /* second temporary space */ ASSERT(tp > M->p[1][1] + M->n); MPN_COPY(tp, u0, *un); if (M->n >= (*un)) { mpn_mul(t2p, M->p[1][0], M->n, u1, *un); /* t2p = r10 * u1 */ mpn_mul(u0, M->p[0][0], M->n, tp, *un); /* u0 = r00 * u0 */ } else { mpn_mul(t2p, u1, *un, M->p[1][0], M->n); mpn_mul(u0, tp, *un, M->p[0][0], M->n); } cy = mpn_add_n(u0, u0, t2p, M->n + (*un)); /* u0 += t2p */ if (M->n >= (*un)) { mpn_mul(t2p, M->p[1][1], M->n, u1, *un); /* t2p = r11 * u1 */ mpn_mul(u1, M->p[0][1], M->n, tp, *un); /* u1 = r01 * u0 */ } else { mpn_mul(t2p, u1, *un, M->p[1][1], M->n); mpn_mul(u1, tp, *un, M->p[0][1], M->n); } cy2 = mpn_add_n(u1, u1, t2p, M->n + (*un)); /* u1 += t2p */ if ((cy) || (cy2)) /* normalise u0, u1 */ { u0[M->n + (*un)] = cy; u1[M->n + (*un)] = cy2; (*un) += (M->n + 1); } else { (*un) += M->n; while ((u0[*un - 1] == 0) && (u1[*un - 1] == 0)) (*un)--; /* both cannot be zero, so this won't overrun */ } } /* Computes |t| where t = (gp - s*ap)/bp Requires temporary space sn + an */ void gcdext_get_t(mp_ptr t, mp_size_t * tn, mp_ptr gp, mp_size_t gn, mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n, mp_ptr s, mp_size_t sn, mp_ptr tp) { mp_size_t ss = ABS(sn); mp_limb_t cy; if (ss >= an) mpn_mul(tp, s, ss, ap, an); else mpn_mul(tp, ap, an, s, ss); (*tn) = ss + an; (*tn) -= (tp[(*tn) - 1] == 0); /* We must have s*ap >= gp and we really want to compute -t */ if (sn > 0) { mpn_sub(tp, tp, *tn, gp, gn); MPN_NORMALIZE(tp, (*tn)); } else { cy = mpn_add(tp, tp, *tn, gp, gn); if (cy) tp[(*tn)++] = cy; } if ((*tn) == 0) { return; } mpn_tdiv_qr(t, tp, 0, tp, (*tn), bp, n); ASSERT_MPN_ZERO_P(tp, n); (*tn) -= (n - 1); (*tn) -= (t[(*tn) - 1] == 0); } mp_limb_t mpn_gcdinv_1(mp_limb_signed_t * a, mp_limb_t x, mp_limb_t y) { mp_limb_signed_t u1 = CNST_LIMB(1); mp_limb_signed_t u2 = CNST_LIMB(0); mp_limb_signed_t t1; mp_limb_t u3, v3; mp_limb_t quot, rem; u3 = x, v3 = y; if ((mp_limb_signed_t) (x & y) < (mp_limb_signed_t) CNST_LIMB(0)) /* x and y both have top bit set */ { quot=u3-v3; t1 = u2; u2 = u1 - u2; u1 = t1; u3 = v3; v3 = quot; } while ((mp_limb_signed_t) (v3<<1) < (mp_limb_signed_t) CNST_LIMB(0)) /* second value has second msb set */ { quot=u3-v3; if (quot < v3) { t1 = u2; u2 = u1 - u2; u1 = t1; u3 = v3; v3 = quot; } else if (quot < (v3<<1)) { t1 = u2; u2 = u1 - (u2<<1); u1 = t1; u3 = v3; v3 = quot-u3; } else { t1 = u2; u2 = u1 - 3*u2; u1 = t1; u3 = v3; v3 = quot-(u3<<1); } } while (v3) { quot=u3-v3; if (u3 < (v3<<2)) /* overflow not possible due to top 2 bits of v3 not being set */ { if (quot < v3) { t1 = u2; u2 = u1 - u2; u1 = t1; u3 = v3; v3 = quot; } else if (quot < (v3<<1)) { t1 = u2; u2 = u1 - (u2<<1); u1 = t1; u3 = v3; v3 = quot-u3; } else { t1 = u2; u2 = u1 - 3*u2; u1 = t1; u3 = v3; v3 = quot-(u3<<1); } } else { quot=u3/v3; rem = u3 - v3*quot; t1 = u2; u2 = u1 - quot*u2; u1 = t1; u3 = v3; v3 = rem; } } /* Quite remarkably, this always has |u1| < x/2 at this point, thus comparison with 0 is valid */ //if (u1 < (mp_limb_signed_t) 0) u1 += y; (*a) = u1; return u3; } mp_size_t mpn_gcdext (mp_ptr gp, mp_ptr s0p, mp_size_t *s0size, mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n) { mp_size_t init_scratch, orig_n = n; mp_size_t scratch, un, u0n, u1n; mp_limb_t t; mp_ptr tp, u0, u1; int swapped = 0; struct ngcd_matrix M; mp_size_t p; mp_size_t nn; mp_limb_signed_t a; int c; TMP_DECL; ASSERT (an >= n); if (an == 1) { if (!n) { /* shouldn't ever occur, but we include for completeness */ gp[0] = ap[0]; s0p[0] = 1; *s0size = 1; return 1; } gp[0] = mpn_gcdinv_1(&a, ap[0], bp[0]); if (a < (mp_limb_signed_t) 0) { s0p[0] = -a; (*s0size) = -1; } else { s0p[0] = a; (*s0size) = 1 - (s0p[0] == 0); } return 1; } init_scratch = MPN_NGCD_MATRIX_INIT_ITCH (n-P_SIZE(n)); scratch = mpn_nhgcd_itch ((n+1)/2); /* Space needed for mpn_ngcd_matrix_adjust */ if (scratch < 2*n) scratch = 2*n; if (scratch < an - n + 1) /* the first division can sometimes be selfish!! */ scratch = an - n + 1; /* Space needed for cofactor adjust */ scratch = MAX(scratch, 2*(n+1) + P_SIZE(n) + 1); TMP_MARK; if (5*n + 2 + MPN_GCD_LEHMER_N_ITCH(n) > init_scratch + scratch) tp = TMP_ALLOC_LIMBS (7*n+4+MPN_GCD_LEHMER_N_ITCH(n)); /* 2n+2 for u0, u1, 5*n+2 + MPN_GCD_LEHMER_N_ITCH(n) for Lehmer and copies of ap and bp and s (and finally 3*n+1 for t and get_t) */ else tp = TMP_ALLOC_LIMBS (2*(n+1) + init_scratch + scratch); if (an > n) { mp_ptr qp = tp; mpn_tdiv_qr (qp, ap, 0, ap, an, bp, n); an = n; MPN_NORMALIZE (ap, an); if (an == 0) { MPN_COPY (gp, bp, n); TMP_FREE; (*s0size) = 0; return n; } } if (BELOW_THRESHOLD (n, GCDEXT_THRESHOLD)) { n = mpn_ngcdext_lehmer (gp, s0p, s0size, ap, bp, n, tp); TMP_FREE; return n; } u0 = tp; /* Cofactor space */ u1 = tp + n + 1; MPN_ZERO(tp, 2*(n+1)); tp += 2*(n+1); /* First iteration, setup u0 and u1 */ p = P_SIZE(n); mpn_ngcd_matrix_init (&M, n - p, tp); ASSERT(tp + init_scratch > M.p[1][1] + M.n); nn = mpn_nhgcd (ap + p, bp + p, n - p, &M, tp + init_scratch); if (nn > 0) { n = mpn_ngcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + init_scratch); /* (ap'', bp'')^T = M^-1(ap', bp')^T and (ap', bp') = (1*ap + ?*bp, 0*ap + ?*bp) We let u0 be minus the factor of ap appearing in the expression for bp'' and u1 be the factor of ap appearing in the expression for ap'' */ MPN_COPY(u0, M.p[1][0], M.n); MPN_COPY(u1, M.p[1][1], M.n); un = M.n; while ((u0[un-1] == 0) && (u1[un-1] == 0)) un--; /* normalise u0, u1, both cannot be zero as det = 1*/ } else { mp_size_t gn; un = 1; u0[0] = 0; /* bp = 0*ap + ?*bp, thus u0 = -0 */ u1[0] = 1; /* ap = 1*ap + ?*bp, thus u1 = 1 */ n = mpn_ngcdext_subdiv_step (gp, &gn, s0p, u0, u1, &un, ap, bp, n, tp); if (n == 0) { /* never observed to occur */ (*s0size) = un; ASSERT(s0p[*s0size - 1] != 0); TMP_FREE; return gn; } } while (ABOVE_THRESHOLD (n, GCDEXT_THRESHOLD)) { struct ngcd_matrix M; mp_size_t p = P_SIZE(n); mp_size_t nn; mpn_ngcd_matrix_init (&M, n - p, tp); nn = mpn_nhgcd (ap + p, bp + p, n - p, &M, tp + init_scratch); if (nn > 0) { n = mpn_ngcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + init_scratch); ngcdext_cofactor_adjust(u0, u1, &un, &M, tp + init_scratch); /* (ap'', bp'')^T = M^-1(ap', bp')^T and (ap', bp') = (u1*ap + ?*bp, -u0*ap + ?*bp) So we need u0' = -(-c*u1 + a*-u0) = a*u0 + c*u1 and we need u1' = (d*u1 -b*-u0) = b*u0 + d*u1 */ ASSERT(un <= orig_n + 1); } else { mp_size_t gn; n = mpn_ngcdext_subdiv_step (gp, &gn, s0p, u0, u1, &un, ap, bp, n, tp); ASSERT(un <= orig_n + 1); if (n == 0) { (*s0size) = un; ASSERT(((*s0size) == 0) || (s0p[ABS(*s0size) - 1] != 0)); TMP_FREE; return gn; } } } ASSERT (ap[n-1] > 0 || bp[n-1] > 0); ASSERT (u0[un-1] > 0 || u1[un-1] > 0); if (ap[n-1] < bp[n-1]) { MP_PTR_SWAP (ap, bp); MP_PTR_SWAP (u0, u1); swapped = 1; } an = n; /* {ap, an} and {bp, bn} are normalised, {ap, an} >= {bp, bn} */ MPN_NORMALIZE (bp, n); if (n == 0) { /* If bp == 0 then gp = ap with cofactor u1 If we swapped then cofactor is -u1 This case never seems to happen */ MPN_COPY (gp, ap, an); MPN_NORMALIZE(u1, un); MPN_COPY(s0p, u1, un); (*s0size) = un; if (swapped) (*s0size) = -(*s0size); TMP_FREE; return an; } /* If at this point we have s*ap' + t*bp' = gp where gp is the gcd and (ap', bp') = (u1*ap + ?*bp, -u0*ap + ?*bp) then gp = s*u1*ap - t*u0*ap + ?*bp and the cofactor we want is (s*u1-t*u0). First there is the special case u0 = 0, u1 = 1 in which case we do not need to compute t... */ ASSERT(u1 + un <= tp); u0n = un; MPN_NORMALIZE(u0, u0n); /* {u0, u0n} is now normalised */ if (u0n == 0) /* u1 = 1 case is rare*/ { mp_size_t gn; gn = mpn_ngcdext_lehmer (gp, s0p, s0size, ap, bp, n, tp); if (swapped) (*s0size) = -(*s0size); TMP_FREE; return gn; } else { /* Compute final gcd. */ mp_size_t gn, sn, tn; mp_ptr s, t; mp_limb_t cy; int negate = 0; /* Save an, bn first as gcdext destroys inputs */ s = tp; tp += an; MPN_COPY(tp, ap, an); MPN_COPY(tp + an, bp, an); if (mpn_cmp(tp, tp + an, an) == 0) { /* gcd is tp or tp + an return smallest cofactor, either -u0 or u1 */ gn = an; MPN_NORMALIZE(tp, gn); MPN_COPY(gp, tp, gn); MPN_CMP(c, u0, u1, un); if (c < (mp_limb_signed_t) 0) { MPN_COPY(s0p, u0, u0n); (*s0size) = -u0n; } else { MPN_NORMALIZE(u1, un); MPN_COPY(s0p, u1, un); (*s0size) = un; } TMP_FREE; return gn; } gn = mpn_ngcdext_lehmer (gp, s, &sn, tp, tp + an, an, tp + 2*an); /* Special case, s == 0, t == 1, cofactor = -u0 case is rare*/ if (sn == 0) { MPN_COPY(s0p, u0, u0n); (*s0size) = -u0n; if (swapped) (*s0size) = -(*s0size); TMP_FREE; return gn; } /* We'll need the other cofactor t = (gp - s*ap)/bp */ t = tp; tp += (an + 1); gcdext_get_t(t, &tn, gp, gn, ap, an, bp, n, s, sn, tp); ASSERT((tn == 0) || (t[tn - 1] > 0)); /* {t, tn} is normalised */ ASSERT(tn <= an + 1); /* We want to compute s*u1 - t*u0, so if s is negative t will be positive, so we'd be dealing with negative numbers. We fix that here. */ if (sn < 0) { sn = -sn; negate = 1; } /* Now we can deal with the special case u1 = 0 */ u1n = un; MPN_NORMALIZE(u1, u1n); /* {u1, u1n} is now normalised */ if (u1n == 0) /* case is rare */ { MPN_COPY(s0p, t, tn); (*s0size) = -tn; if (swapped ^ negate) (*s0size) = -(*s0size); TMP_FREE; return gn; } /* t may be zero, but we need to compute s*u1 anyway */ if (sn >= u1n) mpn_mul(s0p, s, sn, u1, u1n); else mpn_mul(s0p, u1, u1n, s, sn); (*s0size) = sn + u1n; (*s0size) -= (s0p[sn + u1n - 1] == 0); ASSERT(s0p[*s0size - 1] > 0); /* {s0p, *s0size} is normalised now */ if (tn == 0) /* case is rare */ { if (swapped ^ negate) (*s0size) = -(*s0size); TMP_FREE; return gn; } /* Now compute the rest of the cofactor, t*u0 and subtract it We're done with u1 and s which happen to be consecutive, so use that space */ ASSERT(u1 + tn + u0n <= t); if (tn > u0n) mpn_mul(u1, t, tn, u0, u0n); else mpn_mul(u1, u0, u0n, t, tn); u1n = tn + u0n; u1n -= (u1[tn + u0n - 1] == 0); ASSERT(u1[u1n - 1] > 0); /* Recall t is now negated so s*u1 - t*u0 involves an *addition* */ if ((*s0size) >= u1n) { cy = mpn_add(s0p, s0p, *s0size, u1, u1n); if (cy) s0p[(*s0size)++] = cy; } else { cy = mpn_add(s0p, u1, u1n, s0p, *s0size); (*s0size) = u1n; if (cy) s0p[(*s0size)++] = cy; } if (swapped ^ negate) (*s0size) = -(*s0size); TMP_FREE; return gn; } } #endif