From 006f33dc4935324ab523f3fec971aff0d61918ec Mon Sep 17 00:00:00 2001 From: William Hart Date: Fri, 28 Feb 2014 13:59:52 +0000 Subject: [PATCH] Updated mpz/jacobi.c from GMP 5.1.3. --- mpz/jacobi.c | 345 +++++++++++++++++---------------------------------- 1 file changed, 116 insertions(+), 229 deletions(-) diff --git a/mpz/jacobi.c b/mpz/jacobi.c index a169b26c..9090eaa1 100644 --- a/mpz/jacobi.c +++ b/mpz/jacobi.c @@ -1,12 +1,13 @@ /* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols. -Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc. +Copyright 2000, 2001, 2002, 2005, 2010, 2011, 2012 Free Software Foundation, +Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the -Free Software Foundation; either version 2.1 of the License, or (at your +Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but @@ -15,9 +16,7 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along -with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free -Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, -USA. */ +with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include #include "mpir.h" @@ -25,23 +24,10 @@ USA. */ #include "longlong.h" -/* Change this to "#define TRACE(x) x" for some traces. */ -#define TRACE(x) - - -#define MPN_RSHIFT_OR_COPY(dst,src,size,shift) \ - do { \ - if ((shift) != 0) \ - { \ - ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift)); \ - (size) -= ((dst)[(size)-1] == 0); \ - } \ - else \ - MPN_COPY (dst, src, size); \ - } while (0) - - -/* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker. +/* This code does triple duty as mpz_jacobi, mpz_legendre and + mpz_kronecker. For ABI compatibility, the link symbol is + __gmpz_jacobi, not __gmpz_kronecker, even though the latter would + be more logical. mpz_jacobi could assume b is odd, but the improvements from that seem small compared to other operations, and anything significant should be @@ -53,262 +39,163 @@ USA. */ multiple of b), but the checking for that takes little time compared to other operations. - The main loop is just a simple binary GCD with the jacobi symbol result - tracked during the reduction. - - The special cases for a or b fitting in one limb let mod_1 or modexact_1 - get used, without any copying, and end up just as efficient as the mixed - precision mpz_kronecker_ui etc. - - When tdiv_qr is called it's not necessary to make "a" odd or make a - working copy of it, but tdiv_qr is going to be pretty slow so it's not - worth bothering trying to save anything for that case. - Enhancements: - mpn_bdivmod could be used instead of mpn_tdiv_qr, like in mpn_gcd. - Currently tdiv_qr is preferred since it's sub-quadratic on big sizes, - although bdivmod might be a touch quicker on small sizes. This can be - revised when bdivmod becomes sub-quadratic too. - - Some sort of multi-step algorithm should be used. The current subtract - and shift for every bit is very inefficient. Lehmer (per current gcdext) - would need some low bits included in its calculation to apply the sign - change for reciprocity. Binary Lehmer keeps low bits to strip twos - anyway, so might be better suited. Maybe the accelerated GCD style k-ary - reduction would work, if sign changes due to the extra factors it - introduces can be accounted for (or maybe they can be ignored). */ + mpn_bdiv_qr should be used instead of mpn_tdiv_qr. +*/ int mpz_jacobi (mpz_srcptr a, mpz_srcptr b) { mp_srcptr asrcp, bsrcp; mp_size_t asize, bsize; + mp_limb_t alow, blow; mp_ptr ap, bp; - mp_limb_t alow, blow, ahigh, bhigh, asecond, bsecond; - unsigned atwos, btwos; + unsigned btwos; int result_bit1; + int res; TMP_DECL; - TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b)); - mpz_trace (" a", a); - mpz_trace (" b", b)); - asize = SIZ(a); asrcp = PTR(a); alow = asrcp[0]; bsize = SIZ(b); - if (bsize == 0) - return JACOBI_LS0 (alow, asize); /* (a/0) */ - bsrcp = PTR(b); blow = bsrcp[0]; - if (asize == 0) - return JACOBI_0LS (blow, bsize); /* (0/b) */ + /* The MPN jacobi functions require positive a and b, and b odd. So + we must to handle the cases of a or b zero, then signs, and then + the case of even b. + */ - /* (even/even)=0 */ - if (((alow | blow) & 1) == 0) + if (bsize == 0) + /* (a/0) = [ a = 1 or a = -1 ] */ + return JACOBI_LS0 (alow, asize); + + if (asize == 0) + /* (0/b) = [ b = 1 or b = - 1 ] */ + return JACOBI_0LS (blow, bsize); + + if ( (((alow | blow) & 1) == 0)) + /* Common factor of 2 ==> (a/b) = 0 */ return 0; - /* account for effect of sign of b, then ignore it */ - result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize); - bsize = ABS (bsize); + if (bsize < 0) + { + /* (a/-1) = -1 if a < 0, +1 if a >= 0 */ + result_bit1 = (asize < 0) << 1; + bsize = -bsize; + } + else + result_bit1 = 0; - /* low zero limbs on b can be discarded */ JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow); count_trailing_zeros (btwos, blow); - TRACE (printf ("b twos %u\n", btwos)); - - /* establish shifted blow */ blow >>= btwos; - if (bsize > 1) + + if (bsize > 1 && btwos > 0) { - bsecond = bsrcp[1]; - if (btwos != 0) - blow |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK; + mp_limb_t b1 = bsrcp[1]; + blow |= b1 << (GMP_NUMB_BITS - btwos); + if (bsize == 2 && (b1 >> btwos) == 0) + bsize = 1; } - /* account for effect of sign of a, then ignore it */ - result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow); - asize = ABS (asize); - - if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0)) + if (asize < 0) { - /* special case one limb b, use modexact and no copying */ - - /* (a/2)=(2/a) with a odd, and if b is even then a is odd here */ - result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow); - - if (blow == 1) /* (a/1)=1 always */ - return JACOBI_BIT1_TO_PN (result_bit1); - - JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow); - TRACE (printf ("base (%lu/%lu) with %d\n", - alow, blow, JACOBI_BIT1_TO_PN (result_bit1))); - return mpn_jacobi_base (alow, blow, result_bit1); + /* (-1/b) = -1 iff b = 3 (mod 4) */ + result_bit1 ^= JACOBI_N1B_BIT1(blow); + asize = -asize; } - /* Discard low zero limbs of a. Usually there won't be anything to - strip, hence not bothering with it for the bsize==1 case. */ JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow); - count_trailing_zeros (atwos, alow); - TRACE (printf ("a twos %u\n", atwos)); - result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow); + /* Ensure asize >= bsize. Take advantage of the generalized + reciprocity law (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */ - /* establish shifted alow */ - alow >>= atwos; - if (asize > 1) - { - asecond = asrcp[1]; - if (atwos != 0) - alow |= (asecond << (GMP_NUMB_BITS - atwos)) & GMP_NUMB_MASK; - } - - /* (a/2)=(2/a) with a odd */ - result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow); - - if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0)) - { - /* another special case with modexact and no copying */ - - if (alow == 1) /* (1/b)=1 always */ - return JACOBI_BIT1_TO_PN (result_bit1); - - /* b still has its twos, so cancel out their effect */ - result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow); - - result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); /* now (b/a) */ - JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow); - TRACE (printf ("base (%lu/%lu) with %d\n", - blow, alow, JACOBI_BIT1_TO_PN (result_bit1))); - return mpn_jacobi_base (blow, alow, result_bit1); - } - - - TMP_MARK; - TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize); - - MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos); - ASSERT (alow == ap[0]); - TRACE (mpn_trace ("stripped a", ap, asize)); - - MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos); - ASSERT (blow == bp[0]); - TRACE (mpn_trace ("stripped b", bp, bsize)); - - /* swap if necessary to make a longer than b */ if (asize < bsize) { - TRACE (printf ("swap\n")); - MPN_PTR_SWAP (ap,asize, bp,bsize); + MPN_SRCPTR_SWAP (asrcp, asize, bsrcp, bsize); MP_LIMB_T_SWAP (alow, blow); + + /* NOTE: The value of alow (old blow) is a bit subtle. For this code + path, we get alow as the low, always odd, limb of shifted A. Which is + what we need for the reciprocity update below. + + However, all other uses of alow assumes that it is *not* + shifted. Luckily, alow matters only when either + + + btwos > 0, in which case A is always odd + + + asize == bsize == 1, in which case this code path is never + taken. */ + + count_trailing_zeros (btwos, blow); + blow >>= btwos; + + if (bsize > 1 && btwos > 0) + { + mp_limb_t b1 = bsrcp[1]; + blow |= b1 << (GMP_NUMB_BITS - btwos); + if (bsize == 2 && (b1 >> btwos) == 0) + bsize = 1; + } + result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); } - /* If a is bigger than b then reduce to a mod b. - Division is much faster than chipping away at "a" bit-by-bit. */ + if (bsize == 1) + { + result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow); + + if (blow == 1) + return JACOBI_BIT1_TO_PN (result_bit1); + + if (asize > 1) + JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow); + + return mpn_jacobi_base (alow, blow, result_bit1); + } + + /* Allocation strategy: For A, we allocate a working copy only for A % B, but + when A is much larger than B, we have to allocate space for the large + quotient. We use the same area, pointed to by bp, for both the quotient + A/B and the working copy of B. */ + + TMP_MARK; + + if (asize >= 2*bsize) + TMP_ALLOC_LIMBS_2 (ap, bsize, bp, asize - bsize + 1); + else + TMP_ALLOC_LIMBS_2 (ap, bsize, bp, bsize); + + /* In the case of even B, we conceptually shift out the powers of two first, + and then divide A mod B. Hence, when taking those powers of two into + account, we must use alow *before* the division. Doing the actual division + first is ok, because the point is to remove multiples of B from A, and + multiples of 2^k B are good enough. */ if (asize > bsize) + mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, bsize); + else + MPN_COPY (ap, asrcp, bsize); + + if (btwos > 0) { - mp_ptr rp, qp; + result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow); - TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize)); - - TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1); - mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, bp, bsize); - ap = rp; - asize = bsize; - MPN_NORMALIZE (ap, asize); - - TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize); - mpn_trace (" a", ap, asize); - mpn_trace (" b", bp, bsize)); - - if (asize == 0) /* (0/b)=0 for b!=1 */ - goto zero; - - alow = ap[0]; - goto strip_a; + ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, bsize, btwos)); + bsize -= (ap[bsize-1] | bp[bsize-1]) == 0; } + else + MPN_COPY (bp, bsrcp, bsize); - for (;;) - { - ASSERT (asize >= 1); /* a,b non-empty */ - ASSERT (bsize >= 1); - ASSERT (ap[asize-1] != 0); /* a,b normalized (and hence non-zero) */ - ASSERT (bp[bsize-1] != 0); - ASSERT (alow == ap[0]); /* low limb copies should be correct */ - ASSERT (blow == bp[0]); - ASSERT (alow & 1); /* a,b odd */ - ASSERT (blow & 1); + ASSERT (blow == bp[0]); + res = mpn_jacobi_n (ap, bp, bsize, + mpn_jacobi_init (ap[0], blow, (result_bit1>>1) & 1)); - TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize); - mpn_trace (" a", ap, asize); - mpn_trace (" b", bp, bsize)); - - /* swap if necessary to make a>=b, applying reciprocity - high limbs are almost always enough to tell which is bigger */ - if (asize < bsize - || (asize == bsize - && ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1]) - || (ahigh == bhigh - && mpn_cmp (ap, bp, asize-1) < 0)))) - { - TRACE (printf ("swap\n")); - MPN_PTR_SWAP (ap,asize, bp,bsize); - MP_LIMB_T_SWAP (alow, blow); - result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); - } - - if (asize == 1) - break; - - /* a = a-b */ - ASSERT (asize >= bsize); - ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize)); - MPN_NORMALIZE (ap, asize); - alow = ap[0]; - - /* (0/b)=0 for b!=1. b!=1 when a==0 because otherwise would have had - a==1 which is asize==1 and would have exited above. */ - if (asize == 0) - goto zero; - - strip_a: - /* low zero limbs on a can be discarded */ - JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, ap, asize, alow); - - if ((alow & 1) == 0) - { - /* factors of 2 from a */ - unsigned twos; - count_trailing_zeros (twos, alow); - TRACE (printf ("twos %u\n", twos)); - result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow); - ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos)); - asize -= (ap[asize-1] == 0); - alow = ap[0]; - } - } - - ASSERT (asize == 1 && bsize == 1); /* just alow and blow left */ TMP_FREE; - - /* (1/b)=1 always (in this case have b==1 because a>=b) */ - if (alow == 1) - return JACOBI_BIT1_TO_PN (result_bit1); - - /* swap with reciprocity and do (b/a) */ - result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); - TRACE (printf ("base (%lu/%lu) with %d\n", - blow, alow, JACOBI_BIT1_TO_PN (result_bit1))); - return mpn_jacobi_base (blow, alow, result_bit1); - - zero: - TMP_FREE; - return 0; + return res; }