Updated mpz/jacobi.c from GMP 5.1.3.

This commit is contained in:
William Hart 2014-02-28 13:59:52 +00:00
parent 7e44cf98e7
commit 006f33dc49

View File

@ -1,12 +1,13 @@
/* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.
Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
Copyright 2000, 2001, 2002, 2005, 2010, 2011, 2012 Free Software Foundation,
Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify it
under the terms of the GNU Lesser General Public License as published by the
Free Software Foundation; either version 2.1 of the License, or (at your
Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
@ -15,9 +16,7 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
for more details.
You should have received a copy of the GNU Lesser General Public License along
with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free
Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
USA. */
with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include <stdio.h>
#include "mpir.h"
@ -25,23 +24,10 @@ USA. */
#include "longlong.h"
/* Change this to "#define TRACE(x) x" for some traces. */
#define TRACE(x)
#define MPN_RSHIFT_OR_COPY(dst,src,size,shift) \
do { \
if ((shift) != 0) \
{ \
ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift)); \
(size) -= ((dst)[(size)-1] == 0); \
} \
else \
MPN_COPY (dst, src, size); \
} while (0)
/* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker.
/* This code does triple duty as mpz_jacobi, mpz_legendre and
mpz_kronecker. For ABI compatibility, the link symbol is
__gmpz_jacobi, not __gmpz_kronecker, even though the latter would
be more logical.
mpz_jacobi could assume b is odd, but the improvements from that seem
small compared to other operations, and anything significant should be
@ -53,262 +39,163 @@ USA. */
multiple of b), but the checking for that takes little time compared to
other operations.
The main loop is just a simple binary GCD with the jacobi symbol result
tracked during the reduction.
The special cases for a or b fitting in one limb let mod_1 or modexact_1
get used, without any copying, and end up just as efficient as the mixed
precision mpz_kronecker_ui etc.
When tdiv_qr is called it's not necessary to make "a" odd or make a
working copy of it, but tdiv_qr is going to be pretty slow so it's not
worth bothering trying to save anything for that case.
Enhancements:
mpn_bdivmod could be used instead of mpn_tdiv_qr, like in mpn_gcd.
Currently tdiv_qr is preferred since it's sub-quadratic on big sizes,
although bdivmod might be a touch quicker on small sizes. This can be
revised when bdivmod becomes sub-quadratic too.
Some sort of multi-step algorithm should be used. The current subtract
and shift for every bit is very inefficient. Lehmer (per current gcdext)
would need some low bits included in its calculation to apply the sign
change for reciprocity. Binary Lehmer keeps low bits to strip twos
anyway, so might be better suited. Maybe the accelerated GCD style k-ary
reduction would work, if sign changes due to the extra factors it
introduces can be accounted for (or maybe they can be ignored). */
mpn_bdiv_qr should be used instead of mpn_tdiv_qr.
*/
int
mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
{
mp_srcptr asrcp, bsrcp;
mp_size_t asize, bsize;
mp_limb_t alow, blow;
mp_ptr ap, bp;
mp_limb_t alow, blow, ahigh, bhigh, asecond, bsecond;
unsigned atwos, btwos;
unsigned btwos;
int result_bit1;
int res;
TMP_DECL;
TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b));
mpz_trace (" a", a);
mpz_trace (" b", b));
asize = SIZ(a);
asrcp = PTR(a);
alow = asrcp[0];
bsize = SIZ(b);
if (bsize == 0)
return JACOBI_LS0 (alow, asize); /* (a/0) */
bsrcp = PTR(b);
blow = bsrcp[0];
if (asize == 0)
return JACOBI_0LS (blow, bsize); /* (0/b) */
/* The MPN jacobi functions require positive a and b, and b odd. So
we must to handle the cases of a or b zero, then signs, and then
the case of even b.
*/
/* (even/even)=0 */
if (((alow | blow) & 1) == 0)
if (bsize == 0)
/* (a/0) = [ a = 1 or a = -1 ] */
return JACOBI_LS0 (alow, asize);
if (asize == 0)
/* (0/b) = [ b = 1 or b = - 1 ] */
return JACOBI_0LS (blow, bsize);
if ( (((alow | blow) & 1) == 0))
/* Common factor of 2 ==> (a/b) = 0 */
return 0;
/* account for effect of sign of b, then ignore it */
result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize);
bsize = ABS (bsize);
if (bsize < 0)
{
/* (a/-1) = -1 if a < 0, +1 if a >= 0 */
result_bit1 = (asize < 0) << 1;
bsize = -bsize;
}
else
result_bit1 = 0;
/* low zero limbs on b can be discarded */
JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow);
count_trailing_zeros (btwos, blow);
TRACE (printf ("b twos %u\n", btwos));
/* establish shifted blow */
blow >>= btwos;
if (bsize > 1)
if (bsize > 1 && btwos > 0)
{
bsecond = bsrcp[1];
if (btwos != 0)
blow |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
mp_limb_t b1 = bsrcp[1];
blow |= b1 << (GMP_NUMB_BITS - btwos);
if (bsize == 2 && (b1 >> btwos) == 0)
bsize = 1;
}
/* account for effect of sign of a, then ignore it */
result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow);
asize = ABS (asize);
if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0))
if (asize < 0)
{
/* special case one limb b, use modexact and no copying */
/* (-1/b) = -1 iff b = 3 (mod 4) */
result_bit1 ^= JACOBI_N1B_BIT1(blow);
asize = -asize;
}
/* (a/2)=(2/a) with a odd, and if b is even then a is odd here */
result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
if (blow == 1) /* (a/1)=1 always */
/* Ensure asize >= bsize. Take advantage of the generalized
reciprocity law (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */
if (asize < bsize)
{
MPN_SRCPTR_SWAP (asrcp, asize, bsrcp, bsize);
MP_LIMB_T_SWAP (alow, blow);
/* NOTE: The value of alow (old blow) is a bit subtle. For this code
path, we get alow as the low, always odd, limb of shifted A. Which is
what we need for the reciprocity update below.
However, all other uses of alow assumes that it is *not*
shifted. Luckily, alow matters only when either
+ btwos > 0, in which case A is always odd
+ asize == bsize == 1, in which case this code path is never
taken. */
count_trailing_zeros (btwos, blow);
blow >>= btwos;
if (bsize > 1 && btwos > 0)
{
mp_limb_t b1 = bsrcp[1];
blow |= b1 << (GMP_NUMB_BITS - btwos);
if (bsize == 2 && (b1 >> btwos) == 0)
bsize = 1;
}
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
}
if (bsize == 1)
{
result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
if (blow == 1)
return JACOBI_BIT1_TO_PN (result_bit1);
if (asize > 1)
JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
TRACE (printf ("base (%lu/%lu) with %d\n",
alow, blow, JACOBI_BIT1_TO_PN (result_bit1)));
return mpn_jacobi_base (alow, blow, result_bit1);
}
/* Discard low zero limbs of a. Usually there won't be anything to
strip, hence not bothering with it for the bsize==1 case. */
JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
count_trailing_zeros (atwos, alow);
TRACE (printf ("a twos %u\n", atwos));
result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow);
/* establish shifted alow */
alow >>= atwos;
if (asize > 1)
{
asecond = asrcp[1];
if (atwos != 0)
alow |= (asecond << (GMP_NUMB_BITS - atwos)) & GMP_NUMB_MASK;
}
/* (a/2)=(2/a) with a odd */
result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0))
{
/* another special case with modexact and no copying */
if (alow == 1) /* (1/b)=1 always */
return JACOBI_BIT1_TO_PN (result_bit1);
/* b still has its twos, so cancel out their effect */
result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); /* now (b/a) */
JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
TRACE (printf ("base (%lu/%lu) with %d\n",
blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
return mpn_jacobi_base (blow, alow, result_bit1);
}
/* Allocation strategy: For A, we allocate a working copy only for A % B, but
when A is much larger than B, we have to allocate space for the large
quotient. We use the same area, pointed to by bp, for both the quotient
A/B and the working copy of B. */
TMP_MARK;
TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize);
MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos);
ASSERT (alow == ap[0]);
TRACE (mpn_trace ("stripped a", ap, asize));
if (asize >= 2*bsize)
TMP_ALLOC_LIMBS_2 (ap, bsize, bp, asize - bsize + 1);
else
TMP_ALLOC_LIMBS_2 (ap, bsize, bp, bsize);
MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos);
ASSERT (blow == bp[0]);
TRACE (mpn_trace ("stripped b", bp, bsize));
/* swap if necessary to make a longer than b */
if (asize < bsize)
{
TRACE (printf ("swap\n"));
MPN_PTR_SWAP (ap,asize, bp,bsize);
MP_LIMB_T_SWAP (alow, blow);
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
}
/* If a is bigger than b then reduce to a mod b.
Division is much faster than chipping away at "a" bit-by-bit. */
/* In the case of even B, we conceptually shift out the powers of two first,
and then divide A mod B. Hence, when taking those powers of two into
account, we must use alow *before* the division. Doing the actual division
first is ok, because the point is to remove multiples of B from A, and
multiples of 2^k B are good enough. */
if (asize > bsize)
mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, bsize);
else
MPN_COPY (ap, asrcp, bsize);
if (btwos > 0)
{
mp_ptr rp, qp;
result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize));
TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1);
mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, bp, bsize);
ap = rp;
asize = bsize;
MPN_NORMALIZE (ap, asize);
TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize);
mpn_trace (" a", ap, asize);
mpn_trace (" b", bp, bsize));
if (asize == 0) /* (0/b)=0 for b!=1 */
goto zero;
alow = ap[0];
goto strip_a;
ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, bsize, btwos));
bsize -= (ap[bsize-1] | bp[bsize-1]) == 0;
}
else
MPN_COPY (bp, bsrcp, bsize);
for (;;)
{
ASSERT (asize >= 1); /* a,b non-empty */
ASSERT (bsize >= 1);
ASSERT (ap[asize-1] != 0); /* a,b normalized (and hence non-zero) */
ASSERT (bp[bsize-1] != 0);
ASSERT (alow == ap[0]); /* low limb copies should be correct */
ASSERT (blow == bp[0]);
ASSERT (alow & 1); /* a,b odd */
ASSERT (blow & 1);
res = mpn_jacobi_n (ap, bp, bsize,
mpn_jacobi_init (ap[0], blow, (result_bit1>>1) & 1));
TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize);
mpn_trace (" a", ap, asize);
mpn_trace (" b", bp, bsize));
/* swap if necessary to make a>=b, applying reciprocity
high limbs are almost always enough to tell which is bigger */
if (asize < bsize
|| (asize == bsize
&& ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1])
|| (ahigh == bhigh
&& mpn_cmp (ap, bp, asize-1) < 0))))
{
TRACE (printf ("swap\n"));
MPN_PTR_SWAP (ap,asize, bp,bsize);
MP_LIMB_T_SWAP (alow, blow);
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
}
if (asize == 1)
break;
/* a = a-b */
ASSERT (asize >= bsize);
ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize));
MPN_NORMALIZE (ap, asize);
alow = ap[0];
/* (0/b)=0 for b!=1. b!=1 when a==0 because otherwise would have had
a==1 which is asize==1 and would have exited above. */
if (asize == 0)
goto zero;
strip_a:
/* low zero limbs on a can be discarded */
JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, ap, asize, alow);
if ((alow & 1) == 0)
{
/* factors of 2 from a */
unsigned twos;
count_trailing_zeros (twos, alow);
TRACE (printf ("twos %u\n", twos));
result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow);
ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos));
asize -= (ap[asize-1] == 0);
alow = ap[0];
}
}
ASSERT (asize == 1 && bsize == 1); /* just alow and blow left */
TMP_FREE;
/* (1/b)=1 always (in this case have b==1 because a>=b) */
if (alow == 1)
return JACOBI_BIT1_TO_PN (result_bit1);
/* swap with reciprocity and do (b/a) */
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
TRACE (printf ("base (%lu/%lu) with %d\n",
blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
return mpn_jacobi_base (blow, alow, result_bit1);
zero:
TMP_FREE;
return 0;
return res;
}