Updated mpz/jacobi.c from GMP 5.1.3.
This commit is contained in:
parent
7e44cf98e7
commit
006f33dc49
341
mpz/jacobi.c
341
mpz/jacobi.c
@ -1,12 +1,13 @@
|
||||
/* mpz_jacobi, mpz_legendre, mpz_kronecker -- mpz/mpz Jacobi symbols.
|
||||
|
||||
Copyright 2000, 2001, 2002, 2005 Free Software Foundation, Inc.
|
||||
Copyright 2000, 2001, 2002, 2005, 2010, 2011, 2012 Free Software Foundation,
|
||||
Inc.
|
||||
|
||||
This file is part of the GNU MP Library.
|
||||
|
||||
The GNU MP Library is free software; you can redistribute it and/or modify it
|
||||
under the terms of the GNU Lesser General Public License as published by the
|
||||
Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
Free Software Foundation; either version 3 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The GNU MP Library is distributed in the hope that it will be useful, but
|
||||
@ -15,9 +16,7 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License
|
||||
for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License along
|
||||
with the GNU MP Library; see the file COPYING.LIB. If not, write to the Free
|
||||
Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
|
||||
USA. */
|
||||
with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include "mpir.h"
|
||||
@ -25,23 +24,10 @@ USA. */
|
||||
#include "longlong.h"
|
||||
|
||||
|
||||
/* Change this to "#define TRACE(x) x" for some traces. */
|
||||
#define TRACE(x)
|
||||
|
||||
|
||||
#define MPN_RSHIFT_OR_COPY(dst,src,size,shift) \
|
||||
do { \
|
||||
if ((shift) != 0) \
|
||||
{ \
|
||||
ASSERT_NOCARRY (mpn_rshift (dst, src, size, shift)); \
|
||||
(size) -= ((dst)[(size)-1] == 0); \
|
||||
} \
|
||||
else \
|
||||
MPN_COPY (dst, src, size); \
|
||||
} while (0)
|
||||
|
||||
|
||||
/* This code does triple duty as mpz_jacobi, mpz_legendre and mpz_kronecker.
|
||||
/* This code does triple duty as mpz_jacobi, mpz_legendre and
|
||||
mpz_kronecker. For ABI compatibility, the link symbol is
|
||||
__gmpz_jacobi, not __gmpz_kronecker, even though the latter would
|
||||
be more logical.
|
||||
|
||||
mpz_jacobi could assume b is odd, but the improvements from that seem
|
||||
small compared to other operations, and anything significant should be
|
||||
@ -53,262 +39,163 @@ USA. */
|
||||
multiple of b), but the checking for that takes little time compared to
|
||||
other operations.
|
||||
|
||||
The main loop is just a simple binary GCD with the jacobi symbol result
|
||||
tracked during the reduction.
|
||||
|
||||
The special cases for a or b fitting in one limb let mod_1 or modexact_1
|
||||
get used, without any copying, and end up just as efficient as the mixed
|
||||
precision mpz_kronecker_ui etc.
|
||||
|
||||
When tdiv_qr is called it's not necessary to make "a" odd or make a
|
||||
working copy of it, but tdiv_qr is going to be pretty slow so it's not
|
||||
worth bothering trying to save anything for that case.
|
||||
|
||||
Enhancements:
|
||||
|
||||
mpn_bdivmod could be used instead of mpn_tdiv_qr, like in mpn_gcd.
|
||||
Currently tdiv_qr is preferred since it's sub-quadratic on big sizes,
|
||||
although bdivmod might be a touch quicker on small sizes. This can be
|
||||
revised when bdivmod becomes sub-quadratic too.
|
||||
|
||||
Some sort of multi-step algorithm should be used. The current subtract
|
||||
and shift for every bit is very inefficient. Lehmer (per current gcdext)
|
||||
would need some low bits included in its calculation to apply the sign
|
||||
change for reciprocity. Binary Lehmer keeps low bits to strip twos
|
||||
anyway, so might be better suited. Maybe the accelerated GCD style k-ary
|
||||
reduction would work, if sign changes due to the extra factors it
|
||||
introduces can be accounted for (or maybe they can be ignored). */
|
||||
mpn_bdiv_qr should be used instead of mpn_tdiv_qr.
|
||||
|
||||
*/
|
||||
|
||||
int
|
||||
mpz_jacobi (mpz_srcptr a, mpz_srcptr b)
|
||||
{
|
||||
mp_srcptr asrcp, bsrcp;
|
||||
mp_size_t asize, bsize;
|
||||
mp_limb_t alow, blow;
|
||||
mp_ptr ap, bp;
|
||||
mp_limb_t alow, blow, ahigh, bhigh, asecond, bsecond;
|
||||
unsigned atwos, btwos;
|
||||
unsigned btwos;
|
||||
int result_bit1;
|
||||
int res;
|
||||
TMP_DECL;
|
||||
|
||||
TRACE (printf ("start asize=%d bsize=%d\n", SIZ(a), SIZ(b));
|
||||
mpz_trace (" a", a);
|
||||
mpz_trace (" b", b));
|
||||
|
||||
asize = SIZ(a);
|
||||
asrcp = PTR(a);
|
||||
alow = asrcp[0];
|
||||
|
||||
bsize = SIZ(b);
|
||||
if (bsize == 0)
|
||||
return JACOBI_LS0 (alow, asize); /* (a/0) */
|
||||
|
||||
bsrcp = PTR(b);
|
||||
blow = bsrcp[0];
|
||||
|
||||
if (asize == 0)
|
||||
return JACOBI_0LS (blow, bsize); /* (0/b) */
|
||||
/* The MPN jacobi functions require positive a and b, and b odd. So
|
||||
we must to handle the cases of a or b zero, then signs, and then
|
||||
the case of even b.
|
||||
*/
|
||||
|
||||
/* (even/even)=0 */
|
||||
if (((alow | blow) & 1) == 0)
|
||||
if (bsize == 0)
|
||||
/* (a/0) = [ a = 1 or a = -1 ] */
|
||||
return JACOBI_LS0 (alow, asize);
|
||||
|
||||
if (asize == 0)
|
||||
/* (0/b) = [ b = 1 or b = - 1 ] */
|
||||
return JACOBI_0LS (blow, bsize);
|
||||
|
||||
if ( (((alow | blow) & 1) == 0))
|
||||
/* Common factor of 2 ==> (a/b) = 0 */
|
||||
return 0;
|
||||
|
||||
/* account for effect of sign of b, then ignore it */
|
||||
result_bit1 = JACOBI_BSGN_SS_BIT1 (asize, bsize);
|
||||
bsize = ABS (bsize);
|
||||
if (bsize < 0)
|
||||
{
|
||||
/* (a/-1) = -1 if a < 0, +1 if a >= 0 */
|
||||
result_bit1 = (asize < 0) << 1;
|
||||
bsize = -bsize;
|
||||
}
|
||||
else
|
||||
result_bit1 = 0;
|
||||
|
||||
/* low zero limbs on b can be discarded */
|
||||
JACOBI_STRIP_LOW_ZEROS (result_bit1, alow, bsrcp, bsize, blow);
|
||||
|
||||
count_trailing_zeros (btwos, blow);
|
||||
TRACE (printf ("b twos %u\n", btwos));
|
||||
|
||||
/* establish shifted blow */
|
||||
blow >>= btwos;
|
||||
if (bsize > 1)
|
||||
|
||||
if (bsize > 1 && btwos > 0)
|
||||
{
|
||||
bsecond = bsrcp[1];
|
||||
if (btwos != 0)
|
||||
blow |= (bsecond << (GMP_NUMB_BITS - btwos)) & GMP_NUMB_MASK;
|
||||
mp_limb_t b1 = bsrcp[1];
|
||||
blow |= b1 << (GMP_NUMB_BITS - btwos);
|
||||
if (bsize == 2 && (b1 >> btwos) == 0)
|
||||
bsize = 1;
|
||||
}
|
||||
|
||||
/* account for effect of sign of a, then ignore it */
|
||||
result_bit1 ^= JACOBI_ASGN_SU_BIT1 (asize, blow);
|
||||
asize = ABS (asize);
|
||||
|
||||
if (bsize == 1 || (bsize == 2 && (bsecond >> btwos) == 0))
|
||||
if (asize < 0)
|
||||
{
|
||||
/* special case one limb b, use modexact and no copying */
|
||||
/* (-1/b) = -1 iff b = 3 (mod 4) */
|
||||
result_bit1 ^= JACOBI_N1B_BIT1(blow);
|
||||
asize = -asize;
|
||||
}
|
||||
|
||||
/* (a/2)=(2/a) with a odd, and if b is even then a is odd here */
|
||||
result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
|
||||
JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
|
||||
|
||||
if (blow == 1) /* (a/1)=1 always */
|
||||
/* Ensure asize >= bsize. Take advantage of the generalized
|
||||
reciprocity law (a/b*2^n) = (b*2^n / a) * RECIP(a,b) */
|
||||
|
||||
if (asize < bsize)
|
||||
{
|
||||
MPN_SRCPTR_SWAP (asrcp, asize, bsrcp, bsize);
|
||||
MP_LIMB_T_SWAP (alow, blow);
|
||||
|
||||
/* NOTE: The value of alow (old blow) is a bit subtle. For this code
|
||||
path, we get alow as the low, always odd, limb of shifted A. Which is
|
||||
what we need for the reciprocity update below.
|
||||
|
||||
However, all other uses of alow assumes that it is *not*
|
||||
shifted. Luckily, alow matters only when either
|
||||
|
||||
+ btwos > 0, in which case A is always odd
|
||||
|
||||
+ asize == bsize == 1, in which case this code path is never
|
||||
taken. */
|
||||
|
||||
count_trailing_zeros (btwos, blow);
|
||||
blow >>= btwos;
|
||||
|
||||
if (bsize > 1 && btwos > 0)
|
||||
{
|
||||
mp_limb_t b1 = bsrcp[1];
|
||||
blow |= b1 << (GMP_NUMB_BITS - btwos);
|
||||
if (bsize == 2 && (b1 >> btwos) == 0)
|
||||
bsize = 1;
|
||||
}
|
||||
|
||||
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
|
||||
}
|
||||
|
||||
if (bsize == 1)
|
||||
{
|
||||
result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
|
||||
|
||||
if (blow == 1)
|
||||
return JACOBI_BIT1_TO_PN (result_bit1);
|
||||
|
||||
if (asize > 1)
|
||||
JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, alow, asrcp, asize, blow);
|
||||
TRACE (printf ("base (%lu/%lu) with %d\n",
|
||||
alow, blow, JACOBI_BIT1_TO_PN (result_bit1)));
|
||||
|
||||
return mpn_jacobi_base (alow, blow, result_bit1);
|
||||
}
|
||||
|
||||
/* Discard low zero limbs of a. Usually there won't be anything to
|
||||
strip, hence not bothering with it for the bsize==1 case. */
|
||||
JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, asrcp, asize, alow);
|
||||
|
||||
count_trailing_zeros (atwos, alow);
|
||||
TRACE (printf ("a twos %u\n", atwos));
|
||||
result_bit1 ^= JACOBI_TWOS_U_BIT1 (atwos, blow);
|
||||
|
||||
/* establish shifted alow */
|
||||
alow >>= atwos;
|
||||
if (asize > 1)
|
||||
{
|
||||
asecond = asrcp[1];
|
||||
if (atwos != 0)
|
||||
alow |= (asecond << (GMP_NUMB_BITS - atwos)) & GMP_NUMB_MASK;
|
||||
}
|
||||
|
||||
/* (a/2)=(2/a) with a odd */
|
||||
result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
|
||||
|
||||
if (asize == 1 || (asize == 2 && (asecond >> atwos) == 0))
|
||||
{
|
||||
/* another special case with modexact and no copying */
|
||||
|
||||
if (alow == 1) /* (1/b)=1 always */
|
||||
return JACOBI_BIT1_TO_PN (result_bit1);
|
||||
|
||||
/* b still has its twos, so cancel out their effect */
|
||||
result_bit1 ^= JACOBI_TWOS_U_BIT1 (btwos, alow);
|
||||
|
||||
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow); /* now (b/a) */
|
||||
JACOBI_MOD_OR_MODEXACT_1_ODD (result_bit1, blow, bsrcp, bsize, alow);
|
||||
TRACE (printf ("base (%lu/%lu) with %d\n",
|
||||
blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
|
||||
return mpn_jacobi_base (blow, alow, result_bit1);
|
||||
}
|
||||
|
||||
/* Allocation strategy: For A, we allocate a working copy only for A % B, but
|
||||
when A is much larger than B, we have to allocate space for the large
|
||||
quotient. We use the same area, pointed to by bp, for both the quotient
|
||||
A/B and the working copy of B. */
|
||||
|
||||
TMP_MARK;
|
||||
TMP_ALLOC_LIMBS_2 (ap, asize, bp, bsize);
|
||||
|
||||
MPN_RSHIFT_OR_COPY (ap, asrcp, asize, atwos);
|
||||
ASSERT (alow == ap[0]);
|
||||
TRACE (mpn_trace ("stripped a", ap, asize));
|
||||
if (asize >= 2*bsize)
|
||||
TMP_ALLOC_LIMBS_2 (ap, bsize, bp, asize - bsize + 1);
|
||||
else
|
||||
TMP_ALLOC_LIMBS_2 (ap, bsize, bp, bsize);
|
||||
|
||||
MPN_RSHIFT_OR_COPY (bp, bsrcp, bsize, btwos);
|
||||
ASSERT (blow == bp[0]);
|
||||
TRACE (mpn_trace ("stripped b", bp, bsize));
|
||||
|
||||
/* swap if necessary to make a longer than b */
|
||||
if (asize < bsize)
|
||||
{
|
||||
TRACE (printf ("swap\n"));
|
||||
MPN_PTR_SWAP (ap,asize, bp,bsize);
|
||||
MP_LIMB_T_SWAP (alow, blow);
|
||||
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
|
||||
}
|
||||
|
||||
/* If a is bigger than b then reduce to a mod b.
|
||||
Division is much faster than chipping away at "a" bit-by-bit. */
|
||||
/* In the case of even B, we conceptually shift out the powers of two first,
|
||||
and then divide A mod B. Hence, when taking those powers of two into
|
||||
account, we must use alow *before* the division. Doing the actual division
|
||||
first is ok, because the point is to remove multiples of B from A, and
|
||||
multiples of 2^k B are good enough. */
|
||||
if (asize > bsize)
|
||||
mpn_tdiv_qr (bp, ap, 0, asrcp, asize, bsrcp, bsize);
|
||||
else
|
||||
MPN_COPY (ap, asrcp, bsize);
|
||||
|
||||
if (btwos > 0)
|
||||
{
|
||||
mp_ptr rp, qp;
|
||||
result_bit1 ^= JACOBI_TWOS_U_BIT1(btwos, alow);
|
||||
|
||||
TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize));
|
||||
|
||||
TMP_ALLOC_LIMBS_2 (rp, bsize, qp, asize-bsize+1);
|
||||
mpn_tdiv_qr (qp, rp, (mp_size_t) 0, ap, asize, bp, bsize);
|
||||
ap = rp;
|
||||
asize = bsize;
|
||||
MPN_NORMALIZE (ap, asize);
|
||||
|
||||
TRACE (printf ("tdiv_qr asize=%ld bsize=%ld\n", asize, bsize);
|
||||
mpn_trace (" a", ap, asize);
|
||||
mpn_trace (" b", bp, bsize));
|
||||
|
||||
if (asize == 0) /* (0/b)=0 for b!=1 */
|
||||
goto zero;
|
||||
|
||||
alow = ap[0];
|
||||
goto strip_a;
|
||||
ASSERT_NOCARRY (mpn_rshift (bp, bsrcp, bsize, btwos));
|
||||
bsize -= (ap[bsize-1] | bp[bsize-1]) == 0;
|
||||
}
|
||||
else
|
||||
MPN_COPY (bp, bsrcp, bsize);
|
||||
|
||||
for (;;)
|
||||
{
|
||||
ASSERT (asize >= 1); /* a,b non-empty */
|
||||
ASSERT (bsize >= 1);
|
||||
ASSERT (ap[asize-1] != 0); /* a,b normalized (and hence non-zero) */
|
||||
ASSERT (bp[bsize-1] != 0);
|
||||
ASSERT (alow == ap[0]); /* low limb copies should be correct */
|
||||
ASSERT (blow == bp[0]);
|
||||
ASSERT (alow & 1); /* a,b odd */
|
||||
ASSERT (blow & 1);
|
||||
res = mpn_jacobi_n (ap, bp, bsize,
|
||||
mpn_jacobi_init (ap[0], blow, (result_bit1>>1) & 1));
|
||||
|
||||
TRACE (printf ("top asize=%ld bsize=%ld\n", asize, bsize);
|
||||
mpn_trace (" a", ap, asize);
|
||||
mpn_trace (" b", bp, bsize));
|
||||
|
||||
/* swap if necessary to make a>=b, applying reciprocity
|
||||
high limbs are almost always enough to tell which is bigger */
|
||||
if (asize < bsize
|
||||
|| (asize == bsize
|
||||
&& ((ahigh=ap[asize-1]) < (bhigh=bp[asize-1])
|
||||
|| (ahigh == bhigh
|
||||
&& mpn_cmp (ap, bp, asize-1) < 0))))
|
||||
{
|
||||
TRACE (printf ("swap\n"));
|
||||
MPN_PTR_SWAP (ap,asize, bp,bsize);
|
||||
MP_LIMB_T_SWAP (alow, blow);
|
||||
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
|
||||
}
|
||||
|
||||
if (asize == 1)
|
||||
break;
|
||||
|
||||
/* a = a-b */
|
||||
ASSERT (asize >= bsize);
|
||||
ASSERT_NOCARRY (mpn_sub (ap, ap, asize, bp, bsize));
|
||||
MPN_NORMALIZE (ap, asize);
|
||||
alow = ap[0];
|
||||
|
||||
/* (0/b)=0 for b!=1. b!=1 when a==0 because otherwise would have had
|
||||
a==1 which is asize==1 and would have exited above. */
|
||||
if (asize == 0)
|
||||
goto zero;
|
||||
|
||||
strip_a:
|
||||
/* low zero limbs on a can be discarded */
|
||||
JACOBI_STRIP_LOW_ZEROS (result_bit1, blow, ap, asize, alow);
|
||||
|
||||
if ((alow & 1) == 0)
|
||||
{
|
||||
/* factors of 2 from a */
|
||||
unsigned twos;
|
||||
count_trailing_zeros (twos, alow);
|
||||
TRACE (printf ("twos %u\n", twos));
|
||||
result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, blow);
|
||||
ASSERT_NOCARRY (mpn_rshift (ap, ap, asize, twos));
|
||||
asize -= (ap[asize-1] == 0);
|
||||
alow = ap[0];
|
||||
}
|
||||
}
|
||||
|
||||
ASSERT (asize == 1 && bsize == 1); /* just alow and blow left */
|
||||
TMP_FREE;
|
||||
|
||||
/* (1/b)=1 always (in this case have b==1 because a>=b) */
|
||||
if (alow == 1)
|
||||
return JACOBI_BIT1_TO_PN (result_bit1);
|
||||
|
||||
/* swap with reciprocity and do (b/a) */
|
||||
result_bit1 ^= JACOBI_RECIP_UU_BIT1 (alow, blow);
|
||||
TRACE (printf ("base (%lu/%lu) with %d\n",
|
||||
blow, alow, JACOBI_BIT1_TO_PN (result_bit1)));
|
||||
return mpn_jacobi_base (blow, alow, result_bit1);
|
||||
|
||||
zero:
|
||||
TMP_FREE;
|
||||
return 0;
|
||||
return res;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user