/* mpn_jacobi_base -- limb/limb Jacobi symbol with restricted arguments. THIS INTERFACE IS PRELIMINARY AND MIGHT DISAPPEAR OR BE SUBJECT TO INCOMPATIBLE CHANGES IN A FUTURE RELEASE OF GMP. Copyright 1999, 2000, 2001, 2002, 2010 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "mpir.h" #include "gmp-impl.h" #include "longlong.h" /* Use the simple loop by default. The generic count_trailing_zeros is not very fast, and the extra trickery of method 3 has proven to be less use than might have been though. */ #ifndef JACOBI_BASE_METHOD #define JACOBI_BASE_METHOD 2 #endif /* Use count_trailing_zeros. */ #if JACOBI_BASE_METHOD == 1 #define PROCESS_TWOS_ANY \ { \ mp_limb_t twos; \ count_trailing_zeros (twos, a); \ result_bit1 ^= JACOBI_TWOS_U_BIT1 (twos, b); \ a >>= twos; \ } #define PROCESS_TWOS_EVEN PROCESS_TWOS_ANY #endif /* Use a simple loop. A disadvantage of this is that there's a branch on a 50/50 chance of a 0 or 1 low bit. */ #if JACOBI_BASE_METHOD == 2 #define PROCESS_TWOS_EVEN \ { \ int two; \ two = JACOBI_TWO_U_BIT1 (b); \ do \ { \ a >>= 1; \ result_bit1 ^= two; \ ASSERT (a != 0); \ } \ while ((a & 1) == 0); \ } #define PROCESS_TWOS_ANY \ if ((a & 1) == 0) \ PROCESS_TWOS_EVEN; #endif /* Process one bit arithmetically, then a simple loop. This cuts the loop condition down to a 25/75 chance, which should branch predict better. The CPU will need a reasonable variable left shift. */ #if JACOBI_BASE_METHOD == 3 #define PROCESS_TWOS_EVEN \ { \ int two, mask, shift; \ \ two = JACOBI_TWO_U_BIT1 (b); \ mask = (~a & 2); \ a >>= 1; \ \ shift = (~a & 1); \ a >>= shift; \ result_bit1 ^= two ^ (two & mask); \ \ while ((a & 1) == 0) \ { \ a >>= 1; \ result_bit1 ^= two; \ ASSERT (a != 0); \ } \ } #define PROCESS_TWOS_ANY \ { \ int two, mask, shift; \ \ two = JACOBI_TWO_U_BIT1 (b); \ shift = (~a & 1); \ a >>= shift; \ \ mask = shift << 1; \ result_bit1 ^= (two & mask); \ \ while ((a & 1) == 0) \ { \ a >>= 1; \ result_bit1 ^= two; \ ASSERT (a != 0); \ } \ } #endif #if JACOBI_BASE_METHOD < 4 /* Calculate the value of the Jacobi symbol (a/b) of two mp_limb_t's, but with a restricted range of inputs accepted, namely b>1, b odd. The initial result_bit1 is taken as a parameter for the convenience of mpz_kronecker_ui() et al. The sign changes both here and in those routines accumulate nicely in bit 1, see the JACOBI macros. The return value here is the normal +1, 0, or -1. Note that +1 and -1 have bit 1 in the "BIT1" sense, which could be useful if the caller is accumulating it into some extended calculation. Duplicating the loop body to avoid the MP_LIMB_T_SWAP(a,b) would be possible, but a couple of tests suggest it's not a significant speedup, and may even be a slowdown, so what's here is good enough for now. */ int mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int result_bit1) { ASSERT (b & 1); /* b odd */ ASSERT (b != 1); if (a == 0) return 0; PROCESS_TWOS_ANY; if (a == 1) goto done; if (a >= b) goto a_gt_b; for (;;) { result_bit1 ^= JACOBI_RECIP_UU_BIT1 (a, b); MP_LIMB_T_SWAP (a, b); a_gt_b: do { /* working on (a/b), a,b odd, a>=b */ ASSERT (a & 1); ASSERT (b & 1); ASSERT (a >= b); if ((a -= b) == 0) return 0; PROCESS_TWOS_EVEN; if (a == 1) goto done; } while (a >= b); } done: return JACOBI_BIT1_TO_PN (result_bit1); } #endif #if JACOBI_BASE_METHOD == 4 /* Computes (a/b) for odd b > 1 and any a. The initial bit is taken as a * parameter. We have no need for the convention that the sign is in * bit 1, internally we use bit 0. */ /* FIXME: Could try table-based count_trailing_zeros. */ int mpn_jacobi_base (mp_limb_t a, mp_limb_t b, int bit) { int c; ASSERT (b & 1); ASSERT (b > 1); if (a == 0) /* This is the only line which depends on b > 1 */ return 0; bit >>= 1; /* Below, we represent a and b shifted right so that the least significant one bit is implicit. */ b >>= 1; count_trailing_zeros (c, a); bit ^= c & (b ^ (b >> 1)); /* We may have c==GMP_LIMB_BITS-1, so we can't use a>>c+1. */ a >>= c; a >>= 1; do { mp_limb_t t = a - b; mp_limb_t bgta = LIMB_HIGHBIT_TO_MASK (t); if (t == 0) return 0; /* If b > a, invoke reciprocity */ bit ^= (bgta & a & b); /* b <-- min (a, b) */ b += (bgta & t); /* a <-- |a - b| */ a = (t ^ bgta) - bgta; /* Number of trailing zeros is the same no matter if we look at * t or a, but using t gives more parallelism. */ count_trailing_zeros (c, t); c ++; /* (2/b) = -1 if b = 3 or 5 mod 8 */ bit ^= c & (b ^ (b >> 1)); a >>= c; } while (b > 0); return 1-2*(bit & 1); } #endif /* JACOBI_BASE_METHOD == 4 */