/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!. Contributed to the GNU project by Marco Bodrato. THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. Copyright 2010, 2011, 2012 Free Software Foundation, Inc. This file is part of the GNU MP Library. The GNU MP Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 3 of the License, or (at your option) any later version. The GNU MP Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ #include "mpir.h" #include "gmp-impl.h" #include "longlong.h" #if GMP_LIMB_BITS == 64 #define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x335281867ec241ef) #define ODD_FACTORIAL_TABLE_LIMIT (25) #define ODD_FACTORIAL_EXTTABLE_LIMIT (67) #define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x57e22099c030d941) #define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (33) #else #define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x260eeeeb) #define ODD_FACTORIAL_TABLE_LIMIT (16) #define ODD_FACTORIAL_EXTTABLE_LIMIT (34) #define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x27065f73) #define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (19) #endif /* TODO: - split this file in smaller parts with functions that can be recycled for different computations. */ /**************************************************************/ /* Section macros: common macros, for mswing/fac/bin (&sieve) */ /**************************************************************/ #define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \ if ((PR) > (MAX_PR)) { \ (VEC)[(I)++] = (PR); \ (PR) = 1; \ } #define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \ do { \ if ((PR) > (MAX_PR)) { \ (VEC)[(I)++] = (PR); \ (PR) = (P); \ } else \ (PR) *= (P); \ } while (0) #define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \ __max_i = (end); \ \ do { \ ++__i; \ if (((sieve)[__index] & __mask) == 0) \ { \ (prime) = id_to_n(__i) #define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \ do { \ mp_limb_t __mask, __index, __max_i, __i; \ \ __i = (start)-(off); \ __index = __i / GMP_LIMB_BITS; \ __mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \ __i += (off); \ \ LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) #define LOOP_ON_SIEVE_STOP \ } \ __mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \ __index += __mask & 1; \ } while (__i <= __max_i) \ #define LOOP_ON_SIEVE_END \ LOOP_ON_SIEVE_STOP; \ } while (0) /*********************************************************/ /* Section sieve: sieving functions and tools for primes */ /*********************************************************/ #if WANT_ASSERT static mp_limb_t bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; } #endif /* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/ static mp_limb_t id_to_n (mp_limb_t id) { return id*3+1+(id&1); } /* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */ static mp_limb_t n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; } #if WANT_ASSERT static mp_size_t primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; } #endif /*********************************************************/ /* Section mswing: 2-multiswing factorial */ /*********************************************************/ /* Returns an approximation of the sqare root of x. * * It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */ static mp_limb_t limb_apprsqrt (mp_limb_t x) { int s; ASSERT (x > 2); count_leading_zeros (s, x - 1); s = GMP_LIMB_BITS - 1 - s; return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1)); } #if 0 /* A count-then-exponentiate variant for SWING_A_PRIME */ #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ do { \ mp_limb_t __q, __prime; \ int __exp; \ __prime = (P); \ __exp = 0; \ __q = (N); \ do { \ __q /= __prime; \ __exp += __q & 1; \ } while (__q >= __prime); \ if (__exp) { /* Store $prime^{exp}$ */ \ for (__q = __prime; --__exp; __q *= __prime); \ FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \ }; \ } while (0) #else #define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ do { \ mp_limb_t __q, __prime; \ __prime = (P); \ FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \ __q = (N); \ do { \ __q /= __prime; \ if ((__q & 1) != 0) (PR) *= __prime; \ } while (__q >= __prime); \ } while (0) #endif #define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \ do { \ mp_limb_t __prime; \ __prime = (P); \ if ((((N) / __prime) & 1) != 0) \ FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \ } while (0) /* mpz_2multiswing_1 computes the odd part of the 2-multiswing factorial of the parameter n. The result x is an odd positive integer so that multiswing(n,2) = x 2^a. Uses the algorithm described by Peter Luschny in "Divide, Swing and Conquer the Factorial!". The pointer sieve points to primesieve_size(n) limbs containing a bit-array where primes are marked as 0. Enough (FIXME: explain :-) limbs must be pointed by factors. */ static void mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors) { mp_limb_t prod, max_prod; mp_size_t j; ASSERT (n >= 26); j = 0; prod = -(n & 1); n &= ~ CNST_LIMB(1); /* n-1, if n is odd */ prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */ max_prod = GMP_NUMB_MAX / (n-1); /* Handle prime = 3 separately. */ SWING_A_PRIME (3, n, prod, max_prod, factors, j); /* Swing primes from 5 to n/3 */ { mp_limb_t s; { mp_limb_t prime; s = limb_apprsqrt(n); ASSERT (s >= 5); s = n_to_bit (s); LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve); SWING_A_PRIME (prime, n, prod, max_prod, factors, j); LOOP_ON_SIEVE_END; s++; } ASSERT (max_prod <= GMP_NUMB_MAX / 3); ASSERT (bit_to_n (s) * bit_to_n (s) > n); ASSERT (s <= n_to_bit (n / 3)); { mp_limb_t prime; mp_limb_t l_max_prod = max_prod * 3; LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve); SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j); LOOP_ON_SIEVE_END; } } /* Store primes from (n+1)/2 to n */ { mp_limb_t prime; LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve); FACTOR_LIST_STORE (prime, prod, max_prod, factors, j); LOOP_ON_SIEVE_END; } if (LIKELY (j != 0)) { factors[j++] = prod; mpz_prodlimbs (x, factors, j); } else { PTR (x)[0] = prod; SIZ (x) = 1; } } #undef SWING_A_PRIME #undef SH_SWING_A_PRIME #undef LOOP_ON_SIEVE_END #undef LOOP_ON_SIEVE_STOP #undef LOOP_ON_SIEVE_BEGIN #undef LOOP_ON_SIEVE_CONTINUE #undef FACTOR_LIST_APPEND /*********************************************************/ /* Section oddfac: odd factorial, needed also by binomial*/ /*********************************************************/ #if TUNE_PROGRAM_BUILD #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1)) #else #define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1)) #endif /* mpz_oddfac_1 computes the odd part of the factorial of the parameter n. I.e. n! = x 2^a, where x is the returned value: an odd positive integer. If flag != 0 a square is skipped in the DSC part, e.g. if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!. If n is too small, flag is ignored, and an ASSERT can be triggered. TODO: FAC_DSC_THRESHOLD is used here with two different roles: - to decide when prime factorisation is needed, - to stop the recursion, once sieving is done. Maybe two thresholds can do a better job. */ void mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag) { ASSERT (n <= GMP_NUMB_MAX); ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD))); if (n <= ODD_FACTORIAL_TABLE_LIMIT) { PTR (x)[0] = __gmp_oddfac_table[n]; SIZ (x) = 1; } else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1) { mp_ptr px; px = MPZ_NEWALLOC (x, 2); umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]); SIZ (x) = 2; } else { unsigned s; mp_ptr factors; s = 0; { mp_limb_t tn; mp_limb_t prod, max_prod, i; mp_size_t j; TMP_SDECL; #if TUNE_PROGRAM_BUILD ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD); ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2)); #endif /* Compute the number of recursive steps for the DSC algorithm. */ for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++) tn >>= 1; j = 0; TMP_SMARK; factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB); ASSERT (tn >= FACTORS_PER_LIMB); prod = 1; #if TUNE_PROGRAM_BUILD max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT; #else max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD; #endif ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1); do { i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2; factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX; do { FACTOR_LIST_STORE (i, prod, max_prod, factors, j); i += 2; } while (i <= tn); max_prod <<= 1; tn >>= 1; } while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1); factors[j++] = prod; factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1]; factors[j++] = __gmp_oddfac_table[tn >> 1]; mpz_prodlimbs (x, factors, j); TMP_SFREE; } if (s != 0) /* Use the algorithm described by Peter Luschny in "Divide, Swing and Conquer the Factorial!". Improvement: there are two temporary buffers, factors and square, that are never used together; with a good estimate of the maximal needed size, they could share a single allocation. */ { mpz_t mswing; mp_ptr sieve; mp_size_t size; TMP_DECL; TMP_MARK; flag--; size = n / GMP_NUMB_BITS + 4; ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1)); /* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS); one more can be overwritten by mul, another for the sieve */ MPZ_TMP_INIT (mswing, size); /* Initialize size, so that ASSERT can check it correctly. */ ASSERT_CODE (SIZ (mswing) = 0); /* Put the sieve on the second half, it will be overwritten by the last mswing. */ sieve = PTR (mswing) + size / 2 + 1; size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1; factors = TMP_ALLOC_LIMBS (size); do { mp_ptr square, px; mp_size_t nx, ns; mp_limb_t cy; TMP_DECL; s--; ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */ mpz_2multiswing_1 (mswing, n >> s, sieve, factors); TMP_MARK; nx = SIZ (x); if (s == flag) { size = nx; square = TMP_ALLOC_LIMBS (size); MPN_COPY (square, PTR (x), nx); } else { size = nx << 1; square = TMP_ALLOC_LIMBS (size); mpn_sqr (square, PTR (x), nx); size -= (square[size - 1] == 0); } ns = SIZ (mswing); nx = size + ns; px = MPZ_NEWALLOC (x, nx); ASSERT (ns <= size); cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */ TMP_FREE; SIZ(x) = nx - (cy == 0); } while (s != 0); TMP_FREE; } } } #undef FACTORS_PER_LIMB #undef FACTOR_LIST_STORE