mpir/mpz/oddfac_1.c
2014-02-27 21:35:04 +00:00

439 lines
12 KiB
C

/* mpz_oddfac_1(RESULT, N) -- Set RESULT to the odd factor of N!.
Contributed to the GNU project by Marco Bodrato.
THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.
IT IS ONLY SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.
IN FACT, IT IS ALMOST GUARANTEED THAT IT WILL CHANGE OR
DISAPPEAR IN A FUTURE GNU MP RELEASE.
Copyright 2010, 2011, 2012 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 3 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */
#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"
#if GMP_LIMB_BITS == 64
#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x335281867ec241ef)
#define ODD_FACTORIAL_TABLE_LIMIT (25)
#define ODD_FACTORIAL_EXTTABLE_LIMIT (67)
#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x57e22099c030d941)
#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (33)
#else
#define ODD_FACTORIAL_TABLE_MAX CNST_LIMB(0x260eeeeb)
#define ODD_FACTORIAL_TABLE_LIMIT (16)
#define ODD_FACTORIAL_EXTTABLE_LIMIT (34)
#define ODD_DOUBLEFACTORIAL_TABLE_MAX CNST_LIMB(0x27065f73)
#define ODD_DOUBLEFACTORIAL_TABLE_LIMIT (19)
#endif
/* TODO:
- split this file in smaller parts with functions that can be recycled for different computations.
*/
/**************************************************************/
/* Section macros: common macros, for mswing/fac/bin (&sieve) */
/**************************************************************/
#define FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I) \
if ((PR) > (MAX_PR)) { \
(VEC)[(I)++] = (PR); \
(PR) = 1; \
}
#define FACTOR_LIST_STORE(P, PR, MAX_PR, VEC, I) \
do { \
if ((PR) > (MAX_PR)) { \
(VEC)[(I)++] = (PR); \
(PR) = (P); \
} else \
(PR) *= (P); \
} while (0)
#define LOOP_ON_SIEVE_CONTINUE(prime,end,sieve) \
__max_i = (end); \
\
do { \
++__i; \
if (((sieve)[__index] & __mask) == 0) \
{ \
(prime) = id_to_n(__i)
#define LOOP_ON_SIEVE_BEGIN(prime,start,end,off,sieve) \
do { \
mp_limb_t __mask, __index, __max_i, __i; \
\
__i = (start)-(off); \
__index = __i / GMP_LIMB_BITS; \
__mask = CNST_LIMB(1) << (__i % GMP_LIMB_BITS); \
__i += (off); \
\
LOOP_ON_SIEVE_CONTINUE(prime,end,sieve)
#define LOOP_ON_SIEVE_STOP \
} \
__mask = __mask << 1 | __mask >> (GMP_LIMB_BITS-1); \
__index += __mask & 1; \
} while (__i <= __max_i) \
#define LOOP_ON_SIEVE_END \
LOOP_ON_SIEVE_STOP; \
} while (0)
/*********************************************************/
/* Section sieve: sieving functions and tools for primes */
/*********************************************************/
#if WANT_ASSERT
static mp_limb_t
bit_to_n (mp_limb_t bit) { return (bit*3+4)|1; }
#endif
/* id_to_n (x) = bit_to_n (x-1) = (id*3+1)|1*/
static mp_limb_t
id_to_n (mp_limb_t id) { return id*3+1+(id&1); }
/* n_to_bit (n) = ((n-1)&(-CNST_LIMB(2)))/3U-1 */
static mp_limb_t
n_to_bit (mp_limb_t n) { return ((n-5)|1)/3U; }
#if WANT_ASSERT
static mp_size_t
primesieve_size (mp_limb_t n) { return n_to_bit(n) / GMP_LIMB_BITS + 1; }
#endif
/*********************************************************/
/* Section mswing: 2-multiswing factorial */
/*********************************************************/
/* Returns an approximation of the sqare root of x. *
* It gives: x <= limb_apprsqrt (x) ^ 2 < x * 9/4 */
static mp_limb_t
limb_apprsqrt (mp_limb_t x)
{
int s;
ASSERT (x > 2);
count_leading_zeros (s, x - 1);
s = GMP_LIMB_BITS - 1 - s;
return (CNST_LIMB(1) << (s >> 1)) + (CNST_LIMB(1) << ((s - 1) >> 1));
}
#if 0
/* A count-then-exponentiate variant for SWING_A_PRIME */
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
do { \
mp_limb_t __q, __prime; \
int __exp; \
__prime = (P); \
__exp = 0; \
__q = (N); \
do { \
__q /= __prime; \
__exp += __q & 1; \
} while (__q >= __prime); \
if (__exp) { /* Store $prime^{exp}$ */ \
for (__q = __prime; --__exp; __q *= __prime); \
FACTOR_LIST_STORE(__q, PR, MAX_PR, VEC, I); \
}; \
} while (0)
#else
#define SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
do { \
mp_limb_t __q, __prime; \
__prime = (P); \
FACTOR_LIST_APPEND(PR, MAX_PR, VEC, I); \
__q = (N); \
do { \
__q /= __prime; \
if ((__q & 1) != 0) (PR) *= __prime; \
} while (__q >= __prime); \
} while (0)
#endif
#define SH_SWING_A_PRIME(P, N, PR, MAX_PR, VEC, I) \
do { \
mp_limb_t __prime; \
__prime = (P); \
if ((((N) / __prime) & 1) != 0) \
FACTOR_LIST_STORE(__prime, PR, MAX_PR, VEC, I); \
} while (0)
/* mpz_2multiswing_1 computes the odd part of the 2-multiswing
factorial of the parameter n. The result x is an odd positive
integer so that multiswing(n,2) = x 2^a.
Uses the algorithm described by Peter Luschny in "Divide, Swing and
Conquer the Factorial!".
The pointer sieve points to primesieve_size(n) limbs containing a
bit-array where primes are marked as 0.
Enough (FIXME: explain :-) limbs must be pointed by factors.
*/
static void
mpz_2multiswing_1 (mpz_ptr x, mp_limb_t n, mp_ptr sieve, mp_ptr factors)
{
mp_limb_t prod, max_prod;
mp_size_t j;
ASSERT (n >= 26);
j = 0;
prod = -(n & 1);
n &= ~ CNST_LIMB(1); /* n-1, if n is odd */
prod = (prod & n) + 1; /* the original n, if it was odd, 1 otherwise */
max_prod = GMP_NUMB_MAX / (n-1);
/* Handle prime = 3 separately. */
SWING_A_PRIME (3, n, prod, max_prod, factors, j);
/* Swing primes from 5 to n/3 */
{
mp_limb_t s;
{
mp_limb_t prime;
s = limb_apprsqrt(n);
ASSERT (s >= 5);
s = n_to_bit (s);
LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (5), s, 0,sieve);
SWING_A_PRIME (prime, n, prod, max_prod, factors, j);
LOOP_ON_SIEVE_END;
s++;
}
ASSERT (max_prod <= GMP_NUMB_MAX / 3);
ASSERT (bit_to_n (s) * bit_to_n (s) > n);
ASSERT (s <= n_to_bit (n / 3));
{
mp_limb_t prime;
mp_limb_t l_max_prod = max_prod * 3;
LOOP_ON_SIEVE_BEGIN (prime, s, n_to_bit (n/3), 0, sieve);
SH_SWING_A_PRIME (prime, n, prod, l_max_prod, factors, j);
LOOP_ON_SIEVE_END;
}
}
/* Store primes from (n+1)/2 to n */
{
mp_limb_t prime;
LOOP_ON_SIEVE_BEGIN (prime, n_to_bit (n >> 1) + 1, n_to_bit (n), 0,sieve);
FACTOR_LIST_STORE (prime, prod, max_prod, factors, j);
LOOP_ON_SIEVE_END;
}
if (LIKELY (j != 0))
{
factors[j++] = prod;
mpz_prodlimbs (x, factors, j);
}
else
{
PTR (x)[0] = prod;
SIZ (x) = 1;
}
}
#undef SWING_A_PRIME
#undef SH_SWING_A_PRIME
#undef LOOP_ON_SIEVE_END
#undef LOOP_ON_SIEVE_STOP
#undef LOOP_ON_SIEVE_BEGIN
#undef LOOP_ON_SIEVE_CONTINUE
#undef FACTOR_LIST_APPEND
/*********************************************************/
/* Section oddfac: odd factorial, needed also by binomial*/
/*********************************************************/
#if TUNE_PROGRAM_BUILD
#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD_LIMIT-1)+1))
#else
#define FACTORS_PER_LIMB (GMP_NUMB_BITS / (LOG2C(FAC_DSC_THRESHOLD-1)+1))
#endif
/* mpz_oddfac_1 computes the odd part of the factorial of the
parameter n. I.e. n! = x 2^a, where x is the returned value: an
odd positive integer.
If flag != 0 a square is skipped in the DSC part, e.g.
if n is odd, n > FAC_DSC_THRESHOLD and flag = 1, x is set to n!!.
If n is too small, flag is ignored, and an ASSERT can be triggered.
TODO: FAC_DSC_THRESHOLD is used here with two different roles:
- to decide when prime factorisation is needed,
- to stop the recursion, once sieving is done.
Maybe two thresholds can do a better job.
*/
void
mpz_oddfac_1 (mpz_ptr x, mp_limb_t n, unsigned flag)
{
ASSERT (n <= GMP_NUMB_MAX);
ASSERT (flag == 0 || (flag == 1 && n > ODD_FACTORIAL_TABLE_LIMIT && ABOVE_THRESHOLD (n, FAC_DSC_THRESHOLD)));
if (n <= ODD_FACTORIAL_TABLE_LIMIT)
{
PTR (x)[0] = __gmp_oddfac_table[n];
SIZ (x) = 1;
}
else if (n <= ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1)
{
mp_ptr px;
px = MPZ_NEWALLOC (x, 2);
umul_ppmm (px[1], px[0], __gmp_odd2fac_table[(n - 1) >> 1], __gmp_oddfac_table[n >> 1]);
SIZ (x) = 2;
}
else
{
unsigned s;
mp_ptr factors;
s = 0;
{
mp_limb_t tn;
mp_limb_t prod, max_prod, i;
mp_size_t j;
TMP_SDECL;
#if TUNE_PROGRAM_BUILD
ASSERT (FAC_DSC_THRESHOLD_LIMIT >= FAC_DSC_THRESHOLD);
ASSERT (FAC_DSC_THRESHOLD >= 2 * (ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2));
#endif
/* Compute the number of recursive steps for the DSC algorithm. */
for (tn = n; ABOVE_THRESHOLD (tn, FAC_DSC_THRESHOLD); s++)
tn >>= 1;
j = 0;
TMP_SMARK;
factors = TMP_SALLOC_LIMBS (1 + tn / FACTORS_PER_LIMB);
ASSERT (tn >= FACTORS_PER_LIMB);
prod = 1;
#if TUNE_PROGRAM_BUILD
max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD_LIMIT;
#else
max_prod = GMP_NUMB_MAX / FAC_DSC_THRESHOLD;
#endif
ASSERT (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
do {
i = ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 2;
factors[j++] = ODD_DOUBLEFACTORIAL_TABLE_MAX;
do {
FACTOR_LIST_STORE (i, prod, max_prod, factors, j);
i += 2;
} while (i <= tn);
max_prod <<= 1;
tn >>= 1;
} while (tn > ODD_DOUBLEFACTORIAL_TABLE_LIMIT + 1);
factors[j++] = prod;
factors[j++] = __gmp_odd2fac_table[(tn - 1) >> 1];
factors[j++] = __gmp_oddfac_table[tn >> 1];
mpz_prodlimbs (x, factors, j);
TMP_SFREE;
}
if (s != 0)
/* Use the algorithm described by Peter Luschny in "Divide,
Swing and Conquer the Factorial!".
Improvement: there are two temporary buffers, factors and
square, that are never used together; with a good estimate
of the maximal needed size, they could share a single
allocation.
*/
{
mpz_t mswing;
mp_ptr sieve;
mp_size_t size;
TMP_DECL;
TMP_MARK;
flag--;
size = n / GMP_NUMB_BITS + 4;
ASSERT (primesieve_size (n - 1) <= size - (size / 2 + 1));
/* 2-multiswing(n) < 2^(n-1)*sqrt(n/pi) < 2^(n+GMP_NUMB_BITS);
one more can be overwritten by mul, another for the sieve */
MPZ_TMP_INIT (mswing, size);
/* Initialize size, so that ASSERT can check it correctly. */
ASSERT_CODE (SIZ (mswing) = 0);
/* Put the sieve on the second half, it will be overwritten by the last mswing. */
sieve = PTR (mswing) + size / 2 + 1;
size = (gmp_primesieve (sieve, n - 1) + 1) / log_n_max (n) + 1;
factors = TMP_ALLOC_LIMBS (size);
do {
mp_ptr square, px;
mp_size_t nx, ns;
mp_limb_t cy;
TMP_DECL;
s--;
ASSERT (ABSIZ (mswing) < ALLOC (mswing) / 2); /* Check: sieve has not been overwritten */
mpz_2multiswing_1 (mswing, n >> s, sieve, factors);
TMP_MARK;
nx = SIZ (x);
if (s == flag) {
size = nx;
square = TMP_ALLOC_LIMBS (size);
MPN_COPY (square, PTR (x), nx);
} else {
size = nx << 1;
square = TMP_ALLOC_LIMBS (size);
mpn_sqr (square, PTR (x), nx);
size -= (square[size - 1] == 0);
}
ns = SIZ (mswing);
nx = size + ns;
px = MPZ_NEWALLOC (x, nx);
ASSERT (ns <= size);
cy = mpn_mul (px, square, size, PTR(mswing), ns); /* n!= n$ * floor(n/2)!^2 */
TMP_FREE;
SIZ(x) = nx - (cy == 0);
} while (s != 0);
TMP_FREE;
}
}
}
#undef FACTORS_PER_LIMB
#undef FACTOR_LIST_STORE