2010-02-13 16:01:24 -05:00
|
|
|
/* mpn_rootrem
|
|
|
|
|
|
|
|
Copyright 2009 Jason Moxham
|
|
|
|
|
|
|
|
This file is part of the MPIR Library.
|
|
|
|
|
|
|
|
The MPIR Library is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Lesser General Public License as published
|
|
|
|
by the Free Software Foundation; either version 2.1 of the License, or (at
|
|
|
|
your option) any later version.
|
|
|
|
|
|
|
|
The MPIR Library is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
|
|
License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with the MPIR Library; see the file COPYING.LIB. If not, write
|
|
|
|
to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
|
|
Boston, MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
// HERE IS A COPY OF THE OLD GMP ROOTREM WHICH WE RENAMED MPN-ROOTREM_BASECASE
|
|
|
|
// WE USE THIS FOR SMALL SIZES
|
|
|
|
// AND OF THE COURSE THE OLD GMP BOILERPLATE
|
|
|
|
|
|
|
|
|
|
|
|
/* mpn_rootrem(rootp,remp,ap,an,nth) -- Compute the nth root of {ap,an}, and
|
|
|
|
store the truncated integer part at rootp and the remainder at remp.
|
|
|
|
|
|
|
|
THE FUNCTIONS IN THIS FILE ARE INTERNAL FUNCTIONS WITH MUTABLE
|
|
|
|
INTERFACES. IT IS ONLY SAFE TO REACH THEM THROUGH DOCUMENTED INTERFACES.
|
|
|
|
IN FACT, IT IS ALMOST GUARANTEED THAT THEY'LL CHANGE OR DISAPPEAR IN A
|
|
|
|
FUTURE GNU MP RELEASE.
|
|
|
|
|
|
|
|
|
|
|
|
Copyright 2002, 2005 Free Software Foundation, Inc.
|
|
|
|
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or (at your
|
|
|
|
option) any later version.
|
|
|
|
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
|
|
License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with the GNU MP Library; see the file COPYING.LIB. If not, write
|
|
|
|
to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
|
|
Boston, MA 02110-1301, USA. */
|
|
|
|
|
|
|
|
/*
|
|
|
|
We use Newton's method to compute the root of a:
|
|
|
|
|
|
|
|
n
|
|
|
|
f(x) := x - a
|
|
|
|
|
|
|
|
|
|
|
|
n - 1
|
|
|
|
f'(x) := x n
|
|
|
|
|
|
|
|
|
|
|
|
n-1 n-1 n-1
|
|
|
|
x - a/x a/x - x a/x + (n-1)x
|
|
|
|
new x = x - f(x)/f'(x) = x - ---------- = x + --------- = --------------
|
|
|
|
n n n
|
|
|
|
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include "mpir.h"
|
|
|
|
#include "gmp-impl.h"
|
|
|
|
#include "longlong.h"
|
|
|
|
|
2010-02-13 16:11:18 -05:00
|
|
|
mp_size_t
|
2010-02-13 16:01:24 -05:00
|
|
|
mpn_rootrem_basecase (mp_ptr rootp, mp_ptr remp,mp_srcptr up, mp_size_t un, mp_limb_t nth)
|
|
|
|
{
|
|
|
|
mp_ptr pp, qp, xp;
|
|
|
|
mp_size_t pn, xn, qn;
|
|
|
|
unsigned long int unb, xnb, bit;
|
|
|
|
unsigned int cnt;
|
|
|
|
mp_size_t i;
|
|
|
|
unsigned long int n_valid_bits, adj;
|
|
|
|
TMP_DECL;
|
|
|
|
|
|
|
|
TMP_MARK;
|
|
|
|
|
|
|
|
/* The extra factor 1.585 = log(3)/log(2) here is for the worst case
|
|
|
|
overestimate of the root, i.e., when the code rounds a root that is
|
|
|
|
2+epsilon to 3, and then powers this to a potentially huge power. We
|
|
|
|
could generalize the code for detecting root=1 a few lines below to deal
|
|
|
|
with xnb <= k, for some small k. For example, when xnb <= 2, meaning
|
|
|
|
the root should be 1, 2, or 3, we could replace this factor by the much
|
|
|
|
smaller log(5)/log(4). */
|
|
|
|
|
|
|
|
#define PP_ALLOC (2 + (mp_size_t) (un*1.585))
|
|
|
|
pp = TMP_ALLOC_LIMBS (PP_ALLOC);
|
|
|
|
|
|
|
|
count_leading_zeros (cnt, up[un - 1]);
|
|
|
|
unb = un * GMP_NUMB_BITS - cnt + GMP_NAIL_BITS;
|
|
|
|
|
|
|
|
xnb = (unb - 1) / nth + 1;
|
|
|
|
if (xnb == 1)
|
|
|
|
{
|
|
|
|
if (remp == 0)
|
|
|
|
remp = pp;
|
|
|
|
mpn_sub_1 (remp, up, un, (mp_limb_t) 1);
|
|
|
|
MPN_NORMALIZE (remp, un);
|
|
|
|
rootp[0] = 1;
|
|
|
|
TMP_FREE;
|
|
|
|
return un;
|
|
|
|
}
|
|
|
|
|
|
|
|
xn = (xnb + GMP_NUMB_BITS - 1) / GMP_NUMB_BITS;
|
|
|
|
|
|
|
|
qp = TMP_ALLOC_LIMBS (PP_ALLOC);
|
|
|
|
xp = TMP_ALLOC_LIMBS (xn + 1);
|
|
|
|
|
|
|
|
/* Set initial root to only ones. This is an overestimate of the actual root
|
|
|
|
by less than a factor of 2. */
|
|
|
|
for (i = 0; i < xn; i++)
|
|
|
|
xp[i] = GMP_NUMB_MAX;
|
|
|
|
xp[xnb / GMP_NUMB_BITS] = ((mp_limb_t) 1 << (xnb % GMP_NUMB_BITS)) - 1;
|
|
|
|
|
|
|
|
/* Improve the initial approximation, one bit at a time. Keep the
|
|
|
|
approximations >= root(U,nth). */
|
|
|
|
bit = xnb - 2;
|
|
|
|
n_valid_bits = 0;
|
|
|
|
for (i = 0; (nth >> i) != 0; i++)
|
|
|
|
{
|
|
|
|
mp_limb_t xl = xp[bit / GMP_NUMB_BITS];
|
|
|
|
xp[bit / GMP_NUMB_BITS] = xl ^ (mp_limb_t) 1 << bit % GMP_NUMB_BITS;
|
|
|
|
pn = mpn_pow_1 (pp, xp, xn, nth, qp);
|
|
|
|
ASSERT_ALWAYS (pn < PP_ALLOC);
|
|
|
|
/* If the new root approximation is too small, restore old value. */
|
|
|
|
if (! (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0)))
|
|
|
|
xp[bit / GMP_NUMB_BITS] = xl; /* restore old value */
|
|
|
|
n_valid_bits += 1;
|
|
|
|
if (bit == 0)
|
|
|
|
goto done;
|
|
|
|
bit--;
|
|
|
|
}
|
|
|
|
|
|
|
|
adj = n_valid_bits - 1;
|
|
|
|
|
|
|
|
/* Newton loop. Converges downwards towards root(U,nth). Currently we use
|
|
|
|
full precision from iteration 1. Clearly, we should use just n_valid_bits
|
|
|
|
of precision in each step, and thus save most of the computations. */
|
|
|
|
while (n_valid_bits <= xnb)
|
|
|
|
{
|
|
|
|
mp_limb_t cy;
|
|
|
|
|
|
|
|
pn = mpn_pow_1 (pp, xp, xn, nth - 1, qp);
|
|
|
|
ASSERT_ALWAYS (pn < PP_ALLOC);
|
|
|
|
qp[xn - 1] = 0; /* pad quotient to make it always xn limbs */
|
|
|
|
mpn_tdiv_qr (qp, pp, (mp_size_t) 0, up, un, pp, pn); /* junk remainder */
|
|
|
|
cy = mpn_addmul_1 (qp, xp, xn, nth - 1);
|
|
|
|
if (un - pn == xn)
|
|
|
|
{
|
|
|
|
cy += qp[xn];
|
|
|
|
if (cy == nth)
|
|
|
|
{
|
|
|
|
for (i = xn - 1; i >= 0; i--)
|
|
|
|
qp[i] = GMP_NUMB_MAX;
|
|
|
|
cy = nth - 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
qp[xn] = cy;
|
|
|
|
qn = xn + (cy != 0);
|
|
|
|
|
|
|
|
mpn_divrem_1 (xp, 0, qp, qn, nth);
|
|
|
|
n_valid_bits = n_valid_bits * 2 - adj;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* The computed result might be one unit too large. Adjust as necessary. */
|
|
|
|
done:
|
|
|
|
pn = mpn_pow_1 (pp, xp, xn, nth, qp);
|
|
|
|
ASSERT_ALWAYS (pn < PP_ALLOC);
|
|
|
|
if (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0))
|
|
|
|
{
|
|
|
|
mpn_decr_u (xp, 1);
|
|
|
|
pn = mpn_pow_1 (pp, xp, xn, nth, qp);
|
|
|
|
ASSERT_ALWAYS (pn < PP_ALLOC);
|
|
|
|
|
|
|
|
ASSERT_ALWAYS (! (un < pn || (un == pn && mpn_cmp (up, pp, pn) < 0)));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (remp == 0)
|
|
|
|
remp = pp;
|
|
|
|
mpn_sub (remp, up, un, pp, pn);
|
|
|
|
MPN_NORMALIZE (remp, un);
|
|
|
|
MPN_COPY (rootp, xp, xn);
|
|
|
|
TMP_FREE;
|
|
|
|
return un;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#if 0 /* this code needs more work to be faster than that in rootrem.c */
|
|
|
|
|
|
|
|
// HERE IS THE NEW CODE
|
|
|
|
|
|
|
|
/*
|
|
|
|
TODO
|
|
|
|
|
|
|
|
For large k we can calulate x^k faster as a float ie exp(k*ln(x)) or x^(1/k)=exp(ln(x)/k)
|
|
|
|
|
|
|
|
rather than doing it bitwise , round up all the truncation to the next limb , this should save
|
|
|
|
quite a lot of shifts , don't know how much this will save (if any) in practice
|
|
|
|
|
|
|
|
The powering is now a base2 left to right binary expansion , we could the usual sliding base 2^k
|
|
|
|
expansion , although the most common roots are small so this is not likely to give us much in the common case
|
|
|
|
|
|
|
|
As most roots are for small k , we can do the powering via an optimized addition chain , ie some sort of
|
|
|
|
table lookup
|
|
|
|
|
|
|
|
Merge this reciprocal with our reciprocal used in our barratt (and/or newton division)
|
|
|
|
|
|
|
|
Currently we calc x^(1/k) as (x^(-1/k))^(-1/1)
|
|
|
|
or (x^(-1/1))^(-1/k)
|
|
|
|
could also try x(x^(-1/k)^(k-1)) (*)
|
|
|
|
or (x^(-1/a))^(-1/b) where k=ab
|
|
|
|
this last one is SLOWER as high k is fast as so make out computation as small as poss as fast as poss
|
|
|
|
So (*) is the only alternative , which I guess is only faster for small k ???
|
|
|
|
|
|
|
|
Rewrite in term of mpf (or similar) like it was when I started , but I lost it , will make the code
|
|
|
|
below much clearer and smaller.
|
|
|
|
|
|
|
|
multrunc can use high half mul
|
|
|
|
|
|
|
|
if k<496 (32 bit cpus) then nroot_vsmall can be further reduced for a nroot_vvsmall
|
|
|
|
|
|
|
|
change signed long etc to mp_size_t ? mainly for MSVC
|
|
|
|
|
|
|
|
At the moment we have just one threshold , need a separate one for each k , and some sort of rule for large k
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/* Algortihms from "Detecting Perfect Powers in Essentially Linear Time" ,
|
|
|
|
Daniel J Bernstein http://cr.yp.to/papers.html */
|
|
|
|
|
|
|
|
// define this to 1 to test the nroot_small code
|
|
|
|
#define TESTSMALL 0
|
|
|
|
|
|
|
|
// if k<=floor((2^(GMP_LIMB_BITS-1)-33)/66) && k<=2^(GMP_LIMB_BITS-4) then can call vsmall
|
|
|
|
// ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ is always the smallest
|
|
|
|
#define NROOT_VSMALL_MIN (((((mp_limb_t)1)<<(GMP_LIMB_BITS-1))-33)/66)
|
|
|
|
|
|
|
|
// shiftrights requires an extra gmp_numb_bits
|
|
|
|
#define shiftright(x,xn,c) \
|
|
|
|
do{(xn)=(xn)-(c)/GMP_NUMB_BITS; \
|
|
|
|
if((c)%GMP_NUMB_BITS!=0) \
|
|
|
|
{mpn_rshift((x),(x)+(c)/GMP_NUMB_BITS,(xn),(c)%GMP_NUMB_BITS); \
|
|
|
|
if((x)[(xn)-1]==0)(xn)--;} \
|
|
|
|
else{if((c)/GMP_NUMB_BITS!=0)MPN_COPY_INCR((x),(x)+(c)/GMP_NUMB_BITS,(xn));} \
|
|
|
|
}while(0)
|
|
|
|
|
|
|
|
// shiftrights requires an extra gmp_numb_bits
|
|
|
|
#define shiftrights(x,xn,y,yn,c) \
|
|
|
|
do{(xn)=(yn)-(c)/GMP_NUMB_BITS; \
|
|
|
|
if((c)%GMP_NUMB_BITS!=0) \
|
|
|
|
{mpn_rshift((x),(y)+(c)/GMP_NUMB_BITS,(xn),(c)%GMP_NUMB_BITS); \
|
|
|
|
if((x)[(xn)-1]==0)(xn)--;} \
|
|
|
|
else{MPN_COPY_INCR((x),(y)+(c)/GMP_NUMB_BITS,(xn));} \
|
|
|
|
}while(0)
|
|
|
|
|
|
|
|
#define shiftleft(x,xn,c) \
|
|
|
|
do{mp_limb_t __t; \
|
|
|
|
if((c)%GMP_NUMB_BITS!=0) \
|
|
|
|
{__t=mpn_lshift((x)+(c)/GMP_NUMB_BITS,(x),(xn),(c)%GMP_NUMB_BITS); \
|
|
|
|
(xn)=(xn)+(c)/GMP_NUMB_BITS; \
|
|
|
|
if(__t!=0){(x)[(xn)]=__t;(xn)++;}} \
|
|
|
|
else \
|
|
|
|
{if((c)/GMP_NUMB_BITS!=0) \
|
|
|
|
{MPN_COPY_DECR((x)+(c)/GMP_NUMB_BITS,(x),(xn)); \
|
|
|
|
(xn)=(xn)+(c)/GMP_NUMB_BITS;}} \
|
|
|
|
if((c)/GMP_NUMB_BITS!=0)MPN_ZERO((x),(c)/GMP_NUMB_BITS); \
|
|
|
|
}while(0)
|
|
|
|
|
|
|
|
#define shiftlefts(x,xn,y,yn,c) \
|
|
|
|
do{mp_limb_t __t; \
|
|
|
|
if((c)%GMP_NUMB_BITS!=0) \
|
|
|
|
{__t=mpn_lshift((x)+(c)/GMP_NUMB_BITS,(y),(yn),(c)%GMP_NUMB_BITS); \
|
|
|
|
(xn)=(yn)+(c)/GMP_NUMB_BITS; \
|
|
|
|
if(__t!=0){(x)[(xn)]=__t;(xn)++;}} \
|
|
|
|
else \
|
|
|
|
{MPN_COPY_DECR((x)+(c)/GMP_NUMB_BITS,(y),(yn)); \
|
|
|
|
(xn)=(yn)+(c)/GMP_NUMB_BITS;} \
|
|
|
|
if((c)/GMP_NUMB_BITS!=0)MPN_ZERO((x),(c)/GMP_NUMB_BITS); \
|
|
|
|
}while(0)
|
|
|
|
|
|
|
|
#define mul_ui(x,xn,k) do{mp_limb_t __t;__t=mpn_mul_1((x),(x),(xn),(k));if(__t!=0){(x)[(xn)]=__t;(xn)++;}}while(0)
|
|
|
|
|
|
|
|
// tdiv_q_ui requires an extra gmp_numb_bits
|
|
|
|
#define tdiv_q_ui(x,xn,k) do{mpn_divrem_1((x),0,(x),(xn),(k));if((x)[(xn)-1]==0)(xn)--;}while(0)
|
|
|
|
|
|
|
|
// bigmultrunc requires an extra gmp_numb_bits
|
|
|
|
#define bigmultrunc(xv,xn,xp,yv,yn,yp,B) \
|
|
|
|
do{signed long __f;mp_limb_t __t; \
|
|
|
|
(xp)+=(yp); \
|
|
|
|
if((xn)>=(yn)){__t=mpn_mul(t1,(xv),(xn),(yv),(yn));} \
|
|
|
|
else{__t=mpn_mul(t1,(yv),(yn),(xv),(xn));} \
|
|
|
|
t1n=(xn)+(yn);if(__t==0)t1n--; \
|
|
|
|
__f=sizetwo(t1,t1n);__f=__f-(B); \
|
|
|
|
if(__f>0) \
|
|
|
|
{shiftrights((xv),(xn),t1,t1n,__f);(xp)+=__f;} \
|
|
|
|
else{MPN_COPY_INCR((xv),t1,t1n);(xn)=t1n;} \
|
|
|
|
}while(0)
|
|
|
|
|
|
|
|
// bigsqrtrunc requires an extra gmp_numb_bits
|
|
|
|
#define bigsqrtrunc(xv,xn,xp,B) \
|
|
|
|
do{signed long __f; \
|
|
|
|
(xp)+=(xp); \
|
2010-05-22 18:21:47 -04:00
|
|
|
mpn_sqr(t1,(xv),(xn)); \
|
2010-02-13 16:01:24 -05:00
|
|
|
t1n=(xn)*2;if(t1[t1n-1]==0)t1n--; \
|
|
|
|
__f=sizetwo(t1,t1n);__f=__f-(B); \
|
|
|
|
if(__f>0) \
|
|
|
|
{shiftrights((xv),(xn),t1,t1n,__f);(xp)+=__f;} \
|
|
|
|
else{MPN_COPY_INCR((xv),t1,t1n);(xn)=t1n;} \
|
|
|
|
}while(0)
|
|
|
|
|
|
|
|
// must have y>z value wise
|
|
|
|
#define subtract(x,xn,y,yn,z,zn) \
|
|
|
|
do{mpn_sub((x),(y),(yn),(z),(zn));/* no carry */ \
|
|
|
|
(xn)=(yn);while((x)[(xn)-1]==0)(xn)--; \
|
|
|
|
}while(0)
|
|
|
|
|
|
|
|
// returns ceil(lg(x)) where x!=0
|
|
|
|
signed long
|
|
|
|
clg (unsigned long x)
|
|
|
|
{
|
|
|
|
mp_limb_t t;
|
|
|
|
|
|
|
|
ASSERT (x != 0);
|
|
|
|
#if BITS_PER_ULONG<=GMP_LIMB_BITS
|
|
|
|
if (x == 1)
|
|
|
|
return 0;
|
|
|
|
count_leading_zeros (t, (mp_limb_t) (x - 1));
|
|
|
|
return GMP_LIMB_BITS - t;
|
|
|
|
#endif
|
|
|
|
#if BITS_PER_ULONG>GMP_LIMB_BITS
|
|
|
|
#error FIXME
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
// returns sizeinbase(x,2) x!=0
|
|
|
|
static inline signed long
|
|
|
|
sizetwo (mp_srcptr x, mp_size_t xn)
|
|
|
|
{
|
|
|
|
signed long r;
|
|
|
|
count_leading_zeros (r, x[xn - 1]);
|
|
|
|
return xn * GMP_NUMB_BITS + GMP_NAIL_BITS - r;
|
|
|
|
}
|
|
|
|
|
|
|
|
// returns sizeinbase(x-1,2) and returns 0 if x=1
|
|
|
|
static inline signed long
|
|
|
|
sizetwom1 (mp_srcptr x, mp_size_t xn)
|
|
|
|
{
|
|
|
|
signed long r, i;
|
|
|
|
mp_limb_t v;
|
|
|
|
|
|
|
|
ASSERT (xn > 1 || (xn == 1 && x[0] != 0));
|
|
|
|
if (xn == 1 && x[0] == 1)
|
|
|
|
return 0;
|
|
|
|
r = sizetwo (x, xn);
|
|
|
|
i = xn - 1;
|
|
|
|
v = x[i];
|
|
|
|
if ((v & (v - 1)) != 0)
|
|
|
|
return r;
|
|
|
|
for (i--; i >= 0; i--)
|
|
|
|
if (x[i] != 0)
|
|
|
|
return r;
|
|
|
|
return r - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Algorithm B
|
|
|
|
|
|
|
|
Calculates Z such that Z*(1-2^(-b)) < Y^(-1/k) < Z*(1+2^(-b))
|
|
|
|
ie a b bit approximation the reciprocal of the kth root of Y
|
|
|
|
where Z,Y>0 are real , 1<=b<=3+ceil(lg(k)) is an int , k>=1 is an int
|
|
|
|
|
|
|
|
Z={z,zn}*2^zp where zp is the return value , and zn is modified
|
|
|
|
Y={y,yn}*2^yp where {y,yn}>=2 and leading limb of {y,yn} is not zero
|
|
|
|
{z,zn} requires space for GMP_LIMB_BITS+4 bits
|
|
|
|
{z,zn} and {y,yn} must be completly distinct
|
|
|
|
*/
|
|
|
|
static signed long
|
|
|
|
nroot_small (mp_ptr z, mp_size_t * zn, mp_srcptr y, mp_size_t yn,
|
|
|
|
signed long yp, mp_limb_t b, mp_limb_t k)
|
|
|
|
{
|
|
|
|
signed long zp, f, j, g, B, t2p, t3p, t4p;
|
|
|
|
int ret;
|
|
|
|
mp_limb_t mask;
|
|
|
|
mp_size_t t1n, t2n, t3n, t4n;
|
|
|
|
mp_limb_t t1[BITS_TO_LIMBS (2 * (GMP_LIMB_BITS + 8) + GMP_NUMB_BITS)];
|
|
|
|
mp_limb_t t2[BITS_TO_LIMBS (GMP_LIMB_BITS + 8 + GMP_NUMB_BITS)];
|
|
|
|
mp_limb_t t3[BITS_TO_LIMBS (GMP_LIMB_BITS + 8 + 2 + GMP_NUMB_BITS)];
|
|
|
|
mp_limb_t t4[BITS_TO_LIMBS (GMP_LIMB_BITS + 8 + GMP_NUMB_BITS)];
|
|
|
|
|
|
|
|
ASSERT (k != 0);
|
|
|
|
ASSERT (b >= 1);
|
|
|
|
ASSERT (b <= (mp_limb_t) (clg (k) + 3)); // bit counts are maximums , ie can have less
|
|
|
|
ASSERT (yn > 1 || (yn == 1 && y[0] >= 2));
|
|
|
|
ASSERT (y[yn - 1] != 0);
|
|
|
|
g = sizetwom1 (y, yn);
|
|
|
|
g = g + yp;
|
|
|
|
g = -g;
|
|
|
|
if (g >= 0)
|
|
|
|
{
|
|
|
|
g = g / k;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
g = -((k - 1 - g) / k);
|
|
|
|
}
|
|
|
|
B = 66 * (2 * k + 1);
|
|
|
|
B = clg (B);
|
|
|
|
ASSERT (B <= GMP_LIMB_BITS + 8);
|
|
|
|
ASSERT (b + 1 <= GMP_LIMB_BITS + 4);
|
|
|
|
f = sizetwo (y, yn);
|
|
|
|
if (f > B)
|
|
|
|
{
|
|
|
|
shiftrights (t4, t4n, y, yn, f - B);
|
|
|
|
t4p = yp + f - B;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MPN_COPY_INCR (t4, y, yn);
|
|
|
|
t4n = yn;
|
|
|
|
t4p = yp;
|
|
|
|
} // t4 has B bits+numb space
|
|
|
|
*zn = 1;
|
|
|
|
z[0] = 3;
|
|
|
|
zp = g - 1; // z has 2 bits
|
|
|
|
for (j = 1; (unsigned long) j < b; j++)
|
|
|
|
{
|
|
|
|
f = sizetwo (z, *zn);
|
|
|
|
if (f > B)
|
|
|
|
{
|
|
|
|
shiftrights (t2, t2n, z, *zn, f - B);
|
|
|
|
t2p = zp + f - B;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
MPN_COPY_INCR (t2, z, *zn);
|
|
|
|
t2n = *zn;
|
|
|
|
t2p = zp;
|
|
|
|
} // t2 has B bits+numb space
|
|
|
|
if (k != 1)
|
|
|
|
{
|
|
|
|
MPN_COPY_INCR (t3, t2, t2n);
|
|
|
|
t3n = t2n;
|
|
|
|
t3p = t2p; // t3 has B bits
|
|
|
|
mask = (((mp_limb_t) 1) << (GMP_LIMB_BITS - 1));
|
|
|
|
while ((mask & k) == 0)
|
|
|
|
mask >>= 1;
|
|
|
|
mask >>= 1;
|
|
|
|
for (; mask != 0; mask >>= 1)
|
|
|
|
{
|
|
|
|
bigsqrtrunc (t2, t2n, t2p, B); // t2 has B bits+numb space , t1 has 2*B bits+numb space
|
|
|
|
if ((k & mask) != 0)
|
|
|
|
{
|
|
|
|
bigmultrunc (t2, t2n, t2p, t3, t3n, t3p, B);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} // t2 has B bits+numb space , t1 has 2*B bits+numb space
|
|
|
|
bigmultrunc (t2, t2n, t2p, t4, t4n, t4p, B); // t2 has B bits+numb space , t1 has 2*B bits+numb space
|
|
|
|
ret = 0;
|
|
|
|
f = sizetwo (t2, t2n);
|
|
|
|
if (f - 1 <= 8 - (t2p + 10))
|
|
|
|
ret = 1;
|
|
|
|
if (f - 1 >= 10 - (t2p + 10))
|
|
|
|
ret = 0;
|
|
|
|
if (f - 1 == 9 - (t2p + 10))
|
|
|
|
{ // so 512 <= t2.2^(t2p+10) < 1024
|
|
|
|
if (t2p + 10 >= 0)
|
|
|
|
{
|
|
|
|
shiftlefts (t3, t3n, t2, t2n, t2p + 10);
|
|
|
|
} // t3 has 10 bits
|
|
|
|
else
|
|
|
|
{
|
|
|
|
shiftrights (t3, t3n, t2, t2n, -t2p - 10);
|
|
|
|
} // t3 has 10 bits+numb space
|
|
|
|
if (t3n == 1 && t3[0] <= 993)
|
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
if (ret != 0)
|
|
|
|
{
|
|
|
|
shiftleft (z, *zn, zp - (g - j - 1)); // z has j+2 bits
|
|
|
|
{
|
|
|
|
mp_limb_t __t;
|
|
|
|
__t = mpn_add_1 (z, z, *zn, 1);
|
|
|
|
if (__t != 0)
|
|
|
|
{
|
|
|
|
z[*zn] = 1;
|
|
|
|
(*zn)++;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
zp = g - j - 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
f = sizetwom1 (t2, t2n);
|
|
|
|
if (f + t2p >= 1)
|
|
|
|
{
|
|
|
|
shiftleft (z, *zn, zp - (g - j - 1));
|
|
|
|
mpn_sub_1 (z, z, *zn, 1);
|
|
|
|
zp = g - j - 1;
|
|
|
|
}
|
|
|
|
} // z has j+2 bits
|
|
|
|
return zp;
|
|
|
|
} // z has b+1 bits
|
|
|
|
|
|
|
|
|
|
|
|
/* Algorithm B for k<=NROOT_VSMALL_MIN=(((((mp_limb_t)1)<<(GMP_LIMB_BITS-1))-33)/66)
|
|
|
|
|
|
|
|
Calculates Z such that Z*(1-2^(-b)) < Y^(-1/k) < Z*(1+2^(-b))
|
|
|
|
ie a b bit approximation the reciprocal of the kth root of Y
|
|
|
|
where Z,Y>0 are real , 1<=b<=3+ceil(lg(k)) is an int , k>=1 is an int
|
|
|
|
|
|
|
|
Z=z[0]*2^zp where zp is the return value
|
|
|
|
Y={y,yn}*2^yp where {y,yn}>=2 and leading limb of {y,yn} is not zero
|
|
|
|
{z,1} and {y,yn} must be completly distinct
|
|
|
|
|
|
|
|
Note : the restriction on k allows calculations to be less than limb sized
|
|
|
|
assumes GMP_LIMB_BITS>=10
|
|
|
|
|
|
|
|
*/
|
|
|
|
static signed long
|
|
|
|
nroot_vsmall (mp_ptr z, mp_srcptr y, mp_size_t yn, signed long yp,
|
|
|
|
mp_limb_t b, mp_limb_t k)
|
|
|
|
{
|
|
|
|
signed long f1, zp, f, j, g, B, t1p, t2p, t3p;
|
|
|
|
int ret;
|
|
|
|
mp_limb_t t1, t2, t3, qh, ql, mask;
|
|
|
|
|
|
|
|
ASSERT (k != 0);
|
|
|
|
ASSERT (b >= 1);
|
|
|
|
ASSERT (b <= (mp_limb_t) (clg (k) + 3));
|
|
|
|
ASSERT (yn > 1 || (yn == 1 && y[0] >= 2));
|
|
|
|
ASSERT (y[yn - 1] != 0);
|
|
|
|
ASSERT (GMP_LIMB_BITS >= 10);
|
|
|
|
ASSERT (k <= NROOT_VSMALL_MIN);
|
|
|
|
g = sizetwom1 (y, yn);
|
|
|
|
B = 66 * (2 * k + 1);
|
|
|
|
B = clg (B);
|
|
|
|
ASSERT (B <= GMP_LIMB_BITS);
|
|
|
|
ASSERT (b <= GMP_LIMB_BITS - 1);
|
|
|
|
#if GMP_NAIL_BITS==0
|
|
|
|
t3p = yp;
|
|
|
|
t3 = y[yn - 1];
|
|
|
|
count_leading_zeros (f1, t3);
|
|
|
|
f = yn * GMP_NUMB_BITS + GMP_NAIL_BITS - f1; //related to g(internally)
|
|
|
|
f1 = GMP_LIMB_BITS - f1;
|
|
|
|
if (f1 >= B)
|
|
|
|
{
|
|
|
|
t3 >>= f1 - B;
|
|
|
|
t3p += f - B;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
if (yn != 1)
|
|
|
|
{
|
|
|
|
t3 = (t3 << (B - f1)) | ((y[yn - 2]) >> (GMP_LIMB_BITS - B + f1));
|
|
|
|
t3p += f - B;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
#if GMP_NAIL_BITS!=0
|
|
|
|
#if GMP_NUMB_BITS*2 < GMP_LIMB_BITS
|
|
|
|
#error not supported
|
|
|
|
#endif
|
|
|
|
f = sizetwo (y, yn);
|
|
|
|
if (f > B)
|
|
|
|
{
|
|
|
|
mp_limb_t t3t[2];
|
|
|
|
mp_size_t t3n;
|
|
|
|
t3p = yp + f - B;
|
|
|
|
shiftrights (t3t, t3n, y, yn, f - B);
|
|
|
|
t3 = t3t[0];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t3p = yp;
|
|
|
|
if (f <= GMP_NUMB_BITS)
|
|
|
|
{
|
|
|
|
t3 = y[0];
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t3 = (y[0] | (y[1] << (GMP_NUMB_BITS)));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
g = g + yp;
|
|
|
|
g = -g;
|
|
|
|
if (g >= 0)
|
|
|
|
{
|
|
|
|
g = g / k;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
g = -((k - 1 - g) / k);
|
|
|
|
}
|
|
|
|
z[0] = 3;
|
|
|
|
zp = g - 1;
|
|
|
|
for (j = 1; (unsigned long) j < b; j++)
|
|
|
|
{
|
|
|
|
count_leading_zeros (f, z[0]);
|
|
|
|
f = GMP_LIMB_BITS - f;
|
|
|
|
if (f > B)
|
|
|
|
{
|
|
|
|
t1 = (z[0] >> (f - B));
|
|
|
|
t1p = zp + f - B;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t1 = z[0];
|
|
|
|
t1p = zp;
|
|
|
|
}
|
|
|
|
if (k != 1)
|
|
|
|
{
|
|
|
|
t2 = t1;
|
|
|
|
t2p = t1p;
|
|
|
|
mask = (((mp_limb_t) 1) << (GMP_LIMB_BITS - 1));
|
|
|
|
while ((mask & k) == 0)
|
|
|
|
mask >>= 1;
|
|
|
|
mask >>= 1;
|
|
|
|
for (; mask != 0; mask >>= 1)
|
|
|
|
{
|
|
|
|
umul_ppmm (qh, ql, t1, t1);
|
|
|
|
t1p += t1p;
|
|
|
|
if (qh == 0)
|
|
|
|
{ //count_leading_zeros(f,ql);f=GMP_LIMB_BITS-f;f=f-B;if(f>0){t1=(ql>>f);t1p+=f;}else{t1=ql;}
|
|
|
|
t1 = ql; // be lazy
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
count_leading_zeros (f, qh);
|
|
|
|
f = 2 * GMP_LIMB_BITS - f;
|
|
|
|
f = f - B;
|
|
|
|
t1p += f; //only need these cases when B>=16
|
|
|
|
if (f < GMP_LIMB_BITS)
|
|
|
|
{
|
|
|
|
t1 = (ql >> f);
|
|
|
|
t1 |= (qh << (GMP_LIMB_BITS - f));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t1 = (qh >> (f - GMP_LIMB_BITS));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if ((k & mask) != 0)
|
|
|
|
{
|
|
|
|
umul_ppmm (qh, ql, t1, t2);
|
|
|
|
t1p += t2p;
|
|
|
|
if (qh == 0)
|
|
|
|
{ //count_leading_zeros(f,ql);f=GMP_LIMB_BITS-f;f=f-B;if(f>0){t1=(ql>>f);t1p+=f;}else{t1=ql;}
|
|
|
|
t1 = ql; // be lazy
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
count_leading_zeros (f, qh);
|
|
|
|
f = 2 * GMP_LIMB_BITS - f;
|
|
|
|
f = f - B;
|
|
|
|
t1p += f;
|
|
|
|
if (f < GMP_LIMB_BITS)
|
|
|
|
{
|
|
|
|
t1 = (ql >> f);
|
|
|
|
t1 |= (qh << (GMP_LIMB_BITS - f));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t1 = (qh >> (f - GMP_LIMB_BITS));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
umul_ppmm (qh, ql, t1, t3);
|
|
|
|
t1p += t3p;
|
|
|
|
if (qh == 0)
|
|
|
|
{
|
|
|
|
count_leading_zeros (f, ql);
|
|
|
|
f = GMP_LIMB_BITS - f;
|
|
|
|
f = f - B;
|
|
|
|
if (f > 0)
|
|
|
|
{
|
|
|
|
t1 = (ql >> f);
|
|
|
|
t1p += f;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t1 = ql;
|
|
|
|
}
|
|
|
|
// dont be lazy here as it could screw up the compairison below
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
count_leading_zeros (f, qh);
|
|
|
|
f = 2 * GMP_LIMB_BITS - f;
|
|
|
|
f = f - B;
|
|
|
|
t1p += f;
|
|
|
|
if (f < GMP_LIMB_BITS)
|
|
|
|
{
|
|
|
|
t1 = (ql >> f);
|
|
|
|
t1 |= (qh << (GMP_LIMB_BITS - f));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t1 = (qh >> (f - GMP_LIMB_BITS));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
ASSERT (t1 != 0);
|
|
|
|
count_leading_zeros (f, t1);
|
|
|
|
f = GMP_LIMB_BITS - f;
|
|
|
|
if (f - 1 <= 8 - (t1p + 10))
|
|
|
|
ret = 1;
|
|
|
|
if (f - 1 >= 10 - (t1p + 10))
|
|
|
|
ret = 0;
|
|
|
|
if (f - 1 == 9 - (t1p + 10))
|
|
|
|
{ // so 512 <= t1.2^(t1p+10) < 1024
|
|
|
|
if (t1p + 10 >= 0)
|
|
|
|
{
|
|
|
|
t2 = (t1 << (t1p + 10));
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
t2 = (t1 >> (-t1p - 10));
|
|
|
|
}
|
|
|
|
if (t2 <= 993)
|
|
|
|
ret = 1;
|
|
|
|
}
|
|
|
|
if (ret != 0)
|
|
|
|
{
|
|
|
|
z[0] = (z[0] << (zp - (g - j - 1)));
|
|
|
|
z[0]++;
|
|
|
|
zp = g - j - 1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (t1 == 1)
|
|
|
|
{
|
|
|
|
f = 0;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
count_leading_zeros (f, t1 - 1);
|
|
|
|
f = GMP_LIMB_BITS - f;
|
|
|
|
}
|
|
|
|
if (f + t1p >= 1)
|
|
|
|
{
|
|
|
|
z[0] = (z[0] << (zp - (g - j - 1)));
|
|
|
|
z[0]--;
|
|
|
|
zp = g - j - 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return zp;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Algorithm N
|
|
|
|
|
|
|
|
Calculates Z such that Z*(1-2^(-b)) < Y^(-1/k) < Z*(1+2^(-b))
|
|
|
|
ie a b bit approximation the reciprocal of the kth root of Y
|
|
|
|
where Z,Y>0 are real , b>=1 is an int , k>=1 is an int
|
|
|
|
|
|
|
|
Z={z,zn}*2^zp where zp is the return value , and zn is modified
|
|
|
|
Y={y,yn}*2^yp where {y,yn}>=2 and leading limb of {y,yn} is not zero
|
|
|
|
|
|
|
|
z satisfies 1 <= z < 2^(b+7)
|
|
|
|
zp satisfies -lg(Y)/k-b-7-lg(3/2) < zp < -lg(Y)/k+1
|
|
|
|
|
|
|
|
{z,zn} and {y,yn} and temps t1,t2,t3 must be completly distinct
|
|
|
|
z requires b+6+GMP_NUMB_BITS+max(1,clgk)
|
|
|
|
t1 requires max( 2*b+12+GMP_NUMB_BITS , b+6+clg(k+1) )
|
|
|
|
t2 requires b+6+GMP_NUMB_BITS
|
|
|
|
t3 requires b+6+GMP_NUMB_BITS
|
|
|
|
*/
|
|
|
|
static signed long
|
|
|
|
nroot (mp_ptr z, mp_size_t * zn, mp_srcptr y, mp_size_t yn, signed long yp,
|
|
|
|
mp_limb_t b, mp_limb_t k, signed long clgk, mp_ptr t1, mp_ptr t2,
|
|
|
|
mp_ptr t3)
|
|
|
|
{ mp_size_t t1n, t2n, t3n; mp_limb_t mask, kpow2, k1pow2;signed long t1p, zp, t2p, t3p, f, bd, bs[GMP_LIMB_BITS * 2], c; // FIXME how many
|
|
|
|
ASSERT (k != 0);ASSERT (yn > 1 || (yn == 1 && y[0] >= 2));ASSERT (y[yn - 1] != 0);
|
|
|
|
bs[0] = b; // bit counts are maximums , ie can have less
|
|
|
|
for (c = 0;; c++)
|
|
|
|
{ if (bs[c] <= 3 + clgk)
|
|
|
|
break;
|
|
|
|
bs[c + 1] = 1 + (bs[c] + clgk) / 2; } // so bs[c]<=3+clgk
|
|
|
|
#if GMP_LIMB_BITS>=10 && TESTSMALL==0
|
|
|
|
if (k <= NROOT_VSMALL_MIN)
|
|
|
|
{ zp = nroot_vsmall (z, y, yn, yp, bs[c], k); *zn = 1; }
|
|
|
|
else
|
|
|
|
{ zp = nroot_small (z, (mp_limb_t *) zn, y, yn, yp, bs[c], k); }
|
|
|
|
#endif
|
|
|
|
#if GMP_LIMB_BITS<10 || TESTSMALL==1
|
|
|
|
zp = nroot_small (z, zn, y, yn, yp, bs[c], k);
|
|
|
|
#endif // bs[1]=1+floor((b+clgk)/2) max bd=b+6 // z has bs[c]+1 bits
|
|
|
|
kpow2 = 0;k1pow2 = 0; // shortcut for div,mul to a shift instead
|
|
|
|
if (POW2_P(k)){count_leading_zeros (kpow2, k);kpow2 = GMP_LIMB_BITS - kpow2;} // k=2^(kpow2-1)
|
|
|
|
if (POW2_P(k+1)){count_leading_zeros (k1pow2, k + 1);k1pow2 = GMP_LIMB_BITS - k1pow2;} // k+1=2^(k1pow2-1)
|
|
|
|
for (; c != 0; c--)
|
|
|
|
{ bd = 2 * bs[c] + 4 - clgk;
|
|
|
|
f = sizetwo (z, *zn); // is this trunc ever going to do something real?
|
|
|
|
if (f > bd){ shiftright (z, *zn, f - bd); zp = zp + f - bd;} // z has bd bits + numb space
|
|
|
|
MPN_COPY_INCR (t3, z, *zn); t3n = *zn;t3p = zp;
|
|
|
|
mask = (((mp_limb_t) 1) << (GMP_LIMB_BITS - 1)); // t3 has bd bits
|
|
|
|
while ((mask & (k + 1)) == 0)mask >>= 1;
|
|
|
|
for (mask >>= 1; mask != 0; mask >>= 1)
|
|
|
|
{ bigsqrtrunc (t3, t3n, t3p, bd); // t3 has bd bits + numb space t1 has 2*bd bits + numb space
|
|
|
|
if (((k + 1) & mask) != 0){bigmultrunc (t3, t3n, t3p, z, *zn, zp, bd);}}// t3 has bd bits + numb space t1 has 2*bd bits + numb space
|
|
|
|
if (k1pow2){shiftleft (z, *zn, k1pow2 - 1);}else{mul_ui (z, *zn, k + 1);} // z has bd+clg(k+1) bits
|
|
|
|
f = sizetwo (y, yn);
|
|
|
|
if (f > bd){ shiftrights (t2, t2n, y, yn, f - bd);t2p = yp + f - bd;} // t2 has bd bits + numb space
|
|
|
|
else{ MPN_COPY_INCR (t2, y, yn);t2n = yn;t2p = yp;} // this case may not happen if this is only called by mpn_root
|
|
|
|
bigmultrunc (t3, t3n, t3p, t2, t2n, t2p, bd); // t3 has bd bits + numb space t1 has 2*bd bits + numb space
|
|
|
|
if (zp <= t3p) // which branch depends on yp ????? and only want the top bd+clgk bits exactly
|
|
|
|
{ shiftlefts (t1, t1n, t3, t3n, t3p - zp); // t1 has bd+clg(k+1) bits
|
|
|
|
subtract (t1, t1n, z, *zn, t1, t1n);
|
|
|
|
t1p = zp;} // t1 has bd+clg(k+1) bits
|
|
|
|
else
|
|
|
|
{ ASSERT(zp - t3p + sizetwo (z, *zn) <= 2 * b + 12 + GMP_NUMB_BITS);// not allocated enough mem
|
|
|
|
shiftlefts (t1, t1n, z, *zn, zp - t3p); // t1 has 2*b+12+numb
|
|
|
|
subtract (t1, t1n, t1, t1n, t3, t3n);
|
|
|
|
t1p = t3p;} // t1 has 2*b+12+numb
|
|
|
|
f = sizetwo (t1, t1n);
|
|
|
|
if (f >= bd + clgk){shiftrights (z, *zn, t1, t1n, f - bd - clgk);}
|
|
|
|
else{shiftlefts (z, *zn, t1, t1n, bd + clgk - f);} // z has bd+clgk bits + numb space
|
|
|
|
zp = t1p + f - bd - clgk;
|
|
|
|
if (kpow2){shiftright (z, *zn, kpow2 - 1);}else{tdiv_q_ui (z, *zn, k);}
|
|
|
|
} // z has bd+1 bits + numb space (maybe prove just bd bits ?)
|
|
|
|
return zp;
|
|
|
|
} // z has b+7 bits
|
|
|
|
|
|
|
|
|
|
|
|
/* same as Algorithm N but for k=1
|
|
|
|
|
|
|
|
Calculates Z such that Z*(1-2^(-b)) < Y^(-1/k) < Z*(1+2^(-b))
|
|
|
|
ie a b bit approximation the reciprocal of the kth root of Y
|
|
|
|
where Z,Y>0 are real , b>=1 is an int , k>=1 is an int
|
|
|
|
|
|
|
|
Z={z,zn}*2^zp where zp is the return value , and zn is modified
|
|
|
|
Y={y,yn}*2^yp where {y,yn}>=2 and leading limb of {y,yn} is not zero
|
|
|
|
|
|
|
|
and z satisfies 2^b <= z <= 2^(b+1)
|
|
|
|
and zp satisfies zp=-sizetwo(y,yn)-b-yp
|
|
|
|
|
|
|
|
{z,zn} and {y,yn} and temps t1,t2 must be completly distinct
|
|
|
|
z requires 2+floor(((sizetwo(y,yn)+b+1)/GMP_NUMB_BITS)-yn limbs
|
|
|
|
t1 requires 1+floor((sizetwo(y,yn)+b+1)/GMP_NUMB_BITS) limbs
|
|
|
|
t2 requires yn limbs
|
|
|
|
*/
|
|
|
|
static signed long
|
|
|
|
finv_fast (mp_ptr z, int *zn, mp_srcptr y, mp_size_t yn, signed long yp,
|
|
|
|
unsigned long b, mp_ptr t1, mp_ptr t2)
|
|
|
|
{
|
|
|
|
signed long c;
|
|
|
|
signed long zp;
|
|
|
|
mp_size_t t1n;
|
|
|
|
|
|
|
|
c = sizetwo (y, yn) + b;
|
|
|
|
MPN_COPY_INCR (t1, y, yn);
|
|
|
|
t1n = yn; // t1 has yn limbs
|
|
|
|
MPN_ZERO(t1 + t1n, (c + 1) / GMP_NUMB_BITS + 1 - t1n); // t1 has 1+floor((c+1)/numb) limbs
|
|
|
|
t1[(c + 1) / GMP_NUMB_BITS] = (((mp_limb_t) 1) << ((c + 1) % GMP_NUMB_BITS)); // t1 has 1+floor((c+1)/numb) limbs
|
|
|
|
t1n = (c + 1) / GMP_NUMB_BITS + 1;
|
|
|
|
ASSERT (y[yn - 1] != 0);
|
|
|
|
mpn_tdiv_qr (z, t2, 0, t1, t1n, y, yn); //bdivmod could be faster // z has 2+floor((c+1)/numb)-yn t2 has yn limbs
|
|
|
|
*zn = t1n - yn + 1;
|
|
|
|
while (*zn != 0 && z[*zn - 1] == 0)
|
|
|
|
(*zn)--;
|
|
|
|
shiftright (z, *zn, 1);
|
|
|
|
zp = -c - yp;
|
|
|
|
return zp;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* calculates X and R such that X^k<=Y and (X+1)^k>Y
|
|
|
|
where X={x,xn} Y={y,yn} R={r,rn} , only calculates R if r!=NULL
|
|
|
|
|
|
|
|
R satisfies R < (X+1)^k-X^k
|
|
|
|
X satisfies X^k <= Y
|
|
|
|
|
|
|
|
X needs ceil(yn/k) limb space
|
|
|
|
R needs yn limb space if r!=0
|
|
|
|
return sizeof remainder if r!=0
|
|
|
|
*/
|
|
|
|
mp_size_t mpn_rootrem(mp_ptr xp, mp_ptr r, mp_srcptr y,mp_size_t yn, mp_limb_t k)
|
|
|
|
{
|
|
|
|
unsigned long b, clgk;
|
|
|
|
signed long d, tp, zp;
|
|
|
|
mpz_t t4, t3;
|
|
|
|
mp_ptr x,t1,t2;
|
|
|
|
mp_size_t t2n,xn,rn;
|
|
|
|
mp_limb_t val;mp_size_t pos, bit;
|
|
|
|
|
|
|
|
if(BELOW_THRESHOLD(yn,ROOTREM_THRESHOLD))return mpn_rootrem_basecase(xp,r,y,yn,k);
|
|
|
|
|
|
|
|
d = 8; // any d>=1 will do , for testing to its limits use d=1 TUNEME
|
|
|
|
b = sizetwo (y, yn);
|
|
|
|
b = (b + k - 1) / k + 2 + d;
|
|
|
|
clgk = clg (k);
|
|
|
|
|
|
|
|
x=__GMP_ALLOCATE_FUNC_LIMBS(BITS_TO_LIMBS(b+7+GMP_NUMB_BITS));
|
|
|
|
t1=__GMP_ALLOCATE_FUNC_LIMBS(BITS_TO_LIMBS (2 * b + 12 + GMP_NUMB_BITS));
|
|
|
|
t2=__GMP_ALLOCATE_FUNC_LIMBS(BITS_TO_LIMBS (b + 6 + clgk + 1 + GMP_NUMB_BITS));
|
|
|
|
mpz_init2 (t3, b + 6 + GMP_NUMB_BITS * 2);
|
|
|
|
mpz_init2 (t4, b + 6 + GMP_NUMB_BITS);
|
|
|
|
zp = nroot (t2, &t2n, y, yn, 0, b, k, clgk, t1, PTR (t3), PTR (t4));
|
|
|
|
/* 1 <= t2 < 2^(b+7) -lg(Y)/k-b-7-lg(3/2) < zp < -lg(Y)/k+1 where Y={y,yn} */
|
|
|
|
tp = finv_fast (PTR (t3), &SIZ (t3), t2, t2n, zp, b, t1, PTR (t4)); // t3 is our approx root
|
|
|
|
/* 2^b <= t3 <= 2^(b+1) tp=-sizetwo(t2,t2n)-b-zp */
|
|
|
|
ASSERT (tp <= -d - 1);
|
|
|
|
pos = (-tp - d - 1 + 1) / GMP_NUMB_BITS;
|
|
|
|
bit = (-tp - d - 1 + 1) % GMP_NUMB_BITS;
|
|
|
|
val = (((mp_limb_t) 1) << bit);
|
|
|
|
mpn_sub_1 (PTR (t3) + pos, PTR (t3) + pos, SIZ (t3) - pos, val);
|
|
|
|
if (PTR (t3)[SIZ (t3) - 1] == 0)
|
|
|
|
SIZ (t3)--;
|
|
|
|
shiftrights (PTR (t4), SIZ (t4), PTR (t3), SIZ (t3), -tp);
|
|
|
|
if (mpn_add_1 (PTR (t3) + pos, PTR (t3) + pos, SIZ (t3) - pos, val))
|
|
|
|
{
|
|
|
|
PTR (t3)[SIZ (t3)] = 1;
|
|
|
|
SIZ (t3)++;
|
|
|
|
}
|
|
|
|
pos = (-tp - d - 1) / GMP_NUMB_BITS;
|
|
|
|
bit = (-tp - d - 1) % GMP_NUMB_BITS;
|
|
|
|
val = (((mp_limb_t) 1) << bit);
|
|
|
|
if (mpn_add_1 (PTR (t3) + pos, PTR (t3) + pos, SIZ (t3) - pos, val))
|
|
|
|
{
|
|
|
|
PTR (t3)[SIZ (t3)] = 1;
|
|
|
|
SIZ (t3)++;
|
|
|
|
}
|
|
|
|
shiftright (PTR (t3), SIZ (t3), -tp);
|
|
|
|
if (mpz_cmp (t4, t3) == 0)
|
|
|
|
{
|
|
|
|
xn = SIZ (t3);
|
|
|
|
MPN_COPY_INCR (x, PTR (t3), xn);
|
|
|
|
if (r != 0)
|
|
|
|
{
|
|
|
|
mpz_pow_ui (t4, t3, k);
|
|
|
|
mpn_sub (r, y, yn, PTR (t4), SIZ (t4)); /* no carry */
|
|
|
|
rn = yn;
|
|
|
|
while (rn != 0 && r[rn - 1] == 0)rn--;
|
|
|
|
}
|
|
|
|
mpz_clear (t4);
|
|
|
|
mpz_clear (t3);
|
|
|
|
MPN_COPY(xp,x,(yn+k-1)/k);
|
|
|
|
__GMP_FREE_FUNC_LIMBS(x,BITS_TO_LIMBS(b+7+GMP_NUMB_BITS));
|
|
|
|
__GMP_FREE_FUNC_LIMBS(t1,BITS_TO_LIMBS(2*b+12+GMP_NUMB_BITS));
|
|
|
|
__GMP_FREE_FUNC_LIMBS(t2,BITS_TO_LIMBS(b+6+clgk+1+GMP_NUMB_BITS));
|
|
|
|
return rn;
|
|
|
|
}
|
|
|
|
mpz_pow_ui (t4, t3, k);
|
|
|
|
if (SIZ (t4) > yn || (SIZ (t4) == yn && mpn_cmp (PTR (t4), y, yn) > 0))
|
|
|
|
{
|
|
|
|
mpz_sub_ui (t3, t3, 1);
|
|
|
|
xn = SIZ (t3);
|
|
|
|
MPN_COPY_INCR (x, PTR (t3), xn);
|
|
|
|
if (r != 0)
|
|
|
|
{
|
|
|
|
mpz_pow_ui (t4, t3, k);
|
|
|
|
mpn_sub (r, y, yn, PTR (t4), SIZ (t4)); /* no carry */
|
|
|
|
rn = yn;
|
|
|
|
while (rn != 0 && r[rn - 1] == 0) rn--;
|
|
|
|
}
|
|
|
|
mpz_clear (t4);
|
|
|
|
mpz_clear (t3);
|
|
|
|
MPN_COPY(xp,x,(yn+k-1)/k);
|
|
|
|
__GMP_FREE_FUNC_LIMBS(x,BITS_TO_LIMBS(b+7+GMP_NUMB_BITS));
|
|
|
|
__GMP_FREE_FUNC_LIMBS(t1,BITS_TO_LIMBS(2*b+12+GMP_NUMB_BITS));
|
|
|
|
__GMP_FREE_FUNC_LIMBS(t2,BITS_TO_LIMBS(b+6+clgk+1+GMP_NUMB_BITS));
|
|
|
|
return rn;
|
|
|
|
}
|
|
|
|
xn = SIZ (t3);
|
|
|
|
MPN_COPY_INCR (x, PTR (t3), xn);
|
|
|
|
if (r != 0)
|
|
|
|
{
|
|
|
|
mpn_sub (r, y, yn, PTR (t4), SIZ (t4)); /* no carry */
|
|
|
|
rn = yn;
|
|
|
|
while (rn != 0 && r[rn - 1] == 0) rn--;
|
|
|
|
}
|
|
|
|
mpz_clear (t4);
|
|
|
|
mpz_clear (t3);
|
|
|
|
MPN_COPY(xp,x,(yn+k-1)/k);
|
|
|
|
__GMP_FREE_FUNC_LIMBS(x,BITS_TO_LIMBS(b+7+GMP_NUMB_BITS));
|
|
|
|
__GMP_FREE_FUNC_LIMBS(t1,BITS_TO_LIMBS(2*b+12+GMP_NUMB_BITS));
|
|
|
|
__GMP_FREE_FUNC_LIMBS(t2,BITS_TO_LIMBS(b+6+clgk+1+GMP_NUMB_BITS));
|
|
|
|
return rn;}
|
|
|
|
|
|
|
|
#endif
|