243 lines
6.4 KiB
C
243 lines
6.4 KiB
C
/* floating-point Newton, with inversion in 3M(n) */
|
|
|
|
/* mpn_invert
|
|
|
|
Copyright 2009, 2015 Paul Zimmermann
|
|
Copyright 2009, 2015 William Hart
|
|
|
|
This file is part of the MPIR Library.
|
|
|
|
The MPIR Library is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
the Free Software Foundation; either version 2.1 of the License, or (at your
|
|
option) any later version.
|
|
|
|
The MPIR Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with the MPIR Library; see the file COPYING.LIB. If not, write to
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
MA 02110-1301, USA. */
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <assert.h>
|
|
#include "mpir.h"
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
#define ZERO (mp_limb_t) 0
|
|
#define ONE (mp_limb_t) 1
|
|
#define WRAP_AROUND_BOUND 1500
|
|
|
|
int
|
|
mpn_is_invert (mp_srcptr xp, mp_srcptr ap, mp_size_t n)
|
|
{
|
|
int res = 1;
|
|
mp_size_t i;
|
|
mp_ptr tp, up;
|
|
mp_limb_t cy;
|
|
TMP_DECL;
|
|
|
|
TMP_MARK;
|
|
tp = TMP_ALLOC_LIMBS (2 * n);
|
|
up = TMP_ALLOC_LIMBS (2 * n);
|
|
|
|
/* first check X*A < B^(2*n) */
|
|
mpn_mul_n (tp, xp, ap, n);
|
|
cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */
|
|
if (cy != 0)
|
|
return 0;
|
|
|
|
/* now check B^(2n) - X*A <= A */
|
|
mpn_not (tp, 2 * n);
|
|
mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */
|
|
MPN_ZERO (up, 2 * n);
|
|
MPN_COPY (up, ap, n);
|
|
res = mpn_cmp (tp, up, 2 * n) <= 0;
|
|
TMP_FREE;
|
|
return res;
|
|
}
|
|
|
|
/* Input: A = {ap, n} with most significant bit set.
|
|
Output: X = B^n + {xp, n} where B = 2^GMP_NUMB_BITS.
|
|
|
|
X is a lower approximation of B^(2n)/A with implicit msb.
|
|
More precisely, one has:
|
|
|
|
A*X < B^(2n) <= A*(X+1)
|
|
|
|
or X = ceil(B^(2n)/A) - 1.
|
|
*/
|
|
void
|
|
mpn_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)
|
|
{
|
|
if (n == 1)
|
|
{
|
|
/* invert_limb returns min(B-1, floor(B^2/ap[0])-B),
|
|
which is B-1 when ap[0]=B/2, and 1 when ap[0]=B-1.
|
|
For X=B+xp[0], we have A*X < B^2 <= A*(X+1) where
|
|
the equality holds only when A=B/2.
|
|
|
|
We thus have A*X < B^2 <= A*(X+1).
|
|
*/
|
|
invert_limb (xp[0], ap[0]);
|
|
}
|
|
else if (n == 2)
|
|
{
|
|
mp_limb_t tp[4], up[2], sp[2], cy;
|
|
|
|
tp[0] = ZERO;
|
|
invert_limb (xp[1], ap[1]);
|
|
tp[3] = mpn_mul_1 (tp + 1, ap, 2, xp[1]);
|
|
cy = mpn_add_n (tp + 2, tp + 2, ap, 2);
|
|
while (cy) /* Xh is too large */
|
|
{
|
|
xp[1] --;
|
|
cy -= mpn_sub (tp + 1, tp + 1, 3, ap, 2);
|
|
}
|
|
/* tp[3] should be 111...111 */
|
|
|
|
mpn_com_n (sp, tp + 1, 2);
|
|
cy = mpn_add_1 (sp, sp, 2, ONE);
|
|
/* cy should be 0 */
|
|
|
|
up[1] = mpn_mul_1 (up, sp + 1, 1, xp[1]);
|
|
cy = mpn_add_1 (up + 1, up + 1, 1, sp[1]);
|
|
/* cy should be 0 */
|
|
xp[0] = up[1];
|
|
|
|
/* update tp */
|
|
cy = mpn_addmul_1 (tp, ap, 2, xp[0]);
|
|
cy = mpn_add_1 (tp + 2, tp + 2, 2, cy);
|
|
do
|
|
{
|
|
cy = mpn_add (tp, tp, 4, ap, 2);
|
|
if (cy == ZERO)
|
|
mpn_add_1 (xp, xp, 2, ONE);
|
|
}
|
|
while (cy == ZERO);
|
|
|
|
/* now A*X < B^4 <= A*(X+1) */
|
|
}
|
|
else
|
|
{
|
|
mp_size_t l, h;
|
|
mp_ptr tp, up;
|
|
mp_limb_t cy, th;
|
|
TMP_DECL;
|
|
|
|
l = (n - 1) / 2;
|
|
h = n - l;
|
|
|
|
mpn_invert (xp + l, ap + l, h);
|
|
|
|
TMP_MARK;
|
|
tp = TMP_ALLOC_LIMBS (n + h);
|
|
up = TMP_ALLOC_LIMBS (2 * h);
|
|
|
|
if (n <= WRAP_AROUND_BOUND)
|
|
{
|
|
mpn_mul (tp, ap, n, xp + l, h);
|
|
cy = mpn_add_n (tp + h, tp + h, ap, n);
|
|
}
|
|
else
|
|
{
|
|
mp_size_t m = n + 1;
|
|
mpir_ui k;
|
|
int cc;
|
|
|
|
if (m >= FFT_MULMOD_2EXPP1_CUTOFF)
|
|
m = mpir_fft_adjust_limbs (m);
|
|
/* we have m >= n + 1 by construction, thus m > h */
|
|
ASSERT(m < n + h);
|
|
cy = mpn_mulmod_Bexpp1_fft (tp, m, ap, n, xp + l, h);
|
|
/* cy, {tp, m} = A * {xp + l, h} mod (B^m+1) */
|
|
cy += mpn_add_n (tp + h, tp + h, ap, m - h);
|
|
cc = mpn_sub_n (tp, tp, ap + m - h, n + h - m);
|
|
cc = mpn_sub_1 (tp + n + h - m, tp + n + h - m, 2 * m - n - h, cc);
|
|
if (cc > cy) /* can only occur if cc=1 and cy=0 */
|
|
cy = mpn_add_1 (tp, tp, m, ONE);
|
|
else
|
|
cy -= cc;
|
|
/* cy, {tp, m} = A * Xh */
|
|
|
|
/* add B^(n+h) + B^(n+h-m) */
|
|
MPN_ZERO (tp + m, n + h - m);
|
|
tp[m] = cy;
|
|
/* note: since tp[n+h-1] is either 0, or cy<=1 if m=n+h-1,
|
|
the mpn_incr_u() below cannot produce a carry */
|
|
mpn_incr_u (tp + n + h - m, ONE);
|
|
cy = 1;
|
|
do /* check if T >= B^(n+h) + 2*B^n */
|
|
{
|
|
mp_size_t i;
|
|
|
|
if (cy == ZERO)
|
|
break; /* surely T < B^(n+h) */
|
|
if (cy == ONE)
|
|
{
|
|
for (i = n + h - 1; tp[i] == ZERO && i > n; i--);
|
|
if (i == n && tp[i] < (mp_limb_t) 2)
|
|
break;
|
|
}
|
|
/* subtract B^m+1 */
|
|
cy -= mpn_sub_1 (tp, tp, n + h, ONE);
|
|
cy -= mpn_sub_1 (tp + m, tp + m, n + h - m, ONE);
|
|
}
|
|
while (1);
|
|
}
|
|
|
|
while (cy)
|
|
{
|
|
mpn_sub_1 (xp + l, xp + l, h, ONE);
|
|
cy -= mpn_sub (tp, tp, n + h, ap, n);
|
|
}
|
|
|
|
/*
|
|
Note that we work with the inequality AX < B^2n < A(X+1)
|
|
as per the revised version of the paper found here:
|
|
http://www.loria.fr/~zimmerma/papers/invert.pdf
|
|
*/
|
|
mpn_not (tp, n);
|
|
mpn_add_1 (tp, tp, n, ONE);
|
|
mpn_mul_n (up, tp + l, xp + l, h);
|
|
cy = mpn_add_n (up + h, up + h, tp + l, h - l);
|
|
mpn_add_n (xp, up + 2*h - l, tp + h, l);
|
|
mpn_add_1 (xp, xp, l, cy);
|
|
if (up[2*h-l-1] + 3 <= CNST_LIMB(2) && !mpn_is_invert(xp, ap, n))
|
|
mpn_add_1 (xp, xp, n, 1);
|
|
TMP_FREE;
|
|
}
|
|
}
|
|
|
|
void mpn_invert_trunc(mp_ptr x_new, mp_size_t m, mp_srcptr xp, mp_size_t n, mp_srcptr ap)
|
|
{
|
|
mp_ptr tp;
|
|
mp_limb_t cy;
|
|
TMP_DECL;
|
|
|
|
TMP_MARK;
|
|
tp = TMP_ALLOC_LIMBS (2 * m);
|
|
|
|
MPN_COPY(x_new, xp + n - m, m);
|
|
ap += (n - m);
|
|
|
|
mpn_mul_n (tp, x_new, ap, m);
|
|
mpn_add_n (tp + m, tp + m, ap, m); /* A * msb(X) */
|
|
|
|
/* now check B^(2n) - X*A <= A */
|
|
mpn_not (tp, 2 * m);
|
|
mpn_add_1 (tp, tp, 2 * m, 1); /* B^(2m) - X*A */
|
|
|
|
while (tp[m] || mpn_cmp (tp, ap, m) > 0)
|
|
{
|
|
mpn_add_1(x_new, x_new, m, 1);
|
|
tp[m] -= mpn_sub_n(tp, tp, ap, m);
|
|
}
|
|
TMP_FREE;
|
|
}
|