Got invett and sb_divappr_q working (modulo some linker issue).
This commit is contained in:
parent
4783c9dc4e
commit
5308d1e8fd
7
gmp-h.in
7
gmp-h.in
@ -1534,6 +1534,13 @@ __GMP_DECLSPEC mp_limb_t mpn_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr
|
||||
#define mpn_divrem_2 __MPN(divrem_2)
|
||||
__GMP_DECLSPEC mp_limb_t mpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
|
||||
|
||||
#define mpn_invert __MPN(invert)
|
||||
__GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr xp, mp_srcptr ap, mp_size_t n));
|
||||
|
||||
#define mpn_sb_divappr_q __MPN(sb_divappr_q)
|
||||
__GMP_DECLSPEC mp_limb_t mpn_sb_divappr_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
|
||||
mp_srcptr dp, mp_size_t dn, mp_srcptr dip));
|
||||
|
||||
#define mpn_gcd __MPN(gcd)
|
||||
__GMP_DECLSPEC mp_size_t mpn_gcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));
|
||||
|
||||
|
342
mpn/generic/invert.c
Normal file
342
mpn/generic/invert.c
Normal file
@ -0,0 +1,342 @@
|
||||
/* floating-point Newton, with inversion in 3M(n) */
|
||||
|
||||
/* mpn_invert
|
||||
|
||||
Copyright 2009 Paul Zimmermann
|
||||
|
||||
This file is part of the MPIR Library.
|
||||
|
||||
The MPIR Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The MPIR Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the MPIR Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
/* #define WRAP_AROUND */
|
||||
/* #define WANT_ASSERT 1 */
|
||||
|
||||
#ifdef WRAP_AROUND
|
||||
#define INVERT_VERSION 3
|
||||
#define WRAP_AROUND_BOUND 1500
|
||||
#else
|
||||
#define INVERT_VERSION 2
|
||||
#define WRAP_AROUND_BOUND ~0UL
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include "gmp.h"
|
||||
#include "gmp-impl.h"
|
||||
#include "longlong.h"
|
||||
|
||||
#define ZERO (mp_limb_t) 0
|
||||
#define ONE (mp_limb_t) 1
|
||||
|
||||
void
|
||||
mpn_print (mp_ptr A, mp_size_t n)
|
||||
{
|
||||
int j;
|
||||
|
||||
for (j=0; j<n; j++)
|
||||
{
|
||||
printf ("+%lu*B^%u", A[j], j);
|
||||
if (j % 4 == 3 && j != n-1)
|
||||
printf ("\n");
|
||||
}
|
||||
printf (":\n");
|
||||
}
|
||||
|
||||
/* Input: A = {ap, n} with most significant bit set.
|
||||
Output: X = B^n + {xp, n} where B = 2^GMP_NUMB_BITS.
|
||||
|
||||
X is a lower approximation of B^(2n)/A with implicit msb.
|
||||
More precisely, one has:
|
||||
|
||||
A*X < B^(2n) <= A*(X+1)
|
||||
|
||||
or X = ceil(B^(2n)/A) - 1.
|
||||
*/
|
||||
void
|
||||
mpn_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)
|
||||
{
|
||||
if (n == 1)
|
||||
{
|
||||
/* invert_limb returns min(B-1, floor(B^2/ap[0])-B),
|
||||
which is B-1 when ap[0]=B/2, and 1 when ap[0]=B-1.
|
||||
For X=B+xp[0], we have A*X < B^2 <= A*(X+1) where
|
||||
the equality holds only when A=B/2.
|
||||
|
||||
We thus have A*X < B^2 <= A*(X+1).
|
||||
*/
|
||||
invert_limb (xp[0], ap[0]);
|
||||
}
|
||||
else if (n == 2)
|
||||
{
|
||||
mp_limb_t tp[4], up[2], sp[2], cy;
|
||||
|
||||
tp[0] = ZERO;
|
||||
invert_limb (xp[1], ap[1]);
|
||||
tp[3] = mpn_mul_1 (tp + 1, ap, 2, xp[1]);
|
||||
cy = mpn_add_n (tp + 2, tp + 2, ap, 2);
|
||||
while (cy) /* Xh is too large */
|
||||
{
|
||||
xp[1] --;
|
||||
cy -= mpn_sub (tp + 1, tp + 1, 3, ap, 2);
|
||||
}
|
||||
/* tp[3] should be 111...111 */
|
||||
|
||||
mpn_com_n (sp, tp + 1, 2);
|
||||
cy = mpn_add_1 (sp, sp, 2, ONE);
|
||||
/* cy should be 0 */
|
||||
|
||||
up[1] = mpn_mul_1 (up, sp + 1, 1, xp[1]);
|
||||
cy = mpn_add_1 (up + 1, up + 1, 1, sp[1]);
|
||||
/* cy should be 0 */
|
||||
xp[0] = up[1];
|
||||
|
||||
/* update tp */
|
||||
cy = mpn_addmul_1 (tp, ap, 2, xp[0]);
|
||||
cy = mpn_add_1 (tp + 2, tp + 2, 2, cy);
|
||||
do
|
||||
{
|
||||
cy = mpn_add (tp, tp, 4, ap, 2);
|
||||
if (cy == ZERO)
|
||||
mpn_add_1 (xp, xp, 2, ONE);
|
||||
}
|
||||
while (cy == ZERO);
|
||||
|
||||
/* now A*X < B^4 <= A*(X+1) */
|
||||
}
|
||||
else
|
||||
{
|
||||
mp_size_t l, h;
|
||||
mp_ptr tp, up;
|
||||
mp_limb_t cy, th;
|
||||
TMP_DECL;
|
||||
|
||||
l = (n - 1) / 2;
|
||||
h = n - l;
|
||||
|
||||
mpn_invert (xp + l, ap + l, h);
|
||||
|
||||
TMP_MARK;
|
||||
tp = TMP_ALLOC_LIMBS (n + h);
|
||||
up = TMP_ALLOC_LIMBS (2 * h);
|
||||
|
||||
if (n <= WRAP_AROUND_BOUND)
|
||||
{
|
||||
mpn_mul (tp, ap, n, xp + l, h);
|
||||
cy = mpn_add_n (tp + h, tp + h, ap, n);
|
||||
}
|
||||
else
|
||||
{
|
||||
mp_size_t m = n + 1;
|
||||
unsigned long k;
|
||||
int cc;
|
||||
#ifdef CHECK
|
||||
mp_ptr tp2;
|
||||
mp_limb_t cy2;
|
||||
|
||||
tp2 = TMP_ALLOC_LIMBS (n + h);
|
||||
mpn_mul (tp2, ap, n, xp + l, h);
|
||||
cy2 = mpn_add_n (tp2 + h, tp2 + h, ap, n);
|
||||
#endif
|
||||
|
||||
k = mpn_fft_best_k (m, 0);
|
||||
m = mpn_fft_next_size (m, k);
|
||||
/* we have m >= n + 1 by construction, thus m > h */
|
||||
ASSERT(m < n + h);
|
||||
cy = mpn_mul_fft (tp, m, ap, n, xp + l, h, k);
|
||||
/* cy, {tp, m} = A * {xp + l, h} mod (B^m+1) */
|
||||
cy += mpn_add_n (tp + h, tp + h, ap, m - h);
|
||||
cc = mpn_sub_n (tp, tp, ap + m - h, n + h - m);
|
||||
cc = mpn_sub_1 (tp + n + h - m, tp + n + h - m, 2 * m - n - h, cc);
|
||||
if (cc > cy) /* can only occur if cc=1 and cy=0 */
|
||||
cy = mpn_add_1 (tp, tp, m, ONE);
|
||||
else
|
||||
cy -= cc;
|
||||
/* cy, {tp, m} = A * Xh */
|
||||
|
||||
/* add B^(n+h) + B^(n+h-m) */
|
||||
MPN_ZERO (tp + m, n + h - m);
|
||||
tp[m] = cy;
|
||||
/* note: since tp[n+h-1] is either 0, or cy<=1 if m=n+h-1,
|
||||
the mpn_incr_u() below cannot produce a carry */
|
||||
mpn_incr_u (tp + n + h - m, ONE);
|
||||
cy = 1;
|
||||
do /* check if T >= B^(n+h) + 2*B^n */
|
||||
{
|
||||
mp_size_t i;
|
||||
|
||||
if (cy == ZERO)
|
||||
break; /* surely T < B^(n+h) */
|
||||
if (cy == ONE)
|
||||
{
|
||||
for (i = n + h - 1; tp[i] == ZERO && i > n; i--);
|
||||
if (i == n && tp[i] < (mp_limb_t) 2)
|
||||
break;
|
||||
}
|
||||
/* subtract B^m+1 */
|
||||
cy -= mpn_sub_1 (tp, tp, n + h, ONE);
|
||||
cy -= mpn_sub_1 (tp + m, tp + m, n + h - m, ONE);
|
||||
}
|
||||
while (1);
|
||||
|
||||
#ifdef CHECK
|
||||
if ((cy != cy2) || mpn_cmp (tp, tp2, n + h) != 0)
|
||||
{
|
||||
fprintf (stderr, "wrong wrap around reconstruction\n");
|
||||
exit (1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
while (cy)
|
||||
{
|
||||
mpn_sub_1 (xp + l, xp + l, h, ONE);
|
||||
cy -= mpn_sub (tp, tp, n + h, ap, n);
|
||||
}
|
||||
|
||||
mpn_com_n (tp, tp, n);
|
||||
th = ~tp[n] + mpn_add_1 (tp, tp, n, ONE);
|
||||
mpn_mul_n (up, tp + l, xp + l, h);
|
||||
cy = mpn_add_n (up + h, up + h, tp + l, h);
|
||||
if (th != ZERO)
|
||||
cy += ONE + mpn_add_n (up + h, up + h, xp + l, h);
|
||||
MPN_COPY (xp, up + 2 * h - l, l);
|
||||
mpn_add_1 (xp + l, xp + l, h, cy);
|
||||
TMP_FREE;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
test_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)
|
||||
{
|
||||
int res = 1;
|
||||
mp_size_t i;
|
||||
mp_ptr tp, up;
|
||||
mp_limb_t cy;
|
||||
TMP_DECL;
|
||||
|
||||
TMP_MARK;
|
||||
tp = TMP_ALLOC_LIMBS (2 * n);
|
||||
up = TMP_ALLOC_LIMBS (2 * n);
|
||||
|
||||
/* first check X*A < B^(2*n) */
|
||||
mpn_mul_n (tp, xp, ap, n);
|
||||
cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */
|
||||
if (cy != 0)
|
||||
res = 0;
|
||||
|
||||
/* now check B^(2n) - X*A <= A */
|
||||
mpn_com_n (tp, tp, 2 * n);
|
||||
mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */
|
||||
MPN_ZERO (up, 2 * n);
|
||||
MPN_COPY (up, ap, n);
|
||||
res = mpn_cmp (tp, up, 2 * n) <= 0;
|
||||
TMP_FREE;
|
||||
return res;
|
||||
}
|
||||
|
||||
#ifdef MAIN
|
||||
#include <sys/types.h>
|
||||
#include <sys/resource.h>
|
||||
|
||||
int
|
||||
cputime ()
|
||||
{
|
||||
struct rusage rus;
|
||||
|
||||
getrusage (0, &rus);
|
||||
return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
|
||||
}
|
||||
|
||||
int
|
||||
main (int argc, char *argv[])
|
||||
{
|
||||
mp_size_t n = atoi (argv[1]), i, j, k;
|
||||
mp_ptr qp, rp, dp, tp, qp2, rp2;
|
||||
mp_limb_t cy;
|
||||
pid_t pid;
|
||||
int st;
|
||||
|
||||
k = (argc <= 2) ? 1 : atoi(argv[2]);
|
||||
|
||||
qp = malloc (n * sizeof (mp_limb_t));
|
||||
qp2 = malloc (n * sizeof (mp_limb_t));
|
||||
rp = malloc (n * sizeof (mp_limb_t));
|
||||
rp2 = malloc (2 * n * sizeof (mp_limb_t));
|
||||
dp = malloc (n * sizeof (mp_limb_t));
|
||||
tp = malloc (2 * n * sizeof (mp_limb_t));
|
||||
|
||||
pid = getpid ();
|
||||
printf ("Seed=%lu\n", pid);
|
||||
srand48 (pid);
|
||||
for (i = 0; i < n; i++)
|
||||
dp[i] = lrand48 ();
|
||||
dp[n - 1] |= GMP_NUMB_HIGHBIT;
|
||||
|
||||
mpn_random (rp, n);
|
||||
st = cputime ();
|
||||
for (i = 0; i < k; i++)
|
||||
mpn_mul_n (tp, dp, rp, n);
|
||||
printf ("mpn_mul_n took %dms\n", cputime () - st);
|
||||
|
||||
st = cputime ();
|
||||
for (i = 0; i < k; i++)
|
||||
{
|
||||
#ifdef CHECK
|
||||
// printf ("Test %lu\n", i);
|
||||
for (j = 0; j < n; j++)
|
||||
dp[j] = lrand48 ();
|
||||
dp[n - 1] |= GMP_NUMB_HIGHBIT;
|
||||
#endif
|
||||
mpn_invert (qp, dp, n);
|
||||
#ifdef CHECK
|
||||
if (test_invert (qp, dp, n) == 0)
|
||||
{
|
||||
fprintf (stderr, "test_invert failed at i=%lu\n", i);
|
||||
printf ("A:="); mpn_print (dp, n);
|
||||
printf ("X:=B^%lu", n); mpn_print (qp, n);
|
||||
exit (1);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
printf ("mpn_invert%d took %dms", INVERT_VERSION, cputime () - st);
|
||||
#ifdef WRAP_AROUND
|
||||
printf (" (with wrap-around trick, WRAP_AROUND_BOUND=%lu)",
|
||||
WRAP_AROUND_BOUND);
|
||||
#endif
|
||||
printf ("\n");
|
||||
|
||||
// printf ("xp="); mpn_print (qp, n);
|
||||
|
||||
MPN_ZERO (rp2, 2 * n);
|
||||
rp2[2 * n - 1] = GMP_LIMB_HIGHBIT;
|
||||
st = cputime ();
|
||||
for (i = 0; i < k; i++)
|
||||
{
|
||||
MPN_ZERO (rp2, 2 * n);
|
||||
rp2[2 * n - 1] = GMP_LIMB_HIGHBIT;
|
||||
mpn_divrem (qp2, 0, rp2, 2 * n, dp, n);
|
||||
}
|
||||
printf ("mpn_divrem took %dms\n", cputime () - st);
|
||||
|
||||
free (qp);
|
||||
free (rp);
|
||||
free (dp);
|
||||
free (tp);
|
||||
|
||||
return 0;
|
||||
}
|
||||
#endif
|
@ -110,13 +110,13 @@ mpn_sb_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
|
||||
if (mpn_cmp(np + nn - dn, dp, dn) >= 0)
|
||||
{
|
||||
ret = CNST_LIMB(1);
|
||||
mpn_sub_n(np + nn - dn, np + nn - dn, dp, nn);
|
||||
mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn);
|
||||
} else
|
||||
ret = CNST_LIMB(0);
|
||||
|
||||
di1 = dip[1];
|
||||
di0 = dip[0];
|
||||
for (i = qn - 2; i >= 0; i--)
|
||||
for (i = qn - 2; i >= 0L; i--)
|
||||
{
|
||||
/*
|
||||
Compute n2 + top two limbs of n2*di, but
|
||||
@ -125,7 +125,7 @@ mpn_sb_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
|
||||
*/
|
||||
n21 = np[nn - 1];
|
||||
n20 = np[nn - 2];
|
||||
umul_ppmm(p2, p1, di0, n21);
|
||||
umul_ppmm(p2, p1, di0, n21);
|
||||
umul_ppmm(p4, p3, di1, n20);
|
||||
add_ssaaaa(q, q0, n21, p2, CNST_LIMB(0), p4);
|
||||
umul_ppmm(p1, p2, di1, n21);
|
||||
@ -143,8 +143,14 @@ mpn_sb_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
|
||||
}
|
||||
|
||||
qp[i] = q;
|
||||
dn--;
|
||||
nn--;
|
||||
|
||||
if (dn > i + 1)
|
||||
{
|
||||
dp++;
|
||||
dn--;
|
||||
}
|
||||
|
||||
nn--;
|
||||
}
|
||||
|
||||
return ret;
|
||||
|
120
tests/mpn/t-sb_divappr_q.c
Normal file
120
tests/mpn/t-sb_divappr_q.c
Normal file
@ -0,0 +1,120 @@
|
||||
/* Test mpn_sb_divappr_q.
|
||||
|
||||
Copyright 2002 Free Software Foundation, Inc.
|
||||
Copyright 2009 William Hart
|
||||
|
||||
This file is part of the MPIR Library.
|
||||
|
||||
The MPIR Library is free software; you can redistribute it and/or modify
|
||||
it under the terms of the GNU Lesser General Public License as published by
|
||||
the Free Software Foundation; either version 2.1 of the License, or (at your
|
||||
option) any later version.
|
||||
|
||||
The MPIR Library is distributed in the hope that it will be useful, but
|
||||
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
License for more details.
|
||||
|
||||
You should have received a copy of the GNU Lesser General Public License
|
||||
along with the MPIR Library; see the file COPYING.LIB. If not, write to
|
||||
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
||||
MA 02110-1301, USA. */
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
#include "mpir.h"
|
||||
#include "gmp-impl.h"
|
||||
#include "tests.h"
|
||||
|
||||
#define MAX_LIMBS 40
|
||||
#define ITERS 1000
|
||||
|
||||
/* Check schoolboy division routine. */
|
||||
void
|
||||
check_sb_divappr_q (void)
|
||||
{
|
||||
mp_limb_t np[2*MAX_LIMBS];
|
||||
mp_limb_t np2[2*MAX_LIMBS];
|
||||
mp_limb_t rp[2*MAX_LIMBS];
|
||||
mp_limb_t dp[MAX_LIMBS];
|
||||
mp_limb_t qp[MAX_LIMBS];
|
||||
mp_limb_t dip[2];
|
||||
|
||||
mp_size_t nn, rn, dn, qn;
|
||||
|
||||
gmp_randstate_t rands;
|
||||
|
||||
int i, j, s;
|
||||
gmp_randinit_default(rands);
|
||||
|
||||
for (i = 0; i < ITERS; i++)
|
||||
{
|
||||
dn = (random() % MAX_LIMBS) + 1;
|
||||
nn = (random() % MAX_LIMBS) + dn;
|
||||
|
||||
mpn_rrandom (np, rands, nn);
|
||||
mpn_rrandom (dp, rands, dn);
|
||||
dp[dn-1] |= GMP_LIMB_HIGHBIT;
|
||||
|
||||
MPN_COPY(np2, np, nn);
|
||||
|
||||
mpn_invert(dip, dp + dn - 2, 2);
|
||||
|
||||
qn = nn - dn + 1;
|
||||
|
||||
qp[qn - 1] = mpn_sb_divappr_q(qp, np, nn, dp, dn, dip);
|
||||
|
||||
MPN_NORMALIZE(qp, qn);
|
||||
|
||||
if (qn)
|
||||
{
|
||||
if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);
|
||||
else mpn_mul(rp, dp, dn, qp, qn);
|
||||
|
||||
rn = dn + qn;
|
||||
MPN_NORMALIZE(rp, rn);
|
||||
|
||||
s = (rn < nn) ? -1 : (rn > nn) ? 1 : mpn_cmp(rp, np2, nn);
|
||||
if (s <= 0)
|
||||
{
|
||||
mpn_sub(rp, np2, nn, rp, rn);
|
||||
rn = nn;
|
||||
MPN_NORMALIZE(rp, rn);
|
||||
} else
|
||||
{
|
||||
mpn_sub(rp, rp, rn, np2, nn);
|
||||
MPN_NORMALIZE(rp, rn);
|
||||
}
|
||||
} else
|
||||
{
|
||||
rn = nn;
|
||||
MPN_COPY(rp, np, nn);
|
||||
}
|
||||
|
||||
s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);
|
||||
if (s >= 0)
|
||||
{
|
||||
printf ("failed:\n");
|
||||
printf ("nn = %lu, dn = %lu, qn = %lu, rn = %lu\n\n", nn, dn, qn, rn);
|
||||
gmp_printf (" np: %Nx\n\n", np2, nn);
|
||||
gmp_printf (" dp: %Nx\n\n", dp, dn);
|
||||
gmp_printf (" qp: %Nx\n\n", qp, qn);
|
||||
gmp_printf (" rp: %Nx\n\n", rp, rn);
|
||||
abort ();
|
||||
}
|
||||
}
|
||||
|
||||
gmp_randclear(rands);
|
||||
}
|
||||
|
||||
int
|
||||
main (void)
|
||||
{
|
||||
tests_start ();
|
||||
|
||||
check_sb_divappr_q ();
|
||||
|
||||
tests_end ();
|
||||
exit (0);
|
||||
}
|
Loading…
Reference in New Issue
Block a user