Got invett and sb_divappr_q working (modulo some linker issue).

This commit is contained in:
wbhart 2009-09-29 22:55:10 +00:00
parent 4783c9dc4e
commit 5308d1e8fd
4 changed files with 480 additions and 5 deletions

View File

@ -1534,6 +1534,13 @@ __GMP_DECLSPEC mp_limb_t mpn_divrem_1 __GMP_PROTO ((mp_ptr, mp_size_t, mp_srcptr
#define mpn_divrem_2 __MPN(divrem_2)
__GMP_DECLSPEC mp_limb_t mpn_divrem_2 __GMP_PROTO ((mp_ptr, mp_size_t, mp_ptr, mp_size_t, mp_srcptr));
#define mpn_invert __MPN(invert)
__GMP_DECLSPEC void mpn_invert __GMP_PROTO ((mp_ptr xp, mp_srcptr ap, mp_size_t n));
#define mpn_sb_divappr_q __MPN(sb_divappr_q)
__GMP_DECLSPEC mp_limb_t mpn_sb_divappr_q __GMP_PROTO ((mp_ptr qp, mp_ptr np, mp_size_t nn,
mp_srcptr dp, mp_size_t dn, mp_srcptr dip));
#define mpn_gcd __MPN(gcd)
__GMP_DECLSPEC mp_size_t mpn_gcd __GMP_PROTO ((mp_ptr, mp_ptr, mp_size_t, mp_ptr, mp_size_t));

342
mpn/generic/invert.c Normal file
View File

@ -0,0 +1,342 @@
/* floating-point Newton, with inversion in 3M(n) */
/* mpn_invert
Copyright 2009 Paul Zimmermann
This file is part of the MPIR Library.
The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the MPIR Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
/* #define WRAP_AROUND */
/* #define WANT_ASSERT 1 */
#ifdef WRAP_AROUND
#define INVERT_VERSION 3
#define WRAP_AROUND_BOUND 1500
#else
#define INVERT_VERSION 2
#define WRAP_AROUND_BOUND ~0UL
#endif
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "gmp.h"
#include "gmp-impl.h"
#include "longlong.h"
#define ZERO (mp_limb_t) 0
#define ONE (mp_limb_t) 1
void
mpn_print (mp_ptr A, mp_size_t n)
{
int j;
for (j=0; j<n; j++)
{
printf ("+%lu*B^%u", A[j], j);
if (j % 4 == 3 && j != n-1)
printf ("\n");
}
printf (":\n");
}
/* Input: A = {ap, n} with most significant bit set.
Output: X = B^n + {xp, n} where B = 2^GMP_NUMB_BITS.
X is a lower approximation of B^(2n)/A with implicit msb.
More precisely, one has:
A*X < B^(2n) <= A*(X+1)
or X = ceil(B^(2n)/A) - 1.
*/
void
mpn_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)
{
if (n == 1)
{
/* invert_limb returns min(B-1, floor(B^2/ap[0])-B),
which is B-1 when ap[0]=B/2, and 1 when ap[0]=B-1.
For X=B+xp[0], we have A*X < B^2 <= A*(X+1) where
the equality holds only when A=B/2.
We thus have A*X < B^2 <= A*(X+1).
*/
invert_limb (xp[0], ap[0]);
}
else if (n == 2)
{
mp_limb_t tp[4], up[2], sp[2], cy;
tp[0] = ZERO;
invert_limb (xp[1], ap[1]);
tp[3] = mpn_mul_1 (tp + 1, ap, 2, xp[1]);
cy = mpn_add_n (tp + 2, tp + 2, ap, 2);
while (cy) /* Xh is too large */
{
xp[1] --;
cy -= mpn_sub (tp + 1, tp + 1, 3, ap, 2);
}
/* tp[3] should be 111...111 */
mpn_com_n (sp, tp + 1, 2);
cy = mpn_add_1 (sp, sp, 2, ONE);
/* cy should be 0 */
up[1] = mpn_mul_1 (up, sp + 1, 1, xp[1]);
cy = mpn_add_1 (up + 1, up + 1, 1, sp[1]);
/* cy should be 0 */
xp[0] = up[1];
/* update tp */
cy = mpn_addmul_1 (tp, ap, 2, xp[0]);
cy = mpn_add_1 (tp + 2, tp + 2, 2, cy);
do
{
cy = mpn_add (tp, tp, 4, ap, 2);
if (cy == ZERO)
mpn_add_1 (xp, xp, 2, ONE);
}
while (cy == ZERO);
/* now A*X < B^4 <= A*(X+1) */
}
else
{
mp_size_t l, h;
mp_ptr tp, up;
mp_limb_t cy, th;
TMP_DECL;
l = (n - 1) / 2;
h = n - l;
mpn_invert (xp + l, ap + l, h);
TMP_MARK;
tp = TMP_ALLOC_LIMBS (n + h);
up = TMP_ALLOC_LIMBS (2 * h);
if (n <= WRAP_AROUND_BOUND)
{
mpn_mul (tp, ap, n, xp + l, h);
cy = mpn_add_n (tp + h, tp + h, ap, n);
}
else
{
mp_size_t m = n + 1;
unsigned long k;
int cc;
#ifdef CHECK
mp_ptr tp2;
mp_limb_t cy2;
tp2 = TMP_ALLOC_LIMBS (n + h);
mpn_mul (tp2, ap, n, xp + l, h);
cy2 = mpn_add_n (tp2 + h, tp2 + h, ap, n);
#endif
k = mpn_fft_best_k (m, 0);
m = mpn_fft_next_size (m, k);
/* we have m >= n + 1 by construction, thus m > h */
ASSERT(m < n + h);
cy = mpn_mul_fft (tp, m, ap, n, xp + l, h, k);
/* cy, {tp, m} = A * {xp + l, h} mod (B^m+1) */
cy += mpn_add_n (tp + h, tp + h, ap, m - h);
cc = mpn_sub_n (tp, tp, ap + m - h, n + h - m);
cc = mpn_sub_1 (tp + n + h - m, tp + n + h - m, 2 * m - n - h, cc);
if (cc > cy) /* can only occur if cc=1 and cy=0 */
cy = mpn_add_1 (tp, tp, m, ONE);
else
cy -= cc;
/* cy, {tp, m} = A * Xh */
/* add B^(n+h) + B^(n+h-m) */
MPN_ZERO (tp + m, n + h - m);
tp[m] = cy;
/* note: since tp[n+h-1] is either 0, or cy<=1 if m=n+h-1,
the mpn_incr_u() below cannot produce a carry */
mpn_incr_u (tp + n + h - m, ONE);
cy = 1;
do /* check if T >= B^(n+h) + 2*B^n */
{
mp_size_t i;
if (cy == ZERO)
break; /* surely T < B^(n+h) */
if (cy == ONE)
{
for (i = n + h - 1; tp[i] == ZERO && i > n; i--);
if (i == n && tp[i] < (mp_limb_t) 2)
break;
}
/* subtract B^m+1 */
cy -= mpn_sub_1 (tp, tp, n + h, ONE);
cy -= mpn_sub_1 (tp + m, tp + m, n + h - m, ONE);
}
while (1);
#ifdef CHECK
if ((cy != cy2) || mpn_cmp (tp, tp2, n + h) != 0)
{
fprintf (stderr, "wrong wrap around reconstruction\n");
exit (1);
}
#endif
}
while (cy)
{
mpn_sub_1 (xp + l, xp + l, h, ONE);
cy -= mpn_sub (tp, tp, n + h, ap, n);
}
mpn_com_n (tp, tp, n);
th = ~tp[n] + mpn_add_1 (tp, tp, n, ONE);
mpn_mul_n (up, tp + l, xp + l, h);
cy = mpn_add_n (up + h, up + h, tp + l, h);
if (th != ZERO)
cy += ONE + mpn_add_n (up + h, up + h, xp + l, h);
MPN_COPY (xp, up + 2 * h - l, l);
mpn_add_1 (xp + l, xp + l, h, cy);
TMP_FREE;
}
}
int
test_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)
{
int res = 1;
mp_size_t i;
mp_ptr tp, up;
mp_limb_t cy;
TMP_DECL;
TMP_MARK;
tp = TMP_ALLOC_LIMBS (2 * n);
up = TMP_ALLOC_LIMBS (2 * n);
/* first check X*A < B^(2*n) */
mpn_mul_n (tp, xp, ap, n);
cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */
if (cy != 0)
res = 0;
/* now check B^(2n) - X*A <= A */
mpn_com_n (tp, tp, 2 * n);
mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */
MPN_ZERO (up, 2 * n);
MPN_COPY (up, ap, n);
res = mpn_cmp (tp, up, 2 * n) <= 0;
TMP_FREE;
return res;
}
#ifdef MAIN
#include <sys/types.h>
#include <sys/resource.h>
int
cputime ()
{
struct rusage rus;
getrusage (0, &rus);
return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000;
}
int
main (int argc, char *argv[])
{
mp_size_t n = atoi (argv[1]), i, j, k;
mp_ptr qp, rp, dp, tp, qp2, rp2;
mp_limb_t cy;
pid_t pid;
int st;
k = (argc <= 2) ? 1 : atoi(argv[2]);
qp = malloc (n * sizeof (mp_limb_t));
qp2 = malloc (n * sizeof (mp_limb_t));
rp = malloc (n * sizeof (mp_limb_t));
rp2 = malloc (2 * n * sizeof (mp_limb_t));
dp = malloc (n * sizeof (mp_limb_t));
tp = malloc (2 * n * sizeof (mp_limb_t));
pid = getpid ();
printf ("Seed=%lu\n", pid);
srand48 (pid);
for (i = 0; i < n; i++)
dp[i] = lrand48 ();
dp[n - 1] |= GMP_NUMB_HIGHBIT;
mpn_random (rp, n);
st = cputime ();
for (i = 0; i < k; i++)
mpn_mul_n (tp, dp, rp, n);
printf ("mpn_mul_n took %dms\n", cputime () - st);
st = cputime ();
for (i = 0; i < k; i++)
{
#ifdef CHECK
// printf ("Test %lu\n", i);
for (j = 0; j < n; j++)
dp[j] = lrand48 ();
dp[n - 1] |= GMP_NUMB_HIGHBIT;
#endif
mpn_invert (qp, dp, n);
#ifdef CHECK
if (test_invert (qp, dp, n) == 0)
{
fprintf (stderr, "test_invert failed at i=%lu\n", i);
printf ("A:="); mpn_print (dp, n);
printf ("X:=B^%lu", n); mpn_print (qp, n);
exit (1);
}
#endif
}
printf ("mpn_invert%d took %dms", INVERT_VERSION, cputime () - st);
#ifdef WRAP_AROUND
printf (" (with wrap-around trick, WRAP_AROUND_BOUND=%lu)",
WRAP_AROUND_BOUND);
#endif
printf ("\n");
// printf ("xp="); mpn_print (qp, n);
MPN_ZERO (rp2, 2 * n);
rp2[2 * n - 1] = GMP_LIMB_HIGHBIT;
st = cputime ();
for (i = 0; i < k; i++)
{
MPN_ZERO (rp2, 2 * n);
rp2[2 * n - 1] = GMP_LIMB_HIGHBIT;
mpn_divrem (qp2, 0, rp2, 2 * n, dp, n);
}
printf ("mpn_divrem took %dms\n", cputime () - st);
free (qp);
free (rp);
free (dp);
free (tp);
return 0;
}
#endif

View File

@ -110,13 +110,13 @@ mpn_sb_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
if (mpn_cmp(np + nn - dn, dp, dn) >= 0)
{
ret = CNST_LIMB(1);
mpn_sub_n(np + nn - dn, np + nn - dn, dp, nn);
mpn_sub_n(np + nn - dn, np + nn - dn, dp, dn);
} else
ret = CNST_LIMB(0);
di1 = dip[1];
di0 = dip[0];
for (i = qn - 2; i >= 0; i--)
for (i = qn - 2; i >= 0L; i--)
{
/*
Compute n2 + top two limbs of n2*di, but
@ -125,7 +125,7 @@ mpn_sb_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
*/
n21 = np[nn - 1];
n20 = np[nn - 2];
umul_ppmm(p2, p1, di0, n21);
umul_ppmm(p2, p1, di0, n21);
umul_ppmm(p4, p3, di1, n20);
add_ssaaaa(q, q0, n21, p2, CNST_LIMB(0), p4);
umul_ppmm(p1, p2, di1, n21);
@ -143,8 +143,14 @@ mpn_sb_divappr_q (mp_ptr qp, mp_ptr np, mp_size_t nn,
}
qp[i] = q;
dn--;
nn--;
if (dn > i + 1)
{
dp++;
dn--;
}
nn--;
}
return ret;

120
tests/mpn/t-sb_divappr_q.c Normal file
View File

@ -0,0 +1,120 @@
/* Test mpn_sb_divappr_q.
Copyright 2002 Free Software Foundation, Inc.
Copyright 2009 William Hart
This file is part of the MPIR Library.
The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the MPIR Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include <stdio.h>
#include <stdlib.h>
#include "mpir.h"
#include "gmp-impl.h"
#include "tests.h"
#define MAX_LIMBS 40
#define ITERS 1000
/* Check schoolboy division routine. */
void
check_sb_divappr_q (void)
{
mp_limb_t np[2*MAX_LIMBS];
mp_limb_t np2[2*MAX_LIMBS];
mp_limb_t rp[2*MAX_LIMBS];
mp_limb_t dp[MAX_LIMBS];
mp_limb_t qp[MAX_LIMBS];
mp_limb_t dip[2];
mp_size_t nn, rn, dn, qn;
gmp_randstate_t rands;
int i, j, s;
gmp_randinit_default(rands);
for (i = 0; i < ITERS; i++)
{
dn = (random() % MAX_LIMBS) + 1;
nn = (random() % MAX_LIMBS) + dn;
mpn_rrandom (np, rands, nn);
mpn_rrandom (dp, rands, dn);
dp[dn-1] |= GMP_LIMB_HIGHBIT;
MPN_COPY(np2, np, nn);
mpn_invert(dip, dp + dn - 2, 2);
qn = nn - dn + 1;
qp[qn - 1] = mpn_sb_divappr_q(qp, np, nn, dp, dn, dip);
MPN_NORMALIZE(qp, qn);
if (qn)
{
if (qn >= dn) mpn_mul(rp, qp, qn, dp, dn);
else mpn_mul(rp, dp, dn, qp, qn);
rn = dn + qn;
MPN_NORMALIZE(rp, rn);
s = (rn < nn) ? -1 : (rn > nn) ? 1 : mpn_cmp(rp, np2, nn);
if (s <= 0)
{
mpn_sub(rp, np2, nn, rp, rn);
rn = nn;
MPN_NORMALIZE(rp, rn);
} else
{
mpn_sub(rp, rp, rn, np2, nn);
MPN_NORMALIZE(rp, rn);
}
} else
{
rn = nn;
MPN_COPY(rp, np, nn);
}
s = (rn < dn) ? -1 : (rn > dn) ? 1 : mpn_cmp(rp, dp, dn);
if (s >= 0)
{
printf ("failed:\n");
printf ("nn = %lu, dn = %lu, qn = %lu, rn = %lu\n\n", nn, dn, qn, rn);
gmp_printf (" np: %Nx\n\n", np2, nn);
gmp_printf (" dp: %Nx\n\n", dp, dn);
gmp_printf (" qp: %Nx\n\n", qp, qn);
gmp_printf (" rp: %Nx\n\n", rp, rn);
abort ();
}
}
gmp_randclear(rands);
}
int
main (void)
{
tests_start ();
check_sb_divappr_q ();
tests_end ();
exit (0);
}