/* floating-point Newton, with inversion in 3M(n) */ /* mpn_invert Copyright 2009 Paul Zimmermann This file is part of the MPIR Library. The MPIR Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The MPIR Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the MPIR Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ /* #define WRAP_AROUND */ /* #define WANT_ASSERT 1 */ #ifdef WRAP_AROUND #define INVERT_VERSION 3 #define WRAP_AROUND_BOUND 1500 #else #define INVERT_VERSION 2 #define WRAP_AROUND_BOUND ~0UL #endif #include #include #include #include "mpir.h" #include "gmp-impl.h" #include "longlong.h" #define ZERO (mp_limb_t) 0 #define ONE (mp_limb_t) 1 void mpn_print (mp_ptr A, mp_size_t n) { int j; for (j=0; j= n + 1 by construction, thus m > h */ ASSERT(m < n + h); cy = mpn_mul_fft (tp, m, ap, n, xp + l, h, k); /* cy, {tp, m} = A * {xp + l, h} mod (B^m+1) */ cy += mpn_add_n (tp + h, tp + h, ap, m - h); cc = mpn_sub_n (tp, tp, ap + m - h, n + h - m); cc = mpn_sub_1 (tp + n + h - m, tp + n + h - m, 2 * m - n - h, cc); if (cc > cy) /* can only occur if cc=1 and cy=0 */ cy = mpn_add_1 (tp, tp, m, ONE); else cy -= cc; /* cy, {tp, m} = A * Xh */ /* add B^(n+h) + B^(n+h-m) */ MPN_ZERO (tp + m, n + h - m); tp[m] = cy; /* note: since tp[n+h-1] is either 0, or cy<=1 if m=n+h-1, the mpn_incr_u() below cannot produce a carry */ mpn_incr_u (tp + n + h - m, ONE); cy = 1; do /* check if T >= B^(n+h) + 2*B^n */ { mp_size_t i; if (cy == ZERO) break; /* surely T < B^(n+h) */ if (cy == ONE) { for (i = n + h - 1; tp[i] == ZERO && i > n; i--); if (i == n && tp[i] < (mp_limb_t) 2) break; } /* subtract B^m+1 */ cy -= mpn_sub_1 (tp, tp, n + h, ONE); cy -= mpn_sub_1 (tp + m, tp + m, n + h - m, ONE); } while (1); #ifdef CHECK if ((cy != cy2) || mpn_cmp (tp, tp2, n + h) != 0) { fprintf (stderr, "wrong wrap around reconstruction\n"); exit (1); } #endif } while (cy) { mpn_sub_1 (xp + l, xp + l, h, ONE); cy -= mpn_sub (tp, tp, n + h, ap, n); } mpn_com_n (tp, tp, n); th = ~tp[n] + mpn_add_1 (tp, tp, n, ONE); mpn_mul_n (up, tp + l, xp + l, h); cy = mpn_add_n (up + h, up + h, tp + l, h); if (th != ZERO) cy += ONE + mpn_add_n (up + h, up + h, xp + l, h); MPN_COPY (xp, up + 2 * h - l, l); mpn_add_1 (xp + l, xp + l, h, cy); TMP_FREE; } } int test_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n) { int res = 1; mp_size_t i; mp_ptr tp, up; mp_limb_t cy; TMP_DECL; TMP_MARK; tp = TMP_ALLOC_LIMBS (2 * n); up = TMP_ALLOC_LIMBS (2 * n); /* first check X*A < B^(2*n) */ mpn_mul_n (tp, xp, ap, n); cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */ if (cy != 0) res = 0; /* now check B^(2n) - X*A <= A */ mpn_com_n (tp, tp, 2 * n); mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */ MPN_ZERO (up, 2 * n); MPN_COPY (up, ap, n); res = mpn_cmp (tp, up, 2 * n) <= 0; TMP_FREE; return res; } #ifdef MAIN #include #include int cputime () { struct rusage rus; getrusage (0, &rus); return rus.ru_utime.tv_sec * 1000 + rus.ru_utime.tv_usec / 1000; } int main (int argc, char *argv[]) { mp_size_t n = atoi (argv[1]), i, j, k; mp_ptr qp, rp, dp, tp, qp2, rp2; mp_limb_t cy; pid_t pid; int st; k = (argc <= 2) ? 1 : atoi(argv[2]); qp = malloc (n * sizeof (mp_limb_t)); qp2 = malloc (n * sizeof (mp_limb_t)); rp = malloc (n * sizeof (mp_limb_t)); rp2 = malloc (2 * n * sizeof (mp_limb_t)); dp = malloc (n * sizeof (mp_limb_t)); tp = malloc (2 * n * sizeof (mp_limb_t)); pid = getpid (); printf ("Seed=%lu\n", pid); srand48 (pid); for (i = 0; i < n; i++) dp[i] = lrand48 (); dp[n - 1] |= GMP_NUMB_HIGHBIT; mpn_random (rp, n); st = cputime (); for (i = 0; i < k; i++) mpn_mul_n (tp, dp, rp, n); printf ("mpn_mul_n took %dms\n", cputime () - st); st = cputime (); for (i = 0; i < k; i++) { #ifdef CHECK // printf ("Test %lu\n", i); for (j = 0; j < n; j++) dp[j] = lrand48 (); dp[n - 1] |= GMP_NUMB_HIGHBIT; #endif mpn_invert (qp, dp, n); #ifdef CHECK if (test_invert (qp, dp, n) == 0) { fprintf (stderr, "test_invert failed at i=%lu\n", i); printf ("A:="); mpn_print (dp, n); printf ("X:=B^%lu", n); mpn_print (qp, n); exit (1); } #endif } printf ("mpn_invert%d took %dms", INVERT_VERSION, cputime () - st); #ifdef WRAP_AROUND printf (" (with wrap-around trick, WRAP_AROUND_BOUND=%lu)", WRAP_AROUND_BOUND); #endif printf ("\n"); // printf ("xp="); mpn_print (qp, n); MPN_ZERO (rp2, 2 * n); rp2[2 * n - 1] = GMP_LIMB_HIGHBIT; st = cputime (); for (i = 0; i < k; i++) { MPN_ZERO (rp2, 2 * n); rp2[2 * n - 1] = GMP_LIMB_HIGHBIT; mpn_divrem (qp2, 0, rp2, 2 * n, dp, n); } printf ("mpn_divrem took %dms\n", cputime () - st); free (qp); free (rp); free (dp); free (tp); return 0; } #endif