From 087b583f612f6a4668aa6785fba854f3afccac86 Mon Sep 17 00:00:00 2001 From: jasonmoxham Date: Fri, 24 Jul 2009 17:16:50 +0000 Subject: [PATCH] New generic mullow and mulhigh --- mpn/generic/mulhigh_n.c | 196 ++++++++++++++++++++++++++++++++ mpn/generic/mullow_basecase.c | 53 +++++---- mpn/generic/mullow_n.c | 146 +++++++++--------------- mpn/generic/mullow_n_basecase.c | 43 +++++++ 4 files changed, 322 insertions(+), 116 deletions(-) create mode 100644 mpn/generic/mulhigh_n.c create mode 100644 mpn/generic/mullow_n_basecase.c diff --git a/mpn/generic/mulhigh_n.c b/mpn/generic/mulhigh_n.c new file mode 100644 index 00000000..a098e2f5 --- /dev/null +++ b/mpn/generic/mulhigh_n.c @@ -0,0 +1,196 @@ +/* mpn_mulhigh_n + +Copyright 2009 Jason Moxham + +This file is part of the MPIR Library. + +The MPIR Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation; either version 2.1 of the License, or (at +your option) any later version. + +The MPIR Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the MPIR Library; see the file COPYING.LIB. If not, write +to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. +*/ + +#include "mpir.h" +#include "gmp-impl.h" +#include "longlong.h" + +/* +Define the usual multiplication as + +let X=sum over 0<=i=k x[i]y[j]B^(i+j) + +and approx short product as a superset of shortproduct and subset of usual product + +Now consider the usual product XY + +XY = sum over {0<=i=n-m,j>=n-m} {i=n-m} {i>=n-m,j=n-m,j>=n-m} {i=n-m,j>=n-m}{i=n-m,j>=n-m}{i=n-m,j>=n-m}{i=n-m,j>=n-m}{i=n-m,j>=n-m} i+j>=2(n-m) so keep most + {i=n-m,j>=n-m} {i=k=n-2 , let u=i,v=j-m so we have {0<=u=n-m-2 +which is the same short product + +Summary +----------- +Given n digit xp and yp , +define mulshort_n(xp,yp,n) to be sum {i+j>=n-2,and perhaps some i+j= 3); + ASSERT_MPN (xp, n); + ASSERT_MPN (yp, n); + ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n)); + ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n)); + k = n - 2; // so want short product sum_(i+j>=k) x[i]y[j]B^(i+j) +#if GMP_NAIL_BITS!=0 + rp[n] = mpn_mul_1 (rp + k, xp + k, 2, yp[0]); +#else + mp_limb_t t1, t2, t3; + umul_ppmm (t1, rp[k], xp[k], yp[0]); + umul_ppmm (t3, t2, xp[k + 1], yp[0]); + add_ssaaaa (rp[n], rp[k + 1], t3, t2, 0, t1); +#endif + for (i = 1; i <= n - 2; i++) + rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]); + rp[n + n - 1] = mpn_addmul_1 (rp + n - 1, xp, n, yp[n - 1]); + return; +} + +// (rp,2n)=(xp,n)*(yp,n) +static void +mpn_mulshort_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) +{ + mp_size_t m; + mp_limb_t t; + mp_ptr rpn2; + + ASSERT (n >= 1); + ASSERT_MPN (xp, n); + ASSERT_MPN (yp, n); + ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n)); + ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n)); + if (BELOW_THRESHOLD (n, MULHIGH_BASECASE_THRESHOLD)) + { + mpn_mul_basecase (rp, xp, n, yp, n); + return; + } + if (BELOW_THRESHOLD (n, MULHIGH_DC_THRESHOLD)) + { + mpn_mulshort_n_basecase (rp, xp, yp, n); + return; + } +// choose optimal m st n+2<=2m m= 4); + m = 87 * n / 128; + if (2 * m < n + 2) + m = (n + 1) / 2 + 1; + if (m >= n) + m = n - 1; + ASSERT (n + 2 <= 2 * m); + ASSERT (m < n); + rpn2 = rp + n - 2; + mpn_mul_n (rp + n - m + n - m, xp + n - m, yp + n - m, m); + mpn_mulshort_n (rp, xp, yp + m, n - m); + ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2)); + mpn_mulshort_n (rp, xp + m, yp, n - m); + ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2)); + umul_ppmm (rp[1], t, xp[m - 1], yp[n - m - 1] << GMP_NAIL_BITS); + rp[0] = t >> GMP_NAIL_BITS; + ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2)); + umul_ppmm (rp[1], t, xp[n - m - 1], yp[m - 1] << GMP_NAIL_BITS); + rp[0] = t >> GMP_NAIL_BITS; + ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2)); + return; +} + +// (rp,2n)=(xp,n)*(yp,n) +void +mpn_mulhigh_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) +{ + mp_limb_t t; + + ASSERT (n > 0); + ASSERT_MPN (xp, n); + ASSERT_MPN (yp, n); + ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n)); + ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n)); + if (BELOW_THRESHOLD (n, MULHIGH_BASECASE_THRESHOLD)) + { + mpn_mul_basecase (rp, xp, n, yp, n); + return; + } + if (ABOVE_THRESHOLD (n, MULHIGH_MUL_THRESHOLD)) + { + mpn_mul_n (rp, xp, yp, n); + return; + } + mpn_mulshort_n (rp, xp, yp, n); + t = rp[n - 1] + n - 2; + if (UNLIKELY (t < n - 2)) + mpn_mul_n (rp, xp, yp, n); + return; +} diff --git a/mpn/generic/mullow_basecase.c b/mpn/generic/mullow_basecase.c index 3a686412..bf195afa 100644 --- a/mpn/generic/mullow_basecase.c +++ b/mpn/generic/mullow_basecase.c @@ -1,28 +1,23 @@ -/* mpn_mullow_basecase -- Internal routine to multiply two natural - numbers of length m and n and return the low part. +/* +Copyright 2009 Jason Moxham - THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY - SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. +This file is part of the MPIR Library. +The MPIR Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation; either version 2.1 of the License, or (at +your option) any later version. -Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or (at your -option) any later version. - -The GNU MP Library is distributed in the hope that it will be useful, but +The MPIR Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library; see the file COPYING.LIB. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -MA 02110-1301, USA. */ +along with the MPIR Library; see the file COPYING.LIB. If not, write +to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. +*/ #include "mpir.h" #include "gmp-impl.h" @@ -31,13 +26,27 @@ MA 02110-1301, USA. */ FIXME: Should use mpn_addmul_2 (and higher). */ +// (rp,xn+yn)=(xp,xn)*(yp,yn) mod B^n void -mpn_mullow_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +mpn_mullow_basecase (mp_ptr rp, mp_srcptr xp, mp_size_t xn, mp_srcptr yp, + mp_size_t yn, mp_size_t n) { + mp_limb_t c; mp_size_t i; - mpn_mul_1 (rp, up, n, vp[0]); - - for (i = 1; i < n; i++) - mpn_addmul_1 (rp + i, up, n - i, vp[i]); +// want some sort of threshold to call mpn_mul() etc like for the mpn_mullow_n cases + ASSERT (0 < yn); + ASSERT (yn <= xn); + ASSERT (xn <= n); + ASSERT (n <= xn + yn); + ASSERT_MPN (xp, xn); + ASSERT_MPN (yp, yn); + ASSERT (!MPN_OVERLAP_P (rp, xn + yn, xp, xn)); + ASSERT (!MPN_OVERLAP_P (rp, xn + yn, yp, yn)); + rp[xn] = mpn_mul_1 (rp, xp, xn, yp[0]); + for (i = 1; i <= n - xn && i < yn; i++) + rp[xn + i] = mpn_addmul_1 (rp + i, xp, xn, yp[i]); + for (i = i; i > n - xn && i < yn; i++) + mpn_addmul_1 (rp + i, xp, n - i, yp[i]); + return; } diff --git a/mpn/generic/mullow_n.c b/mpn/generic/mullow_n.c index 39bfca9c..45d29567 100644 --- a/mpn/generic/mullow_n.c +++ b/mpn/generic/mullow_n.c @@ -1,113 +1,71 @@ /* mpn_mullow_n -- multiply two n-limb nunbers and return the low n limbs of their products. + + Copyright 2004, 2005 Free Software Foundation, Inc. + Copyright 2009 Jason Moxham + + This file is part of the GNU MP Library. + + The GNU MP Library is free software; you can redistribute it and/or modify + it under the terms of the GNU Lesser General Public License as published by + the Free Software Foundation; either version 2.1 of the License, or (at your + option) any later version. + + The GNU MP Library is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY + or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public + License for more details. + + You should have received a copy of the GNU Lesser General Public License + along with the GNU MP Library; see the file COPYING.LIB. If not, write to + the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, + MA 02110-1301, USA. */ - THIS IS (FOR NOW) AN INTERNAL FUNCTION. IT IS ONLY SAFE TO REACH THIS - FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED - THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. + /* Copyright 2008 Complete rewrite Only the name is the same + Note: sets 2n limbs -Copyright 2004, 2005 Free Software Foundation, Inc. - -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or (at your -option) any later version. - -The GNU MP Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library; see the file COPYING.LIB. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -MA 02110-1301, USA. */ + */ #include "mpir.h" #include "gmp-impl.h" - -#ifndef MULLOW_BASECASE_THRESHOLD -#define MULLOW_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */ -#endif - -#ifndef MULLOW_DC_THRESHOLD -#define MULLOW_DC_THRESHOLD 3*MUL_KARATSUBA_THRESHOLD -#endif - -#ifndef MULLOW_MUL_N_THRESHOLD -#define MULLOW_MUL_N_THRESHOLD 10*MULLOW_DC_THRESHOLD -#endif - -/* Avoid zero allocations when MULLOW_BASECASE_THRESHOLD is 0. */ -#define MUL_BASECASE_ALLOC \ - (MULLOW_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLOW_BASECASE_THRESHOLD_LIMIT) - -/* - FIXME: This function should accept a temporary area. - FIXME: Perhaps call mpn_kara_mul_n instead of mpn_mul_n? - THINK: If mpn_mul_basecase is always faster than mpn_mullow_basecase - (typically thanks to mpn_addmul_2) should we unconditionally use - mpn_mul_n? - FIXME: The recursive calls to mpn_mullow_n use sizes n/2 (one uses floor(n/2) - and the other ceil(n/2)). Depending on the values of the various - _THRESHOLDs, this may never trigger MULLOW_BASECASE_THRESHOLD. - Should we worry about this overhead? -*/ - void mpn_mullow_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { + mp_size_t m; + + ASSERT (n > 0); + ASSERT_MPN (xp, n); + ASSERT_MPN (yp, n); + ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n)); + ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n)); if (BELOW_THRESHOLD (n, MULLOW_BASECASE_THRESHOLD)) { - /* Allocate workspace of fixed size on stack: fast! */ - mp_limb_t ws[MUL_BASECASE_ALLOC]; - mpn_mul_basecase (ws, xp, n, yp, n); - MPN_COPY (rp, ws, n); + mpn_mul_basecase (rp, xp, n, yp, n); + return; } - else if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD)) + if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD)) { - mpn_mullow_basecase (rp, xp, yp, n); + mpn_mullow_n_basecase (rp, xp, yp, n); + return; } - else if (BELOW_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD)) + if (ABOVE_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD)) { - /* Divide-and-conquer */ - mp_size_t n2 = n >> 1; /* floor(n/2) */ - mp_size_t n1 = n - n2; /* ceil(n/2) */ - mp_ptr tp; - TMP_SDECL; - TMP_SMARK; - tp = TMP_SALLOC_LIMBS (n1); - - /* Split as x = x1 2^(n1 GMP_NUMB_BITS) + x0, - y = y1 2^(n2 GMP_NUMB_BITS) + y0 */ - - /* x0 * y0 */ - mpn_mul_n (rp, xp, yp, n2); - if (n1 != n2) - rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]); - - /* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */ - mpn_mullow_n (tp, xp + n1, yp, n2); - mpn_add_n (rp + n1, rp + n1, tp, n2); - - /* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */ - mpn_mullow_n (tp, yp + n2, xp, n1); - mpn_add_n (rp + n2, rp + n2, tp, n1); - TMP_SFREE; - } - else - { - /* For really large operands, use plain mpn_mul_n but throw away upper n - limbs of result. */ - mp_ptr tp; - TMP_DECL; - TMP_MARK; - tp = TMP_ALLOC_LIMBS (2 * n); - - mpn_mul_n (tp, xp, yp, n); - MPN_COPY (rp, tp, n); - TMP_FREE; + mpn_mul_n (rp, xp, yp, n); + return; } +//choose optimal m st n/2 <= m <= n , choosing m==n is same as above + m = n * 87 / 128; + if (2 * m < n) + m = n - n / 2; + if (m > n) + m = n; + ASSERT (n / 2 <= m); + ASSERT (m <= n); + mpn_mul_n (rp, xp, yp, m); + mpn_mullow_n (rp + 2 * m, xp, yp + m, n - m); + mpn_add_n (rp + m, rp + m, rp + 2 * m, n - m); + mpn_mullow_n (rp + 2 * m, xp + m, yp, n - m); + mpn_add_n (rp + m, rp + m, rp + 2 * m, n - m); + return; } diff --git a/mpn/generic/mullow_n_basecase.c b/mpn/generic/mullow_n_basecase.c new file mode 100644 index 00000000..5bb4bdd4 --- /dev/null +++ b/mpn/generic/mullow_n_basecase.c @@ -0,0 +1,43 @@ +/* mpn_mullow_n_basecase -- Internal routine to multiply two natural + numbers of length m and n and return the low part. + + THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY + SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. + + +Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc. + +This file is part of the GNU MP Library. + +The GNU MP Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +The GNU MP Library is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with the GNU MP Library; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +MA 02110-1301, USA. */ + +#include "mpir.h" +#include "gmp-impl.h" + +/* + FIXME: Should use mpn_addmul_2 (and higher). +*/ + +void +mpn_mullow_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) +{ + mp_size_t i; + + mpn_mul_1 (rp, up, n, vp[0]); + + for (i = 1; i < n; i++) + mpn_addmul_1 (rp + i, up, n - i, vp[i]); +}