New generic mullow and mulhigh

This commit is contained in:
jasonmoxham 2009-07-24 17:16:50 +00:00
parent 766f024ef4
commit 087b583f61
4 changed files with 322 additions and 116 deletions

196
mpn/generic/mulhigh_n.c Normal file
View File

@ -0,0 +1,196 @@
/* mpn_mulhigh_n
Copyright 2009 Jason Moxham
This file is part of the MPIR Library.
The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 2.1 of the License, or (at
your option) any later version.
The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the MPIR Library; see the file COPYING.LIB. If not, write
to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"
/*
Define the usual multiplication as
let X=sum over 0<=i<n of x[i]B^i
let Y=sum over 0<=i<n of y[i]B^i
XY = sum over 0<=i<n 0<=j<n x[i]y[j]B^(i+j)
Define short product as
XY_k = sum over i+j>=k x[i]y[j]B^(i+j)
and approx short product as a superset of shortproduct and subset of usual product
Now consider the usual product XY
XY = sum over {0<=i<n,0<=j<n} x[i]y[j]B^(i+j) from now we just show the sum bounds with these implicit limits on i and j
={0<=i<n,0<=j<n}
split into four pieces (requires 0<=m<=n)
={i<n-m,j<n-m}{i>=n-m,j>=n-m} {i<n-m,j>=n-m} {i>=n-m,j<n-m}
split last two pieces again (requires n-m<=m-1)
={i<n-m,j<n-m}{i>=n-m,j>=n-m} {i<n-m,n-m<=j<m} {i<n-m,m<=j} {n-m<=i<m,j<n-m} {m<=i,j<n-m}
rearrange
={i<n-m,j<n-m}{i>=n-m,j>=n-m}{i<n-m,m<=j}{m<=i,j<n-m} {i<n-m,n-m<=j<m} {n-m<=i<m,j<n-m}
split last two again (requires n-m<=m-2)
={i<n-m,j<n-m}{i>=n-m,j>=n-m}{i<n-m,m<=j}{m<=i,j<n-m} {i<n-m,n-m<=j<=m-2} {i<n-m,m-2<j<m} {n-m<=i<=m-2,j<n-m} {m-2<i<m,j<n-m}
rearrange
={i<n-m,j<n-m}{i>=n-m,j>=n-m}{i<n-m,m<=j}{m<=i,j<n-m}{i<n-m,n-m<=j<=m-2}{n-m<=i<=m-2,j<n-m} {i<n-m,j=m-1} {i=m-1,j<n-m}
split last two again
={i<n-m,j<n-m}{i>=n-m,j>=n-m}{i<n-m,m<=j}{m<=i,j<n-m}{i<n-m,n-m<=j<=m-2}{n-m<=i<=m-2,j<n-m} {i<n-m-1,j=m-1}{i=n-m-1,j=m-1} {i=m-1,j<n-m-1}{i=m-1,j=n-m-1}
Now choose any m such that n+2<=2m m<=n
so n-m<=m-2 so our requirements above are satisfied
Now consider the short product with k=n-2 , so we discard those with i+j<k=n-2
={i<n-m,j<n-m} i+j<=2(n-m)-2 as n+2<=2m so n<2m so 2n<2m+n so 2n-2m<n so i+j<n-2=k so empty
{i>=n-m,j>=n-m} i+j>=2(n-m) so keep most
{i<n-m,m<=j} keep some
{m<=i,j<n-m} keep some
{i<n-m,n-m<=j<=m-2} i+j<=n-m-1+m-2=n-3<n-2=k so empty
{n-m<=i<=m-2,j<n-m} i+j<=n-m-1+m-2=n-3<n-2=k so empty
{i<n-m-1,j=m-1} i+j<=n-m-2+m-1=n-3<n-2=k so empty
{i=n-m-1,j=m-1} i+j=n-m-1+m-1=n-2=k so keep all
{i=m-1,j<n-m-1} i+j<=m-1+n-m-2=n-3<n-2=k so empty
{i=m-1,j=n-m-1} i+j=m-1+n-m-1=n-2=k so keep all
so the approx short product XY_k is {i>=n-m,j>=n-m} {i<n-m,m<=j} {m<=i,j<n-m} {i=n-m-1,j=m-1} {i=m-1,j=n-m-1}
Now for {i<n-m,m<=j} with i+j>=k=n-2 , let u=i,v=j-m so we have {0<=u<n-m, 0<=v<n-m} with u+v>=n-m-2
which is the same short product
Summary
-----------
Given n digit xp and yp ,
define mulshort_n(xp,yp,n) to be sum {i+j>=n-2,and perhaps some i+j<n-2} xp[i]yp[i]B^(i+j)
choose m such that n+2<=2m and m<n then from above ,
mulshort_n(xp,yp,n)=mul(xp+n-m,yp+n-m,m)B^(2n-2m)
+mulshort_n(xp+m,yp,n-m)B^m
+mulshort_n(xp,yp+m,n-m)B^m
+xp[n-m-1]yp[m-1]B^(n-2)
+xp[m-1]yp[n-m-1]B^(n-2)
and clearly when summing the above we can ignore any products from i+j<n-2
Theorem
Let (zp,2n)=mulshort_n(xp,yp,n)
if zp[n-1]+n-2<B then mulhigh_n(xp,yp,n)=(zp,2n)
*/
// (rp,2n)=(xp,n)*(yp,n) / B^n
inline static void
mpn_mulshort_n_basecase (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
mp_size_t i, k;
ASSERT (n >= 3);
ASSERT_MPN (xp, n);
ASSERT_MPN (yp, n);
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n));
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n));
k = n - 2; // so want short product sum_(i+j>=k) x[i]y[j]B^(i+j)
#if GMP_NAIL_BITS!=0
rp[n] = mpn_mul_1 (rp + k, xp + k, 2, yp[0]);
#else
mp_limb_t t1, t2, t3;
umul_ppmm (t1, rp[k], xp[k], yp[0]);
umul_ppmm (t3, t2, xp[k + 1], yp[0]);
add_ssaaaa (rp[n], rp[k + 1], t3, t2, 0, t1);
#endif
for (i = 1; i <= n - 2; i++)
rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]);
rp[n + n - 1] = mpn_addmul_1 (rp + n - 1, xp, n, yp[n - 1]);
return;
}
// (rp,2n)=(xp,n)*(yp,n)
static void
mpn_mulshort_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
mp_size_t m;
mp_limb_t t;
mp_ptr rpn2;
ASSERT (n >= 1);
ASSERT_MPN (xp, n);
ASSERT_MPN (yp, n);
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n));
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n));
if (BELOW_THRESHOLD (n, MULHIGH_BASECASE_THRESHOLD))
{
mpn_mul_basecase (rp, xp, n, yp, n);
return;
}
if (BELOW_THRESHOLD (n, MULHIGH_DC_THRESHOLD))
{
mpn_mulshort_n_basecase (rp, xp, yp, n);
return;
}
// choose optimal m st n+2<=2m m<n
ASSERT (n >= 4);
m = 87 * n / 128;
if (2 * m < n + 2)
m = (n + 1) / 2 + 1;
if (m >= n)
m = n - 1;
ASSERT (n + 2 <= 2 * m);
ASSERT (m < n);
rpn2 = rp + n - 2;
mpn_mul_n (rp + n - m + n - m, xp + n - m, yp + n - m, m);
mpn_mulshort_n (rp, xp, yp + m, n - m);
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
mpn_mulshort_n (rp, xp + m, yp, n - m);
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
umul_ppmm (rp[1], t, xp[m - 1], yp[n - m - 1] << GMP_NAIL_BITS);
rp[0] = t >> GMP_NAIL_BITS;
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
umul_ppmm (rp[1], t, xp[n - m - 1], yp[m - 1] << GMP_NAIL_BITS);
rp[0] = t >> GMP_NAIL_BITS;
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
return;
}
// (rp,2n)=(xp,n)*(yp,n)
void
mpn_mulhigh_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
mp_limb_t t;
ASSERT (n > 0);
ASSERT_MPN (xp, n);
ASSERT_MPN (yp, n);
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n));
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n));
if (BELOW_THRESHOLD (n, MULHIGH_BASECASE_THRESHOLD))
{
mpn_mul_basecase (rp, xp, n, yp, n);
return;
}
if (ABOVE_THRESHOLD (n, MULHIGH_MUL_THRESHOLD))
{
mpn_mul_n (rp, xp, yp, n);
return;
}
mpn_mulshort_n (rp, xp, yp, n);
t = rp[n - 1] + n - 2;
if (UNLIKELY (t < n - 2))
mpn_mul_n (rp, xp, yp, n);
return;
}

View File

@ -1,28 +1,23 @@
/* mpn_mullow_basecase -- Internal routine to multiply two natural
numbers of length m and n and return the low part.
/*
Copyright 2009 Jason Moxham
THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
This file is part of the MPIR Library.
The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 2.1 of the License, or (at
your option) any later version.
Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
along with the MPIR Library; see the file COPYING.LIB. If not, write
to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "mpir.h"
#include "gmp-impl.h"
@ -31,13 +26,27 @@ MA 02110-1301, USA. */
FIXME: Should use mpn_addmul_2 (and higher).
*/
// (rp,xn+yn)=(xp,xn)*(yp,yn) mod B^n
void
mpn_mullow_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
mpn_mullow_basecase (mp_ptr rp, mp_srcptr xp, mp_size_t xn, mp_srcptr yp,
mp_size_t yn, mp_size_t n)
{
mp_limb_t c;
mp_size_t i;
mpn_mul_1 (rp, up, n, vp[0]);
for (i = 1; i < n; i++)
mpn_addmul_1 (rp + i, up, n - i, vp[i]);
// want some sort of threshold to call mpn_mul() etc like for the mpn_mullow_n cases
ASSERT (0 < yn);
ASSERT (yn <= xn);
ASSERT (xn <= n);
ASSERT (n <= xn + yn);
ASSERT_MPN (xp, xn);
ASSERT_MPN (yp, yn);
ASSERT (!MPN_OVERLAP_P (rp, xn + yn, xp, xn));
ASSERT (!MPN_OVERLAP_P (rp, xn + yn, yp, yn));
rp[xn] = mpn_mul_1 (rp, xp, xn, yp[0]);
for (i = 1; i <= n - xn && i < yn; i++)
rp[xn + i] = mpn_addmul_1 (rp + i, xp, xn, yp[i]);
for (i = i; i > n - xn && i < yn; i++)
mpn_addmul_1 (rp + i, xp, n - i, yp[i]);
return;
}

View File

@ -1,113 +1,71 @@
/* mpn_mullow_n -- multiply two n-limb nunbers and return the low n limbs
of their products.
Copyright 2004, 2005 Free Software Foundation, Inc.
Copyright 2009 Jason Moxham
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
THIS IS (FOR NOW) AN INTERNAL FUNCTION. IT IS ONLY SAFE TO REACH THIS
FUNCTION THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST GUARANTEED
THAT IT'LL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
/* Copyright 2008 Complete rewrite Only the name is the same
Note: sets 2n limbs
Copyright 2004, 2005 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
*/
#include "mpir.h"
#include "gmp-impl.h"
#ifndef MULLOW_BASECASE_THRESHOLD
#define MULLOW_BASECASE_THRESHOLD 0 /* never use mpn_mul_basecase */
#endif
#ifndef MULLOW_DC_THRESHOLD
#define MULLOW_DC_THRESHOLD 3*MUL_KARATSUBA_THRESHOLD
#endif
#ifndef MULLOW_MUL_N_THRESHOLD
#define MULLOW_MUL_N_THRESHOLD 10*MULLOW_DC_THRESHOLD
#endif
/* Avoid zero allocations when MULLOW_BASECASE_THRESHOLD is 0. */
#define MUL_BASECASE_ALLOC \
(MULLOW_BASECASE_THRESHOLD_LIMIT == 0 ? 1 : 2*MULLOW_BASECASE_THRESHOLD_LIMIT)
/*
FIXME: This function should accept a temporary area.
FIXME: Perhaps call mpn_kara_mul_n instead of mpn_mul_n?
THINK: If mpn_mul_basecase is always faster than mpn_mullow_basecase
(typically thanks to mpn_addmul_2) should we unconditionally use
mpn_mul_n?
FIXME: The recursive calls to mpn_mullow_n use sizes n/2 (one uses floor(n/2)
and the other ceil(n/2)). Depending on the values of the various
_THRESHOLDs, this may never trigger MULLOW_BASECASE_THRESHOLD.
Should we worry about this overhead?
*/
void
mpn_mullow_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
mp_size_t m;
ASSERT (n > 0);
ASSERT_MPN (xp, n);
ASSERT_MPN (yp, n);
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n));
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n));
if (BELOW_THRESHOLD (n, MULLOW_BASECASE_THRESHOLD))
{
/* Allocate workspace of fixed size on stack: fast! */
mp_limb_t ws[MUL_BASECASE_ALLOC];
mpn_mul_basecase (ws, xp, n, yp, n);
MPN_COPY (rp, ws, n);
mpn_mul_basecase (rp, xp, n, yp, n);
return;
}
else if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD))
if (BELOW_THRESHOLD (n, MULLOW_DC_THRESHOLD))
{
mpn_mullow_basecase (rp, xp, yp, n);
mpn_mullow_n_basecase (rp, xp, yp, n);
return;
}
else if (BELOW_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD))
if (ABOVE_THRESHOLD (n, MULLOW_MUL_N_THRESHOLD))
{
/* Divide-and-conquer */
mp_size_t n2 = n >> 1; /* floor(n/2) */
mp_size_t n1 = n - n2; /* ceil(n/2) */
mp_ptr tp;
TMP_SDECL;
TMP_SMARK;
tp = TMP_SALLOC_LIMBS (n1);
/* Split as x = x1 2^(n1 GMP_NUMB_BITS) + x0,
y = y1 2^(n2 GMP_NUMB_BITS) + y0 */
/* x0 * y0 */
mpn_mul_n (rp, xp, yp, n2);
if (n1 != n2)
rp[2 * n2] = mpn_addmul_1 (rp + n2, yp, n2, xp[n2]);
/* x1 * y0 * 2^(n1 GMP_NUMB_BITS) */
mpn_mullow_n (tp, xp + n1, yp, n2);
mpn_add_n (rp + n1, rp + n1, tp, n2);
/* x0 * y1 * 2^(n2 GMP_NUMB_BITS) */
mpn_mullow_n (tp, yp + n2, xp, n1);
mpn_add_n (rp + n2, rp + n2, tp, n1);
TMP_SFREE;
}
else
{
/* For really large operands, use plain mpn_mul_n but throw away upper n
limbs of result. */
mp_ptr tp;
TMP_DECL;
TMP_MARK;
tp = TMP_ALLOC_LIMBS (2 * n);
mpn_mul_n (tp, xp, yp, n);
MPN_COPY (rp, tp, n);
TMP_FREE;
mpn_mul_n (rp, xp, yp, n);
return;
}
//choose optimal m st n/2 <= m <= n , choosing m==n is same as above
m = n * 87 / 128;
if (2 * m < n)
m = n - n / 2;
if (m > n)
m = n;
ASSERT (n / 2 <= m);
ASSERT (m <= n);
mpn_mul_n (rp, xp, yp, m);
mpn_mullow_n (rp + 2 * m, xp, yp + m, n - m);
mpn_add_n (rp + m, rp + m, rp + 2 * m, n - m);
mpn_mullow_n (rp + 2 * m, xp + m, yp, n - m);
mpn_add_n (rp + m, rp + m, rp + 2 * m, n - m);
return;
}

View File

@ -0,0 +1,43 @@
/* mpn_mullow_n_basecase -- Internal routine to multiply two natural
numbers of length m and n and return the low part.
THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY
SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES.
Copyright (C) 2000, 2002, 2004 Free Software Foundation, Inc.
This file is part of the GNU MP Library.
The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.
The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */
#include "mpir.h"
#include "gmp-impl.h"
/*
FIXME: Should use mpn_addmul_2 (and higher).
*/
void
mpn_mullow_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
{
mp_size_t i;
mpn_mul_1 (rp, up, n, vp[0]);
for (i = 1; i < n; i++)
mpn_addmul_1 (rp + i, up, n - i, vp[i]);
}