mpir/mpn/generic/mulhigh_n.c

199 lines
6.6 KiB
C
Raw Normal View History

2009-07-24 13:16:50 -04:00
/* mpn_mulhigh_n
Copyright 2009 Jason Moxham
This file is part of the MPIR Library.
The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 2.1 of the License, or (at
your option) any later version.
The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the MPIR Library; see the file COPYING.LIB. If not, write
to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"
/*
Define the usual multiplication as
let X=sum over 0<=i<n of x[i]B^i
let Y=sum over 0<=i<n of y[i]B^i
XY = sum over 0<=i<n 0<=j<n x[i]y[j]B^(i+j)
Define short product as
XY_k = sum over i+j>=k x[i]y[j]B^(i+j)
and approx short product as a superset of shortproduct and subset of usual product
Now consider the usual product XY
XY = sum over {0<=i<n,0<=j<n} x[i]y[j]B^(i+j) from now we just show the sum bounds with these implicit limits on i and j
={0<=i<n,0<=j<n}
split into four pieces (requires 0<=m<=n)
={i<n-m,j<n-m}{i>=n-m,j>=n-m} {i<n-m,j>=n-m} {i>=n-m,j<n-m}
split last two pieces again (requires n-m<=m-1)
={i<n-m,j<n-m}{i>=n-m,j>=n-m} {i<n-m,n-m<=j<m} {i<n-m,m<=j} {n-m<=i<m,j<n-m} {m<=i,j<n-m}
rearrange
={i<n-m,j<n-m}{i>=n-m,j>=n-m}{i<n-m,m<=j}{m<=i,j<n-m} {i<n-m,n-m<=j<m} {n-m<=i<m,j<n-m}
split last two again (requires n-m<=m-2)
={i<n-m,j<n-m}{i>=n-m,j>=n-m}{i<n-m,m<=j}{m<=i,j<n-m} {i<n-m,n-m<=j<=m-2} {i<n-m,m-2<j<m} {n-m<=i<=m-2,j<n-m} {m-2<i<m,j<n-m}
rearrange
={i<n-m,j<n-m}{i>=n-m,j>=n-m}{i<n-m,m<=j}{m<=i,j<n-m}{i<n-m,n-m<=j<=m-2}{n-m<=i<=m-2,j<n-m} {i<n-m,j=m-1} {i=m-1,j<n-m}
split last two again
={i<n-m,j<n-m}{i>=n-m,j>=n-m}{i<n-m,m<=j}{m<=i,j<n-m}{i<n-m,n-m<=j<=m-2}{n-m<=i<=m-2,j<n-m} {i<n-m-1,j=m-1}{i=n-m-1,j=m-1} {i=m-1,j<n-m-1}{i=m-1,j=n-m-1}
Now choose any m such that n+2<=2m m<=n
so n-m<=m-2 so our requirements above are satisfied
Now consider the short product with k=n-2 , so we discard those with i+j<k=n-2
={i<n-m,j<n-m} i+j<=2(n-m)-2 as n+2<=2m so n<2m so 2n<2m+n so 2n-2m<n so i+j<n-2=k so empty
{i>=n-m,j>=n-m} i+j>=2(n-m) so keep most
{i<n-m,m<=j} keep some
{m<=i,j<n-m} keep some
{i<n-m,n-m<=j<=m-2} i+j<=n-m-1+m-2=n-3<n-2=k so empty
{n-m<=i<=m-2,j<n-m} i+j<=n-m-1+m-2=n-3<n-2=k so empty
{i<n-m-1,j=m-1} i+j<=n-m-2+m-1=n-3<n-2=k so empty
{i=n-m-1,j=m-1} i+j=n-m-1+m-1=n-2=k so keep all
{i=m-1,j<n-m-1} i+j<=m-1+n-m-2=n-3<n-2=k so empty
{i=m-1,j=n-m-1} i+j=m-1+n-m-1=n-2=k so keep all
so the approx short product XY_k is {i>=n-m,j>=n-m} {i<n-m,m<=j} {m<=i,j<n-m} {i=n-m-1,j=m-1} {i=m-1,j=n-m-1}
Now for {i<n-m,m<=j} with i+j>=k=n-2 , let u=i,v=j-m so we have {0<=u<n-m, 0<=v<n-m} with u+v>=n-m-2
which is the same short product
Summary
-----------
Given n digit xp and yp ,
define mulshort_n(xp,yp,n) to be sum {i+j>=n-2,and perhaps some i+j<n-2} xp[i]yp[i]B^(i+j)
choose m such that n+2<=2m and m<n then from above ,
mulshort_n(xp,yp,n)=mul(xp+n-m,yp+n-m,m)B^(2n-2m)
+mulshort_n(xp+m,yp,n-m)B^m
+mulshort_n(xp,yp+m,n-m)B^m
+xp[n-m-1]yp[m-1]B^(n-2)
+xp[m-1]yp[n-m-1]B^(n-2)
and clearly when summing the above we can ignore any products from i+j<n-2
Theorem
Let (zp,2n)=mulshort_n(xp,yp,n)
if zp[n-1]+n-2<B then mulhigh_n(xp,yp,n)=(zp,2n)
*/
// (rp,2n)=(xp,n)*(yp,n) / B^n
inline static void
mpn_mulshort_n_basecase (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
mp_size_t i, k;
2009-07-26 12:42:36 -04:00
#if GMP_NAIL_BITS==0
mp_limb_t t1, t2, t3;
#endif
2009-07-24 13:16:50 -04:00
ASSERT (n >= 3);
ASSERT_MPN (xp, n);
ASSERT_MPN (yp, n);
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n));
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n));
k = n - 2; // so want short product sum_(i+j>=k) x[i]y[j]B^(i+j)
#if GMP_NAIL_BITS!=0
rp[n] = mpn_mul_1 (rp + k, xp + k, 2, yp[0]);
#else
umul_ppmm (t1, rp[k], xp[k], yp[0]);
umul_ppmm (t3, t2, xp[k + 1], yp[0]);
add_ssaaaa (rp[n], rp[k + 1], t3, t2, 0, t1);
#endif
for (i = 1; i <= n - 2; i++)
rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]);
rp[n + n - 1] = mpn_addmul_1 (rp + n - 1, xp, n, yp[n - 1]);
return;
}
// (rp,2n)=(xp,n)*(yp,n)
static void
mpn_mulshort_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
mp_size_t m;
mp_limb_t t;
mp_ptr rpn2;
ASSERT (n >= 1);
ASSERT_MPN (xp, n);
ASSERT_MPN (yp, n);
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n));
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n));
if (BELOW_THRESHOLD (n, MULHIGH_BASECASE_THRESHOLD))
{
mpn_mul_basecase (rp, xp, n, yp, n);
return;
}
if (BELOW_THRESHOLD (n, MULHIGH_DC_THRESHOLD))
{
mpn_mulshort_n_basecase (rp, xp, yp, n);
return;
}
// choose optimal m st n+2<=2m m<n
ASSERT (n >= 4);
m = 87 * n / 128;
if (2 * m < n + 2)
m = (n + 1) / 2 + 1;
if (m >= n)
m = n - 1;
ASSERT (n + 2 <= 2 * m);
ASSERT (m < n);
rpn2 = rp + n - 2;
mpn_mul_n (rp + n - m + n - m, xp + n - m, yp + n - m, m);
mpn_mulshort_n (rp, xp, yp + m, n - m);
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
mpn_mulshort_n (rp, xp + m, yp, n - m);
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
umul_ppmm (rp[1], t, xp[m - 1], yp[n - m - 1] << GMP_NAIL_BITS);
rp[0] = t >> GMP_NAIL_BITS;
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
umul_ppmm (rp[1], t, xp[n - m - 1], yp[m - 1] << GMP_NAIL_BITS);
rp[0] = t >> GMP_NAIL_BITS;
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
return;
}
// (rp,2n)=(xp,n)*(yp,n)
void
mpn_mulhigh_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
{
mp_limb_t t;
ASSERT (n > 0);
ASSERT_MPN (xp, n);
ASSERT_MPN (yp, n);
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n));
ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n));
if (BELOW_THRESHOLD (n, MULHIGH_BASECASE_THRESHOLD))
{
mpn_mul_basecase (rp, xp, n, yp, n);
return;
}
if (ABOVE_THRESHOLD (n, MULHIGH_MUL_THRESHOLD))
{
mpn_mul_n (rp, xp, yp, n);
return;
}
mpn_mulshort_n (rp, xp, yp, n);
t = rp[n - 1] + n - 2;
if (UNLIKELY (t < n - 2))
mpn_mul_n (rp, xp, yp, n);
return;
}