341 lines
9.4 KiB
C
341 lines
9.4 KiB
C
/* mpn_mulhigh_n
|
|
|
|
Copyright 2009 Jason Moxham
|
|
|
|
This file is part of the MPIR Library.
|
|
|
|
The MPIR Library is free software; you can redistribute it and/or modify
|
|
it under the terms of the GNU Lesser General Public License as published
|
|
by the Free Software Foundation; either version 2.1 of the License, or (at
|
|
your option) any later version.
|
|
|
|
The MPIR Library is distributed in the hope that it will be useful, but
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
License for more details.
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
along with the MPIR Library; see the file COPYING.LIB. If not, write
|
|
to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
Boston, MA 02110-1301, USA.
|
|
*/
|
|
|
|
#include "mpir.h"
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
/*
|
|
Let X = sum over 0 <= i < n of x[i]B^i
|
|
Let Y = sum over 0 <= i < n of y[i]B^i
|
|
|
|
Define the usual multiplication as
|
|
|
|
XY = sum over 0 <= i < n, 0 <= j < n, x[i]y[j]B^(i + j)
|
|
|
|
Define short product as
|
|
|
|
XY_k = sum over i + j >= k, x[i]y[j]B^(i + j)
|
|
|
|
and approx short product as a superset of short product and subset of usual product
|
|
|
|
Now consider the usual product XY
|
|
|
|
XY = sum over {0 <= i < n, 0 <= j < n} x[i]y[j]B^(i+j)
|
|
|
|
from now we just show the sum bounds with these implicit limits on i and j
|
|
|
|
= {0 <= i < n, 0 <= j < n}
|
|
|
|
split into four pieces (requires 0 <= m <= n)
|
|
|
|
= {i < n - m, j < n - m} {i >= n-m, j >= n - m}
|
|
{i < n - m, j >= n - m} {i >= n - m, j < n - m}
|
|
|
|
split last two pieces again (requires n - m <= m - 1)
|
|
|
|
= {i < n - m, j < n - m} {i >= n - m, j >= n - m} {i < n - m, n - m <= j < m}
|
|
{i < n - m, m <= j} {n - m <= i < m, j < n - m} {m <= i, j < n - m}
|
|
|
|
rearrange
|
|
|
|
= {i < n - m, j < n - m} {i >= n - m, j >= n - m} {i < n - m, m <= j}
|
|
{m <= i, j < n - m} {i < n - m, n - m <= j < m} { n - m <= i < m, j < n - m}
|
|
|
|
split last two again (requires n - m <= m - 2)
|
|
|
|
= {i < n - m, j < n - m} {i >= n - m, j >= n - m} {i < n - m, m <= j}
|
|
{m <= i, j < n - m} {i < n - m, n - m <= j <= m - 2} {i < n - m, m - 2 < j < m}
|
|
{n - m <= i <= m-2, j < n - m} {m - 2 < i < m, j < n - m}
|
|
|
|
rearrange
|
|
|
|
= {i < n - m, j < n - m} {i >= n - m, j >= n - m} {i < n - m, m <= j}
|
|
{m <= i, j < n - m} {i < n - m, n - m <= j <= m - 2} {n - m <= i <= m - 2, j < n - m} {i<n-m,j=m-1} {i=m-1,j<n-m}
|
|
|
|
split last two again
|
|
|
|
= {i < n - m, j < n - m} {i >= n - m, j >= n - m}
|
|
{i < n - m, m <= j} {m <= i, j < n - m} {i < n - m, n - m <= j <= m - 2}
|
|
{n - m <= i <= m - 2, j < n - m} {i < n - m - 1, j = m - 1}
|
|
{i = n - m - 1, j = m - 1} {i = m - 1, j < n - m - 1} {i = m - 1, j = n - m - 1}
|
|
|
|
Now choose any m such that n + 2 <= 2m, m <= n
|
|
so n - m <= m - 2 so our requirements above are satisfied
|
|
|
|
Now consider the short product with k = n - 2, so we discard those
|
|
with i + j < k = n - 2
|
|
|
|
= {i < n - m, j < n - m}, i + j <= 2(n - m) - 2
|
|
|
|
as n + 2 <= 2m, so n < 2m so 2n < 2m + n so 2n - 2m < n so i + j < n - 2 = k
|
|
so empty
|
|
|
|
{i >= n - m, j >= n - m}, i + j >= 2(n - m) keep most
|
|
{i < n - m, m <= j}, keep some
|
|
{m <= i, j < n - m}, keep some
|
|
{i < n - m, n - m <= j <= m - 2}, i + j <= n - m - 1 + m - 2 = n - 3 < n - 2 = k, empty
|
|
{n - m <= i <= m - 2, j < n - m}, i + j <= n - m - 1 + m - 2 = n - 3 < n - 2 = k, empty
|
|
{i < n - m - 1, j = m - 1}, i + j <= n - m - 2 + m - 1 = n - 3 < n - 2 = k, empty
|
|
{i = n - m - 1, j = m - 1}, i + j = n - m - 1 + m - 1 = n - 2 = k, keep all
|
|
{i = m - 1, j < n - m - 1}, i + j <= m - 1 + n - m - 2 = n - 3 < n - 2 = k, empty
|
|
{i = m - 1, j = n - m - 1}, i + j = m - 1 + n - m - 1 = n - 2 = k, keep all
|
|
|
|
so the approx short product XY_k is
|
|
|
|
{i >= n - m, j >= n - m} {i < n - m, m <= j}
|
|
{m <= i, j < n - m} {i = n - m - 1, j = m - 1} {i = m - 1, j = n - m - 1}
|
|
|
|
Now for {i < n - m, m <= j} with i + j > = k = n - 2, let u = i, v = j - m
|
|
so we have {0 <= u < n - m, 0 <= v < n - m} with u + v >= n - m - 2
|
|
which is the same short product
|
|
|
|
Summary
|
|
-----------
|
|
|
|
Given n digit xp and yp,
|
|
define mulshort_n(xp,yp,n) to be sum
|
|
|
|
{i + j >= n - 2, and perhaps some i + j < n - 2} xp[i]yp[j]B^(i+j)
|
|
choose m such that n+2 <= 2m and m < n then from above
|
|
|
|
mulshort_n(xp, yp, n) = mul(xp + n - m, yp + n - m, m)B^(2n - 2m)
|
|
+ mulshort_n(xp + m,yp, n - m)B^m
|
|
+ mulshort_n(xp, yp + m, n - m)B^m
|
|
+ xp[n - m - 1]yp[m - 1]B^(n - 2)
|
|
+ xp[m - 1]yp[n - m - 1]B^(n - 2)
|
|
|
|
and clearly when summing the above we can ignore any products from i + j < n - 2
|
|
|
|
Theorem
|
|
|
|
Let (zp, 2n) = mulshort_n(xp, yp, n)
|
|
if zp[n - 1] + n - 2 < B then mulhigh_n(xp, yp, n) = (zp, 2n)
|
|
*/
|
|
|
|
/* (rp, 2n) = (xp, n)*(yp, n) / B^n */
|
|
inline static void
|
|
mpn_mulshort_n_basecase(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
|
|
{
|
|
mp_size_t i, k;
|
|
|
|
ASSERT(n >= 3); /* this restriction doesn't make a lot of sense in general */
|
|
ASSERT_MPN(xp, n);
|
|
ASSERT_MPN(yp, n);
|
|
ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));
|
|
ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));
|
|
|
|
k = n - 2; /* so want short product sum_(i + j >= k) x[i]y[j]B^(i + j) */
|
|
i = 0;
|
|
|
|
/* Multiply w limbs from y + i to (2 + i + w - 1) limbs from x + (n - 2 - i - w + 1)
|
|
and put it into r + (n - 2 - w + 1), "overflow" (i.e. last) limb into
|
|
r + (n + w - 1) for i between 0 and n - 2.
|
|
i == n - w needs special treatment. */
|
|
|
|
/* We first multiply by the low order limb (or depending on optional function
|
|
availability, limbs). This result can be stored, not added, to rp. We
|
|
also avoid a loop for zeroing this way. */
|
|
|
|
#if HAVE_NATIVE_mpn_mul_2
|
|
rp[n + 1] = mpn_mul_2 (rp + k - 1, xp + k - 1, 2 + 1, yp);
|
|
i += 2;
|
|
#else
|
|
rp[n] = mpn_mul_1 (rp + k, xp + k, 2, yp[0]);
|
|
i += 1;
|
|
#endif
|
|
|
|
#if HAVE_NATIVE_mpn_addmul_6
|
|
while (i < n - 6)
|
|
{
|
|
rp[n + i + 6 - 1] = mpn_addmul_6 (rp + k - 6 + 1, xp + k - i - 6 + 1, 2 + i + 6 - 1, yp + i);
|
|
i += 6;
|
|
}
|
|
if (i == n - 6)
|
|
{
|
|
rp[n + n - 1] = mpn_addmul_6 (rp + i, xp, n, yp + i);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if HAVE_NATIVE_mpn_addmul_5
|
|
while (i < n - 5)
|
|
{
|
|
rp[n + i + 5 - 1] = mpn_addmul_5 (rp + k - 5 + 1, xp + k - i - 5 + 1, 2 + i + 5 - 1, yp + i)
|
|
i += 5;
|
|
}
|
|
if (i == n - 5)
|
|
{
|
|
rp[n + n - 1] = mpn_addmul_5 (rp + i, xp, n, yp + i);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if HAVE_NATIVE_mpn_addmul_4
|
|
while (i < n - 4)
|
|
{
|
|
rp[n + i + 4 - 1] = mpn_addmul_4 (rp + k - 4 + 1, xp + k - i - 4 + 1, 2 + i + 4 - 1, yp + i);
|
|
i += 4;
|
|
}
|
|
if (i == n - 4)
|
|
{
|
|
rp[n + n - 1] = mpn_addmul_4 (rp + i, xp, n, yp + i);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if HAVE_NATIVE_mpn_addmul_3
|
|
while (i < n - 3)
|
|
{
|
|
rp[n + i + 3 - 1] = mpn_addmul_3 (rp + k - 3 + 1, xp + k - i - 3 + 1, 2 + i + 3 - 1, yp + i);
|
|
i += 3;
|
|
}
|
|
if (i == n - 3)
|
|
{
|
|
rp[n + n - 1] = mpn_addmul_3 (rp + i, xp, n, yp + i);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
#if HAVE_NATIVE_mpn_addmul_2
|
|
while (i < n - 2)
|
|
{
|
|
rp[n + i + 2 - 1] = mpn_addmul_2 (rp + k - 2 + 1, xp + k - i - 2 + 1, 2 + i + 2 - 1, yp + i);
|
|
i += 2;
|
|
}
|
|
if (i == n - 2)
|
|
{
|
|
rp[n + n - 1] = mpn_addmul_2 (rp + i, xp, n, yp + i);
|
|
return;
|
|
}
|
|
#endif
|
|
|
|
while (i < n - 1)
|
|
{
|
|
rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]);
|
|
i += 1;
|
|
}
|
|
rp[n + n - 1] = mpn_addmul_1 (rp + i, xp, n, yp[i]);
|
|
return;
|
|
}
|
|
|
|
/* (rp, 2n) = (xp, n)*(yp, n) */
|
|
static void
|
|
mpn_mulshort_n(mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
|
|
{
|
|
mp_size_t m;
|
|
mp_limb_t t;
|
|
mp_ptr rpn2;
|
|
|
|
ASSERT(n >= 1);
|
|
ASSERT_MPN(xp, n);
|
|
ASSERT_MPN(yp, n);
|
|
ASSERT(!MPN_OVERLAP_P (rp, 2 * n, xp, n));
|
|
ASSERT(!MPN_OVERLAP_P (rp, 2 * n, yp, n));
|
|
|
|
if (BELOW_THRESHOLD(n, MULHIGH_BASECASE_THRESHOLD))
|
|
{
|
|
mpn_mul_basecase(rp, xp, n, yp, n);
|
|
|
|
return;
|
|
}
|
|
|
|
if (BELOW_THRESHOLD (n, MULHIGH_DC_THRESHOLD))
|
|
{
|
|
mpn_mulshort_n_basecase(rp, xp, yp, n);
|
|
|
|
return;
|
|
}
|
|
|
|
/* choose optimal m s.t. n + 2 <= 2m, m < n */
|
|
ASSERT (n >= 4);
|
|
|
|
m = 87 * n / 128;
|
|
|
|
if (2 * m < n + 2)
|
|
m = (n + 1) / 2 + 1;
|
|
|
|
if (m >= n)
|
|
m = n - 1;
|
|
|
|
ASSERT (n + 2 <= 2 * m);
|
|
ASSERT (m < n);
|
|
|
|
rpn2 = rp + n - 2;
|
|
|
|
mpn_mul_n (rp + n - m + n - m, xp + n - m, yp + n - m, m);
|
|
mpn_mulshort_n (rp, xp, yp + m, n - m);
|
|
|
|
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
|
|
|
|
mpn_mulshort_n (rp, xp + m, yp, n - m);
|
|
|
|
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2));
|
|
|
|
umul_ppmm (rp[1], t, xp[m - 1], yp[n - m - 1] << GMP_NAIL_BITS);
|
|
rp[0] = t >> GMP_NAIL_BITS;
|
|
|
|
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
|
|
|
|
umul_ppmm (rp[1], t, xp[n - m - 1], yp[m - 1] << GMP_NAIL_BITS);
|
|
rp[0] = t >> GMP_NAIL_BITS;
|
|
|
|
ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2));
|
|
|
|
return;
|
|
}
|
|
|
|
/* (rp, 2n) = (xp, n)*(yp, n) */
|
|
void
|
|
mpn_mulhigh_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n)
|
|
{
|
|
mp_limb_t t;
|
|
|
|
ASSERT(n > 0);
|
|
ASSERT_MPN(xp, n);
|
|
ASSERT_MPN(yp, n);
|
|
ASSERT(!MPN_OVERLAP_P(rp, 2 * n, xp, n));
|
|
ASSERT(!MPN_OVERLAP_P(rp, 2 * n, yp, n));
|
|
|
|
if (BELOW_THRESHOLD(n, MULHIGH_BASECASE_THRESHOLD))
|
|
{
|
|
mpn_mul_basecase(rp, xp, n, yp, n);
|
|
|
|
return;
|
|
}
|
|
|
|
if (ABOVE_THRESHOLD (n, MULHIGH_MUL_THRESHOLD))
|
|
{
|
|
mpn_mul_n(rp, xp, yp, n);
|
|
|
|
return;
|
|
}
|
|
|
|
mpn_mulshort_n(rp, xp, yp, n);
|
|
t = rp[n - 1] + n - 2;
|
|
|
|
if (UNLIKELY(t < n - 2))
|
|
mpn_mul_n(rp, xp, yp, n);
|
|
|
|
return;
|
|
}
|