/* mpn_mulhigh_n Copyright 2009 Jason Moxham This file is part of the MPIR Library. The MPIR Library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as published by the Free Software Foundation; either version 2.1 of the License, or (at your option) any later version. The MPIR Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License along with the MPIR Library; see the file COPYING.LIB. If not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ #include "mpir.h" #include "gmp-impl.h" #include "longlong.h" /* Define the usual multiplication as let X=sum over 0<=i=k x[i]y[j]B^(i+j) and approx short product as a superset of shortproduct and subset of usual product Now consider the usual product XY XY = sum over {0<=i=n-m,j>=n-m} {i=n-m} {i>=n-m,j=n-m,j>=n-m} {i=n-m,j>=n-m}{i=n-m,j>=n-m}{i=n-m,j>=n-m}{i=n-m,j>=n-m}{i=n-m,j>=n-m} i+j>=2(n-m) so keep most {i=n-m,j>=n-m} {i=k=n-2 , let u=i,v=j-m so we have {0<=u=n-m-2 which is the same short product Summary ----------- Given n digit xp and yp , define mulshort_n(xp,yp,n) to be sum {i+j>=n-2,and perhaps some i+j= 3); // this restriction doesn't make a lot of sense in general ASSERT_MPN (xp, n); ASSERT_MPN (yp, n); ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n)); ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n)); k = n - 2; // so want short product sum_(i+j>=k) x[i]y[j]B^(i+j) #if GMP_NAIL_BITS!=0 rp[n] = mpn_mul_1 (rp + k, xp + k, 2, yp[0]); #else umul_ppmm (t1, rp[k], xp[k], yp[0]); umul_ppmm (t3, t2, xp[k + 1], yp[0]); add_ssaaaa (rp[n], rp[k + 1], t3, t2, 0, t1); #endif for (i = 1; i <= n - 2; i++) rp[n + i] = mpn_addmul_1 (rp + k, xp + k - i, 2 + i, yp[i]); rp[n + n - 1] = mpn_addmul_1 (rp + n - 1, xp, n, yp[n - 1]); return; } // (rp,2n)=(xp,n)*(yp,n) static void mpn_mulshort_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { mp_size_t m; mp_limb_t t; mp_ptr rpn2; ASSERT (n >= 1); ASSERT_MPN (xp, n); ASSERT_MPN (yp, n); ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n)); ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n)); if (BELOW_THRESHOLD (n, MULHIGH_BASECASE_THRESHOLD)) { mpn_mul_basecase (rp, xp, n, yp, n); return; } if (BELOW_THRESHOLD (n, MULHIGH_DC_THRESHOLD)) { mpn_mulshort_n_basecase (rp, xp, yp, n); return; } // choose optimal m st n+2<=2m m= 4); m = 87 * n / 128; if (2 * m < n + 2) m = (n + 1) / 2 + 1; if (m >= n) m = n - 1; ASSERT (n + 2 <= 2 * m); ASSERT (m < n); rpn2 = rp + n - 2; mpn_mul_n (rp + n - m + n - m, xp + n - m, yp + n - m, m); mpn_mulshort_n (rp, xp, yp + m, n - m); ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2)); mpn_mulshort_n (rp, xp + m, yp, n - m); ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rpn2 - m, n - m + 2)); umul_ppmm (rp[1], t, xp[m - 1], yp[n - m - 1] << GMP_NAIL_BITS); rp[0] = t >> GMP_NAIL_BITS; ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2)); umul_ppmm (rp[1], t, xp[n - m - 1], yp[m - 1] << GMP_NAIL_BITS); rp[0] = t >> GMP_NAIL_BITS; ASSERT_NOCARRY (mpn_add (rpn2, rpn2, n + 2, rp, 2)); return; } // (rp,2n)=(xp,n)*(yp,n) void mpn_mulhigh_n (mp_ptr rp, mp_srcptr xp, mp_srcptr yp, mp_size_t n) { mp_limb_t t; ASSERT (n > 0); ASSERT_MPN (xp, n); ASSERT_MPN (yp, n); ASSERT (!MPN_OVERLAP_P (rp, 2 * n, xp, n)); ASSERT (!MPN_OVERLAP_P (rp, 2 * n, yp, n)); if (BELOW_THRESHOLD (n, MULHIGH_BASECASE_THRESHOLD)) { mpn_mul_basecase (rp, xp, n, yp, n); return; } if (ABOVE_THRESHOLD (n, MULHIGH_MUL_THRESHOLD)) { mpn_mul_n (rp, xp, yp, n); return; } mpn_mulshort_n (rp, xp, yp, n); t = rp[n - 1] + n - 2; if (UNLIKELY (t < n - 2)) mpn_mul_n (rp, xp, yp, n); return; }