From 1f133530f1c08798efb1566ef1989be21be1d79d Mon Sep 17 00:00:00 2001 From: jasonmoxham Date: Sun, 24 Jul 2011 19:09:36 +0000 Subject: [PATCH] write new toom eval for +-1 using addadd and sumdiff --- mpn/generic/toom_eval_pm1.c | 110 +++++++++++++++--------------------- 1 file changed, 46 insertions(+), 64 deletions(-) diff --git a/mpn/generic/toom_eval_pm1.c b/mpn/generic/toom_eval_pm1.c index f1d6f17c..43e34b13 100644 --- a/mpn/generic/toom_eval_pm1.c +++ b/mpn/generic/toom_eval_pm1.c @@ -1,79 +1,61 @@ -/* mpn_toom_eval_pm1 -- Evaluate a polynomial in +1 and -1 +/* toom_eval_pm1 - Contributed to the GNU project by Niels Möller +Copyright 2011 The Code Cavern - THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE. IT IS ONLY - SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES. IN FACT, IT IS ALMOST - GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE. +This file is part of the MPIR Library. -Copyright 2009 Free Software Foundation, Inc. +The MPIR Library is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published +by the Free Software Foundation; either version 2.1 of the License, or (at +your option) any later version. -This file is part of the GNU MP Library. - -The GNU MP Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 3 of the License, or (at your -option) any later version. - -The GNU MP Library is distributed in the hope that it will be useful, but +The MPIR Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more details. You should have received a copy of the GNU Lesser General Public License -along with the GNU MP Library. If not, see http://www.gnu.org/licenses/. */ - +along with the MPIR Library; see the file COPYING.LIB. If not, write +to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +Boston, MA 02110-1301, USA. +*/ #include "mpir.h" #include "gmp-impl.h" -/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */ -int -mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k, - mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp) -{ - unsigned i; - int neg; +// k degree poly so have k+1 coeffs and first k are size n +// k>3 so we can do the first add unconditionally +int mpn_toom_eval_pm1(mp_ptr pp,mp_ptr mp,unsigned int k,mp_srcptr xp,mp_size_t n,mp_size_t m,mp_ptr tp) +{int isneg=0;unsigned int i; - ASSERT (k >= 4); - - ASSERT (hn > 0); - ASSERT (hn <= n); - - /* The degree k is also the number of full-size coefficients, so - * that last coefficient, of size hn, starts at xp + k*n. */ - - xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n); - for (i = 4; i < k; i += 2) - ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n)); - - tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n); - for (i = 5; i < k; i += 2) - ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n)); - - if (k & 1) - ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn)); - else - ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn)); - - neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0; - -#if HAVE_NATIVE_mpn_add_n_sub_n - if (neg) - mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1); - else - mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1); -#else - if (neg) - mpn_sub_n (xm1, tp, xp1, n + 1); - else - mpn_sub_n (xm1, xp1, tp, n + 1); - - mpn_add_n (xp1, xp1, tp, n + 1); +ASSERT(k>3);ASSERT(n>=m);ASSERT(m>0);ASSERT_MPN(xp,n*k+m); +//ASSERT_SPACE(pp,n+1);ASSERT_SPACE(mp,n+1);ASSERT_SPACE(tp,n+1); +ASSERT(!MPN_OVERLAP_P(pp,n+1,mp,n+1));ASSERT(!MPN_OVERLAP_P(pp,n+1,xp,n*k+m));ASSERT(!MPN_OVERLAP_P(pp,n+1,tp,n+1)); +ASSERT(!MPN_OVERLAP_P(mp,n+1,xp,n*k+m));ASSERT(!MPN_OVERLAP_P(xp,n*k+m,tp,n+1)); +#if ! HAVE_NATIVE_mpn_sumdiff_n +ASSERT(!MPN_OVERLAP_P(mp,n+1,tp,n+1)); #endif - - ASSERT (xp1[n] <= k); - ASSERT (xm1[n] <= k/2 + 1); - - return neg; -} +#if HAVE_NATIVE_mpn_addadd_n +if(k==4){pp[n]=mpn_add_n(pp,xp,xp+2*n,n);tp[n]=mpn_add_n(tp,xp+n,xp+3*n,n);}else +if(k==5){pp[n]=mpn_addadd_n(pp,xp,xp+2*n,xp+4*n,n);tp[n]=mpn_add_n(tp,xp+n,xp+3*n,n);}else + {pp[n]=mpn_addadd_n(pp,xp,xp+2*n,xp+4*n,n);tp[n]=mpn_addadd_n(tp,xp+n,xp+3*n,xp+5*n,n); + for(i=7;i0)isneg=-1; +#if HAVE_NATIVE_mpn_sumdiff_n +if(isneg==0){mpn_sumdiff_n(pp,mp,pp,tp,n+1);}else{mpn_sumdiff_n(pp,mp,tp,pp,n+1);} +#else +if(isneg==0){mpn_sub_n(mp,pp,tp,n+1);}else{mpn_sub_n(mp,tp,pp,n+1);} +mpn_add_n(pp,pp,tp,n+1); +#endif +return isneg;}