write new toom eval for +-1 using addadd and sumdiff

2011-07-24 19:09:36 +00:00 · 2011-07-24 19:09:36 +00:00 · 1f133530f1
commit 1f133530f1
parent bb009d186b
1 changed files with 46 additions and 64 deletions
--- a/mpn/generic/toom_eval_pm1.c
+++ b/mpn/generic/toom_eval_pm1.c
@ -1,79 +1,61 @@
-/* mpn_toom_eval_pm1 -- Evaluate a polynomial in +1 and -1
+/* toom_eval_pm1
-   Contributed to the GNU project by Niels Möller
+Copyright 2011 The Code Cavern
-   THE FUNCTION IN THIS FILE IS INTERNAL WITH A MUTABLE INTERFACE.  IT IS ONLY
+This file is part of the MPIR Library.
   SAFE TO REACH IT THROUGH DOCUMENTED INTERFACES.  IN FACT, IT IS ALMOST
   GUARANTEED THAT IT WILL CHANGE OR DISAPPEAR IN A FUTURE GNU MP RELEASE.
-Copyright 2009 Free Software Foundation, Inc.
+The MPIR Library is free software; you can redistribute it and/or modify
 it under the terms of the GNU Lesser General Public License as published
 by the Free Software Foundation; either version 2.1 of the License, or (at
 your option) any later version.
-This file is part of the GNU MP Library.
+The MPIR Library is distributed in the hope that it will be useful, but
 The GNU MP Library is free software; you can redistribute it and/or modify
 it under the terms of the GNU Lesser General Public License as published by
 the Free Software Foundation; either version 3 of the License, or (at your
 option) any later version.
 The GNU MP Library is distributed in the hope that it will be useful, but
 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 License for more details.
 You should have received a copy of the GNU Lesser General Public License
-along with the GNU MP Library.  If not, see http://www.gnu.org/licenses/.  */
+along with the MPIR Library; see the file COPYING.LIB.  If not, write
-
+to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 Boston, MA 02110-1301, USA.
 */
 #include "mpir.h"
 #include "gmp-impl.h"
-/* Evaluates a polynomial of degree k > 3, in the points +1 and -1. */
+// k degree poly so have k+1 coeffs and first k are size n
-int
+// k>3 so we can do the first add unconditionally 
-mpn_toom_eval_pm1 (mp_ptr xp1, mp_ptr xm1, unsigned k,
+int	mpn_toom_eval_pm1(mp_ptr pp,mp_ptr mp,unsigned int k,mp_srcptr xp,mp_size_t n,mp_size_t m,mp_ptr tp)
-		   mp_srcptr xp, mp_size_t n, mp_size_t hn, mp_ptr tp)
+{int isneg=0;unsigned int i;
 {
  unsigned i;
  int neg;
-  ASSERT (k >= 4);
+ASSERT(k>3);ASSERT(n>=m);ASSERT(m>0);ASSERT_MPN(xp,n*k+m);
-
+//ASSERT_SPACE(pp,n+1);ASSERT_SPACE(mp,n+1);ASSERT_SPACE(tp,n+1);
-  ASSERT (hn > 0);
+ASSERT(!MPN_OVERLAP_P(pp,n+1,mp,n+1));ASSERT(!MPN_OVERLAP_P(pp,n+1,xp,n*k+m));ASSERT(!MPN_OVERLAP_P(pp,n+1,tp,n+1));
-  ASSERT (hn <= n);
+ASSERT(!MPN_OVERLAP_P(mp,n+1,xp,n*k+m));ASSERT(!MPN_OVERLAP_P(xp,n*k+m,tp,n+1));
-
+#if ! HAVE_NATIVE_mpn_sumdiff_n
-  /* The degree k is also the number of full-size coefficients, so
+ASSERT(!MPN_OVERLAP_P(mp,n+1,tp,n+1));
   * that last coefficient, of size hn, starts at xp + k*n. */
  xp1[n] = mpn_add_n (xp1, xp, xp + 2*n, n);
  for (i = 4; i < k; i += 2)
    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+i*n, n));
  tp[n] = mpn_add_n (tp, xp + n, xp + 3*n, n);
  for (i = 5; i < k; i += 2)
    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+i*n, n));
  if (k & 1)
    ASSERT_NOCARRY (mpn_add (tp, tp, n+1, xp+k*n, hn));
  else
    ASSERT_NOCARRY (mpn_add (xp1, xp1, n+1, xp+k*n, hn));
  neg = (mpn_cmp (xp1, tp, n + 1) < 0) ? ~0 : 0;
 #if HAVE_NATIVE_mpn_add_n_sub_n
  if (neg)
    mpn_add_n_sub_n (xp1, xm1, tp, xp1, n + 1);
  else
    mpn_add_n_sub_n (xp1, xm1, xp1, tp, n + 1);
 #else
  if (neg)
    mpn_sub_n (xm1, tp, xp1, n + 1);
  else
    mpn_sub_n (xm1, xp1, tp, n + 1);
  mpn_add_n (xp1, xp1, tp, n + 1);
 #endif
-
+#if HAVE_NATIVE_mpn_addadd_n
-  ASSERT (xp1[n] <= k);
+if(k==4){pp[n]=mpn_add_n(pp,xp,xp+2*n,n);tp[n]=mpn_add_n(tp,xp+n,xp+3*n,n);}else
-  ASSERT (xm1[n] <= k/2 + 1);
+if(k==5){pp[n]=mpn_addadd_n(pp,xp,xp+2*n,xp+4*n,n);tp[n]=mpn_add_n(tp,xp+n,xp+3*n,n);}else
-
+  {pp[n]=mpn_addadd_n(pp,xp,xp+2*n,xp+4*n,n);tp[n]=mpn_addadd_n(tp,xp+n,xp+3*n,xp+5*n,n);
-  return neg;
+   for(i=7;i<k-2;i+=4){pp[n]+=mpn_addadd_n(pp,pp,xp+(i-1)*n,xp+(i+1)*n,n);tp[n]+=mpn_addadd_n(tp,tp,xp+i*n,xp+(i+2)*n,n);}
-}
+   if(k%4==3){pp[n]+=mpn_add_n(pp,pp,xp+(k-1)*n,n);}
   if(k%4==0){pp[n]+=mpn_add_n(pp,pp,xp+(k-2)*n,n);tp[n]+=mpn_add_n(tp,tp,xp+(k-1)*n,n);}
   if(k%4==1){pp[n]+=mpn_addadd_n(pp,pp,xp+(k-3)*n,xp+(k-1)*n,n);tp[n]+=mpn_add_n(tp,tp,xp+(k-2)*n,n);}}
 if(k%2==0){pp[n]+=mpn_add(pp,pp,n,xp+k*n,m);}else{tp[n]+=mpn_add(tp,tp,n,xp+k*n,m);}
 #else
 // pp is xp+0 xp+2n xp+4n xp+6n ... xp+jn where j<=k-1
 // mp is xp+1 xp+3n xp+5n xp+7n ... xp+jn where j<=k-1
 pp[n]=mpn_add_n(pp,xp,xp+2*n,n);tp[n]=mpn_add_n(tp,xp+n,xp+3*n,n);
 for(i=5;i<k;i+=2){pp[n]+=mpn_add_n(pp,pp,xp+(i-1)*n,n);tp[n]+=mpn_add_n(tp,tp,xp+i*n,n);}
 if(k%2==1){pp[n]+=mpn_add_n(pp,pp,xp+(k-1)*n,n);tp[n]+=mpn_add(tp,tp,n,xp+k*n,m);}else{pp[n]+=mpn_add(pp,pp,n,xp+k*n,m);}
 #endif
 if(mpn_cmp(tp,pp,n+1)>0)isneg=-1;
 #if HAVE_NATIVE_mpn_sumdiff_n
 if(isneg==0){mpn_sumdiff_n(pp,mp,pp,tp,n+1);}else{mpn_sumdiff_n(pp,mp,tp,pp,n+1);}
 #else
 if(isneg==0){mpn_sub_n(mp,pp,tp,n+1);}else{mpn_sub_n(mp,tp,pp,n+1);}
 mpn_add_n(pp,pp,tp,n+1);  
 #endif
 return isneg;}