From 36ca8eb0a97e6d06c0ca956550d8559b2ffa5439 Mon Sep 17 00:00:00 2001 From: "(no author)" <(no author)> Date: Sun, 14 Mar 2010 14:16:35 +0000 Subject: [PATCH] Removed toom7 code. --- configure | 2 +- configure.in | 2 +- doc/devel/mpn_functions.txt | 5 - doc/mpir.info | 173 +++ doc/mpir.info-1 | 1 - doc/mpir.texi | 1 - gmp-impl.h | 35 - mpn/generic/toom7_mul_n.c | 2012 ------------------------------- mpn/x86_64w/core2/gmp-mparam.h | 2 - mpn/x86_64w/k8/gmp-mparam.h | 2 - mpn/x86_64w/k8/k10/gmp-mparam.h | 152 ++- tests/devel/try.c | 12 - tune/common.c | 10 - tune/speed.c | 2 - tune/speed.h | 15 - tune/tuneup.c | 2 - 16 files changed, 250 insertions(+), 2178 deletions(-) delete mode 100644 mpn/generic/toom7_mul_n.c diff --git a/configure b/configure index c626375d..fa3ed872 100755 --- a/configure +++ b/configure @@ -29319,7 +29319,7 @@ gmp_mpn_functions="$extra_functions \ rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfsqr \ bdivmod gcd gcd_1 gcdext tdiv_qr dc_divrem_n sb_divrem_mn jacbase get_d \ mullow_n mulhigh_n mullow_n_basecase mullow_basecase redc_basecase \ - sb_divappr_q toom3_mul toom3_mul_n toom4_mul toom4_mul_n toom7_mul_n \ + sb_divappr_q toom3_mul toom3_mul_n toom4_mul toom4_mul_n \ dc_div_q dc_divappr_q sb_div_q sb_div_qr dc_div_qr inv_divappr_q_n \ inv_divappr_q inv_div_q inv_div_qr inv_div_qr_n rootrem_basecase \ toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 toom_eval_pm1 toom_eval_pm2 \ diff --git a/configure.in b/configure.in index 4743708d..f829d6bb 100644 --- a/configure.in +++ b/configure.in @@ -2517,7 +2517,7 @@ gmp_mpn_functions="$extra_functions \ rootrem sqrtrem get_str set_str scan0 scan1 popcount hamdist cmp perfsqr \ bdivmod gcd gcd_1 gcdext tdiv_qr dc_divrem_n sb_divrem_mn jacbase get_d \ mullow_n mulhigh_n mullow_n_basecase mullow_basecase redc_basecase \ - sb_divappr_q toom3_mul toom3_mul_n toom4_mul toom4_mul_n toom7_mul_n \ + sb_divappr_q toom3_mul toom3_mul_n toom4_mul toom4_mul_n \ dc_div_q dc_divappr_q sb_div_q sb_div_qr dc_div_qr inv_divappr_q_n \ inv_divappr_q inv_div_q inv_div_qr inv_div_qr_n rootrem_basecase \ toom_eval_dgr3_pm1 toom_eval_dgr3_pm2 toom_eval_pm1 toom_eval_pm2 \ diff --git a/doc/devel/mpn_functions.txt b/doc/devel/mpn_functions.txt index f0b5390d..371ee91a 100644 --- a/doc/devel/mpn_functions.txt +++ b/doc/devel/mpn_functions.txt @@ -121,8 +121,3 @@ New C level mpn functions since MPIR 0.9 void mpn_toom4_mul_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) -/* - Multiply {up, n} by {vp, n} and store the result at {rp, 2n} using Toom 7 algorithm -*/ -mpn_toom7_mul_nvoid - mpn_toom4_mul_n (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n) diff --git a/doc/mpir.info b/doc/mpir.info index e69de29b..5f670852 100644 --- a/doc/mpir.info +++ b/doc/mpir.info @@ -0,0 +1,173 @@ +This is mpir.info, produced by makeinfo version 4.11 from mpir.texi. + +This manual describes how to install and use MPIR, the Multiple +Precision Integers and Rationals library, version 1.3.1. + + Copyright 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, +2001, 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc. + + Copyright 2008 William Hart + + Permission is granted to copy, distribute and/or modify this +document under the terms of the GNU Free Documentation License, Version +1.2 or any later version published by the Free Software Foundation; +with no Invariant Sections, with the Front-Cover Texts being "A GNU +Manual", and with the Back-Cover Texts being "You have freedom to copy +and modify this GNU Manual, like GNU software". A copy of the license +is included in *note GNU Free Documentation License::. + +INFO-DIR-SECTION GNU libraries +START-INFO-DIR-ENTRY +* mpir: (mpir). MPIR Multiple Precision Integers and Rationals Library. +END-INFO-DIR-ENTRY + + +Indirect: +mpir.info-1: 990 +mpir.info-2: 300621 + +Tag Table: +(Indirect) +Node: Top990 +Node: Copying3173 +Node: Introduction to MPIR5016 +Node: Installing MPIR7437 +Node: Build Options8573 +Node: ABI and ISA24881 +Node: Notes for Package Builds34308 +Node: Notes for Particular Systems37426 +Node: Known Build Problems44354 +Node: Performance optimization47904 +Node: MPIR Basics49039 +Node: Headers and Libraries49693 +Node: Nomenclature and Types51148 +Node: Function Classes52859 +Node: Variable Conventions54397 +Node: Parameter Conventions55926 +Node: Memory Management57990 +Node: Reentrancy59122 +Node: Useful Macros and Constants60559 +Node: Compatibility with older versions61905 +Node: Demonstration Programs62748 +Node: Efficiency64619 +Node: Debugging72249 +Node: Profiling78818 +Node: Autoconf82852 +Node: Emacs84726 +Node: Reporting Bugs85335 +Node: Integer Functions87902 +Node: Initializing Integers88681 +Node: Assigning Integers90353 +Node: Simultaneous Integer Init & Assign91941 +Node: Converting Integers93568 +Node: Integer Arithmetic96221 +Node: Integer Division97824 +Node: Integer Exponentiation104253 +Node: Integer Roots105115 +Node: Number Theoretic Functions106942 +Node: Integer Comparisons115093 +Node: Integer Logic and Bit Fiddling116472 +Node: I/O of Integers119086 +Node: Integer Random Numbers121701 +Node: Integer Import and Export123477 +Node: Miscellaneous Integer Functions127482 +Node: Integer Special Functions129343 +Node: Rational Number Functions132514 +Node: Initializing Rationals133709 +Node: Rational Conversions135955 +Node: Rational Arithmetic137687 +Node: Comparing Rationals139024 +Node: Applying Integer Functions140392 +Node: I/O of Rationals141876 +Node: Floating-point Functions143739 +Node: Initializing Floats146627 +Node: Assigning Floats150325 +Node: Simultaneous Float Init & Assign152893 +Node: Converting Floats154423 +Node: Float Arithmetic157384 +Node: Float Comparison159430 +Node: I/O of Floats160782 +Node: Miscellaneous Float Functions163108 +Node: Low-level Functions165747 +Node: Random Number Functions188658 +Node: Random State Initialization189728 +Node: Random State Seeding191791 +Node: Random State Miscellaneous193181 +Node: Formatted Output193823 +Node: Formatted Output Strings194069 +Node: Formatted Output Functions199309 +Node: C++ Formatted Output203386 +Node: Formatted Input206073 +Node: Formatted Input Strings206310 +Node: Formatted Input Functions210972 +Node: C++ Formatted Input213945 +Node: C++ Class Interface215853 +Node: C++ Interface General216851 +Node: C++ Interface Integers219934 +Node: C++ Interface Rationals223368 +Node: C++ Interface Floats227047 +Node: C++ Interface Random Numbers232342 +Node: C++ Interface Limitations234472 +Node: Custom Allocation237318 +Node: Language Bindings241964 +Node: Algorithms245980 +Node: Multiplication Algorithms246736 +Node: Basecase Multiplication247710 +Node: Karatsuba Multiplication249622 +Node: Toom 3-Way Multiplication253252 +Node: FFT Multiplication259661 +Node: Other Multiplication264999 +Node: Division Algorithms267392 +Node: Single Limb Division267740 +Node: Basecase Division270660 +Node: Divide and Conquer Division271864 +Node: Exact Division274102 +Node: Exact Remainder277270 +Node: Small Quotient Division279563 +Node: Greatest Common Divisor Algorithms281162 +Node: Binary GCD281436 +Node: Accelerated GCD284115 +Node: Extended GCD285914 +Node: Jacobi Symbol287585 +Node: Powering Algorithms288502 +Node: Normal Powering Algorithm288766 +Node: Modular Powering Algorithm289295 +Node: Root Extraction Algorithms290359 +Node: Square Root Algorithm290675 +Node: Nth Root Algorithm292818 +Node: Perfect Square Algorithm293604 +Node: Perfect Power Algorithm295691 +Node: Radix Conversion Algorithms296313 +Node: Binary to Radix296690 +Node: Radix to Binary300621 +Node: Other Algorithms302553 +Node: Prime Testing Algorithm302907 +Node: Factorial Algorithm304092 +Node: Binomial Coefficients Algorithm305496 +Node: Fibonacci Numbers Algorithm306391 +Node: Lucas Numbers Algorithm308867 +Node: Random Number Algorithms309589 +Node: Assembler Coding311713 +Node: Assembler Code Organisation312690 +Node: Assembler Basics313663 +Node: Assembler Carry Propagation314822 +Node: Assembler Cache Handling316660 +Node: Assembler Functional Units318828 +Node: Assembler Floating Point320449 +Node: Assembler SIMD Instructions324232 +Node: Assembler Software Pipelining325221 +Node: Assembler Loop Unrolling326289 +Node: Assembler Writing Guide328509 +Node: Internals331279 +Node: Integer Internals331793 +Node: Rational Internals334050 +Node: Float Internals335290 +Node: Raw Output Internals342618 +Node: C++ Interface Internals343813 +Node: Contributors347112 +Node: References353691 +Node: GNU Free Documentation License360764 +Node: Concept Index383211 +Node: Function Index429073 + +End Tag Table diff --git a/doc/mpir.info-1 b/doc/mpir.info-1 index 751c93cf..802dc3b8 100644 --- a/doc/mpir.info-1 +++ b/doc/mpir.info-1 @@ -5927,7 +5927,6 @@ algorithms, as the size N increases. Karatsuba `MUL_KARATSUBA_THRESHOLD' Toom-3 `MUL_TOOM3_THRESHOLD' Toom-4 `MUL_TOOM4_THRESHOLD' - Toom-7 `MUL_TOOM7_THRESHOLD' FFT `MUL_FFT_THRESHOLD' Similarly for squaring, with the `SQR' thresholds. diff --git a/doc/mpir.texi b/doc/mpir.texi index 3511d1d8..52fe601e 100644 --- a/doc/mpir.texi +++ b/doc/mpir.texi @@ -7106,7 +7106,6 @@ algorithms, as the size N increases. @item Karatsuba @tab @code{MUL_KARATSUBA_THRESHOLD} @item Toom-3 @tab @code{MUL_TOOM3_THRESHOLD} @item Toom-4 @tab @code{MUL_TOOM4_THRESHOLD} -@item Toom-7 @tab @code{MUL_TOOM7_THRESHOLD} @item FFT @tab @code{MUL_FFT_THRESHOLD} @end multitable @end quotation diff --git a/gmp-impl.h b/gmp-impl.h index 05d1ad94..3f57cff1 100644 --- a/gmp-impl.h +++ b/gmp-impl.h @@ -1099,11 +1099,9 @@ __GMP_DECLSPEC extern gmp_randstate_t __gmp_rands; /* Need l>=1, ls>=1, and 2*ls > l (the latter for the tD MPN_INCR_U) */ #define MPN_TOOM3_MUL_N_MINSIZE 17 #define MPN_TOOM4_MUL_N_MINSIZE 32 -#define MPN_TOOM7_MUL_N_MINSIZE 56 #define MPN_TOOM8H_MUL_MINSIZE 86 #define MPN_TOOM3_SQR_N_MINSIZE 17 #define MPN_TOOM4_SQR_N_MINSIZE 32 -#define MPN_TOOM7_SQR_N_MINSIZE 56 #define MPN_TOOM8_SQR_N_MINSIZE 40 #define MPN_FFT_MUL_N_MINSIZE 64 @@ -1152,9 +1150,6 @@ void mpn_toom4_interpolate _PROTO ((mp_ptr rp, mp_size_t * rpn, mp_size_t sn, mp_ptr tp, mp_size_t s4, mp_size_t n4, mp_size_t n6, mp_limb_t r30)); -#define mpn_toom7_mul_n __MPN(toom7_mul_n) -void mpn_toom7_mul_n _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); - #define mpn_toom_eval_dgr3_pm1 __MPN(toom_eval_dgr3_pm1) int mpn_toom_eval_dgr3_pm1 _PROTO ((mp_ptr xp1, mp_ptr xm1, mp_srcptr xp, mp_size_t n, mp_size_t x3n, mp_ptr tp)); @@ -1206,9 +1201,6 @@ void mpn_toom8_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t)); #define mpn_toom42_mulmid __MPN(toom42_mulmid) void mpn_toom42_mulmid __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); -#define mpn_toom7_sqr_n __MPN(toom7_sqr_n) -void mpn_toom7_sqr_n _PROTO((mp_ptr, mp_srcptr, mp_size_t)); - #define mpn_fft_best_k __MPN(fft_best_k) int mpn_fft_best_k _PROTO ((mp_size_t n, int sqr)) ATTRIBUTE_CONST; @@ -1551,10 +1543,6 @@ __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; #define MULMID_TOOM42_THRESHOLD 36 #endif -#ifndef MUL_TOOM7_THRESHOLD -#define MUL_TOOM7_THRESHOLD 700 -#endif - #ifndef MUL_TOOM8H_THRESHOLD #define MUL_TOOM8H_THRESHOLD 401 #endif @@ -1633,9 +1621,6 @@ __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; #ifndef MUL_TOOM4_THRESHOLD_LIMIT #define MUL_TOOM4_THRESHOLD_LIMIT MUL_TOOM4_THRESHOLD #endif -#ifndef MUL_TOOM7_THRESHOLD_LIMIT -#define MUL_TOOM7_THRESHOLD_LIMIT MUL_TOOM7_THRESHOLD -#endif #ifndef MUL_TOOM8H_THRESHOLD_LIMIT #define MUL_TOOM8H_THRESHOLD_LIMIT MUL_TOOM8H_THRESHOLD #endif @@ -1670,10 +1655,6 @@ __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; #define SQR_TOOM4_THRESHOLD 300 #endif -#ifndef SQR_TOOM7_THRESHOLD -#define SQR_TOOM7_THRESHOLD 700 -#endif - #ifndef SQR_TOOM8_THRESHOLD #define SQR_TOOM8_THRESHOLD 400 #endif @@ -1687,10 +1668,6 @@ __GMP_DECLSPEC extern const mp_limb_t __gmp_fib_table[]; #define SQR_TOOM4_THRESHOLD_LIMIT SQR_TOOM4_THRESHOLD #endif -#ifndef SQR_TOOM7_THRESHOLD_LIMIT -#define SQR_TOOM7_THRESHOLD_LIMIT SQR_TOOM7_THRESHOLD -#endif - #ifndef SQR_TOOM8_THRESHOLD_LIMIT #define SQR_TOOM8_THRESHOLD_LIMIT SQR_TOOM8_THRESHOLD #endif @@ -4110,10 +4087,6 @@ extern mp_size_t mul_toom3_threshold; #define MUL_TOOM4_THRESHOLD mul_toom4_threshold extern mp_size_t mul_toom4_threshold; -#undef MUL_TOOM7_THRESHOLD -#define MUL_TOOM7_THRESHOLD mul_toom7_threshold -extern mp_size_t mul_toom7_threshold; - #undef MUL_TOOM8H_THRESHOLD #define MUL_TOOM8H_THRESHOLD mul_toom8h_threshold extern mp_size_t mul_toom8h_threshold; @@ -4154,10 +4127,6 @@ extern mp_size_t sqr_toom3_threshold; #define SQR_TOOM4_THRESHOLD sqr_toom4_threshold extern mp_size_t sqr_toom4_threshold; -#undef SQR_TOOM7_THRESHOLD -#define SQR_TOOM7_THRESHOLD sqr_toom7_threshold -extern mp_size_t sqr_toom7_threshold; - #undef SQR_TOOM8_THRESHOLD #define SQR_TOOM8_THRESHOLD sqr_toom8_threshold extern mp_size_t sqr_toom8_threshold; @@ -4338,23 +4307,19 @@ extern mp_size_t mpn_fft_table[2][MPN_FFT_TABLE_SIZE]; #undef MUL_KARATSUBA_THRESHOLD_LIMIT #undef MUL_TOOM3_THRESHOLD_LIMIT #undef MUL_TOOM4_THRESHOLD_LIMIT -#undef MUL_TOOM7_THRESHOLD_LIMIT #undef MUL_TOOM8H_THRESHOLD_LIMIT #undef MULLOW_BASECASE_THRESHOLD_LIMIT #undef SQR_TOOM3_THRESHOLD_LIMIT #undef SQR_TOOM4_THRESHOLD_LIMIT -#undef SQR_TOOM7_THRESHOLD_LIMIT #undef SQR_TOOM8_THRESHOLD_LIMIT #define SQR_KARATSUBA_MAX_GENERIC 200 #define MUL_KARATSUBA_THRESHOLD_LIMIT 700 #define MUL_TOOM3_THRESHOLD_LIMIT 700 #define MUL_TOOM4_THRESHOLD_LIMIT 1000 -#define MUL_TOOM7_THRESHOLD_LIMIT 2000 #define MUL_TOOM8H_THRESHOLD_LIMIT 2000 #define MULLOW_BASECASE_THRESHOLD_LIMIT 200 #define SQR_TOOM3_THRESHOLD_LIMIT 400 #define SQR_TOOM4_THRESHOLD_LIMIT 1000 -#define SQR_TOOM7_THRESHOLD_LIMIT 2000 #define SQR_TOOM8_THRESHOLD_LIMIT 2000 #define GET_STR_THRESHOLD_LIMIT 150 diff --git a/mpn/generic/toom7_mul_n.c b/mpn/generic/toom7_mul_n.c deleted file mode 100644 index fd36199e..00000000 --- a/mpn/generic/toom7_mul_n.c +++ /dev/null @@ -1,2012 +0,0 @@ -/* mpn_toom7_mul_n -- Internal routine to multiply two natural numbers - of length n. - - THIS IS AN INTERNAL FUNCTION WITH A MUTABLE INTERFACE. IT IS ONLY - SAFE TO REACH THIS FUNCTION THROUGH DOCUMENTED INTERFACES. -*/ - -/* Implementation of the Bodrato-Zanoni algorithm for Toom-Cook 7-way. - -Copyright 2001, 2002, 2004, 2005, 2006 Free Software Foundation, Inc. -Copyright Marco Bodrato, November 2006 -Copyright 2009 William Hart - -This file is part of the MPIR Library. - -The MPIR Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published by -the Free Software Foundation; either version 2.1 of the License, or (at your -option) any later version. - -The MPIR Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the MPIR Library; see the file COPYING.LIB. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, -MA 02110-1301, USA. */ - -/* - This implementation is based on that of Paul Zimmmermann, which is available - for Toom 4 for mpz_t's at http://www.loria.fr/~zimmerma/software/toom7.c - and uses the Toom 7 sequence as generated by Bodrato and Zanoni at - http://bodrato.it/software/tc3-7.27nov2006.tar.bz2 - - Please see the papers of Bodrato (and Zanoni) at http://www.bodrato.it/ - In particular see the paper by these authors: - - Integer and Polynomial Multiplication: Towards - Optimal Toom-Cook Matrices in Proceedings of the ISSAC 2007 conference, - Ontario, Canada, July 29-August 1, 2007, ACM press. -*/ - -#include "mpir.h" -#include "gmp-impl.h" -#include "longlong.h" - -#define TC7_LIB 1 // library build only - -#if !TC7_LIB -#include -#include -#define TC7_TEST 0 // test code -#define TC7_TIME 1 // timing code -#else -#define TC7_TEST 0 -#define TC7_TIME 0 -#endif - -void -mpn_toom7_mul_n (mp_ptr rp, mp_srcptr up, - mp_srcptr vp, mp_size_t n); - -void toom7_interpolate(mp_ptr rp, mp_size_t * rpn, mp_size_t sn, - mp_ptr tp, mp_size_t s7, mp_size_t n2, mp_size_t n6, - mp_size_t n8, mp_size_t n9, mp_size_t n11); - -void _tc7_add(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, - mp_size_t r1n, mp_srcptr r2, mp_size_t r2n) -{ - mp_limb_t cy; - mp_size_t s1 = ABS(r1n); - mp_size_t s2 = ABS(r2n); - - if (!s1) - { - *rn = 0; - } else if (!s2) - { - if (rp != r1) MPN_COPY(rp, r1, s1); - *rn = r1n; - } else if ((r1n ^ r2n) >= 0) - { - *rn = r1n; - cy = mpn_add(rp, r1, s1, r2, s2); - if (cy) - { - rp[s1] = cy; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - } else - { - mp_size_t ct; - if (s1 != s2) ct = 1; - else MPN_CMP(ct, r1, r2, s1); - - if (!ct) *rn = 0; - else if (ct > 0) - { - mpn_sub(rp, r1, s1, r2, s2); - *rn = s1; - MPN_NORMALIZE(rp, (*rn)); - if (r1n < 0) *rn = -(*rn); - } - else - { - mpn_sub_n(rp, r2, r1, s1); - *rn = s1; - MPN_NORMALIZE(rp, (*rn)); - if (r1n > 0) *rn = -(*rn); - } - } -} - -void tc7_add(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, mp_srcptr r2, mp_size_t r2n) -{ - mp_size_t s1 = ABS(r1n); - mp_size_t s2 = ABS(r2n); - - if (s1 < s2) _tc7_add(rp, rn, r2, r2n, r1, r1n); - else _tc7_add(rp, rn, r1, r1n, r2, r2n); -} - -#if HAVE_NATIVE_mpn_sumdiff_n -void tc7_sumdiff(mp_ptr rp, mp_size_t * rn, mp_ptr sp, mp_size_t * sn, mp_ptr r1, mp_size_t r1n, mp_ptr r2, mp_size_t r2n) -{ - mp_limb_t cy, cy2; - mp_size_t s1 = ABS(r1n); - mp_size_t s2 = ABS(r2n); - int swapped = 0; - - if (s1 < s2) - { - MPN_PTR_SWAP(r1, r1n, r2, r2n); - MP_SIZE_T_SWAP(s1, s2); - swapped = 1; - } - - if (!s1) - { - *rn = 0; - *sn = 0; - } else if (!s2) - { - if (rp != r1) MPN_COPY(rp, r1, s1); - if (sp != r1) MPN_COPY(sp, r1, s1); - *rn = r1n; - *sn = (swapped ? -r1n : r1n); - } else - { - mp_size_t ct; - if (s1 != s2) ct = 1; - else MPN_CMP(ct, r1, r2, s1); - - if (!ct) - { - if ((r1n ^ r2n) >= 0) - { - *sn = 0; - *rn = r1n; - cy = mpn_lshift1(rp, r1, s1); - if (cy) - { - rp[s1] = cy; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - } else - { - *rn = 0; - *sn = (swapped ? -r1n : r1n); - cy = mpn_lshift1(sp, r1, s1); - if (cy) - { - sp[s1] = cy; - if ((*sn) < 0) (*sn)--; - else (*sn)++; - } - } - } else if (ct > 0) // r1 is bigger than r2 - { - if ((r1n ^ r2n) >= 0) // both inputs are same sign - { - cy = mpn_sumdiff_n(rp, sp, r1, r2, s2); - if (s1 > s2) // r1 has more limbs than r2 - { - *rn = r1n; - cy2 = mpn_add_1(rp + s2, r1 + s2, s1 - s2, cy>>1); - if (cy2) - { - rp[s1] = cy2; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - mpn_sub_1(sp + s2, r1 + s2, s1 - s2, cy&1); - } else // both inputs have same number of limbs - { - *rn = r1n; - if (cy>>1) - { - rp[s1] = 1; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - } - *sn = s1; - MPN_NORMALIZE(sp, (*sn)); - if (r1n ^ (~swapped) < 0) *sn = -(*sn); - } else // inputs are different sign - { - cy = mpn_sumdiff_n(sp, rp, r1, r2, s2); - if (s1 > s2) // r1 has more limbs than r2 - { - *sn = r1n; - cy2 = mpn_add_1(sp + s2, r1 + s2, s1 - s2, cy>>1); - if (cy2) - { - sp[s1] = cy2; - if ((*sn) < 0) (*sn)--; - else (*sn)++; - } - mpn_sub_1(rp + s2, r1 + s2, s1 - s2, cy&1); - } else // both inputs have same number of limbs - { - *sn = r1n; - if (cy>>1) - { - sp[s1] = 1; - if ((*sn) < 0) (*sn)--; - else (*sn)++; - } - } - *rn = s1; - MPN_NORMALIZE(rp, (*rn)); - if (r1n ^ (~swapped) < 0) *rn = -(*rn); - } - } else // r2 is bigger than r1 (but same number of limbs) - { - if ((r1n ^ r2n) >= 0) // both inputs are same sign - { - cy = mpn_sumdiff_n(rp, sp, r2, r1, s1); - *rn = r1n; - if (cy>>1) - { - rp[s1] = 1; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - *sn = s1; - MPN_NORMALIZE(sp, (*sn)); - if (r1n ^ (~swapped) > 0) *sn = -(*sn); - } else // inputs are different sign - { - cy = mpn_sumdiff_n(sp, rp, r2, r1, s1); - *sn = r1n; - if (cy>>1) - { - sp[s1] = 1; - if ((*sn) < 0) (*sn)--; - else (*sn)++; - } - *rn = s1; - MPN_NORMALIZE(rp, (*rn)); - if (r1n ^ (~swapped) > 0) *rn = -(*rn); - } - } - } -} - -void tc7_sumdiff_unsigned(mp_ptr rp, mp_size_t * rn, mp_ptr sp, mp_size_t * sn, mp_ptr r1, mp_size_t r1n, mp_ptr r2, mp_size_t r2n) -{ - mp_limb_t cy, cy2; - mp_size_t s1 = ABS(r1n); - mp_size_t s2 = ABS(r2n); - int swapped = 0; - - if (s1 < s2) - { - MPN_PTR_SWAP(r1, r1n, r2, r2n); - MP_SIZE_T_SWAP(s1, s2); - swapped = 1; - } - - if (!s1) - { - *rn = 0; - *sn = 0; - } else if (!s2) - { - if (rp != r1) MPN_COPY(rp, r1, s1); - if (sp != r1) MPN_COPY(sp, r1, s1); - *rn = r1n; - *sn = (swapped ? -r1n : r1n); - } else - { - mp_size_t ct; - if (s1 != s2) ct = 1; - else MPN_CMP(ct, r1, r2, s1); - - if (!ct) - { - *sn = 0; - *rn = r1n; - cy = mpn_lshift1(rp, r1, s1); - if (cy) - { - rp[s1] = cy; - (*rn)++; - } - } else if (ct > 0) // r1 is bigger than r2 - { - cy = mpn_sumdiff_n(rp, sp, r1, r2, s2); - if (s1 > s2) // r1 has more limbs than r2 - { - *rn = r1n; - cy2 = mpn_add_1(rp + s2, r1 + s2, s1 - s2, cy>>1); - if (cy2) - { - rp[s1] = cy2; - (*rn)++; - } - mpn_sub_1(sp + s2, r1 + s2, s1 - s2, cy&1); - } else // both inputs have same number of limbs - { - *rn = r1n; - if (cy>>1) - { - rp[s1] = 1; - (*rn)++; - } - } - *sn = s1; - MPN_NORMALIZE(sp, (*sn)); - if (swapped) *sn = -(*sn); - } else // r2 is bigger than r1 (but same number of limbs) - { - cy = mpn_sumdiff_n(rp, sp, r2, r1, s1); - *rn = r1n; - if (cy>>1) - { - rp[s1] = 1; - (*rn)++; - } - *sn = s1; - MPN_NORMALIZE(sp, (*sn)); - if (!swapped) *sn = -(*sn); - } - } -} -#endif - -/* ~~~~~~~~~~~~~~~~~~~~UNTESTED CODE~~~~~~~~~~~~~~~~~~~~~~~~~~ - -#define MPN_ADDSUB_CMP(cyxx, r1xx, r2xx, r3xx, snxx) \ - do { \ - mp_limb_t t[2]; \ - add_ssaaaa(t[1], t[0], CNST_LIMB(0), r1xx[snxx - 1], CNST_LIMB(0), r2xx[snxx - 1]); \ - if (t[1]) cyxx = 1; \ - else if (t[0] > r3xx[snxx - 1]) cyxx = 1; \ - else if (t[0] < r3xx[snxx - 1] - CNST_LIMB(1)) cyxx = -1; \ - else cyxx = 0; \ - } while (0) - -void tc7_addadd(mp_ptr rp, mp_size_t * rn, mp_ptr r1, mp_size_t r1n, mp_ptr r2, mp_size_t r2n, mp_ptr r3, n3, mp_size_t r3n) -{ - mp_size_t s1 = ABS(r1n); - mp_size_t s2 = ABS(r2n); - mp_size_t s3 = ABS(r3n); - - if ((s1 != s2) || (s1 != s3)) - { - tc7_add(rp, rn, r1, r1n, r2, r2n); - tc7_add(rp, rn, rp, *rn, r3, r3n); - } else - { - mp_limb_t cy; - mp_size_t cy2; - if (((r1n ^ r2n) >= 0) && ((r1n ^ r3n) >= 0)) // all same sign addadd - { - cy = mpn_addadd_n(rp, r1, r2, r3, s1); - *rn = r1n; - if (cy) - { - rp[s1] = cy; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - } else if (((r1n ^ r2n) >= 0) && ((r1n ^ r3n) < 0)) // addsub - { - MPN_ADDSUB_CMP(cy2, r1, r2, r3, s1); - - if (cy2 > 0) - { - cy = mpn_addsub_n(rp, r1, r2, r3, s1); - *rn = r1n; - if (cy) - { - rp[s1] = cy; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - } else if (cy2 < 0) - { - cy = mpn_subadd_n(rp, r3, r1, r2, s1); - if (cy) abort(); - *rn = s1; - MPN_NORMALIZE(rp, (*rn)); - if (r1n < 0) *rn = -(*rn); - } else - { - tc7_add(rp, rn, r1, r1n, r2, r2n); - tc7_add(rp, rn, rp, *rn, r3, r3n); - } - } else if (((r1n ^ r2n) < 0) && ((r1n ^ r3n) >= 0)) // subadd - { - MPN_ADDSUB_CMP(cy2, r1, r3, r2, s1); - - if (cy2 > 0) - { - cy = mpn_addsub_n(rp, r1, r3, r2, s1); - *rn = r1n; - if (cy) - { - rp[s1] = cy; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - } else if (cy2 < 0) - { - mpn_subadd_n(rp, r2, r1, r3, s1); - *rn = s1; - MPN_NORMALIZE(rp, (*rn)); - if (r1n > 0) *rn = -(*rn); - } else - { - tc7_add(rp, rn, r1, r1n, r2, r2n); - tc7_add(rp, rn, rp, *rn, r3, r3n); - } - } else // add final two and subtract first - { - MPN_ADDSUB_CMP(cy2, r2, r3, r1, s1); - - if (cy2 > 0) - { - cy = mpn_addsub_n(rp, r2, r3, r1, s1); - *rn = r1n; - if (cy) - { - rp[s1] = cy; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - } else if (cy2 < 0) - { - mpn_subadd_n(rp, r1, r2, r3, s1); - *rn = s1; - MPN_NORMALIZE(rp, (*rn)); - if (r1n < 0) *rn = -(*rn); - } else - { - tc7_add(rp, rn, r1, r1n, r2, r2n); - tc7_add(rp, rn, rp, *rn, r3, r3n); - } - } - } -} - -void tc7_addsub(mp_ptr rp, mp_size_t * rn, mp_ptr r1, mp_size_t r1n, mp_ptr r2, mp_size_t r2n, mp_ptr r3, mp_size_t r3n) -{ - tc7_addadd(rp, rn, r1, r1n, r2, r2n, r3, -r3n); -} - -void tc7_subsub(mp_ptr rp, mp_size_t * rn, mp_ptr r1, mp_size_t r1n, mp_ptr r2, mp_size_t r2n, mp_ptr r3, mp_size_t r3n) -{ - tc7_addadd(rp, rn, r1, r1n, r2, -r2n, r3, -r3n); -} - -*/ - -void _tc7_add_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, mp_srcptr r2, mp_size_t r2n) -{ - mp_limb_t cy; - mp_size_t s1 = r1n; - mp_size_t s2 = r2n; - - if (!s2) - { - if (!s1) *rn = 0; - else - { - if (rp != r1) MPN_COPY(rp, r1, s1); - *rn = r1n; - } - } else - { - *rn = r1n; - cy = mpn_add(rp, r1, s1, r2, s2); - if (cy) - { - rp[s1] = cy; - if ((*rn) < 0) (*rn)--; - else (*rn)++; - } - } -} - -void tc7_add_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr r1, mp_size_t r1n, mp_srcptr r2, mp_size_t r2n) -{ - if (r1n < r2n) _tc7_add_unsigned(rp, rn, r2, r2n, r1, r1n); - else _tc7_add_unsigned(rp, rn, r1, r1n, r2, r2n); -} - -void tc7_sub(mp_ptr rp, mp_size_t * rn, mp_ptr r1, mp_size_t r1n, mp_ptr r2, mp_size_t r2n) -{ - tc7_add(rp, rn, r1, r1n, r2, -r2n); -} - -void tc7_lshift(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn, mp_size_t bits) -{ - if (xn == 0) *rn = 0; - else - { - mp_size_t xu = ABS(xn); - mp_limb_t msl = mpn_lshift(rp, xp, xu, bits); - if (msl) - { - rp[xu] = msl; - *rn = (xn >= 0 ? xn + 1 : xn - 1); - } else - *rn = xn; - } -} - -void tc7_rshift_inplace(mp_ptr rp, mp_size_t * rn, mp_size_t bits) -{ - if (*rn) - { - if ((*rn) > 0) - { - mpn_rshift(rp, rp, *rn, bits); - if (rp[(*rn) - 1] == CNST_LIMB(0)) (*rn)--; - } else - { - mpn_rshift(rp, rp, -(*rn), bits); - if (rp[-(*rn) - 1] == CNST_LIMB(0)) (*rn)++; - } - } -} - -#if HAVE_NATIVE_mpn_addlsh1_n -void tc7_addlsh1_unsigned(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn) -{ - if (xn) - { - if (xn >= *rn) - { - mp_limb_t cy; - if (xn > *rn) MPN_ZERO(rp + *rn, xn - *rn); - cy = mpn_addlsh1_n(rp, rp, xp, xn); - if (cy) - { - rp[xn] = cy; - *rn = xn + 1; - } else *rn = xn; - } else - { - mp_limb_t cy = mpn_addlsh1_n(rp, rp, xp, xn); - if (cy) cy = mpn_add_1(rp + xn, rp + xn, *rn - xn, cy); - if (cy) - { - rp[*rn] = cy; - (*rn)++; - } - } - } -} -#endif -/* -void tc7_divexact_ui(mp_ptr rp, mp_size_t * rn, mp_srcptr x, mp_size_t xn, mp_limb_t c) -{ - if (xn) - { - mp_size_t xu = ABS(xn); - mp_limb_t cy = mpn_divmod_1(rp, x, xu, c); - if (xn > 0) - { - if (rp[xu - 1] == 0) *rn = xn - 1; - else *rn = xn; - } else - { - if (rp[xu - 1] == 0) *rn = xn + 1; - else *rn = xn; - } - } else *rn = 0; -} -*/ - -void tc7_mul_1(mp_ptr rp, mp_size_t * rn, mp_srcptr xp, mp_size_t xn, mp_limb_t c) -{ - if (xn == 0) *rn = 0; // c won't be zero in tc7 - else - { - mp_size_t xu = ABS(xn); - mp_limb_t msl = mpn_mul_1(rp, xp, xu, c); - if (msl) - { - rp[xu] = msl; - *rn = (xn >= 0 ? xn + 1 : xn - 1); - } else - *rn = xn; - } -} - -void tc7_divexact_1(mp_ptr rp, mp_size_t * rn, mp_ptr x, mp_size_t xn, mp_limb_t c) -{ -mp_size_t abs_size; - if (xn == 0) - { - *rn = 0; - return; - } - abs_size = ABS (xn); - - MPN_DIVREM_OR_DIVEXACT_1 (rp, x, abs_size, c); - abs_size -= (rp[abs_size-1] == 0); - *rn = (xn >= 0 ? abs_size : -abs_size); -} - -void tc7_divexact_by3(mp_ptr rp, mp_size_t * rn, mp_ptr x, mp_size_t xn) -{ - if (xn) - { - mp_size_t xu = ABS(xn); - mpn_divexact_by3(rp, x, xu); - if (xn > 0) - { - if (rp[xu - 1] == CNST_LIMB(0)) *rn = xn - 1; - else *rn = xn; - } else - { - if (rp[xu - 1] == CNST_LIMB(0)) *rn = xn + 1; - else *rn = xn; - } - } else *rn = 0; -} - -#if HAVE_NATIVE_mpn_divexact_byBm1of -void tc7_divexact_by15(mp_ptr rp, mp_size_t * rn, mp_ptr x, mp_size_t xn) -{ - if (xn) - { - mp_size_t xu = ABS(xn); - mpn_divexact_byBm1of(rp, x, xu, CNST_LIMB(15), CNST_LIMB((~0)/15)); // works for 32 and 64 bits - if (xn > 0) - { - if (rp[xu - 1] == CNST_LIMB(0)) *rn = xn - 1; - else *rn = xn; - } else - { - if (rp[xu - 1] == CNST_LIMB(0)) *rn = xn + 1; - else *rn = xn; - } - } else *rn = 0; -} -#endif - -#if HAVE_NATIVE_mpn_mul_1c -#define MPN_MUL_1C(cout, dst, src, size, n, cin) \ - do { \ - (cout) = mpn_mul_1c (dst, src, size, n, cin); \ - } while (0) -#else -#define MPN_MUL_1C(cout, dst, src, size, n, cin) \ - do { \ - mp_limb_t __cy; \ - __cy = mpn_mul_1 (dst, src, size, n); \ - (cout) = __cy + mpn_add_1 (dst, dst, size, cin); \ - } while (0) -#endif - -void tc7_addmul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr xp, mp_size_t xn, mp_limb_t y) -{ - mp_size_t sign, wu, xu, ws, new_wn, min_size, dsize; - mp_limb_t cy; - - /* w unaffected if x==0 or y==0 */ - if (xn == 0 || y == 0) - return; - - sign = xn; - xu = ABS (xn); - - ws = *wn; - if (*wn == 0) - { - /* nothing to add to, just set x*y, "sign" gives the sign */ - cy = mpn_mul_1 (wp, xp, xu, y); - if (cy) - { - wp[xu] = cy; - xu = xu + 1; - } - *wn = (sign >= 0 ? xu : -xu); - return; - } - - sign ^= *wn; - wu = ABS (*wn); - - new_wn = MAX (wu, xu); - min_size = MIN (wu, xu); - - if (sign >= 0) - { - /* addmul of absolute values */ - - cy = mpn_addmul_1 (wp, xp, min_size, y); - - dsize = xu - wu; -#if HAVE_NATIVE_mpn_mul_1c - if (dsize > 0) - cy = mpn_mul_1c (wp + min_size, xp + min_size, dsize, y, cy); - else if (dsize < 0) - { - dsize = -dsize; - cy = mpn_add_1 (wp + min_size, wp + min_size, dsize, cy); - } -#else - if (dsize != 0) - { - mp_limb_t cy2; - if (dsize > 0) - cy2 = mpn_mul_1 (wp + min_size, xp + min_size, dsize, y); - else - { - dsize = -dsize; - cy2 = 0; - } - cy = cy2 + mpn_add_1 (wp + min_size, wp + min_size, dsize, cy); - } -#endif - - if (cy) - { - wp[dsize + min_size] = cy; - new_wn ++; - } - } else - { - /* submul of absolute values */ - - cy = mpn_submul_1 (wp, xp, min_size, y); - if (wu >= xu) - { - /* if w bigger than x, then propagate borrow through it */ - if (wu != xu) - cy = mpn_sub_1 (wp + xu, wp + xu, wu - xu, cy); - - if (cy != 0) - { - /* Borrow out of w, take twos complement negative to get - absolute value, flip sign of w. */ - wp[new_wn] = ~-cy; /* extra limb is 0-cy */ - mpn_com_n (wp, wp, new_wn); - new_wn++; - MPN_INCR_U (wp, new_wn, CNST_LIMB(1)); - ws = -*wn; - } - } else /* wu < xu */ - { - /* x bigger than w, so want x*y-w. Submul has given w-x*y, so - take twos complement and use an mpn_mul_1 for the rest. */ - - mp_limb_t cy2; - - /* -(-cy*b^n + w-x*y) = (cy-1)*b^n + ~(w-x*y) + 1 */ - mpn_com_n (wp, wp, wu); - cy += mpn_add_1 (wp, wp, wu, CNST_LIMB(1)); - cy -= 1; - - /* If cy-1 == -1 then hold that -1 for latter. mpn_submul_1 never - returns cy==MP_LIMB_T_MAX so that value always indicates a -1. */ - cy2 = (cy == MP_LIMB_T_MAX); - cy += cy2; - MPN_MUL_1C (cy, wp + wu, xp + wu, xu - wu, y, cy); - wp[new_wn] = cy; - new_wn += (cy != 0); - - /* Apply any -1 from above. The value at wp+wsize is non-zero - because y!=0 and the high limb of x will be non-zero. */ - if (cy2) - MPN_DECR_U (wp+wu, new_wn - wu, CNST_LIMB(1)); - - ws = -*wn; - } - - /* submul can produce high zero limbs due to cancellation, both when w - has more limbs or x has more */ - MPN_NORMALIZE (wp, new_wn); - } - - *wn = (ws >= 0 ? new_wn : -new_wn); - - ASSERT (new_wn == 0 || wp[new_wn - 1] != 0); -} - -void tc7_submul_1(mp_ptr wp, mp_size_t * wn, mp_srcptr x, mp_size_t xn, mp_limb_t y) -{ - tc7_addmul_1(wp, wn, x, -xn, y); -} - -void tc7_copy (mp_ptr yp, mp_size_t * yn, mp_size_t offset, mp_srcptr xp, mp_size_t xn) -{ - mp_size_t yu = ABS(*yn); - mp_size_t xu = ABS(xn); - mp_limb_t cy = 0; - - if (xn == 0) - return; - - if (offset < yu) /* low part of x overlaps with y */ - { - if (offset + xu <= yu) /* x entirely inside y */ - { - cy = mpn_add_n (yp + offset, yp + offset, xp, xu); - if (offset + xu < yu) - cy = mpn_add_1 (yp + offset + xu, yp + offset + xu, - yu - (offset + xu), cy); - } else - cy = mpn_add_n (yp + offset, yp + offset, xp, yu - offset); - /* now cy is the carry at yp + yu */ - if (xu + offset > yu) /* high part of x exceeds y */ - { - MPN_COPY (yp + yu, xp + yu - offset, xu + offset - yu); - cy = mpn_add_1 (yp + yu, yp + yu, xu + offset - yu, cy); - yu = xu + offset; - } - /* now cy is the carry at yp + yn */ - if (cy) - yp[yu++] = cy; - MPN_NORMALIZE(yp, yu); - *yn = yu; - } else /* x does not overlap */ - { - if (offset > yu) - MPN_ZERO (yp + yu, offset - yu); - MPN_COPY (yp + offset, xp, xu); - *yn = offset + xu; - } -} - -#define MUL_TC7_UNSIGNED(r3xx, n3xx, r1xx, n1xx, r2xx, n2xx) \ - do \ - { \ - if ((n1xx != 0) && (n2xx != 0)) \ - { mp_size_t len; \ - if (n1xx == n2xx) \ - { \ - if (n1xx > MUL_TOOM7_THRESHOLD) mpn_toom7_mul_n(r3xx, r1xx, r2xx, n1xx); \ - else mpn_mul_n(r3xx, r1xx, r2xx, n1xx); \ - } else if (n1xx > n2xx) \ - mpn_mul(r3xx, r1xx, n1xx, r2xx, n2xx); \ - else \ - mpn_mul(r3xx, r2xx, n2xx, r1xx, n1xx); \ - len = n1xx + n2xx; \ - MPN_NORMALIZE(r3xx, len); \ - n3xx = len; \ - } else \ - n3xx = 0; \ - } while (0) - -#define MUL_TC7(r3xx, n3xx, r1xx, n1xx, r2xx, n2xx) \ - do \ - { \ - mp_size_t sign = n1xx ^ n2xx; \ - mp_size_t un1 = ABS(n1xx); \ - mp_size_t un2 = ABS(n2xx); \ - MUL_TC7_UNSIGNED(r3xx, n3xx, r1xx, un1, r2xx, un2); \ - if (sign < 0) n3xx = -n3xx; \ - } while (0) - -#define SQR_TC7_UNSIGNED(r3xx, n3xx, r1xx, n1xx) \ - do \ - { \ - if (n1xx != 0) \ - { mp_size_t len; \ - if (n1xx > SQR_TOOM7_THRESHOLD) mpn_toom7_sqr_n(r3xx, r1xx, n1xx); \ - else mpn_sqr_n(r3xx, r1xx, n1xx); \ - len = 2*n1xx; \ - MPN_NORMALIZE(r3xx, len); \ - n3xx = len; \ - } else \ - n3xx = 0; \ - } while (0) - -#define SQR_TC7(r3xx, n3xx, r1xx, n1xx) \ - do \ - { \ - mp_size_t un1 = ABS(n1xx); \ - SQR_TC7_UNSIGNED(r3xx, n3xx, r1xx, un1); \ - } while (0) - -#define TC7_NORM(rxx, nxx, sxx) \ - do \ - { \ - nxx = sxx; \ - MPN_NORMALIZE(rxx, nxx); \ - } while(0) - -#if TC7_TEST || TC7_TIME -#define p2(axx, anxx, bxx, bnxx) \ - do \ - { \ - printf("s1 = "); \ - if (anxx < 0) printf("-"); \ - if (anxx == 0) printf("0, "); \ - else printf("%ld, ", axx[0]); \ - printf("s2 = "); \ - if (bnxx < 0) printf("-"); \ - if (bnxx == 0) printf("0"); \ - else printf("%ld\n", bxx[0]); \ - } while (0) - -#define p(axx, anxx) \ - do \ - { \ - printf("r = "); \ - if (anxx < 0) printf("-"); \ - if (anxx == 0) printf("0\n"); \ - else printf("%ld\n", axx[0]); \ - } while (0) -#endif - -/* Zero out limbs past end of integer */ -#define TC7_DENORM(rxx, nxx, sxx) \ - do { \ - MPN_ZERO(rxx + ABS(nxx), sxx - ABS(nxx)); \ - } while (0) - -/* Two's complement divexact by power of 2 */ -#define TC7_DIVEXACT_2EXP(rxx, nxx, sxx) \ - do { \ - mp_limb_t sign = (LIMB_HIGHBIT_TO_MASK(rxx[nxx-1]) << (GMP_LIMB_BITS - sxx)); \ - mpn_rshift(rxx, rxx, nxx, sxx); \ - rxx[nxx-1] |= sign; \ - } while (0) - -#if HAVE_NATIVE_mpn_rshift1 -#define TC7_RSHIFT1(rxx, nxx) \ - do { \ - mp_limb_t sign = (LIMB_HIGHBIT_TO_MASK(rxx[nxx-1]) << (GMP_LIMB_BITS - 1)); \ - mpn_rshift1(rxx, rxx, nxx); \ - rxx[nxx-1] |= sign; \ - } while (0) -#else -#define TC7_RSHIFT1(rxx, nxx) \ - do { \ - mp_limb_t sign = (LIMB_HIGHBIT_TO_MASK(rxx[nxx-1]) << (GMP_LIMB_BITS - 1)); \ - mpn_rshift(rxx, rxx, nxx, 1); \ - rxx[nxx-1] |= sign; \ - } while (0) -#endif - -#define r1 (tp) -#define r2 (tp + t7) -#define r3 (tp + 2*t7) -#define r4 (tp + 3*t7) -#define r5 (tp + 4*t7) -#define r6 (tp + 5*t7) -#define r7 (tp + 6*t7) -#define r8 (tp + 7*t7) -#define r9 (tp + 8*t7) -#define r10 (tp + 9*t7) -#define r11 (tp + 10*t7) -#define r12 (tp + 11*t7) -#define r13 (tp + 12*t7) - -/* Multiply {up, n} by {vp, n} and write the result to - {prodp, 2n}. - - Note that prodp gets 2n limbs stored, even if the actual result - only needs 2n - 1. -*/ - -void -mpn_toom7_mul_n (mp_ptr rp, mp_srcptr up, - mp_srcptr vp, mp_size_t n) -{ - mp_size_t len1, len2; - mp_limb_t cy; - mp_ptr tp; - mp_size_t a0n, a1n, a2n, a3n, a4n, a5n, a6n; - mp_size_t b0n, b1n, b2n, b3n, b4n, b5n, b6n; - mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, rpn, t7; - - len1 = n; - len2 = n; - ASSERT (n >= 1); - - MPN_NORMALIZE(up, len1); - MPN_NORMALIZE(vp, len2); - - sn = (n - 1) / 7 + 1; - -#define a0 (up) -#define a1 (up + sn) -#define a2 (up + 2*sn) -#define a3 (up + 3*sn) -#define a4 (up + 4*sn) -#define a5 (up + 5*sn) -#define a6 (up + 6*sn) -#define b0 (vp) -#define b1 (vp + sn) -#define b2 (vp + 2*sn) -#define b3 (vp + 3*sn) -#define b4 (vp + 4*sn) -#define b5 (vp + 5*sn) -#define b6 (vp + 6*sn) - - TC7_NORM(a0, a0n, sn); - TC7_NORM(a1, a1n, sn); - TC7_NORM(a2, a2n, sn); - TC7_NORM(a3, a3n, sn); - TC7_NORM(a4, a4n, sn); - TC7_NORM(a5, a5n, sn); - TC7_NORM(a6, a6n, n - 6*sn); - TC7_NORM(b0, b0n, sn); - TC7_NORM(b1, b1n, sn); - TC7_NORM(b2, b2n, sn); - TC7_NORM(b3, b3n, sn); - TC7_NORM(b4, b4n, sn); - TC7_NORM(b5, b5n, sn); - TC7_NORM(b6, b6n, n - 6*sn); - - t7 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs - - tp = __GMP_ALLOCATE_FUNC_LIMBS(13*t7 + 11*(sn+1)); - -#define u2 (tp + 13*t7) -#define u3 (tp + 13*t7 + (sn+1)) -#define u4 (tp + 13*t7 + 2*(sn+1)) -#define u5 (tp + 13*t7 + 3*(sn+1)) -#define u6 (tp + 13*t7 + 4*(sn+1)) -#define u7 (tp + 13*t7 + 5*(sn+1)) -#define u8 (tp + 13*t7 + 6*(sn+1)) -#define u9 (tp + 13*t7 + 7*(sn+1)) -#define u10 (tp + 13*t7 + 8*(sn+1)) -#define u11 (tp + 13*t7 + 9*(sn+1)) -#define u12 (tp + 13*t7 + 10*(sn+1)) - - tc7_lshift (r6, &n6, a2, a2n, 4); - tc7_lshift (r7, &n7, b2, b2n, 4); - - tc7_lshift (u10, &n10, a4, a4n, 4); - tc7_lshift (u11, &n11, b4, b4n, 4); - - tc7_lshift (u5, &n5, a3, a3n, 3); - tc7_lshift (u2, &n2, b3, b3n, 3); - - tc7_lshift (r1, &n1, a1, a1n, 5); - tc7_add (r1, &n1, r1, n1, u5, n5); - tc7_addmul_1 (r1, &n1, a5, a5n, 2); - tc7_add (u3, &n3, r6, n6, a6, a6n); - tc7_addmul_1 (u3, &n3, a0, a0n, 64); - tc7_addmul_1 (u3, &n3, a4, a4n, 4); - - tc7_sub (r13, &n13, r1, n1, u3, n3); - tc7_add (u3, &n3, u3, n3, r1, n1); - - tc7_lshift (r1, &n1, b1, b1n, 5); - tc7_add (r1, &n1, r1, n1, u2, n2); - tc7_addmul_1 (r1, &n1, b5, b5n, 2); - tc7_add (u8, &n8, r7, n7, b6, b6n); - tc7_addmul_1 (u8, &n8, b0, b0n, 64); - tc7_addmul_1 (u8, &n8, b4, b4n, 4); - - tc7_add (r12, &n12, r1, n1, u8, n8); - tc7_sub (u8, &n8, r1, n1, u8, n8); - - MUL_TC7 (r3, n3, u3, n3, r12, n12); - MUL_TC7 (r8, n8, u8, n8, r13, n13); - - tc7_add (r1, &n1, u10, n10, a0, a0n); - tc7_addmul_1 (r1, &n1, a6, a6n,64); - tc7_addmul_1 (r1, &n1, a2, a2n, 4); - tc7_addmul_1 (u5, &n5, a1, a1n, 2); - tc7_addmul_1 (u5, &n5, a5, a5n, 32); - - tc7_sub (r13, &n13, r1, n1, u5, n5); - tc7_add (u5, &n5, u5, n5, r1, n1); - - tc7_add (r1, &n1, u11, n11, b0, b0n); - tc7_addmul_1 (r1, &n1, b6, b6n,64); - tc7_addmul_1 (r1, &n1, b2, b2n, 4); - tc7_addmul_1 (u2, &n2, b1, b1n, 2); - tc7_addmul_1 (u2, &n2, b5, b5n, 32); - - tc7_add (r12, &n12, u2, n2, r1, n1); - tc7_sub (u2, &n2, r1, n1, u2, n2); - - MUL_TC7 (r5, n5, u5, n5, r12, n12); - MUL_TC7 (r2, n2, u2, n2, r13, n13); - - tc7_add (r1, &n1, r6, n6, a0, a0n); - tc7_addmul_1 (r1, &n1, u10, n10, 16); - tc7_addmul_1 (r1, &n1, a6, a6n, 4096); - tc7_add (u10, &n10, u10, n10, a6, a6n); - tc7_addmul_1 (u10, &n10, r6, n6, 16); - tc7_addmul_1 (u10, &n10, a0, a0n, 4096); - tc7_lshift (u6, &n6, a3, a3n, 4); - tc7_add (u4, &n4, u6, n6, a1, a1n); - tc7_addmul_1 (u4, &n4, a5, a5n, 256); - tc7_lshift (u4, &n4, u4, n4, 2); - tc7_add (u6, &n6, u6, n6, a5, a5n); - tc7_addmul_1 (u6, &n6, a1, a1n, 256); - tc7_lshift (u6, &n6, u6, n6, 2); - - tc7_sub (u9, &n9, u4, n4, r1, n1); - tc7_add (u4, &n4, u4, n4, r1, n1); - - tc7_add (r1, &n1, r7, n7, b0, b0n); - tc7_addmul_1 (r1, &n1, u11, n11, 16); - tc7_addmul_1 (r1, &n1, b6, b6n, 4096); - tc7_add (u11, &n11, u11, n11, b6, b6n); - tc7_addmul_1 (u11, &n11, r7, n7, 16); - tc7_addmul_1 (u11, &n11, b0, b0n, 4096); - tc7_lshift (r7, &n7, b3, b3n, 4); - tc7_add (r13, &n13, r7, n7, b1, b1n); - tc7_addmul_1 (r13, &n13, b5, b5n, 256); - tc7_lshift (r13, &n13, r13, n13, 2); - tc7_add (r7, &n7, r7, n7, b5, b5n); - tc7_addmul_1 (r7, &n7, b1, b1n, 256); - tc7_lshift (r7, &n7, r7, n7, 2); - - tc7_sub (r12, &n12, r13, n13, r1, n1); - tc7_add (r13, &n13, r13, n13, r1, n1); - - MUL_TC7 (r9, n9, u9, n9, r12, n12); - MUL_TC7 (r4, n4, u4, n4, r13, n13); - - tc7_sub (r12, &n12, u10, n10, u6, n6); - tc7_add (u10, &n10, u10, n10, u6, n6); - - tc7_add (r13, &n13, u11, n11, r7, n7); - tc7_sub (u11, &n11, u11, n11, r7, n7); - - MUL_TC7 (r10, n10, u10, n10, r13, n13); - MUL_TC7 (r11, n11, u11, n11, r12, n12); - - tc7_add (u7, &n7, a3, a3n, a1, a1n); - tc7_add (u6, &n6, a2, a2n, a0, a0n); - tc7_add (u6, &n6, u6, n6, a4, a4n); - tc7_add (u6, &n6, u6, n6, a6, a6n); - tc7_add (u7, &n7, u7, n7, a5, a5n); - tc7_add (r1, &n1, u7, n7, u6, n6); - tc7_sub (u6, &n6, u6, n6, u7, n7); - tc7_add (u7, &n7, b3, b3n, b1, b1n); - tc7_add (r13, &n13, b2, b2n, b0, b0n); - tc7_add (r13, &n13, r13, n13, b4, b4n); - tc7_add (r13, &n13, r13, n13, b6, b6n); - tc7_add (u7, &n7, u7, n7, b5, b5n); - tc7_sub (r12, &n12, r13, n13, u7, n7); - tc7_add (u7, &n7, u7, n7, r13, n13); - - MUL_TC7 (r7, n7, u7, n7, r1, n1); - MUL_TC7 (r6, n6, u6, n6, r12, n12); - - tc7_mul_1 (u12, &n12, b6, b6n, 729); - tc7_addmul_1 (u12, &n12, b5, b5n, 243); - tc7_addmul_1 (u12, &n12, b4, b4n, 81); - tc7_addmul_1 (u12, &n12, b3, b3n, 27); - tc7_addmul_1 (u12, &n12, b2, b2n, 9); - tc7_addmul_1 (u12, &n12, b1, b1n, 3); - tc7_add (u12, &n12, u12, n12, b0, b0n); - tc7_mul_1 (r13, &n13, a6, a6n, 729); - tc7_addmul_1 (r13, &n13, a5, a5n, 243); - tc7_addmul_1 (r13, &n13, a4, a4n, 81); - tc7_addmul_1 (r13, &n13, a3, a3n, 27); - tc7_addmul_1 (r13, &n13, a2, a2n, 9); - tc7_addmul_1 (r13, &n13, a1, a1n, 3); - tc7_add (r13, &n13, r13, n13, a0, a0n); - - MUL_TC7 (r12, n12, u12, n12, r13, n13); - - MUL_TC7 (r1, n1, a6, a6n, b6, b6n); - - MUL_TC7 (r13, n13, a0, a0n, b0, b0n); - - TC7_DENORM(r1, n1, t7); - TC7_DENORM(r2, n2, t7); - TC7_DENORM(r3, n3, t7); - TC7_DENORM(r4, n4, t7); - TC7_DENORM(r5, n5, t7); - TC7_DENORM(r6, n6, t7); - TC7_DENORM(r7, n7, t7); - TC7_DENORM(r8, n8, t7); - TC7_DENORM(r9, n9, t7); - TC7_DENORM(r10, n10, t7); - TC7_DENORM(r11, n11, t7); - TC7_DENORM(r12, n12, t7); - TC7_DENORM(r13, n13, t7); - - toom7_interpolate(rp, &rpn, sn, tp, t7 - 1, n2, n6, n8, n9, n11); - - if (rpn != 2*n) - { - MPN_ZERO((rp + rpn), 2*n - rpn); - } - - __GMP_FREE_FUNC_LIMBS (tp, 13*t7 + 11*(sn+1)); -} - -/* Square {up, n} and write the result to {prodp, 2n}. - - Note that prodp gets 2n limbs stored, even if the actual result - only needs 2n - 1. -*/ - -void -mpn_toom7_sqr_n (mp_ptr rp, mp_srcptr up, mp_size_t n) -{ - mp_size_t len1, len2; - mp_limb_t cy; - mp_ptr tp; - mp_size_t a0n, a1n, a2n, a3n, a4n, a5n, a6n; - mp_size_t sn, n1, n2, n3, n4, n5, n6, n7, n8, n9, n10, n11, n12, n13, n14, n15, rpn, t7; - - len1 = n; - ASSERT (n >= 1); - - MPN_NORMALIZE(up, len1); - - sn = (n - 1) / 7 + 1; - - TC7_NORM(a0, a0n, sn); - TC7_NORM(a1, a1n, sn); - TC7_NORM(a2, a2n, sn); - TC7_NORM(a3, a3n, sn); - TC7_NORM(a4, a4n, sn); - TC7_NORM(a5, a5n, sn); - TC7_NORM(a6, a6n, n - 6*sn); - - t7 = 2*sn+2; // allows mult of 2 integers of sn + 1 limbs - - tp = __GMP_ALLOCATE_FUNC_LIMBS(13*t7 + 7*(sn+1)); - - tc7_lshift (r6, &n6, a2, a2n, 4); - - tc7_lshift (u8, &n15, a4, a4n, 4); - - tc7_lshift (u5, &n5, a3, a3n, 3); - - tc7_lshift (r1, &n1, a1, a1n, 5); - tc7_add (r1, &n1, r1, n1, u5, n5); - tc7_addmul_1 (r1, &n1, a5, a5n, 2); - tc7_add (u3, &n3, r6, n6, a6, a6n); - tc7_addmul_1 (u3, &n3, a0, a0n, 64); - tc7_addmul_1 (u3, &n3, a4, a4n, 4); - - tc7_sub (r13, &n13, r1, n1, u3, n3); - tc7_add (u3, &n3, u3, n3, r1, n1); - - SQR_TC7 (r3, n3, u3, n3); - SQR_TC7 (r8, n8, r13, n13); - - tc7_add (r1, &n1, u8, n15, a0, a0n); - tc7_addmul_1 (r1, &n1, a6, a6n,64); - tc7_addmul_1 (r1, &n1, a2, a2n, 4); - tc7_addmul_1 (u5, &n5, a1, a1n, 2); - tc7_addmul_1 (u5, &n5, a5, a5n, 32); - - tc7_sub (r13, &n13, r1, n1, u5, n5); - tc7_add (u5, &n5, u5, n5, r1, n1); - - SQR_TC7 (r5, n5, u5, n5); - SQR_TC7 (r2, n2, r13, n13); - - tc7_add (r1, &n1, r6, n6, a0, a0n); - tc7_addmul_1 (r1, &n1, u8, n15, 16); - tc7_addmul_1 (r1, &n1, a6, a6n, 4096); - tc7_add (u8, &n15, u8, n15, a6, a6n); - tc7_addmul_1 (u8, &n15, r6, n6, 16); - tc7_addmul_1 (u8, &n15, a0, a0n, 4096); - tc7_lshift (u6, &n6, a3, a3n, 4); - tc7_add (u4, &n4, u6, n6, a1, a1n); - tc7_addmul_1 (u4, &n4, a5, a5n, 256); - tc7_lshift (u4, &n4, u4, n4, 2); - tc7_add (u6, &n6, u6, n6, a5, a5n); - tc7_addmul_1 (u6, &n6, a1, a1n, 256); - tc7_lshift (u6, &n6, u6, n6, 2); - - tc7_sub (u2, &n14, u4, n4, r1, n1); - tc7_add (u4, &n4, u4, n4, r1, n1); - - SQR_TC7 (r9, n9, u2, n14); - SQR_TC7 (r4, n4, u4, n4); - - tc7_sub (r12, &n12, u8, n15, u6, n6); - tc7_add (u8, &n15, u8, n15, u6, n6); - - SQR_TC7 (r10, n10, u8, n15); - SQR_TC7 (r11, n11, r12, n12); - - tc7_add (u7, &n7, a3, a3n, a1, a1n); - tc7_add (u6, &n6, a2, a2n, a0, a0n); - tc7_add (u6, &n6, u6, n6, a4, a4n); - tc7_add (u6, &n6, u6, n6, a6, a6n); - tc7_add (u7, &n7, u7, n7, a5, a5n); - tc7_add (r1, &n1, u7, n7, u6, n6); - tc7_sub (u6, &n6, u6, n6, u7, n7); - - SQR_TC7 (r7, n7, r1, n1); - SQR_TC7 (r6, n6, u6, n6); - - tc7_mul_1 (r13, &n13, a6, a6n, 729); - tc7_addmul_1 (r13, &n13, a5, a5n, 243); - tc7_addmul_1 (r13, &n13, a4, a4n, 81); - tc7_addmul_1 (r13, &n13, a3, a3n, 27); - tc7_addmul_1 (r13, &n13, a2, a2n, 9); - tc7_addmul_1 (r13, &n13, a1, a1n, 3); - tc7_add (r13, &n13, r13, n13, a0, a0n); - - SQR_TC7 (r12, n12, r13, n13); - - SQR_TC7 (r1, n1, a6, a6n); - - SQR_TC7 (r13, n13, a0, a0n); - - TC7_DENORM(r1, n1, t7); - TC7_DENORM(r2, n2, t7); - TC7_DENORM(r3, n3, t7); - TC7_DENORM(r4, n4, t7); - TC7_DENORM(r5, n5, t7); - TC7_DENORM(r6, n6, t7); - TC7_DENORM(r7, n7, t7); - TC7_DENORM(r8, n8, t7); - TC7_DENORM(r9, n9, t7); - TC7_DENORM(r10, n10, t7); - TC7_DENORM(r11, n11, t7); - TC7_DENORM(r12, n12, t7); - TC7_DENORM(r13, n13, t7); - - toom7_interpolate(rp, &rpn, sn, tp, t7 - 1, n2, n6, n8, n9, n11); - - if (rpn != 2*n) - { - MPN_ZERO((rp + rpn), 2*n - rpn); - } - - __GMP_FREE_FUNC_LIMBS (tp, 13*t7 + 7*(sn+1)); -} - -/* - Toom 7 interpolation. Interpolates the value at 2^(sn*B) of a - polynomial p(x) with 13 coefficients given the values - p(oo), p(-2), 2^12*p(1/2), p(4), p(2), p(-1), p(1), - 2^12*p(-1/2), p(-4), 4^12*p(1/4), 4^12*p(-1/4), p(3), p(0) - The values are assumed to be stored in tp, each separated by - s7 + 1 limbs, each of no more than s7 limbs. - The output is placed in rp and the final number of limbs of the - output is given in rpn. - The 2nd, 6th, 8th, 9th and 11th values may be negative, and if so, - n2, n6, n8, n9 and n11 should be set to a negative value respectively. - -*/ - -void toom7_interpolate(mp_ptr rp, mp_size_t * rpn, mp_size_t sn, - mp_ptr tp, mp_size_t s7, mp_size_t n2, mp_size_t n6, - mp_size_t n8, mp_size_t n9, mp_size_t n11) -{ - - mp_size_t n1, n3, n4, n5, n7, n10, n12, n13, t7; - - t7 = s7 + 1; - - // Marco Bodrato's auto generated sequence - - if (n9 < 0) - mpn_add_n(r9, r4, r9, s7); - else - mpn_sub_n(r9, r4, r9, s7); - /* r9 is now in 2s complement form */ - - TC7_RSHIFT1(r9, s7); - - mpn_submul_1(r4, r1, s7, 16777216); - - mpn_sub_n(r12, r12, r5, s7); - -#if HAVE_NATIVE_mpn_subadd_n - mpn_subadd_n(r4, r4, r9, r13, s7); -#else - mpn_sub_n(r4, r4, r9, s7); - mpn_sub_n(r4, r4, r13, s7); -#endif - - if (n2 < 0) - mpn_add_n(r2, r5, r2, s7); - else - mpn_sub_n(r2, r5, r2, s7); - /* r2 is now in 2s complement form */ - - TC7_RSHIFT1(r2, s7); - -#if HAVE_NATIVE_mpn_subadd_n - mpn_subadd_n(r5, r5, r13, r2, s7); -#else - mpn_sub_n(r5, r5, r13, s7); - mpn_sub_n(r5, r5, r2, s7); -#endif - - mpn_submul_1(r5, r1, s7, 4096); - - if (n6 < 0) - mpn_add_n(r6, r7, r6, s7); - else - mpn_sub_n(r6, r7, r6, s7); - /* r6 is now in 2s complement form */ - - TC7_RSHIFT1(r6, s7); - - mpn_sub_n(r7, r7, r13, s7); - - mpn_sub_n(r12, r12, r7, s7); - -#if HAVE_NATIVE_mpn_subadd_n - mpn_subadd_n(r7, r7, r6, r1, s7); -#else - mpn_sub_n(r7, r7, r6, s7); - mpn_sub_n(r7, r7, r1, s7); -#endif - - if (n8 < 0) - mpn_add_n(r8, r3, r8, s7); - else - mpn_sub_n(r8, r3, r8, s7); - /* r8 is now in 2s complement form */ - - TC7_RSHIFT1(r8, s7); - -#if HAVE_NATIVE_mpn_subadd_n - mpn_subadd_n(r3, r3, r8, r1, s7); -#else - mpn_sub_n(r3, r3, r8, s7); - mpn_sub_n(r3, r3, r1, s7); -#endif - - mpn_add_n(r3, r5, r3, s7); - - mpn_submul_1(r3, r13, s7, 4096); - - mpn_submul_1(r3, r7, s7, 128); - - if (n11 < 0) - mpn_add_n(r11, r10, r11, s7); - else - mpn_sub_n(r11, r10, r11, s7); - /* r11 is now in 2s complement form */ - - TC7_RSHIFT1(r11, s7); - -#if HAVE_NATIVE_mpn_subadd_n - mpn_subadd_n(r10, r10, r11, r1, s7); -#else - mpn_sub_n(r10, r10, r11, s7); - mpn_sub_n(r10, r10, r1, s7); -#endif - - mpn_add_n(r10, r4, r10, s7); - - mpn_submul_1(r10, r13, s7, 16777216); - - mpn_submul_1(r10, r7, s7, 8192); - - mpn_submul_1(r10, r3, s7, 400); - - mpn_divexact_1(r10, r10, s7, 680400); - - mpn_submul_1(r3, r10, s7, 900); - - mpn_divexact_1(r3, r3, s7, 144); - - mpn_submul_1(r5, r3, s7, 16); - - mpn_sub_n(r12, r12, r5, s7); - - mpn_submul_1(r12, r3, s7, 64); - -#if HAVE_NATIVE_mpn_subadd_n - mpn_subadd_n(r7, r7, r3, r10, s7); -#else - mpn_sub_n(r7, r7, r3, s7); - mpn_sub_n(r7, r7, r10, s7); -#endif - - mpn_submul_1(r5, r7, s7, 64); - - mpn_submul_1(r4, r7, s7, 4096); - - mpn_submul_1(r4, r5, s7, 4); - - mpn_submul_1(r4, r3, s7, 256); - - mpn_submul_1(r5, r10, s7, 4); - - mpn_submul_1(r4, r5, s7, 268); - - mpn_divexact_1(r4, r4, s7, 771120); - - mpn_submul_1(r12, r5, s7, 25); - - mpn_submul_1(r12, r7, s7, 600); - - mpn_submul_1(r12, r4, s7, 31500); - - mpn_submul_1(r12, r1, s7, 527344); - - mpn_submul_1(r5, r4, s7, 1020); - - mpn_divexact_byBm1of(r5, r5, s7, CNST_LIMB(15), CNST_LIMB(~0/15)); - - TC7_DIVEXACT_2EXP(r5, s7, 4); - - mpn_sub_n(r10, r10, r4, s7); - - mpn_sub_n(r3, r3, r5, s7); - - mpn_add_n(r11, r9, r11, s7); - - mpn_add_n(r8, r2, r8, s7); - - mpn_submul_1(r11, r6, s7, 17408); - - mpn_submul_1(r8, r6, s7, 160); - - mpn_submul_1(r11, r8, s7, 680); - - mpn_divexact_1(r11, r11, s7, 2891700); - - mpn_submul_1(r8, r11, s7, 1890); - - mpn_divexact_1(r8, r8, s7, 360); - -#if HAVE_NATIVE_mpn_subadd_n - mpn_subadd_n(r6, r6, r11, r8, s7); -#else - mpn_sub_n(r6, r6, r11, s7); - mpn_sub_n(r6, r6, r8, s7); -#endif - - mpn_submul_1(r12, r6, s7, 210); - - mpn_submul_1(r12, r8, s7, 18); - - mpn_submul_1(r9, r6, s7, 1024); - - mpn_submul_1(r9, r8, s7, 64); - - mpn_submul_1(r9, r11, s7, 4); - - mpn_submul_1(r2, r6, s7, 32); - - mpn_submul_1(r2, r8, s7, 8); - - mpn_submul_1(r2, r11, s7, 2); - - mpn_submul_1(r9, r2, s7, 160); - - mpn_divexact_1(r9, r9, s7, 11340); - - mpn_divexact_by3(r2, r2, s7); - - TC7_RSHIFT1(r2, s7); - - mpn_sub_n(r2, r2, r9, s7); - - mpn_lshift(r12, r12, s7, 3); - - mpn_submul_1(r12, r9, s7, 5649); - - mpn_com_n(r12, r12, s7); //r12 = -r12; - - mpn_add_1(r12, r12, s7, 1); - - mpn_addmul_1(r12, r2, s7, 924); - - mpn_divexact_1(r12, r12, s7, 525525); - - mpn_submul_1(r9, r12, s7, 341); - - mpn_sub_n(r11, r11, r12, s7); - - TC7_DIVEXACT_2EXP(r9, s7, 4); - - mpn_submul_1(r2, r9, s7, 68); - - mpn_sub_n(r8, r8, r9, s7); - - TC7_DIVEXACT_2EXP(r2, s7, 4); - - mpn_sub_n(r6, r6, r2, s7); - - TC7_NORM(r1, n1, s7); - TC7_NORM(r2, n2, s7); - TC7_NORM(r3, n3, s7); - TC7_NORM(r4, n4, s7); - TC7_NORM(r5, n5, s7); - TC7_NORM(r6, n6, s7); - TC7_NORM(r7, n7, s7); - TC7_NORM(r8, n8, s7); - TC7_NORM(r9, n9, s7); - TC7_NORM(r10, n10, s7); - TC7_NORM(r11, n11, s7); - TC7_NORM(r12, n12, s7); - TC7_NORM(r13, n13, s7); - - *rpn = 0; - tc7_copy(rp, rpn, 0, r13, n13); - tc7_copy(rp, rpn, sn, r11, n11); - tc7_copy(rp, rpn, 2*sn, r10, n10); - tc7_copy(rp, rpn, 3*sn, r8, n8); - tc7_copy(rp, rpn, 4*sn, r3, n3); - tc7_copy(rp, rpn, 5*sn, r6, n6); - tc7_copy(rp, rpn, 6*sn, r7, n7); - tc7_copy(rp, rpn, 7*sn, r2, n2); - tc7_copy(rp, rpn, 8*sn, r5, n5); - tc7_copy(rp, rpn, 9*sn, r9, n9); - tc7_copy(rp, rpn, 10*sn, r4, n4); - tc7_copy(rp, rpn, 11*sn, r12, n12); - tc7_copy(rp, rpn, 12*sn, r1, n1); -} - -#if TC7_TEST -int tc7_test(mp_ptr up, mp_ptr vp, mp_size_t n) -{ - mp_limb_t * rp1 = malloc(2*n*sizeof(mp_limb_t)); - mp_limb_t * rp2 = malloc(2*n*sizeof(mp_limb_t)); - - mpn_mul_n(rp1, up, vp, n); - mpn_toom7_mul_n(rp2, up, vp, n); - - mp_size_t i; - for (i = 0; i < 2*n; i++) - { - if (rp1[i] != rp2[i]) - { - printf("First error in limb %d\n", i); - free(rp1); - free(rp2); - return 0; - } - } - - free(rp1); - free(rp2); - return 1; -} - -mp_size_t randsize(mp_size_t limit) -{ - static uint64_t randval = 4035456057U; - randval = ((uint64_t)randval*(uint64_t)1025416097U+(uint64_t)286824430U)%(uint64_t)4294967311U; - - if (limit == 0L) return (mp_size_t) randval; - - return (mp_size_t) randval%limit; -} - -int main(void) -{ - mp_limb_t * up = malloc(20000*sizeof(mp_limb_t)); - mp_limb_t * vp = malloc(20000*sizeof(mp_limb_t)); - gmp_randstate_t rands; - gmp_randinit_default(rands); - - mp_size_t i, n; - for (i = 0; i < 20000; i++) - { - n = randsize(15000) + 500; - printf("n = %d\n", n); - mpn_rrandom(up, rands, n); - mpn_rrandom(vp, rands, n); - if (!tc7_test(up, vp, n)) break; - } - - free(up); - free(vp); - - return 0; -} -#endif - -#if TC7_TIME -int main(void) -{ - mp_limb_t * up = malloc(40096*sizeof(mp_limb_t)); - mp_limb_t * vp = malloc(40096*sizeof(mp_limb_t)); - mp_limb_t * rp = malloc(80192*sizeof(mp_limb_t)); - - mp_size_t i, n; - gmp_randstate_t rands; - gmp_randinit_default(rands); - n = 2048; - mpn_randomb(up, rands, n); - mpn_randomb(vp, rands, n); - for (i = 0; i < 50000; i++) - { - if ((i & 31) == 0) - { - mpn_randomb(up, rands, n); - mpn_randomb(vp, rands, n); - } - //mpn_mul_n(rp, up, vp, n); - mpn_toom7_mul_n(rp, up, vp, n); - } - - free(up); - free(vp); - free(rp); - - return 0; -} -#endif - -/* Bill Hart's hand derived interpolation sequence (slightly slower than Bodrato's) - - The following code verifies this sequence in Sage: - -A=Matrix(ZZ,13) -A[0]=[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] -A[1]=[16777216, 4194304, 1048576, 262144, 65536, 16384, 4096, 1024, 256, 64, 16, 4, 1] -A[2]=[16777216, -4194304, 1048576, -262144, 65536, -16384, 4096, -1024, 256, -64, 16, -4, 1] -A[3]=[531441, 177147, 59049, 19683, 6561, 2187, 729, 243, 81, 27, 9, 3, 1] -A[4]=[4096, 2048, 1024, 512, 256, 128, 64, 32, 16, 8, 4, 2, 1] -A[5]=[4096, -2048, 1024, -512, 256, -128, 64, -32, 16, -8, 4, -2, 1] -A[6]=[1, 4, 16, 64, 256, 1024, 4096, 16384, 65536, 262144, 1048576, 4194304, 16777216] -A[7]=[1, 2, 4, 8, 16, 32, 64, 128, 256, 512, 1024, 2048, 4096] -A[8]=[1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1] -A[9]=[1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1, -1, 1] -A[10]=[1, -2, 4, -8, 16, -32, 64, -128, 256, -512, 1024, -2048, 4096] -A[11]=[1, -4, 16, -64, 256, -1024, 4096, -16384, 65536, -262144, 1048576, -4194304, 16777216] -A[12]=[0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1] - -A[1]=A[1]-A[2] -A[1]=A[1]/8 -A[2]=A[2]-16777216*A[0] -A[4]=A[4]-A[5] -A[5]=A[5]-4096*A[0] - -A[3]=A[3]-531441*A[0] -A[6]=A[6]-A[11] -A[11]=A[11]-16777216*A[12] -A[10]=A[10]-A[7] - -A[7]=A[7]-4096*A[12] -A[8]=A[8]-A[9] -A[8]=A[8]/2 -A[10]=A[10]/4 -A[6]=A[6]+16384*A[10] -A[2]=A[2]+4*A[1] - -A[6]=A[6]/8 -A[11]=A[11]-4*A[6] -A[6]=A[6]-1024*A[10] -A[7]=A[7]+2*A[10] -A[11]=A[11]-1024*A[7] -A[9]=A[9]+A[8] - -A[4]=A[4]+2*A[5] -A[4]=A[4]/2 -A[5]=A[5]-A[4] -A[1]=A[1]+512*A[5] -A[5]=A[5]+2048*A[8] -A[10]=A[10]+A[8] -A[5]=A[5]+2*A[10] -A[5]=A[5]/90 -A[11]=A[11]+1023*A[0] - -A[7]=A[7]-A[0] -A[1]=A[1]-A[10] -A[1]=A[1]+11565*A[5] - -A[9]=A[9]-A[0] -A[9]=A[9]-A[12] -A[4]=A[4]-A[12] -A[2]=A[2]-1024*A[4] -A[2]=A[2]-A[12] - -A[10]=A[10]+1023*A[8] -A[11]=A[11]-8*A[10] -A[6]=A[6]+A[10] -A[6]=A[6]+11520*A[5] -A[6]=A[6]/11340 -A[5]=A[5]-A[6] -A[11]=A[11]-92160*A[5] -A[1]=A[1]-11340*A[5] -A[1]=A[1]/170100 -A[7]=A[7]-4*A[9] - -A[5]=A[5]-A[1] -A[8]=A[8]-A[1] -A[10]=A[10]+15*A[5] -A[10]=A[10]-1023*A[8] -A[10]=A[10]+3*A[6] -A[10]=A[10]-1068*A[1] - -A[2]=A[2]+4*A[7] -A[2]=A[2]/720 - -A[4]=A[4]-4*A[9] -A[11]=A[11]+4*A[4] -A[11]=A[11]-9360*A[2] -A[11]=A[11]-1440*A[6] - -A[7]=A[7]-1020*A[9] -A[7]=A[7]+A[4] -A[7]=A[7]-36*A[2] -A[11]=A[11]-280*A[7] -A[11]=A[11]/129600 - -A[7]=A[7]/432 -A[4]=A[4]-12*A[7] -A[2]=A[2]+13*A[7] -A[2]=A[2]+20*A[11] -A[2]=A[2]/21 - A[2]=-A[2] - -A[7]=A[7]-5*A[11] -A[7]=A[7]/21 - -A[9]=A[9]-A[11] -A[9]=A[9]-A[7] -A[6]=A[6]-16*A[1] - -A[5]=A[5]-17*A[8] -A[5]=A[5]+A[6] -A[5]=A[5]-4*A[1] - -A[4]=A[4]-12*A[2] -A[4]=A[4]/1020 - -A[9]=A[9]-A[2] -A[5]=A[5]/17 - A[5]=-A[5] -A[8]=A[8]-A[5] -A[9]=A[9]-A[4] - -A[3]=A[3]-A[12] -A[3]=A[3]-3*A[5] -A[3]=A[3]-9*A[9] -A[3]=A[3]-27*A[8] -A[3]=A[3]-81*A[2] -A[3]=A[3]-243*A[1] -A[3]=A[3]-729*A[11] -A[3]=A[3]-486*A[6] -A[3]=A[3]-6561*A[7] -A[10]=A[10]+1023*A[5] -A[3]=A[3]-59049*A[4] -A[3]=A[3]*31-A[10]*5368 -A[3]=A[3]/95550 -A[6]=A[6]-17*A[3] -A[10]=A[10]-48*A[3] -A[10]=A[10]/1023 -A[6]=A[6]/4 -A[1]=A[1]-A[6] -A[8]=A[8]-A[3] -A[5]=A[5]-A[10] - - tc7_sub(r4, &n4, r4, n4, r9, n9); - tc7_rshift_inplace(r4, &n4, 3); - tc7_submul_1(r9, &n9, r1, n1, 16777216); - tc7_sub(r5, &n5, r5, n5, r2, n2); - tc7_submul_1(r2, &n2, r1, n1, 4096); - - tc7_submul_1(r12, &n12, r1, n1, 531441); - tc7_sub(r10, &n10, r10, n10, r11, n11); - tc7_submul_1(r11, &n11, r13, n13, 16777216); - tc7_sub(r8, &n8, r8, n8, r3, n3); - - tc7_submul_1(r3, &n3, r13, n13, 4096); - tc7_sub(r7, &n7, r7, n7, r6, n6); - tc7_rshift_inplace(r7, &n7, 1); - tc7_rshift_inplace(r8, &n8, 2); - tc7_addmul_1(r10, &n10, r8, n8, 16384); - tc7_addmul_1(r9, &n9, r4, n4, 4); - - tc7_rshift_inplace(r10, &n10, 3); - tc7_submul_1(r11, &n11, r10, n10, 4); - tc7_submul_1(r10, &n10, r8, n8, 1024); - tc7_addmul_1(r3, &n3, r8, n8, 2); - tc7_submul_1(r11, &n11, r3, n3, 1024); - tc7_add(r6, &n6, r6, n6, r7, n7); - - tc7_addmul_1(r5, &n5, r2, n2, 2); - tc7_rshift_inplace(r5, &n5, 1); - tc7_sub(r2, &n2, r2, n2, r5, n5); - tc7_addmul_1(r4, &n4, r2, n2, 512); - tc7_addmul_1(r2, &n2, r7, n7, 2048); - tc7_add(r8, &n8, r8, n8, r7, n7); - tc7_addmul_1(r2, &n2, r8, n8, 2); - tc7_divexact_1(r2, &n2, r2, n2, 90); - tc7_addmul_1(r11, &n11, r1, n1, 1023); - - tc7_sub(r3, &n3, r3, n3, r1, n1); - tc7_sub(r4, &n4, r4, n4, r8, n8); - tc7_addmul_1(r4, &n4, r2, n2, 11565); - - tc7_sub(r6, &n6, r6, n6, r1, n1); - tc7_sub(r6, &n6, r6, n6, r13, n13); - tc7_sub(r5, &n5, r5, n5, r13, n13); - tc7_submul_1(r9, &n9, r5, n5, 1024); - tc7_sub(r9, &n9, r9, n9, r13, n13); - - tc7_addmul_1(r8, &n8, r7, n7, 1023); - tc7_submul_1(r11, &n11, r8, n8, 8); - tc7_add(r10, &n10, r10, n10, r8, n8); - tc7_addmul_1(r10, &n10, r2, n2, 11520); - tc7_divexact_1(r10, &n10, r10, n10, 11340); - tc7_sub(r2, &n2, r2, n2, r10, n10); - tc7_submul_1(r11, &n11, r2, n2, 92160); - tc7_submul_1(r4, &n4, r2, n2, 11340); - tc7_divexact_1(r4, &n4, r4, n4, 170100); - tc7_submul_1(r3, &n3, r6, n6, 4); - - tc7_sub(r2, &n2, r2, n2, r4, n4); - tc7_sub(r7, &n7, r7, n7, r4, n4); - tc7_addmul_1(r8, &n8, r2, n2, 15); - tc7_submul_1(r8, &n8, r7, n7, 1023); - tc7_addmul_1(r8, &n8, r10, n10, 3); - tc7_submul_1(r8, &n8, r4, n4, 1068); - - tc7_addmul_1(r9, &n9, r3, n3, 4); - tc7_divexact_1(r9, &n9, r9, n9, 720); - - tc7_submul_1(r5, &n5, r6, n6, 4); - tc7_addmul_1(r11, &n11, r5, n5, 4); - tc7_submul_1(r11, &n11, r9, n9, 9360); - tc7_submul_1(r11, &n11, r10, n10, 1440); - - tc7_submul_1(r3, &n3, r6, n6, 1020); - tc7_add(r3, &n3, r3, n3, r5, n5); - tc7_submul_1(r3, &n3, r9, n9, 36); - tc7_submul_1(r11, &n11, r3, n3, 280); - tc7_divexact_1(r11, &n11, r11, n11, 129600); - - tc7_divexact_1(r3, &n3, r3, n3, 432); - tc7_submul_1(r5, &n5, r3, n3, 12); - tc7_addmul_1(r9, &n9, r3, n3, 13); - tc7_addmul_1(r9, &n9, r11, n11, 20); - tc7_divexact_1(r9, &n9, r9, n9, 21); - n9 = -n9; - - tc7_submul_1(r3, &n3, r11, n11, 5); - tc7_divexact_1(r3, &n3, r3, n3, 21); - - tc7_sub(r6, &n6, r6, n6, r11, n11); - tc7_sub(r6, &n6, r6, n6, r3, n3); - tc7_submul_1(r10, &n10, r4, n4, 16); - - tc7_submul_1(r2, &n2, r7, n7, 17); - tc7_add(r2, &n2, r2, n2, r10, n10); - tc7_submul_1(r2, &n2, r4, n4, 4); - - tc7_submul_1(r5, &n5, r9, n9, 12); - tc7_divexact_1(r5, &n5, r5, n5, 1020); - - tc7_sub(r6, &n6, r6, n6, r9, n9); - tc7_divexact_1(r2, &n2, r2, n2, 17); - n2 = -n2; - tc7_sub(r7, &n7, r7, n7, r2, n2); - tc7_sub(r6, &n6, r6, n6, r5, n5); - - tc7_sub(r12, &n12, r12, n12, r13, n13); - tc7_submul_1(r12, &n12, r2, n2, 3); - tc7_submul_1(r12, &n12, r6, n6, 9); - tc7_submul_1(r12, &n12, r7, n7, 27); - tc7_submul_1(r12, &n12, r9, n9, 81); - tc7_submul_1(r12, &n12, r4, n4, 243); - tc7_submul_1(r12, &n12, r11, n11, 729); - tc7_submul_1(r12, &n12, r10, n10, 486); - tc7_submul_1(r12, &n12, r3, n3, 6561); - tc7_addmul_1(r8, &n8, r2, n2, 1023); - tc7_submul_1(r12, &n12, r5, n5, 59049); - tc7_mul_1(r12, &n12, r12, n12, 31); - tc7_submul_1(r12, &n12, r8, n8, 5368); - tc7_divexact_1(r12, &n12, r12, n12, 95550); - tc7_submul_1(r10, &n10, r12, n12, 17); - tc7_submul_1(r8, &n8, r12, n12, 48); - tc7_divexact_1(r8, &n8, r8, n8, 1023); - tc7_rshift_inplace(r10, &n10, 2); - tc7_sub(r4, &n4, r4, n4, r10, n10); - tc7_sub(r7, &n7, r7, n7, r12, n12); - tc7_sub(r2, &n2, r2, n2, r8, n8); - - rpn = 0; - tc7_copy(rp, &rpn, 0, r13, n13); - tc7_copy(rp, &rpn, sn, r2, n2); - tc7_copy(rp, &rpn, 2*sn, r6, n6); - tc7_copy(rp, &rpn, 3*sn, r7, n7); - tc7_copy(rp, &rpn, 4*sn, r9, n9); - tc7_copy(rp, &rpn, 5*sn, r4, n4); - tc7_copy(rp, &rpn, 6*sn, r11, n11); - tc7_copy(rp, &rpn, 7*sn, r10, n10); - tc7_copy(rp, &rpn, 8*sn, r3, n3); - tc7_copy(rp, &rpn, 9*sn, r12, n12); - tc7_copy(rp, &rpn, 10*sn, r5, n5); - tc7_copy(rp, &rpn, 11*sn, r8, n8); - tc7_copy(rp, &rpn, 12*sn, r1, n1); - -*/ diff --git a/mpn/x86_64w/core2/gmp-mparam.h b/mpn/x86_64w/core2/gmp-mparam.h index e6a3a89d..76f20920 100644 --- a/mpn/x86_64w/core2/gmp-mparam.h +++ b/mpn/x86_64w/core2/gmp-mparam.h @@ -3,14 +3,12 @@ #define MUL_KARATSUBA_THRESHOLD 22 #define MUL_TOOM3_THRESHOLD 129 #define MUL_TOOM4_THRESHOLD 214 -#define MUL_TOOM7_THRESHOLD 381 #define MUL_TOOM8H_THRESHOLD 400 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_KARATSUBA_THRESHOLD 28 #define SQR_TOOM3_THRESHOLD 121 #define SQR_TOOM4_THRESHOLD 121 -#define SQR_TOOM7_THRESHOLD 399 #define SQR_TOOM8_THRESHOLD 650 #define MULLOW_BASECASE_THRESHOLD 8 diff --git a/mpn/x86_64w/k8/gmp-mparam.h b/mpn/x86_64w/k8/gmp-mparam.h index 7681ea2c..1b756634 100644 --- a/mpn/x86_64w/k8/gmp-mparam.h +++ b/mpn/x86_64w/k8/gmp-mparam.h @@ -4,14 +4,12 @@ #define MUL_KARATSUBA_THRESHOLD 30 #define MUL_TOOM3_THRESHOLD 113 #define MUL_TOOM4_THRESHOLD 398 -#define MUL_TOOM7_THRESHOLD 399 #define MUL_TOOM8H_THRESHOLD 400 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_KARATSUBA_THRESHOLD 36 #define SQR_TOOM3_THRESHOLD 129 #define SQR_TOOM4_THRESHOLD 134 -#define SQR_TOOM7_THRESHOLD 399 #define SQR_TOOM8_THRESHOLD 650 #define MULLOW_BASECASE_THRESHOLD 29 diff --git a/mpn/x86_64w/k8/k10/gmp-mparam.h b/mpn/x86_64w/k8/k10/gmp-mparam.h index a900fe93..c6fdafc9 100644 --- a/mpn/x86_64w/k8/k10/gmp-mparam.h +++ b/mpn/x86_64w/k8/k10/gmp-mparam.h @@ -1,77 +1,75 @@ - -/* Generated by tuneup.c, 2009-10-20, system compiler */ - -#define MUL_KARATSUBA_THRESHOLD 30 -#define MUL_TOOM3_THRESHOLD 113 -#define MUL_TOOM4_THRESHOLD 858 -#define MUL_TOOM7_THRESHOLD 858 - -#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_KARATSUBA_THRESHOLD 36 -#define SQR_TOOM3_THRESHOLD 129 -#define SQR_TOOM4_THRESHOLD 134 -#define SQR_TOOM7_THRESHOLD 144 - -#define MULLOW_BASECASE_THRESHOLD 29 -#define MULLOW_DC_THRESHOLD 29 -#define MULLOW_MUL_THRESHOLD 9970 - -#define MULHIGH_BASECASE_THRESHOLD 0 /* always */ -#define MULHIGH_DC_THRESHOLD 4 -#define MULHIGH_MUL_THRESHOLD 4 - -#define MULMOD_2EXPM1_THRESHOLD 2 - -#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIV_DC_THRESHOLD 64 -#define POWM_THRESHOLD 391 -#define FAC_UI_THRESHOLD 32756 - -#define GCD_ACCEL_THRESHOLD 866 -#define GCDEXT_THRESHOLD 298 -#define JACOBI_BASE_METHOD 2 - -#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ -#define USE_PREINV_DIVREM_1 0 -#define USE_PREINV_MOD_1 1 -#define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* never */ -#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ -#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ -#define MOD_1_1_THRESHOLD 3 -#define MOD_1_2_THRESHOLD 4 -#define MOD_1_3_THRESHOLD 5 -#define DIVREM_HENSEL_QR_1_THRESHOLD 39 -#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 3 -#define DIVREM_EUCLID_HENSEL_THRESHOLD 8 - -#define ROOTREM_THRESHOLD 1 - -#define GET_STR_DC_THRESHOLD 20 -#define GET_STR_PRECOMPUTE_THRESHOLD 22 -#define SET_STR_THRESHOLD 7764 - -#define MUL_FFT_TABLE { 656, 800, 4032, 4864, 11264, 36864, 0 } -#define MUL_FFT_MODF_THRESHOLD 656 -#define MUL_FFT_THRESHOLD 7680 - -#define SQR_FFT_TABLE { 624, 1568, 3904, 4352, 11264, 36864, 0 } -#define SQR_FFT_MODF_THRESHOLD 624 -#define SQR_FFT_THRESHOLD 5888 - -/* Tuneup completed successfully, took 14 seconds */ - -#define MUL_FFT_TABLE2 {{1, 3}, {205, 4}, {210, 3}, {220, 4}, {260, 3}, {266, 4}, {272, 3}, {278, 4}, {285, 3}, {292, 4}, {413, 5}, {433, 4}, {443, 5}, {463, 6}, {474, 5}, {485, 4}, {507, 5}, {850, 4}, {869, 5}, {889, 3}, {909, 4}, {929, 2}, {950, 3}, {971, 4}, {993, 5}, {1015, 6}, {1038, 5}, {1061, 6}, {1109, 7}, {1134, 6}, {1159, 5}, {1185, 6}, {1612, 5}, {1648, 6}, {2144, 4}, {2191, 5}, {2239, 6}, {2289, 7}, {2340, 6}, {2445, 7}, {2499, 6}, {2668, 7}, {2727, 6}, {2849, 5}, {2912, 4}, {2976, 5}, {3042, 6}, {3320, 5}, {3393, 6}, {3468, 7}, {3544, 5}, {3622, 6}, {3784, 5}, {3867, 6}, {3952, 5}, {4039, 6}, {4219, 7}, {4312, 8}, {4914, 7}, {5022, 8}, {5245, 6}, {5360, 7}, {5598, 8}, {6241, 6}, {6378, 7}, {6518, 8}, {6807, 9}, {6957, 8}, {7110, 9}, {7266, 8}, {7426, 9}, {7756, 8}, {8100, 7}, {8278, 8}, {8460, 9}, {8646, 7}, {8836, 8}, {9638, 9}, {9850, 8}, {10744, 9}, {10980, 8}, {11221, 9}, {12239, 8}, {12507, 9}, {13347, 8}, {14245, 9}, {15202, 10}, {15535, 9}, {16224, 8}, {16580, 9}, {16944, 8}, {17316, 9}, {18084, 8}, {18480, 9}, {19299, 10}, {19722, 9}, {25581, 10}, {26715, 9}, {29772, 10}, {31772, 9}, {33906, 10}, {36184, 9}, {37787, 10}, {40326, 9}, {41210, 10}, {58292, 11}, {62207, 12}, {63570, 10}, {70845, 11}, {72397, 10}, {73983, 11}, {80681, 10}, {89912, 11}, {98051, 10}, {102394, 11}, {114110, 12}, {127165, 11}, {129950, 10}, {132796, 11}, {164920, 12}, {168532, 11}, {172223, 12}, {196129, 11}, {228243, 12}, {233241, 13}, {254354, 12}, {259924, 11}, {283451, 12}, {289658, 11}, {296001, 12}, {329862, 11}, {344468, 12}, {456509, 13}, {519871, 12}, {659749, 13}, {784582, 12}, {933036, 13}, {953467, 14}, {MP_SIZE_T_MAX,0}} - -#define MUL_FFTM_TABLE2 {{1, 3}, {205, 4}, {215, 3}, {220, 4}, {377, 5}, {386, 4}, {404, 5}, {423, 4}, {433, 5}, {507, 4}, {519, 5}, {555, 4}, {568, 5}, {813, 6}, {831, 4}, {850, 5}, {869, 6}, {889, 5}, {909, 4}, {929, 5}, {950, 6}, {971, 5}, {993, 6}, {1109, 5}, {1134, 6}, {1238, 5}, {1266, 6}, {2144, 7}, {2191, 6}, {2289, 7}, {2340, 6}, {2392, 7}, {2445, 6}, {2499, 7}, {3109, 5}, {3178, 6}, {3248, 7}, {3544, 5}, {3622, 6}, {3702, 4}, {3784, 5}, {3867, 6}, {3952, 7}, {4039, 5}, {4128, 6}, {4219, 7}, {4312, 5}, {4407, 6}, {4504, 7}, {4914, 8}, {5022, 6}, {5132, 7}, {6107, 8}, {6241, 9}, {6378, 8}, {6518, 7}, {6661, 8}, {6807, 7}, {7110, 8}, {7266, 9}, {7426, 8}, {7589, 9}, {7756, 8}, {8278, 9}, {8460, 8}, {9030, 9}, {9228, 10}, {9431, 9}, {9638, 8}, {10066, 7}, {10287, 8}, {10980, 9}, {11467, 7}, {11719, 8}, {12239, 7}, {12507, 8}, {12781, 9}, {13939, 10}, {14245, 8}, {15535, 9}, {15876, 10}, {16224, 8}, {16580, 9}, {18885, 10}, {19299, 8}, {19722, 9}, {20154, 10}, {20596, 8}, {21047, 9}, {22461, 10}, {22953, 9}, {25581, 10}, {26142, 9}, {26715, 10}, {27898, 9}, {29134, 10}, {31091, 9}, {31772, 10}, {34649, 9}, {35408, 10}, {36184, 11}, {36977, 9}, {37787, 10}, {40326, 9}, {41210, 10}, {42113, 9}, {43036, 10}, {46934, 11}, {47962, 10}, {52305, 9}, {53451, 10}, {57042, 11}, {58292, 10}, {59569, 11}, {62207, 12}, {63570, 10}, {69326, 11}, {72397, 10}, {73983, 11}, {80681, 10}, {84254, 11}, {91881, 12}, {93893, 11}, {98051, 10}, {102394, 11}, {116609, 12}, {127165, 11}, {147989, 12}, {161386, 11}, {164920, 12}, {168532, 11}, {175995, 12}, {196129, 11}, {204813, 12}, {228243, 13}, {243569, 12}, {248903, 13}, {254354, 12}, {259924, 11}, {265616, 12}, {359720, 13}, {392279, 12}, {456509, 13}, {466506, 14}, {508731, 13}, {519871, 12}, {566925, 13}, {579340, 12}, {592026, 13}, {674196, 12}, {688959, 13}, {913042, 14}, {MP_SIZE_T_MAX,0}} - -#define SQR_FFT_TABLE2 {{1, 3}, {205, 4}, {210, 3}, {220, 4}, {225, 3}, {230, 4}, {272, 3}, {278, 4}, {404, 5}, {423, 4}, {443, 5}, {665, 4}, {680, 5}, {929, 6}, {1294, 5}, {1323, 6}, {1476, 5}, {1509, 6}, {1922, 4}, {1965, 2}, {2009, 3}, {2098, 4}, {2144, 5}, {2191, 6}, {2340, 4}, {2392, 2}, {2849, 3}, {2912, 4}, {2976, 5}, {3042, 6}, {3109, 4}, {3178, 5}, {3248, 6}, {3320, 7}, {3784, 8}, {3867, 7}, {4128, 8}, {4219, 9}, {4312, 8}, {4407, 6}, {4504, 7}, {4603, 8}, {4704, 9}, {4808, 8}, {4914, 7}, {5132, 8}, {5478, 7}, {5598, 8}, {5976, 7}, {6107, 8}, {6518, 7}, {6661, 5}, {6807, 6}, {6957, 5}, {7110, 4}, {7266, 5}, {7426, 6}, {7589, 4}, {7756, 2}, {7926, 3}, {8460, 4}, {8646, 5}, {8836, 4}, {9228, 5}, {9431, 6}, {9638, 7}, {9850, 8}, {10744, 9}, {10980, 8}, {11221, 9}, {11719, 10}, {11976, 8}, {12239, 6}, {12781, 4}, {13061, 5}, {13347, 4}, {13640, 5}, {13939, 6}, {14245, 5}, {14557, 6}, {14876, 4}, {15202, 5}, {15876, 6}, {16224, 7}, {17316, 8}, {18084, 7}, {18885, 8}, {19299, 9}, {19722, 8}, {20154, 9}, {21047, 7}, {22953, 8}, {23456, 9}, {25581, 10}, {26142, 9}, {26715, 10}, {27898, 8}, {28509, 9}, {29772, 10}, {32468, 8}, {33179, 9}, {34649, 10}, {37787, 9}, {38615, 10}, {39461, 9}, {41210, 10}, {45928, 11}, {47962, 10}, {52305, 9}, {53451, 10}, {57042, 11}, {59569, 10}, {60874, 11}, {62207, 12}, {63570, 10}, {66386, 11}, {69326, 10}, {70845, 9}, {72397, 10}, {73983, 11}, {75604, 10}, {77260, 11}, {78952, 10}, {84254, 11}, {86099, 10}, {87985, 11}, {91881, 10}, {93893, 11}, {104637, 10}, {106929, 11}, {111664, 10}, {114110, 11}, {116609, 12}, {119163, 10}, {121773, 11}, {124440, 12}, {127165, 11}, {157927, 12}, {161386, 11}, {168532, 12}, {172223, 11}, {175995, 12}, {196129, 11}, {228243, 12}, {243569, 13}, {248903, 12}, {259924, 11}, {283451, 12}, {289658, 11}, {296001, 12}, {337086, 11}, {344468, 12}, {392279, 11}, {400869, 12}, {456509, 13}, {519871, 12}, {659749, 13}, {784582, 12}, {855595, 11}, {874331, 12}, {893477, 13}, {913042, 14}, {MP_SIZE_T_MAX,0}} - -#define SQR_FFTM_TABLE2 {{1, 3}, {215, 4}, {360, 2}, {368, 3}, {377, 4}, {404, 5}, {496, 4}, {507, 5}, {695, 6}, {711, 5}, {831, 6}, {850, 5}, {869, 6}, {1109, 5}, {1134, 6}, {1612, 4}, {1648, 5}, {1685, 6}, {1799, 7}, {1839, 6}, {1880, 7}, {1922, 6}, {1965, 7}, {2009, 8}, {2053, 6}, {2144, 7}, {2239, 6}, {2340, 7}, {2392, 6}, {2445, 7}, {2727, 6}, {2787, 7}, {2849, 6}, {2912, 7}, {3178, 6}, {3248, 7}, {3393, 6}, {3468, 4}, {3544, 5}, {3622, 4}, {3702, 5}, {3784, 4}, {3867, 5}, {3952, 6}, {4039, 4}, {4219, 5}, {4312, 6}, {4504, 4}, {4603, 3}, {4808, 4}, {4914, 3}, {5022, 4}, {5132, 5}, {5245, 6}, {5360, 4}, {5478, 3}, {5598, 2}, {5721, 3}, {5847, 4}, {5976, 5}, {6241, 6}, {6378, 5}, {6518, 6}, {6661, 7}, {6807, 5}, {6957, 6}, {7110, 7}, {7266, 6}, {7426, 7}, {7589, 8}, {7756, 7}, {7926, 8}, {8278, 7}, {8460, 6}, {8646, 7}, {8836, 6}, {9030, 5}, {9431, 4}, {9638, 5}, {9850, 6}, {10066, 5}, {10513, 6}, {10744, 7}, {10980, 8}, {11221, 9}, {12239, 7}, {12507, 6}, {13640, 7}, {14245, 8}, {14557, 9}, {15202, 7}, {16224, 8}, {16580, 9}, {16944, 8}, {17696, 9}, {18084, 7}, {18885, 8}, {19299, 9}, {19722, 8}, {21508, 9}, {22461, 10}, {24495, 8}, {25032, 9}, {25581, 10}, {26715, 9}, {27300, 10}, {27898, 9}, {29772, 10}, {30424, 8}, {32468, 9}, {33179, 10}, {33906, 8}, {34649, 9}, {35408, 10}, {36977, 11}, {37787, 10}, {40326, 9}, {42113, 10}, {43036, 9}, {43979, 10}, {44943, 11}, {45928, 9}, {46934, 10}, {50087, 11}, {52305, 10}, {53451, 11}, {55819, 10}, {57042, 11}, {59569, 9}, {60874, 10}, {62207, 11}, {63570, 9}, {64963, 10}, {66386, 11}, {67840, 10}, {70845, 11}, {73983, 9}, {75604, 10}, {77260, 11}, {78952, 12}, {80681, 10}, {82448, 11}, {89912, 10}, {93893, 11}, {104637, 10}, {109271, 11}, {114110, 12}, {127165, 11}, {129950, 10}, {132796, 11}, {135704, 12}, {138676, 11}, {151230, 12}, {164920, 11}, {172223, 12}, {196129, 11}, {209298, 12}, {228243, 13}, {233241, 12}, {238349, 13}, {243569, 12}, {248903, 13}, {254354, 12}, {337086, 13}, {344468, 12}, {352011, 13}, {392279, 12}, {456509, 13}, {466506, 12}, {476722, 13}, {497829, 14}, {508731, 13}, {519871, 12}, {566925, 13}, {579340, 12}, {592026, 13}, {659749, 12}, {688959, 13}, {801763, 12}, {819320, 13}, {913042, 14}, {MP_SIZE_T_MAX,0}} - -#define MUL_FFT_FULL_TABLE2 {{16, 1}, {1045, 2}, {1068, 1}, {1092, 2}, {1116, 4}, {1141, 1}, {1166, 4}, {1192, 2}, {1219, 1}, {1246, 3}, {1274, 1}, {1361, 2}, {1391, 1}, {1623, 3}, {1659, 1}, {1772, 2}, {1811, 1}, {2159, 4}, {2256, 1}, {2306, 2}, {2357, 1}, {2869, 4}, {3063, 1}, {3131, 2}, {3200, 4}, {3343, 2}, {3417, 1}, {3492, 2}, {3648, 4}, {3728, 5}, {3810, 6}, {3894, 4}, {3980, 3}, {4158, 2}, {4250, 4}, {4344, 2}, {4440, 4}, {4538, 5}, {4740, 4}, {4844, 6}, {4951, 4}, {5060, 3}, {5171, 2}, {5285, 3}, {5401, 6}, {5520, 3}, {5641, 1}, {5892, 2}, {6022, 4}, {6154, 3}, {6289, 2}, {6427, 4}, {6568, 3}, {6712, 2}, {6859, 4}, {7010, 2}, {7164, 4}, {7482, 1}, {7986, 2}, {8161, 4}, {8340, 2}, {8710, 5}, {9296, 6}, {9500, 3}, {9709, 1}, {10140, 3}, {10363, 1}, {11059, 4}, {11302, 1}, {11550, 4}, {11803, 2}, {12062, 4}, {12327, 3}, {12873, 1}, {14040, 4}, {14348, 2}, {14663, 1}, {15314, 3}, {15650, 1}, {15993, 2}, {16702, 1}, {17068, 2}, {17442, 1}, {19439, 3}, {19865, 1}, {20300, 2}, {20745, 1}, {26334, 3}, {26911, 1}, {32007, 2}, {33425, 1}, {34905, 2}, {35670, 1}, {36452, 3}, {37251, 4}, {38901, 1}, {40624, 3}, {41514, 1}, {42424, 3}, {43353, 1}, {46266, 2}, {47280, 4}, {50456, 1}, {52691, 3}, {53845, 1}, {57462, 4}, {61321, 1}, {62664, 2}, {64037, 1}, {65440, 2}, {66873, 5}, {68338, 8}, {72928, 5}, {74525, 6}, {76157, 4}, {77825, 1}, {81272, 3}, {83052, 2}, {84871, 3}, {86730, 1}, {88630, 2}, {90571, 1}, {92555, 2}, {94582, 1}, {96654, 2}, {98771, 4}, {103145, 1}, {105404, 2}, {107713, 1}, {114947, 4}, {122667, 1}, {130905, 2}, {145881, 5}, {149076, 6}, {152341, 4}, {155677, 1}, {159086, 4}, {162570, 3}, {166130, 2}, {169768, 3}, {173486, 1}, {193333, 2}, {197567, 4}, {206315, 3}, {215450, 1}, {229917, 2}, {240097, 4}, {245355, 1}, {261830, 2}, {267564, 5}, {273423, 8}, {279411, 9}, {285530, 7}, {298173, 6}, {304703, 4}, {325163, 3}, {346997, 2}, {354596, 1}, {362361, 2}, {370296, 1}, {378405, 2}, {395159, 4}, {412655, 7}, {421691, 10}, {430925, 8}, {440362, 6}, {450005, 5}, {469929, 4}, {490736, 1}, {523686, 2}, {583592, 5}, {596372, 6}, {609431, 4}, {650350, 3}, {694016, 1}, {724743, 2}, {740613, 1}, {756831, 2}, {790340, 1}, {MP_SIZE_T_MAX,0}} - -#define SQR_FFT_FULL_TABLE2 {{16, 2}, {1022, 1}, {1092, 2}, {1116, 4}, {1141, 2}, {1166, 1}, {1219, 2}, {1274, 1}, {1331, 4}, {1361, 1}, {1422, 2}, {1454, 1}, {1486, 3}, {1519, 1}, {1588, 2}, {1659, 1}, {1934, 4}, {1977, 6}, {2066, 2}, {2112, 4}, {2207, 1}, {2357, 4}, {2409, 3}, {2462, 4}, {2516, 3}, {2572, 2}, {2746, 3}, {2807, 2}, {2869, 5}, {2932, 2}, {2997, 3}, {3063, 1}, {3131, 4}, {3200, 5}, {3271, 4}, {3417, 2}, {3492, 3}, {3648, 2}, {3728, 5}, {3894, 4}, {4158, 2}, {4250, 4}, {4344, 2}, {4440, 3}, {4538, 2}, {4740, 3}, {4844, 1}, {4951, 2}, {5060, 3}, {5171, 2}, {5285, 1}, {5641, 3}, {5765, 6}, {5892, 5}, {6022, 6}, {6289, 4}, {6427, 6}, {6568, 3}, {6712, 4}, {6859, 3}, {7010, 4}, {7321, 2}, {7482, 1}, {7646, 4}, {7814, 3}, {8161, 5}, {8340, 6}, {8523, 5}, {8710, 2}, {8901, 4}, {9096, 2}, {9296, 1}, {9500, 4}, {9922, 1}, {10140, 4}, {10363, 6}, {10822, 7}, {11059, 10}, {11302, 6}, {11550, 8}, {11803, 9}, {12062, 5}, {12327, 8}, {12597, 7}, {12873, 3}, {13155, 2}, {13444, 3}, {14040, 6}, {14348, 8}, {14985, 6}, {15314, 5}, {15650, 6}, {15993, 3}, {16344, 1}, {16702, 3}, {17068, 4}, {17442, 6}, {17824, 8}, {18215, 5}, {18614, 2}, {19022, 4}, {19439, 1}, {20300, 2}, {21200, 5}, {21665, 1}, {22140, 2}, {23121, 1}, {23628, 2}, {24146, 1}, {24675, 4}, {25216, 2}, {25769, 1}, {26334, 4}, {26911, 5}, {27501, 1}, {28104, 4}, {28720, 1}, {29349, 2}, {29992, 1}, {30649, 4}, {31321, 2}, {32007, 4}, {32708, 2}, {35670, 1}, {36452, 4}, {37251, 1}, {38901, 3}, {39753, 1}, {40624, 2}, {42424, 1}, {47280, 4}, {49374, 1}, {50456, 4}, {51561, 3}, {53845, 1}, {57462, 3}, {58721, 4}, {60007, 3}, {61321, 1}, {62664, 2}, {66873, 5}, {68338, 2}, {69835, 1}, {71365, 2}, {72928, 4}, {74525, 6}, {76157, 4}, {77825, 1}, {79530, 4}, {81272, 2}, {83052, 5}, {84871, 3}, {86730, 4}, {88630, 3}, {90571, 6}, {92555, 8}, {100934, 4}, {103145, 6}, {105404, 8}, {107713, 5}, {110072, 1}, {114947, 2}, {117465, 4}, {122667, 3}, {125354, 1}, {130905, 2}, {142755, 5}, {149076, 6}, {152341, 4}, {162570, 3}, {173486, 2}, {189190, 1}, {193333, 2}, {197567, 3}, {201894, 4}, {206315, 3}, {215450, 6}, {224990, 2}, {229917, 5}, {234952, 4}, {245355, 1}, {261830, 4}, {267564, 7}, {273423, 9}, {279411, 12}, {285530, 8}, {291783, 7}, {298173, 6}, {304703, 4}, {311376, 1}, {325163, 3}, {346997, 2}, {354596, 1}, {362361, 2}, {370296, 1}, {378405, 2}, {395159, 4}, {403812, 7}, {421691, 8}, {440362, 6}, {450005, 5}, {469929, 4}, {490736, 1}, {523686, 2}, {583592, 4}, {596372, 6}, {609431, 4}, {650350, 3}, {694016, 2}, {709213, 1}, {724743, 2}, {740613, 1}, {756831, 2}, {790340, 1}, {MP_SIZE_T_MAX,0}} - + +/* Generated by tuneup.c, 2009-10-20, system compiler */ + +#define MUL_KARATSUBA_THRESHOLD 30 +#define MUL_TOOM3_THRESHOLD 113 +#define MUL_TOOM4_THRESHOLD 858 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_KARATSUBA_THRESHOLD 36 +#define SQR_TOOM3_THRESHOLD 129 +#define SQR_TOOM4_THRESHOLD 134 + +#define MULLOW_BASECASE_THRESHOLD 29 +#define MULLOW_DC_THRESHOLD 29 +#define MULLOW_MUL_THRESHOLD 9970 + +#define MULHIGH_BASECASE_THRESHOLD 0 /* always */ +#define MULHIGH_DC_THRESHOLD 4 +#define MULHIGH_MUL_THRESHOLD 4 + +#define MULMOD_2EXPM1_THRESHOLD 2 + +#define DIV_SB_PREINV_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIV_DC_THRESHOLD 64 +#define POWM_THRESHOLD 391 +#define FAC_UI_THRESHOLD 32756 + +#define GCD_ACCEL_THRESHOLD 866 +#define GCDEXT_THRESHOLD 298 +#define JACOBI_BASE_METHOD 2 + +#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define USE_PREINV_DIVREM_1 0 +#define USE_PREINV_MOD_1 1 +#define DIVREM_2_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVEXACT_1_THRESHOLD 0 /* always (native) */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ +#define MOD_1_1_THRESHOLD 3 +#define MOD_1_2_THRESHOLD 4 +#define MOD_1_3_THRESHOLD 5 +#define DIVREM_HENSEL_QR_1_THRESHOLD 39 +#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 3 +#define DIVREM_EUCLID_HENSEL_THRESHOLD 8 + +#define ROOTREM_THRESHOLD 1 + +#define GET_STR_DC_THRESHOLD 20 +#define GET_STR_PRECOMPUTE_THRESHOLD 22 +#define SET_STR_THRESHOLD 7764 + +#define MUL_FFT_TABLE { 656, 800, 4032, 4864, 11264, 36864, 0 } +#define MUL_FFT_MODF_THRESHOLD 656 +#define MUL_FFT_THRESHOLD 7680 + +#define SQR_FFT_TABLE { 624, 1568, 3904, 4352, 11264, 36864, 0 } +#define SQR_FFT_MODF_THRESHOLD 624 +#define SQR_FFT_THRESHOLD 5888 + +/* Tuneup completed successfully, took 14 seconds */ + +#define MUL_FFT_TABLE2 {{1, 3}, {205, 4}, {210, 3}, {220, 4}, {260, 3}, {266, 4}, {272, 3}, {278, 4}, {285, 3}, {292, 4}, {413, 5}, {433, 4}, {443, 5}, {463, 6}, {474, 5}, {485, 4}, {507, 5}, {850, 4}, {869, 5}, {889, 3}, {909, 4}, {929, 2}, {950, 3}, {971, 4}, {993, 5}, {1015, 6}, {1038, 5}, {1061, 6}, {1109, 7}, {1134, 6}, {1159, 5}, {1185, 6}, {1612, 5}, {1648, 6}, {2144, 4}, {2191, 5}, {2239, 6}, {2289, 7}, {2340, 6}, {2445, 7}, {2499, 6}, {2668, 7}, {2727, 6}, {2849, 5}, {2912, 4}, {2976, 5}, {3042, 6}, {3320, 5}, {3393, 6}, {3468, 7}, {3544, 5}, {3622, 6}, {3784, 5}, {3867, 6}, {3952, 5}, {4039, 6}, {4219, 7}, {4312, 8}, {4914, 7}, {5022, 8}, {5245, 6}, {5360, 7}, {5598, 8}, {6241, 6}, {6378, 7}, {6518, 8}, {6807, 9}, {6957, 8}, {7110, 9}, {7266, 8}, {7426, 9}, {7756, 8}, {8100, 7}, {8278, 8}, {8460, 9}, {8646, 7}, {8836, 8}, {9638, 9}, {9850, 8}, {10744, 9}, {10980, 8}, {11221, 9}, {12239, 8}, {12507, 9}, {13347, 8}, {14245, 9}, {15202, 10}, {15535, 9}, {16224, 8}, {16580, 9}, {16944, 8}, {17316, 9}, {18084, 8}, {18480, 9}, {19299, 10}, {19722, 9}, {25581, 10}, {26715, 9}, {29772, 10}, {31772, 9}, {33906, 10}, {36184, 9}, {37787, 10}, {40326, 9}, {41210, 10}, {58292, 11}, {62207, 12}, {63570, 10}, {70845, 11}, {72397, 10}, {73983, 11}, {80681, 10}, {89912, 11}, {98051, 10}, {102394, 11}, {114110, 12}, {127165, 11}, {129950, 10}, {132796, 11}, {164920, 12}, {168532, 11}, {172223, 12}, {196129, 11}, {228243, 12}, {233241, 13}, {254354, 12}, {259924, 11}, {283451, 12}, {289658, 11}, {296001, 12}, {329862, 11}, {344468, 12}, {456509, 13}, {519871, 12}, {659749, 13}, {784582, 12}, {933036, 13}, {953467, 14}, {MP_SIZE_T_MAX,0}} + +#define MUL_FFTM_TABLE2 {{1, 3}, {205, 4}, {215, 3}, {220, 4}, {377, 5}, {386, 4}, {404, 5}, {423, 4}, {433, 5}, {507, 4}, {519, 5}, {555, 4}, {568, 5}, {813, 6}, {831, 4}, {850, 5}, {869, 6}, {889, 5}, {909, 4}, {929, 5}, {950, 6}, {971, 5}, {993, 6}, {1109, 5}, {1134, 6}, {1238, 5}, {1266, 6}, {2144, 7}, {2191, 6}, {2289, 7}, {2340, 6}, {2392, 7}, {2445, 6}, {2499, 7}, {3109, 5}, {3178, 6}, {3248, 7}, {3544, 5}, {3622, 6}, {3702, 4}, {3784, 5}, {3867, 6}, {3952, 7}, {4039, 5}, {4128, 6}, {4219, 7}, {4312, 5}, {4407, 6}, {4504, 7}, {4914, 8}, {5022, 6}, {5132, 7}, {6107, 8}, {6241, 9}, {6378, 8}, {6518, 7}, {6661, 8}, {6807, 7}, {7110, 8}, {7266, 9}, {7426, 8}, {7589, 9}, {7756, 8}, {8278, 9}, {8460, 8}, {9030, 9}, {9228, 10}, {9431, 9}, {9638, 8}, {10066, 7}, {10287, 8}, {10980, 9}, {11467, 7}, {11719, 8}, {12239, 7}, {12507, 8}, {12781, 9}, {13939, 10}, {14245, 8}, {15535, 9}, {15876, 10}, {16224, 8}, {16580, 9}, {18885, 10}, {19299, 8}, {19722, 9}, {20154, 10}, {20596, 8}, {21047, 9}, {22461, 10}, {22953, 9}, {25581, 10}, {26142, 9}, {26715, 10}, {27898, 9}, {29134, 10}, {31091, 9}, {31772, 10}, {34649, 9}, {35408, 10}, {36184, 11}, {36977, 9}, {37787, 10}, {40326, 9}, {41210, 10}, {42113, 9}, {43036, 10}, {46934, 11}, {47962, 10}, {52305, 9}, {53451, 10}, {57042, 11}, {58292, 10}, {59569, 11}, {62207, 12}, {63570, 10}, {69326, 11}, {72397, 10}, {73983, 11}, {80681, 10}, {84254, 11}, {91881, 12}, {93893, 11}, {98051, 10}, {102394, 11}, {116609, 12}, {127165, 11}, {147989, 12}, {161386, 11}, {164920, 12}, {168532, 11}, {175995, 12}, {196129, 11}, {204813, 12}, {228243, 13}, {243569, 12}, {248903, 13}, {254354, 12}, {259924, 11}, {265616, 12}, {359720, 13}, {392279, 12}, {456509, 13}, {466506, 14}, {508731, 13}, {519871, 12}, {566925, 13}, {579340, 12}, {592026, 13}, {674196, 12}, {688959, 13}, {913042, 14}, {MP_SIZE_T_MAX,0}} + +#define SQR_FFT_TABLE2 {{1, 3}, {205, 4}, {210, 3}, {220, 4}, {225, 3}, {230, 4}, {272, 3}, {278, 4}, {404, 5}, {423, 4}, {443, 5}, {665, 4}, {680, 5}, {929, 6}, {1294, 5}, {1323, 6}, {1476, 5}, {1509, 6}, {1922, 4}, {1965, 2}, {2009, 3}, {2098, 4}, {2144, 5}, {2191, 6}, {2340, 4}, {2392, 2}, {2849, 3}, {2912, 4}, {2976, 5}, {3042, 6}, {3109, 4}, {3178, 5}, {3248, 6}, {3320, 7}, {3784, 8}, {3867, 7}, {4128, 8}, {4219, 9}, {4312, 8}, {4407, 6}, {4504, 7}, {4603, 8}, {4704, 9}, {4808, 8}, {4914, 7}, {5132, 8}, {5478, 7}, {5598, 8}, {5976, 7}, {6107, 8}, {6518, 7}, {6661, 5}, {6807, 6}, {6957, 5}, {7110, 4}, {7266, 5}, {7426, 6}, {7589, 4}, {7756, 2}, {7926, 3}, {8460, 4}, {8646, 5}, {8836, 4}, {9228, 5}, {9431, 6}, {9638, 7}, {9850, 8}, {10744, 9}, {10980, 8}, {11221, 9}, {11719, 10}, {11976, 8}, {12239, 6}, {12781, 4}, {13061, 5}, {13347, 4}, {13640, 5}, {13939, 6}, {14245, 5}, {14557, 6}, {14876, 4}, {15202, 5}, {15876, 6}, {16224, 7}, {17316, 8}, {18084, 7}, {18885, 8}, {19299, 9}, {19722, 8}, {20154, 9}, {21047, 7}, {22953, 8}, {23456, 9}, {25581, 10}, {26142, 9}, {26715, 10}, {27898, 8}, {28509, 9}, {29772, 10}, {32468, 8}, {33179, 9}, {34649, 10}, {37787, 9}, {38615, 10}, {39461, 9}, {41210, 10}, {45928, 11}, {47962, 10}, {52305, 9}, {53451, 10}, {57042, 11}, {59569, 10}, {60874, 11}, {62207, 12}, {63570, 10}, {66386, 11}, {69326, 10}, {70845, 9}, {72397, 10}, {73983, 11}, {75604, 10}, {77260, 11}, {78952, 10}, {84254, 11}, {86099, 10}, {87985, 11}, {91881, 10}, {93893, 11}, {104637, 10}, {106929, 11}, {111664, 10}, {114110, 11}, {116609, 12}, {119163, 10}, {121773, 11}, {124440, 12}, {127165, 11}, {157927, 12}, {161386, 11}, {168532, 12}, {172223, 11}, {175995, 12}, {196129, 11}, {228243, 12}, {243569, 13}, {248903, 12}, {259924, 11}, {283451, 12}, {289658, 11}, {296001, 12}, {337086, 11}, {344468, 12}, {392279, 11}, {400869, 12}, {456509, 13}, {519871, 12}, {659749, 13}, {784582, 12}, {855595, 11}, {874331, 12}, {893477, 13}, {913042, 14}, {MP_SIZE_T_MAX,0}} + +#define SQR_FFTM_TABLE2 {{1, 3}, {215, 4}, {360, 2}, {368, 3}, {377, 4}, {404, 5}, {496, 4}, {507, 5}, {695, 6}, {711, 5}, {831, 6}, {850, 5}, {869, 6}, {1109, 5}, {1134, 6}, {1612, 4}, {1648, 5}, {1685, 6}, {1799, 7}, {1839, 6}, {1880, 7}, {1922, 6}, {1965, 7}, {2009, 8}, {2053, 6}, {2144, 7}, {2239, 6}, {2340, 7}, {2392, 6}, {2445, 7}, {2727, 6}, {2787, 7}, {2849, 6}, {2912, 7}, {3178, 6}, {3248, 7}, {3393, 6}, {3468, 4}, {3544, 5}, {3622, 4}, {3702, 5}, {3784, 4}, {3867, 5}, {3952, 6}, {4039, 4}, {4219, 5}, {4312, 6}, {4504, 4}, {4603, 3}, {4808, 4}, {4914, 3}, {5022, 4}, {5132, 5}, {5245, 6}, {5360, 4}, {5478, 3}, {5598, 2}, {5721, 3}, {5847, 4}, {5976, 5}, {6241, 6}, {6378, 5}, {6518, 6}, {6661, 7}, {6807, 5}, {6957, 6}, {7110, 7}, {7266, 6}, {7426, 7}, {7589, 8}, {7756, 7}, {7926, 8}, {8278, 7}, {8460, 6}, {8646, 7}, {8836, 6}, {9030, 5}, {9431, 4}, {9638, 5}, {9850, 6}, {10066, 5}, {10513, 6}, {10744, 7}, {10980, 8}, {11221, 9}, {12239, 7}, {12507, 6}, {13640, 7}, {14245, 8}, {14557, 9}, {15202, 7}, {16224, 8}, {16580, 9}, {16944, 8}, {17696, 9}, {18084, 7}, {18885, 8}, {19299, 9}, {19722, 8}, {21508, 9}, {22461, 10}, {24495, 8}, {25032, 9}, {25581, 10}, {26715, 9}, {27300, 10}, {27898, 9}, {29772, 10}, {30424, 8}, {32468, 9}, {33179, 10}, {33906, 8}, {34649, 9}, {35408, 10}, {36977, 11}, {37787, 10}, {40326, 9}, {42113, 10}, {43036, 9}, {43979, 10}, {44943, 11}, {45928, 9}, {46934, 10}, {50087, 11}, {52305, 10}, {53451, 11}, {55819, 10}, {57042, 11}, {59569, 9}, {60874, 10}, {62207, 11}, {63570, 9}, {64963, 10}, {66386, 11}, {67840, 10}, {70845, 11}, {73983, 9}, {75604, 10}, {77260, 11}, {78952, 12}, {80681, 10}, {82448, 11}, {89912, 10}, {93893, 11}, {104637, 10}, {109271, 11}, {114110, 12}, {127165, 11}, {129950, 10}, {132796, 11}, {135704, 12}, {138676, 11}, {151230, 12}, {164920, 11}, {172223, 12}, {196129, 11}, {209298, 12}, {228243, 13}, {233241, 12}, {238349, 13}, {243569, 12}, {248903, 13}, {254354, 12}, {337086, 13}, {344468, 12}, {352011, 13}, {392279, 12}, {456509, 13}, {466506, 12}, {476722, 13}, {497829, 14}, {508731, 13}, {519871, 12}, {566925, 13}, {579340, 12}, {592026, 13}, {659749, 12}, {688959, 13}, {801763, 12}, {819320, 13}, {913042, 14}, {MP_SIZE_T_MAX,0}} + +#define MUL_FFT_FULL_TABLE2 {{16, 1}, {1045, 2}, {1068, 1}, {1092, 2}, {1116, 4}, {1141, 1}, {1166, 4}, {1192, 2}, {1219, 1}, {1246, 3}, {1274, 1}, {1361, 2}, {1391, 1}, {1623, 3}, {1659, 1}, {1772, 2}, {1811, 1}, {2159, 4}, {2256, 1}, {2306, 2}, {2357, 1}, {2869, 4}, {3063, 1}, {3131, 2}, {3200, 4}, {3343, 2}, {3417, 1}, {3492, 2}, {3648, 4}, {3728, 5}, {3810, 6}, {3894, 4}, {3980, 3}, {4158, 2}, {4250, 4}, {4344, 2}, {4440, 4}, {4538, 5}, {4740, 4}, {4844, 6}, {4951, 4}, {5060, 3}, {5171, 2}, {5285, 3}, {5401, 6}, {5520, 3}, {5641, 1}, {5892, 2}, {6022, 4}, {6154, 3}, {6289, 2}, {6427, 4}, {6568, 3}, {6712, 2}, {6859, 4}, {7010, 2}, {7164, 4}, {7482, 1}, {7986, 2}, {8161, 4}, {8340, 2}, {8710, 5}, {9296, 6}, {9500, 3}, {9709, 1}, {10140, 3}, {10363, 1}, {11059, 4}, {11302, 1}, {11550, 4}, {11803, 2}, {12062, 4}, {12327, 3}, {12873, 1}, {14040, 4}, {14348, 2}, {14663, 1}, {15314, 3}, {15650, 1}, {15993, 2}, {16702, 1}, {17068, 2}, {17442, 1}, {19439, 3}, {19865, 1}, {20300, 2}, {20745, 1}, {26334, 3}, {26911, 1}, {32007, 2}, {33425, 1}, {34905, 2}, {35670, 1}, {36452, 3}, {37251, 4}, {38901, 1}, {40624, 3}, {41514, 1}, {42424, 3}, {43353, 1}, {46266, 2}, {47280, 4}, {50456, 1}, {52691, 3}, {53845, 1}, {57462, 4}, {61321, 1}, {62664, 2}, {64037, 1}, {65440, 2}, {66873, 5}, {68338, 8}, {72928, 5}, {74525, 6}, {76157, 4}, {77825, 1}, {81272, 3}, {83052, 2}, {84871, 3}, {86730, 1}, {88630, 2}, {90571, 1}, {92555, 2}, {94582, 1}, {96654, 2}, {98771, 4}, {103145, 1}, {105404, 2}, {107713, 1}, {114947, 4}, {122667, 1}, {130905, 2}, {145881, 5}, {149076, 6}, {152341, 4}, {155677, 1}, {159086, 4}, {162570, 3}, {166130, 2}, {169768, 3}, {173486, 1}, {193333, 2}, {197567, 4}, {206315, 3}, {215450, 1}, {229917, 2}, {240097, 4}, {245355, 1}, {261830, 2}, {267564, 5}, {273423, 8}, {279411, 9}, {285530, 7}, {298173, 6}, {304703, 4}, {325163, 3}, {346997, 2}, {354596, 1}, {362361, 2}, {370296, 1}, {378405, 2}, {395159, 4}, {412655, 7}, {421691, 10}, {430925, 8}, {440362, 6}, {450005, 5}, {469929, 4}, {490736, 1}, {523686, 2}, {583592, 5}, {596372, 6}, {609431, 4}, {650350, 3}, {694016, 1}, {724743, 2}, {740613, 1}, {756831, 2}, {790340, 1}, {MP_SIZE_T_MAX,0}} + +#define SQR_FFT_FULL_TABLE2 {{16, 2}, {1022, 1}, {1092, 2}, {1116, 4}, {1141, 2}, {1166, 1}, {1219, 2}, {1274, 1}, {1331, 4}, {1361, 1}, {1422, 2}, {1454, 1}, {1486, 3}, {1519, 1}, {1588, 2}, {1659, 1}, {1934, 4}, {1977, 6}, {2066, 2}, {2112, 4}, {2207, 1}, {2357, 4}, {2409, 3}, {2462, 4}, {2516, 3}, {2572, 2}, {2746, 3}, {2807, 2}, {2869, 5}, {2932, 2}, {2997, 3}, {3063, 1}, {3131, 4}, {3200, 5}, {3271, 4}, {3417, 2}, {3492, 3}, {3648, 2}, {3728, 5}, {3894, 4}, {4158, 2}, {4250, 4}, {4344, 2}, {4440, 3}, {4538, 2}, {4740, 3}, {4844, 1}, {4951, 2}, {5060, 3}, {5171, 2}, {5285, 1}, {5641, 3}, {5765, 6}, {5892, 5}, {6022, 6}, {6289, 4}, {6427, 6}, {6568, 3}, {6712, 4}, {6859, 3}, {7010, 4}, {7321, 2}, {7482, 1}, {7646, 4}, {7814, 3}, {8161, 5}, {8340, 6}, {8523, 5}, {8710, 2}, {8901, 4}, {9096, 2}, {9296, 1}, {9500, 4}, {9922, 1}, {10140, 4}, {10363, 6}, {10822, 7}, {11059, 10}, {11302, 6}, {11550, 8}, {11803, 9}, {12062, 5}, {12327, 8}, {12597, 7}, {12873, 3}, {13155, 2}, {13444, 3}, {14040, 6}, {14348, 8}, {14985, 6}, {15314, 5}, {15650, 6}, {15993, 3}, {16344, 1}, {16702, 3}, {17068, 4}, {17442, 6}, {17824, 8}, {18215, 5}, {18614, 2}, {19022, 4}, {19439, 1}, {20300, 2}, {21200, 5}, {21665, 1}, {22140, 2}, {23121, 1}, {23628, 2}, {24146, 1}, {24675, 4}, {25216, 2}, {25769, 1}, {26334, 4}, {26911, 5}, {27501, 1}, {28104, 4}, {28720, 1}, {29349, 2}, {29992, 1}, {30649, 4}, {31321, 2}, {32007, 4}, {32708, 2}, {35670, 1}, {36452, 4}, {37251, 1}, {38901, 3}, {39753, 1}, {40624, 2}, {42424, 1}, {47280, 4}, {49374, 1}, {50456, 4}, {51561, 3}, {53845, 1}, {57462, 3}, {58721, 4}, {60007, 3}, {61321, 1}, {62664, 2}, {66873, 5}, {68338, 2}, {69835, 1}, {71365, 2}, {72928, 4}, {74525, 6}, {76157, 4}, {77825, 1}, {79530, 4}, {81272, 2}, {83052, 5}, {84871, 3}, {86730, 4}, {88630, 3}, {90571, 6}, {92555, 8}, {100934, 4}, {103145, 6}, {105404, 8}, {107713, 5}, {110072, 1}, {114947, 2}, {117465, 4}, {122667, 3}, {125354, 1}, {130905, 2}, {142755, 5}, {149076, 6}, {152341, 4}, {162570, 3}, {173486, 2}, {189190, 1}, {193333, 2}, {197567, 3}, {201894, 4}, {206315, 3}, {215450, 6}, {224990, 2}, {229917, 5}, {234952, 4}, {245355, 1}, {261830, 4}, {267564, 7}, {273423, 9}, {279411, 12}, {285530, 8}, {291783, 7}, {298173, 6}, {304703, 4}, {311376, 1}, {325163, 3}, {346997, 2}, {354596, 1}, {362361, 2}, {370296, 1}, {378405, 2}, {395159, 4}, {403812, 7}, {421691, 8}, {440362, 6}, {450005, 5}, {469929, 4}, {490736, 1}, {523686, 2}, {583592, 4}, {596372, 6}, {609431, 4}, {650350, 3}, {694016, 2}, {709213, 1}, {724743, 2}, {740613, 1}, {756831, 2}, {790340, 1}, {MP_SIZE_T_MAX,0}} + diff --git a/tests/devel/try.c b/tests/devel/try.c index 13b16c50..008694a1 100644 --- a/tests/devel/try.c +++ b/tests/devel/try.c @@ -1670,16 +1670,6 @@ mpn_toom4_sqr_n_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size) mpn_toom4_sqr_n (dst, src1, size); } void -mpn_toom7_mul_n_fun (mp_ptr dst, mp_srcptr src1, mp_srcptr src2, mp_size_t size) -{ - mpn_toom7_mul_n (dst, src1, src2, size); -} -void -mpn_toom7_sqr_n_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size) -{ - mpn_toom7_sqr_n (dst, src1, size); -} -void mpn_toom8h_mul_fun (mp_ptr dst, mp_srcptr src1, mp_size_t size1, mp_srcptr src2, mp_size_t size2) { mpn_toom8h_mul (dst, src1, size1, src2, size2); @@ -1942,11 +1932,9 @@ const struct choice_t choice_array[] = { { TRY_FUNFUN(mpn_kara_sqr_n), TYPE_SQR, MPN_KARA_SQR_N_MINSIZE }, { TRY_FUNFUN(mpn_toom3_mul_n), TYPE_MUL_N, MPN_TOOM3_MUL_N_MINSIZE }, { TRY_FUNFUN(mpn_toom4_mul_n), TYPE_MUL_N, MPN_TOOM4_MUL_N_MINSIZE }, - { TRY_FUNFUN(mpn_toom7_mul_n), TYPE_MUL_N, MPN_TOOM7_MUL_N_MINSIZE }, { TRY_FUNFUN(mpn_toom8h_mul), TYPE_MUL_BASECASE, MPN_TOOM8H_MUL_MINSIZE }, { TRY_FUNFUN(mpn_toom3_sqr_n), TYPE_SQR, MPN_TOOM3_SQR_N_MINSIZE }, { TRY_FUNFUN(mpn_toom4_sqr_n), TYPE_SQR, MPN_TOOM4_SQR_N_MINSIZE }, - { TRY_FUNFUN(mpn_toom7_sqr_n), TYPE_SQR, MPN_TOOM7_SQR_N_MINSIZE }, { TRY_FUNFUN(mpn_toom8_sqr_n), TYPE_SQR, MPN_TOOM8_SQR_N_MINSIZE }, { TRY(mpn_gcd_1), TYPE_GCD_1 }, diff --git a/tune/common.c b/tune/common.c index 309682ea..0f2430ee 100644 --- a/tune/common.c +++ b/tune/common.c @@ -1141,11 +1141,6 @@ speed_mpn_toom4_mul_n (struct speed_params *s) SPEED_ROUTINE_MPN_TOOM4_MUL_N (mpn_toom4_mul_n); } double -speed_mpn_toom7_mul_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM7_MUL_N (mpn_toom7_mul_n); -} -double speed_mpn_toom8h_mul (struct speed_params *s) { SPEED_ROUTINE_MPN_TOOM8H_MUL (mpn_toom8h_mul); @@ -1161,11 +1156,6 @@ speed_mpn_toom4_sqr_n (struct speed_params *s) SPEED_ROUTINE_MPN_TOOM4_SQR_N (mpn_toom4_sqr_n); } double -speed_mpn_toom7_sqr_n (struct speed_params *s) -{ - SPEED_ROUTINE_MPN_TOOM7_SQR_N (mpn_toom7_sqr_n); -} -double speed_mpn_toom8_sqr_n (struct speed_params *s) { SPEED_ROUTINE_MPN_TOOM8_SQR_N (mpn_toom8_sqr_n); diff --git a/tune/speed.c b/tune/speed.c index f224c3be..fe3a5b6d 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -333,11 +333,9 @@ const struct routine_t { { "mpn_kara_sqr_n", speed_mpn_kara_sqr_n }, { "mpn_toom3_mul_n", speed_mpn_toom3_mul_n }, { "mpn_toom4_mul_n", speed_mpn_toom4_mul_n }, - { "mpn_toom7_mul_n", speed_mpn_toom7_mul_n }, { "mpn_toom8h_mul", speed_mpn_toom8h_mul }, { "mpn_toom3_sqr_n", speed_mpn_toom3_sqr_n }, { "mpn_toom4_sqr_n", speed_mpn_toom4_sqr_n }, - { "mpn_toom7_sqr_n", speed_mpn_toom7_sqr_n }, { "mpn_mul_fft_full", speed_mpn_mul_fft_full }, { "mpn_mul_fft_full_sqr", speed_mpn_mul_fft_full_sqr }, diff --git a/tune/speed.h b/tune/speed.h index e2bb2a47..d61366a6 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -297,11 +297,9 @@ double speed_mpn_sublsh1_n _PROTO ((struct speed_params *s)); double speed_mpn_submul_1 _PROTO ((struct speed_params *s)); double speed_mpn_toom3_mul_n _PROTO ((struct speed_params *s)); double speed_mpn_toom4_mul_n _PROTO ((struct speed_params *s)); -double speed_mpn_toom7_mul_n _PROTO ((struct speed_params *s)); double speed_mpn_toom8h_mul _PROTO ((struct speed_params *s)); double speed_mpn_toom3_sqr_n _PROTO ((struct speed_params *s)); double speed_mpn_toom4_sqr_n _PROTO ((struct speed_params *s)); -double speed_mpn_toom7_sqr_n _PROTO ((struct speed_params *s)); double speed_mpn_toom8_sqr_n _PROTO ((struct speed_params *s)); double speed_mpn_udiv_qrnnd _PROTO ((struct speed_params *s)); double speed_mpn_udiv_qrnnd_r _PROTO ((struct speed_params *s)); @@ -476,23 +474,19 @@ mp_size_t mpn_set_str_subquad _PROTO ((mp_ptr, const unsigned char *, size_t, in void mpn_toom3_mul_n_open _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); void mpn_toom4_mul_n_open _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -void mpn_toom7_mul_n_open _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); void mpn_toom8h_mul_open _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); void mpn_toom3_sqr_n_open _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); void mpn_toom4_sqr_n_open _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); -void mpn_toom7_sqr_n_open _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); void mpn_toom8_sqr_n_open _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); void mpn_toom3_mul_n_mpn _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t, mp_ptr)); void mpn_toom4_mul_n_mpn _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); -void mpn_toom7_mul_n_mpn _PROTO ((mp_ptr, mp_srcptr, mp_srcptr, mp_size_t)); void mpn_toom8h_mul_mpn _PROTO ((mp_ptr, mp_srcptr, mp_size_t, mp_srcptr, mp_size_t)); void mpn_toom3_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); void mpn_toom4_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); -void mpn_toom7_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); void mpn_toom8_sqr_n_mpn _PROTO((mp_ptr, mp_srcptr, mp_size_t, mp_ptr)); void mpz_powm_mod _PROTO ((mpz_ptr res, mpz_srcptr base, mpz_srcptr e, @@ -1268,11 +1262,6 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s, (function (wp, s->xp, s->yp, s->size), \ MPN_TOOM4_MUL_N_MINSIZE) -#define SPEED_ROUTINE_MPN_TOOM7_MUL_N(function) \ - SPEED_ROUTINE_MPN_MUL_N_SIZE \ - (function (wp, s->xp, s->yp, s->size), \ - MPN_TOOM7_MUL_N_MINSIZE) - #define SPEED_ROUTINE_MPN_TOOM8H_MUL(function) \ SPEED_ROUTINE_MPN_MUL_CALL \ (function (wp, s->xp, size1, s->yp, s->size), \ @@ -1382,10 +1371,6 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s, SPEED_ROUTINE_MPN_SQR_N_SIZE (function (wp, s->xp, s->size), \ MPN_TOOM4_SQR_N_MINSIZE) -#define SPEED_ROUTINE_MPN_TOOM7_SQR_N(function) \ - SPEED_ROUTINE_MPN_SQR_N_SIZE (function (wp, s->xp, s->size), \ - MPN_TOOM7_SQR_N_MINSIZE) - #define SPEED_ROUTINE_MPN_TOOM8_SQR_N(function) \ SPEED_ROUTINE_MPN_SQR_N_SIZE (function (wp, s->xp, s->size), \ MPN_TOOM8_SQR_N_MINSIZE) diff --git a/tune/tuneup.c b/tune/tuneup.c index 5d89c3a9..84a5e72a 100644 --- a/tune/tuneup.c +++ b/tune/tuneup.c @@ -181,7 +181,6 @@ int allocdat = 0; mp_size_t mul_karatsuba_threshold = MP_SIZE_T_MAX; mp_size_t mul_toom3_threshold = MUL_TOOM3_THRESHOLD_LIMIT; mp_size_t mul_toom4_threshold = MUL_TOOM4_THRESHOLD_LIMIT; -mp_size_t mul_toom7_threshold = MUL_TOOM7_THRESHOLD_LIMIT; mp_size_t mul_toom8h_threshold = MUL_TOOM8H_THRESHOLD_LIMIT; mp_size_t mul_fft_threshold = MP_SIZE_T_MAX; mp_size_t mul_fft_modf_threshold = MP_SIZE_T_MAX; @@ -190,7 +189,6 @@ mp_size_t sqr_karatsuba_threshold = (TUNE_SQR_KARATSUBA_MAX == 0 ? MP_SIZE_T_MAX : TUNE_SQR_KARATSUBA_MAX); mp_size_t sqr_toom3_threshold = SQR_TOOM3_THRESHOLD_LIMIT; mp_size_t sqr_toom4_threshold = SQR_TOOM4_THRESHOLD_LIMIT; -mp_size_t sqr_toom7_threshold = SQR_TOOM7_THRESHOLD_LIMIT; mp_size_t sqr_toom8_threshold = SQR_TOOM8_THRESHOLD_LIMIT; mp_size_t sqr_fft_threshold = MP_SIZE_T_MAX; mp_size_t sqr_fft_modf_threshold = MP_SIZE_T_MAX;