From b005717671f3e06ed1c3682121b5f05e0df90a4e Mon Sep 17 00:00:00 2001 From: Brian Gladman Date: Tue, 14 Feb 2017 17:47:19 +0000 Subject: [PATCH] add latest tuning data --- mpn/x86_64w/nehalem/gmp-mparam.h | 65 +++++++------- .../sandybridge/ivybridge/gmp-mparam.h | 89 ++++++++++--------- mpn/x86_64w/skylake/avx/gmp-mparam.h | 76 ++++++++-------- 3 files changed, 120 insertions(+), 110 deletions(-) diff --git a/mpn/x86_64w/nehalem/gmp-mparam.h b/mpn/x86_64w/nehalem/gmp-mparam.h index c05e4961..5bcfa9ce 100644 --- a/mpn/x86_64w/nehalem/gmp-mparam.h +++ b/mpn/x86_64w/nehalem/gmp-mparam.h @@ -1,4 +1,4 @@ -/* Generated by tuneup.c, 2014-03-21, gcc 4.7 */ +/* Generated by tuneup.c, 2017-02-14, gcc 5.4 */ #define MUL_KARATSUBA_THRESHOLD 16 #define MUL_TOOM3_THRESHOLD 89 @@ -6,12 +6,10 @@ #define MUL_TOOM8H_THRESHOLD 286 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_KARATSUBA_THRESHOLD 26 +#define SQR_KARATSUBA_THRESHOLD 24 #define SQR_TOOM3_THRESHOLD 93 -#define SQR_TOOM4_THRESHOLD 262 -#define SQR_TOOM8_THRESHOLD 369 - -#define POWM_THRESHOLD 984 +#define SQR_TOOM4_THRESHOLD 278 +#define SQR_TOOM8_THRESHOLD 351 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ @@ -23,65 +21,70 @@ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ #define MOD_1_1_THRESHOLD 5 #define MOD_1_2_THRESHOLD 8 -#define MOD_1_3_THRESHOLD 19 +#define MOD_1_3_THRESHOLD 16 #define DIVREM_HENSEL_QR_1_THRESHOLD 10 #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 7 -#define DIVREM_EUCLID_HENSEL_THRESHOLD 18 +#define DIVREM_EUCLID_HENSEL_THRESHOLD 119 #define MUL_FFT_FULL_THRESHOLD 3008 #define SQR_FFT_FULL_THRESHOLD 2368 -#define MULLOW_BASECASE_THRESHOLD 7 -#define MULLOW_DC_THRESHOLD 11 -#define MULLOW_MUL_THRESHOLD 3336 +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 41 +#define MULLOW_MUL_THRESHOLD 3402 -#define MULHIGH_BASECASE_THRESHOLD 16 -#define MULHIGH_DC_THRESHOLD 16 -#define MULHIGH_MUL_THRESHOLD 2743 +#define MULMID_TOOM42_THRESHOLD 24 + +#define MULHIGH_BASECASE_THRESHOLD 8 +#define MULHIGH_DC_THRESHOLD 35 +#define MULHIGH_MUL_THRESHOLD 2716 #define MULMOD_2EXPM1_THRESHOLD 14 -#define DC_DIV_QR_THRESHOLD 36 +#define DC_DIV_QR_THRESHOLD 32 #define INV_DIV_QR_THRESHOLD 1895 -#define INV_DIVAPPR_Q_N_THRESHOLD 36 +#define INV_DIVAPPR_Q_N_THRESHOLD 32 #define DC_DIV_Q_THRESHOLD 44 -#define INV_DIV_Q_THRESHOLD 1078 -#define DC_DIVAPPR_Q_THRESHOLD 39 -#define INV_DIVAPPR_Q_THRESHOLD 15352 +#define INV_DIV_Q_THRESHOLD 1142 +#define DC_DIVAPPR_Q_THRESHOLD 38 +#define INV_DIVAPPR_Q_THRESHOLD 16039 #define DC_BDIV_QR_THRESHOLD 36 #define DC_BDIV_Q_THRESHOLD 11 +#define BINV_NEWTON_THRESHOLD 22 +#define REDC_1_TO_REDC_2_THRESHOLD 2 +#define REDC_2_TO_REDC_N_THRESHOLD 44 + #define ROOTREM_THRESHOLD 6 #define MATRIX22_STRASSEN_THRESHOLD 13 -#define HGCD_THRESHOLD 95 +#define HGCD_THRESHOLD 86 #define HGCD_APPR_THRESHOLD 50 #define HGCD_REDUCE_THRESHOLD 6852 -#define GCD_DC_THRESHOLD 379 +#define GCD_DC_THRESHOLD 386 #define GCDEXT_DC_THRESHOLD 278 #define JACOBI_BASE_METHOD 1 -#define GET_STR_DC_THRESHOLD 15 -#define GET_STR_PRECOMPUTE_THRESHOLD 21 -#define SET_STR_DC_THRESHOLD 418 -#define SET_STR_PRECOMPUTE_THRESHOLD 1659 +#define GET_STR_DC_THRESHOLD 13 +#define GET_STR_PRECOMPUTE_THRESHOLD 18 +#define SET_STR_DC_THRESHOLD 172 +#define SET_STR_PRECOMPUTE_THRESHOLD 1566 -#define FAC_DSC_THRESHOLD 270 -#define FAC_ODD_THRESHOLD 0 /* always */ +#define FAC_DSC_THRESHOLD 276 +#define FAC_ODD_THRESHOLD 43 /* fft_tuning -- autogenerated by tune-fft */ #define FFT_TAB \ - { { 4, 3 }, { 3, 2 }, { 3, 2 }, { 2, 1 }, { 1, 0 } } + { { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } } #define MULMOD_TAB \ - { 4, 3, 4, 4, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1 } + { 4, 3, 3, 4, 4, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 1, 1 } #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 -/* Tuneup completed successfully, took 140 seconds */ - +/* Tuneup completed successfully, took 107 seconds */ diff --git a/mpn/x86_64w/sandybridge/ivybridge/gmp-mparam.h b/mpn/x86_64w/sandybridge/ivybridge/gmp-mparam.h index 853bc6d8..5a01028c 100644 --- a/mpn/x86_64w/sandybridge/ivybridge/gmp-mparam.h +++ b/mpn/x86_64w/sandybridge/ivybridge/gmp-mparam.h @@ -1,17 +1,15 @@ -/* Generated by tuneup.c, 2014-03-28, gcc 4.8 */ +/* Generated by tuneup.c, 2017-02-14, gcc 5.4 */ #define MUL_KARATSUBA_THRESHOLD 16 -#define MUL_TOOM3_THRESHOLD 109 +#define MUL_TOOM3_THRESHOLD 101 #define MUL_TOOM4_THRESHOLD 244 -#define MUL_TOOM8H_THRESHOLD 327 +#define MUL_TOOM8H_THRESHOLD 303 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_KARATSUBA_THRESHOLD 29 -#define SQR_TOOM3_THRESHOLD 169 -#define SQR_TOOM4_THRESHOLD 244 -#define SQR_TOOM8_THRESHOLD 375 - -#define POWM_THRESHOLD 956 +#define SQR_KARATSUBA_THRESHOLD 24 +#define SQR_TOOM3_THRESHOLD 137 +#define SQR_TOOM4_THRESHOLD 250 +#define SQR_TOOM8_THRESHOLD 333 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ @@ -21,67 +19,72 @@ #define USE_PREINV_MOD_1 1 #define DIVEXACT_1_THRESHOLD 0 /* always */ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ -#define MOD_1_1_THRESHOLD 7 -#define MOD_1_2_THRESHOLD 12 -#define MOD_1_3_THRESHOLD 13 -#define DIVREM_HENSEL_QR_1_THRESHOLD 7 +#define MOD_1_1_THRESHOLD 6 +#define MOD_1_2_THRESHOLD 8 +#define MOD_1_3_THRESHOLD 17 +#define DIVREM_HENSEL_QR_1_THRESHOLD 10 #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 3 -#define DIVREM_EUCLID_HENSEL_THRESHOLD 10 +#define DIVREM_EUCLID_HENSEL_THRESHOLD 202 -#define MUL_FFT_FULL_THRESHOLD 3904 +#define MUL_FFT_FULL_THRESHOLD 3776 -#define SQR_FFT_FULL_THRESHOLD 2880 +#define SQR_FFT_FULL_THRESHOLD 2496 -#define MULLOW_BASECASE_THRESHOLD 6 -#define MULLOW_DC_THRESHOLD 12 -#define MULLOW_MUL_THRESHOLD 369 +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 75 +#define MULLOW_MUL_THRESHOLD 4994 -#define MULHIGH_BASECASE_THRESHOLD 9 -#define MULHIGH_DC_THRESHOLD 27 +#define MULMID_TOOM42_THRESHOLD 22 + +#define MULHIGH_BASECASE_THRESHOLD 8 +#define MULHIGH_DC_THRESHOLD 35 #define MULHIGH_MUL_THRESHOLD 3436 -#define MULMOD_2EXPM1_THRESHOLD 20 +#define MULMOD_2EXPM1_THRESHOLD 14 -#define DC_DIV_QR_THRESHOLD 20 +#define DC_DIV_QR_THRESHOLD 28 #define INV_DIV_QR_THRESHOLD 2089 -#define INV_DIVAPPR_Q_N_THRESHOLD 20 -#define DC_DIV_Q_THRESHOLD 40 +#define INV_DIVAPPR_Q_N_THRESHOLD 28 +#define DC_DIV_Q_THRESHOLD 44 #define INV_DIV_Q_THRESHOLD 1470 -#define DC_DIVAPPR_Q_THRESHOLD 46 -#define INV_DIVAPPR_Q_THRESHOLD 15352 -#define DC_BDIV_QR_THRESHOLD 26 -#define DC_BDIV_Q_THRESHOLD 40 +#define DC_DIVAPPR_Q_THRESHOLD 47 +#define INV_DIVAPPR_Q_THRESHOLD 17327 +#define DC_BDIV_QR_THRESHOLD 25 +#define DC_BDIV_Q_THRESHOLD 48 + +#define BINV_NEWTON_THRESHOLD 48 +#define REDC_1_TO_REDC_2_THRESHOLD 18 +#define REDC_2_TO_REDC_N_THRESHOLD 36 #define ROOTREM_THRESHOLD 6 #define MATRIX22_STRASSEN_THRESHOLD 19 -#define HGCD_THRESHOLD 77 -#define HGCD_APPR_THRESHOLD 50 +#define HGCD_THRESHOLD 88 +#define HGCD_APPR_THRESHOLD 52 #define HGCD_REDUCE_THRESHOLD 6852 -#define GCD_DC_THRESHOLD 465 -#define GCDEXT_DC_THRESHOLD 345 -#define JACOBI_BASE_METHOD 1 +#define GCD_DC_THRESHOLD 483 +#define GCDEXT_DC_THRESHOLD 321 +#define JACOBI_BASE_METHOD 2 #define GET_STR_DC_THRESHOLD 12 -#define GET_STR_PRECOMPUTE_THRESHOLD 21 -#define SET_STR_DC_THRESHOLD 890 -#define SET_STR_PRECOMPUTE_THRESHOLD 2093 +#define GET_STR_PRECOMPUTE_THRESHOLD 23 +#define SET_STR_DC_THRESHOLD 1182 +#define SET_STR_PRECOMPUTE_THRESHOLD 2342 -#define FAC_DSC_THRESHOLD 537 -#define FAC_ODD_THRESHOLD 25 +#define FAC_DSC_THRESHOLD 542 +#define FAC_ODD_THRESHOLD 26 /* fft_tuning -- autogenerated by tune-fft */ #define FFT_TAB \ - { { 4, 3 }, { 4, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } } + { { 4, 3 }, { 3, 3 }, { 3, 2 }, { 2, 1 }, { 1, 0 } } #define MULMOD_TAB \ - { 4, 4, 4, 4, 4, 3, 3, 3, 3, 2, 2, 3, 3, 2, 2, 2, 2, 1, 1 } + { 4, 3, 3, 4, 4, 2, 3, 3, 3, 2, 2, 3, 3, 2, 2, 3, 2, 1, 1 } #define FFT_N_NUM 19 #define FFT_MULMOD_2EXPP1_CUTOFF 128 -/* Tuneup completed successfully, took 75 seconds */ - +/* Tuneup completed successfully, took 129 seconds */ diff --git a/mpn/x86_64w/skylake/avx/gmp-mparam.h b/mpn/x86_64w/skylake/avx/gmp-mparam.h index b4a841bf..be5f601b 100644 --- a/mpn/x86_64w/skylake/avx/gmp-mparam.h +++ b/mpn/x86_64w/skylake/avx/gmp-mparam.h @@ -1,17 +1,15 @@ -/* Generated by tuneup.c, 2017-01-30, gcc 4.8 */ +/* Generated by tuneup.c, 2017-02-14, gcc 5.4 */ -#define MUL_KARATSUBA_THRESHOLD 21 +#define MUL_KARATSUBA_THRESHOLD 19 #define MUL_TOOM3_THRESHOLD 137 -#define MUL_TOOM4_THRESHOLD 195 -#define MUL_TOOM8H_THRESHOLD 399 +#define MUL_TOOM4_THRESHOLD 193 +#define MUL_TOOM8H_THRESHOLD 381 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_KARATSUBA_THRESHOLD 32 -#define SQR_TOOM3_THRESHOLD 97 -#define SQR_TOOM4_THRESHOLD 266 -#define SQR_TOOM8_THRESHOLD 512 - -#define POWM_THRESHOLD 32 +#define SQR_TOOM3_THRESHOLD 89 +#define SQR_TOOM4_THRESHOLD 258 +#define SQR_TOOM8_THRESHOLD 494 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ @@ -22,53 +20,59 @@ #define DIVEXACT_1_THRESHOLD 0 /* always */ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ #define MOD_1_1_THRESHOLD 6 -#define MOD_1_2_THRESHOLD 8 -#define MOD_1_3_THRESHOLD 13 -#define DIVREM_HENSEL_QR_1_THRESHOLD 29 +#define MOD_1_2_THRESHOLD 10 +#define MOD_1_3_THRESHOLD 17 +#define DIVREM_HENSEL_QR_1_THRESHOLD 31 #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 6 -#define DIVREM_EUCLID_HENSEL_THRESHOLD 14 +#define DIVREM_EUCLID_HENSEL_THRESHOLD 35 -#define MUL_FFT_FULL_THRESHOLD 7104 +#define MUL_FFT_FULL_THRESHOLD 5056 -#define SQR_FFT_FULL_THRESHOLD 3904 +#define SQR_FFT_FULL_THRESHOLD 3008 -#define MULLOW_BASECASE_THRESHOLD 7 -#define MULLOW_DC_THRESHOLD 62 -#define MULLOW_MUL_THRESHOLD 3504 +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 21 +#define MULLOW_MUL_THRESHOLD 3369 -#define MULHIGH_BASECASE_THRESHOLD 9 -#define MULHIGH_DC_THRESHOLD 62 -#define MULHIGH_MUL_THRESHOLD 3504 +#define MULMID_TOOM42_THRESHOLD 24 + +#define MULHIGH_BASECASE_THRESHOLD 10 +#define MULHIGH_DC_THRESHOLD 22 +#define MULHIGH_MUL_THRESHOLD 3336 #define MULMOD_2EXPM1_THRESHOLD 18 -#define DC_DIV_QR_THRESHOLD 35 +#define DC_DIV_QR_THRESHOLD 29 #define INV_DIV_QR_THRESHOLD 2444 -#define INV_DIVAPPR_Q_N_THRESHOLD 35 +#define INV_DIVAPPR_Q_N_THRESHOLD 29 #define DC_DIV_Q_THRESHOLD 45 -#define INV_DIV_Q_THRESHOLD 1308 -#define DC_DIVAPPR_Q_THRESHOLD 142 -#define INV_DIVAPPR_Q_THRESHOLD 13280 -#define DC_BDIV_QR_THRESHOLD 64 -#define DC_BDIV_Q_THRESHOLD 144 +#define INV_DIV_Q_THRESHOLD 1258 +#define DC_DIVAPPR_Q_THRESHOLD 104 +#define INV_DIVAPPR_Q_THRESHOLD 14091 +#define DC_BDIV_QR_THRESHOLD 62 +#define DC_BDIV_Q_THRESHOLD 96 + +#define BINV_NEWTON_THRESHOLD 8 +#define REDC_1_TO_REDC_2_THRESHOLD 22 +#define REDC_2_TO_REDC_N_THRESHOLD 36 #define ROOTREM_THRESHOLD 6 #define MATRIX22_STRASSEN_THRESHOLD 15 -#define HGCD_THRESHOLD 106 -#define HGCD_APPR_THRESHOLD 91 +#define HGCD_THRESHOLD 103 +#define HGCD_APPR_THRESHOLD 98 #define HGCD_REDUCE_THRESHOLD 6852 -#define GCD_DC_THRESHOLD 706 -#define GCDEXT_DC_THRESHOLD 492 +#define GCD_DC_THRESHOLD 752 +#define GCDEXT_DC_THRESHOLD 496 #define JACOBI_BASE_METHOD 2 #define GET_STR_DC_THRESHOLD 14 #define GET_STR_PRECOMPUTE_THRESHOLD 20 -#define SET_STR_DC_THRESHOLD 2393 -#define SET_STR_PRECOMPUTE_THRESHOLD 4064 +#define SET_STR_DC_THRESHOLD 2324 +#define SET_STR_PRECOMPUTE_THRESHOLD 3246 #define FAC_DSC_THRESHOLD 858 -#define FAC_ODD_THRESHOLD 45 +#define FAC_ODD_THRESHOLD 44 /* fft_tuning -- autogenerated by tune-fft */ @@ -83,4 +87,4 @@ #define FFT_MULMOD_2EXPP1_CUTOFF 256 -/* Tuneup completed successfully, took 64 seconds */ +/* Tuneup completed successfully, took 63 seconds */ \ No newline at end of file