From cf2286caa24b9fb4205e5cdebb3ceb7e9113755d Mon Sep 17 00:00:00 2001 From: Brian Gladman Date: Fri, 31 Mar 2017 17:04:11 +0100 Subject: [PATCH] move *nix tuning values into Windows --- mpn/x86_64w/bulldozer/gmp-mparam.h | 66 ++++++++++---------- mpn/x86_64w/haswell/avx/gmp-mparam.h | 76 ++++++++++++----------- mpn/x86_64w/skylake/gmp-mparam.h | 90 ++++++++++++++++++++++++++++ 3 files changed, 165 insertions(+), 67 deletions(-) create mode 100644 mpn/x86_64w/skylake/gmp-mparam.h diff --git a/mpn/x86_64w/bulldozer/gmp-mparam.h b/mpn/x86_64w/bulldozer/gmp-mparam.h index be420ad8..a9537d52 100644 --- a/mpn/x86_64w/bulldozer/gmp-mparam.h +++ b/mpn/x86_64w/bulldozer/gmp-mparam.h @@ -1,17 +1,15 @@ -/* Generated by tuneup.c, 2017-01-31, gcc 5.4 */ +/* Generated by tuneup.c, 2017-02-14, gcc 5.4 */ #define MUL_KARATSUBA_THRESHOLD 16 -#define MUL_TOOM3_THRESHOLD 102 +#define MUL_TOOM3_THRESHOLD 65 #define MUL_TOOM4_THRESHOLD 107 -#define MUL_TOOM8H_THRESHOLD 351 +#define MUL_TOOM8H_THRESHOLD 274 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_KARATSUBA_THRESHOLD 22 +#define SQR_KARATSUBA_THRESHOLD 20 #define SQR_TOOM3_THRESHOLD 121 -#define SQR_TOOM4_THRESHOLD 942 -#define SQR_TOOM8_THRESHOLD 942 - -#define POWM_THRESHOLD 162 +#define SQR_TOOM4_THRESHOLD 933 +#define SQR_TOOM8_THRESHOLD 933 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ @@ -23,51 +21,57 @@ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ #define MOD_1_1_THRESHOLD 8 #define MOD_1_2_THRESHOLD 8 -#define MOD_1_3_THRESHOLD 13 +#define MOD_1_3_THRESHOLD 9 #define DIVREM_HENSEL_QR_1_THRESHOLD 13 #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 13 -#define DIVREM_EUCLID_HENSEL_THRESHOLD 120 +#define DIVREM_EUCLID_HENSEL_THRESHOLD 73 #define MUL_FFT_FULL_THRESHOLD 3904 -#define SQR_FFT_FULL_THRESHOLD 3392 +#define SQR_FFT_FULL_THRESHOLD 3008 #define MULLOW_BASECASE_THRESHOLD 0 /* always */ #define MULLOW_DC_THRESHOLD 30 -#define MULLOW_MUL_THRESHOLD 5565 +#define MULLOW_MUL_THRESHOLD 5194 + +#define MULMID_TOOM42_THRESHOLD 24 #define MULHIGH_BASECASE_THRESHOLD 5 -#define MULHIGH_DC_THRESHOLD 29 +#define MULHIGH_DC_THRESHOLD 33 #define MULHIGH_MUL_THRESHOLD 2966 #define MULMOD_2EXPM1_THRESHOLD 14 -#define DC_DIV_QR_THRESHOLD 47 -#define INV_DIV_QR_THRESHOLD 1718 -#define INV_DIVAPPR_Q_N_THRESHOLD 47 -#define DC_DIV_Q_THRESHOLD 60 +#define DC_DIV_QR_THRESHOLD 52 +#define INV_DIV_QR_THRESHOLD 1970 +#define INV_DIVAPPR_Q_N_THRESHOLD 52 +#define DC_DIV_Q_THRESHOLD 56 #define INV_DIV_Q_THRESHOLD 1470 -#define DC_DIVAPPR_Q_THRESHOLD 34 -#define INV_DIVAPPR_Q_THRESHOLD 15758 +#define DC_DIVAPPR_Q_THRESHOLD 37 +#define INV_DIVAPPR_Q_THRESHOLD 17333 #define DC_BDIV_QR_THRESHOLD 52 -#define DC_BDIV_Q_THRESHOLD 32 +#define DC_BDIV_Q_THRESHOLD 34 + +#define BINV_NEWTON_THRESHOLD 46 +#define REDC_1_TO_REDC_2_THRESHOLD 48 +#define REDC_2_TO_REDC_N_THRESHOLD 0 /* always */ #define ROOTREM_THRESHOLD 6 -#define MATRIX22_STRASSEN_THRESHOLD 19 -#define HGCD_THRESHOLD 96 +#define MATRIX22_STRASSEN_THRESHOLD 14 +#define HGCD_THRESHOLD 102 #define HGCD_APPR_THRESHOLD 50 #define HGCD_REDUCE_THRESHOLD 6852 -#define GCD_DC_THRESHOLD 365 -#define GCDEXT_DC_THRESHOLD 241 +#define GCD_DC_THRESHOLD 358 +#define GCDEXT_DC_THRESHOLD 253 #define JACOBI_BASE_METHOD 1 -#define GET_STR_DC_THRESHOLD 12 -#define GET_STR_PRECOMPUTE_THRESHOLD 20 -#define SET_STR_DC_THRESHOLD 426 -#define SET_STR_PRECOMPUTE_THRESHOLD 1627 +#define GET_STR_DC_THRESHOLD 13 +#define GET_STR_PRECOMPUTE_THRESHOLD 19 +#define SET_STR_DC_THRESHOLD 418 +#define SET_STR_PRECOMPUTE_THRESHOLD 1505 -#define FAC_DSC_THRESHOLD 244 +#define FAC_DSC_THRESHOLD 178 #define FAC_ODD_THRESHOLD 27 /* fft_tuning -- autogenerated by tune-fft */ @@ -80,7 +84,7 @@ #define FFT_N_NUM 19 -#define FFT_MULMOD_2EXPP1_CUTOFF 256 +#define FFT_MULMOD_2EXPP1_CUTOFF 128 -/* Tuneup completed successfully, took 129 seconds */ +/* Tuneup completed successfully, took 130 seconds */ diff --git a/mpn/x86_64w/haswell/avx/gmp-mparam.h b/mpn/x86_64w/haswell/avx/gmp-mparam.h index 3ad2609a..b98660ec 100644 --- a/mpn/x86_64w/haswell/avx/gmp-mparam.h +++ b/mpn/x86_64w/haswell/avx/gmp-mparam.h @@ -1,17 +1,15 @@ -/* Generated by tuneup.c, 2017-01-30, gcc 4.9 */ +/* Generated by tuneup.c, 2017-02-14, gcc 4.9 */ -#define MUL_KARATSUBA_THRESHOLD 21 -#define MUL_TOOM3_THRESHOLD 132 -#define MUL_TOOM4_THRESHOLD 196 -#define MUL_TOOM8H_THRESHOLD 342 +#define MUL_KARATSUBA_THRESHOLD 20 +#define MUL_TOOM3_THRESHOLD 122 +#define MUL_TOOM4_THRESHOLD 202 +#define MUL_TOOM8H_THRESHOLD 351 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ -#define SQR_KARATSUBA_THRESHOLD 32 -#define SQR_TOOM3_THRESHOLD 204 -#define SQR_TOOM4_THRESHOLD 336 -#define SQR_TOOM8_THRESHOLD 438 - -#define POWM_THRESHOLD 22 +#define SQR_KARATSUBA_THRESHOLD 38 +#define SQR_TOOM3_THRESHOLD 129 +#define SQR_TOOM4_THRESHOLD 348 +#define SQR_TOOM8_THRESHOLD 414 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ #define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ @@ -23,51 +21,57 @@ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ #define MOD_1_1_THRESHOLD 6 #define MOD_1_2_THRESHOLD 8 -#define MOD_1_3_THRESHOLD 19 +#define MOD_1_3_THRESHOLD 15 #define DIVREM_HENSEL_QR_1_THRESHOLD 9 #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 3 -#define DIVREM_EUCLID_HENSEL_THRESHOLD 28 +#define DIVREM_EUCLID_HENSEL_THRESHOLD 72 #define MUL_FFT_FULL_THRESHOLD 6976 #define SQR_FFT_FULL_THRESHOLD 3520 -#define MULLOW_BASECASE_THRESHOLD 5 -#define MULLOW_DC_THRESHOLD 53 +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 45 #define MULLOW_MUL_THRESHOLD 3436 -#define MULHIGH_BASECASE_THRESHOLD 10 -#define MULHIGH_DC_THRESHOLD 45 +#define MULMID_TOOM42_THRESHOLD 21 + +#define MULHIGH_BASECASE_THRESHOLD 13 +#define MULHIGH_DC_THRESHOLD 14 #define MULHIGH_MUL_THRESHOLD 3336 #define MULMOD_2EXPM1_THRESHOLD 18 -#define DC_DIV_QR_THRESHOLD 47 -#define INV_DIV_QR_THRESHOLD 2350 -#define INV_DIVAPPR_Q_N_THRESHOLD 47 -#define DC_DIV_Q_THRESHOLD 120 -#define INV_DIV_Q_THRESHOLD 998 -#define DC_DIVAPPR_Q_THRESHOLD 288 -#define INV_DIVAPPR_Q_THRESHOLD 7956 +#define DC_DIV_QR_THRESHOLD 48 +#define INV_DIV_QR_THRESHOLD 2130 +#define INV_DIVAPPR_Q_N_THRESHOLD 48 +#define DC_DIV_Q_THRESHOLD 97 +#define INV_DIV_Q_THRESHOLD 1142 +#define DC_DIVAPPR_Q_THRESHOLD 221 +#define INV_DIVAPPR_Q_THRESHOLD 10764 #define DC_BDIV_QR_THRESHOLD 60 -#define DC_BDIV_Q_THRESHOLD 288 +#define DC_BDIV_Q_THRESHOLD 136 + +#define BINV_NEWTON_THRESHOLD 57 +#define REDC_1_TO_REDC_2_THRESHOLD 28 +#define REDC_2_TO_REDC_N_THRESHOLD 0 /* always */ #define ROOTREM_THRESHOLD 6 #define MATRIX22_STRASSEN_THRESHOLD 15 -#define HGCD_THRESHOLD 81 -#define HGCD_APPR_THRESHOLD 69 +#define HGCD_THRESHOLD 95 +#define HGCD_APPR_THRESHOLD 97 #define HGCD_REDUCE_THRESHOLD 6852 -#define GCD_DC_THRESHOLD 807 -#define GCDEXT_DC_THRESHOLD 501 -#define JACOBI_BASE_METHOD 1 +#define GCD_DC_THRESHOLD 706 +#define GCDEXT_DC_THRESHOLD 492 +#define JACOBI_BASE_METHOD 2 -#define GET_STR_DC_THRESHOLD 13 -#define GET_STR_PRECOMPUTE_THRESHOLD 21 -#define SET_STR_DC_THRESHOLD 1418 -#define SET_STR_PRECOMPUTE_THRESHOLD 3106 +#define GET_STR_DC_THRESHOLD 14 +#define GET_STR_PRECOMPUTE_THRESHOLD 20 +#define SET_STR_DC_THRESHOLD 1562 +#define SET_STR_PRECOMPUTE_THRESHOLD 3100 -#define FAC_DSC_THRESHOLD 739 +#define FAC_DSC_THRESHOLD 656 #define FAC_ODD_THRESHOLD 0 /* always */ /* fft_tuning -- autogenerated by tune-fft */ @@ -83,4 +87,4 @@ #define FFT_MULMOD_2EXPP1_CUTOFF 256 -/* Tuneup completed successfully, took 74 seconds */ +/* Tuneup completed successfully, took 75 seconds */ diff --git a/mpn/x86_64w/skylake/gmp-mparam.h b/mpn/x86_64w/skylake/gmp-mparam.h new file mode 100644 index 00000000..be5f601b --- /dev/null +++ b/mpn/x86_64w/skylake/gmp-mparam.h @@ -0,0 +1,90 @@ +/* Generated by tuneup.c, 2017-02-14, gcc 5.4 */ + +#define MUL_KARATSUBA_THRESHOLD 19 +#define MUL_TOOM3_THRESHOLD 137 +#define MUL_TOOM4_THRESHOLD 193 +#define MUL_TOOM8H_THRESHOLD 381 + +#define SQR_BASECASE_THRESHOLD 0 /* always (native) */ +#define SQR_KARATSUBA_THRESHOLD 32 +#define SQR_TOOM3_THRESHOLD 89 +#define SQR_TOOM4_THRESHOLD 258 +#define SQR_TOOM8_THRESHOLD 494 + +#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define DIVREM_1_UNNORM_THRESHOLD MP_SIZE_T_MAX /* never */ +#define MOD_1_NORM_THRESHOLD 0 /* always */ +#define MOD_1_UNNORM_THRESHOLD 0 /* always */ +#define USE_PREINV_DIVREM_1 1 /* native */ +#define USE_PREINV_MOD_1 1 +#define DIVEXACT_1_THRESHOLD 0 /* always */ +#define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ +#define MOD_1_1_THRESHOLD 6 +#define MOD_1_2_THRESHOLD 10 +#define MOD_1_3_THRESHOLD 17 +#define DIVREM_HENSEL_QR_1_THRESHOLD 31 +#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 6 +#define DIVREM_EUCLID_HENSEL_THRESHOLD 35 + +#define MUL_FFT_FULL_THRESHOLD 5056 + +#define SQR_FFT_FULL_THRESHOLD 3008 + +#define MULLOW_BASECASE_THRESHOLD 0 /* always */ +#define MULLOW_DC_THRESHOLD 21 +#define MULLOW_MUL_THRESHOLD 3369 + +#define MULMID_TOOM42_THRESHOLD 24 + +#define MULHIGH_BASECASE_THRESHOLD 10 +#define MULHIGH_DC_THRESHOLD 22 +#define MULHIGH_MUL_THRESHOLD 3336 + +#define MULMOD_2EXPM1_THRESHOLD 18 + +#define DC_DIV_QR_THRESHOLD 29 +#define INV_DIV_QR_THRESHOLD 2444 +#define INV_DIVAPPR_Q_N_THRESHOLD 29 +#define DC_DIV_Q_THRESHOLD 45 +#define INV_DIV_Q_THRESHOLD 1258 +#define DC_DIVAPPR_Q_THRESHOLD 104 +#define INV_DIVAPPR_Q_THRESHOLD 14091 +#define DC_BDIV_QR_THRESHOLD 62 +#define DC_BDIV_Q_THRESHOLD 96 + +#define BINV_NEWTON_THRESHOLD 8 +#define REDC_1_TO_REDC_2_THRESHOLD 22 +#define REDC_2_TO_REDC_N_THRESHOLD 36 + +#define ROOTREM_THRESHOLD 6 + +#define MATRIX22_STRASSEN_THRESHOLD 15 +#define HGCD_THRESHOLD 103 +#define HGCD_APPR_THRESHOLD 98 +#define HGCD_REDUCE_THRESHOLD 6852 +#define GCD_DC_THRESHOLD 752 +#define GCDEXT_DC_THRESHOLD 496 +#define JACOBI_BASE_METHOD 2 + +#define GET_STR_DC_THRESHOLD 14 +#define GET_STR_PRECOMPUTE_THRESHOLD 20 +#define SET_STR_DC_THRESHOLD 2324 +#define SET_STR_PRECOMPUTE_THRESHOLD 3246 + +#define FAC_DSC_THRESHOLD 858 +#define FAC_ODD_THRESHOLD 44 + +/* fft_tuning -- autogenerated by tune-fft */ + +#define FFT_TAB \ + { { 4, 4 }, { 4, 3 }, { 3, 2 }, { 3, 2 }, { 2, 1 } } + +#define MULMOD_TAB \ + { 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 1, 1 } + +#define FFT_N_NUM 19 + +#define FFT_MULMOD_2EXPP1_CUTOFF 256 + + +/* Tuneup completed successfully, took 63 seconds */ \ No newline at end of file