From a047b009a7e5979d74af934c00173e5d372d0fd1 Mon Sep 17 00:00:00 2001 From: jasonmoxham Date: Tue, 22 Nov 2011 10:57:58 +0000 Subject: [PATCH] k8 tuning --- mpn/x86_64/k8/gmp-mparam.h | 54 +++++++++++++++++++------------------- 1 file changed, 27 insertions(+), 27 deletions(-) diff --git a/mpn/x86_64/k8/gmp-mparam.h b/mpn/x86_64/k8/gmp-mparam.h index 85be0b9d..7cea735b 100644 --- a/mpn/x86_64/k8/gmp-mparam.h +++ b/mpn/x86_64/k8/gmp-mparam.h @@ -1,20 +1,21 @@ -/* Generated by tuneup.c, 2011-07-11, gcc 4.5 */ +/* Generated by tuneup.c, 2011-11-15, gcc 4.5 */ #define MUL_KARATSUBA_THRESHOLD 22 #define MUL_TOOM3_THRESHOLD 134 -#define MUL_TOOM4_THRESHOLD 387 -#define MUL_TOOM8H_THRESHOLD 446 +#define MUL_TOOM4_THRESHOLD 242 +#define MUL_TOOM8H_THRESHOLD 414 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_KARATSUBA_THRESHOLD 43 -#define SQR_TOOM3_THRESHOLD 125 +#define SQR_TOOM3_THRESHOLD 124 #define SQR_TOOM4_THRESHOLD 512 #define SQR_TOOM8_THRESHOLD 674 -#define POWM_THRESHOLD 464 +#define POWM_THRESHOLD 391 -#define GCD_THRESHOLD 446 -#define GCDEXT_THRESHOLD 969 +#define HGCD_THRESHOLD 30 +#define GCD_DC_THRESHOLD 2048 +#define GCDEXT_DC_THRESHOLD 1252 #define JACOBI_BASE_METHOD 1 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ @@ -27,49 +28,48 @@ #define MODEXACT_1_ODD_THRESHOLD 0 /* always (native) */ #define MOD_1_1_THRESHOLD 4 #define MOD_1_2_THRESHOLD 8 -#define MOD_1_3_THRESHOLD 24 +#define MOD_1_3_THRESHOLD 11 #define DIVREM_HENSEL_QR_1_THRESHOLD 8 #define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 7 -#define DIVREM_EUCLID_HENSEL_THRESHOLD 30 +#define DIVREM_EUCLID_HENSEL_THRESHOLD 21 -#define ROOTREM_THRESHOLD 11 +#define ROOTREM_THRESHOLD 8 -#define GET_STR_DC_THRESHOLD 15 -#define GET_STR_PRECOMPUTE_THRESHOLD 26 -#define SET_STR_DC_THRESHOLD 27410 -#define SET_STR_PRECOMPUTE_THRESHOLD 52671 +#define GET_STR_DC_THRESHOLD 16 +#define GET_STR_PRECOMPUTE_THRESHOLD 23 +#define SET_STR_DC_THRESHOLD 8262 +#define SET_STR_PRECOMPUTE_THRESHOLD 55160 -#define MUL_FFT_TABLE { 432, 1056, 3136, 3840, 7168, 45056, 114688, 589824, 0 } -#define MUL_FFT_MODF_THRESHOLD 528 -#define MUL_FFT_FULL_THRESHOLD 3264 +#define MUL_FFT_TABLE { 432, 1056, 2880, 3328, 7168, 36864, 114688, 589824, 2359296, 7340032, 0 } +#define MUL_FFT_MODF_THRESHOLD 496 +#define MUL_FFT_FULL_THRESHOLD 3008 -#define SQR_FFT_TABLE { 464, 928, 2368, 2816, 13312, 45056, 114688, 458752, 0 } -#define SQR_FFT_MODF_THRESHOLD 464 +#define SQR_FFT_TABLE { 432, 992, 2368, 2816, 13312, 45056, 114688, 458752, 2359296, 7340032, 0 } +#define SQR_FFT_MODF_THRESHOLD 432 #define SQR_FFT_FULL_THRESHOLD 2496 #define MULLOW_BASECASE_THRESHOLD 9 -#define MULLOW_DC_THRESHOLD 13 +#define MULLOW_DC_THRESHOLD 17 #define MULLOW_MUL_THRESHOLD 9970 -#define MULHIGH_BASECASE_THRESHOLD 21 -#define MULHIGH_DC_THRESHOLD 21 +#define MULHIGH_BASECASE_THRESHOLD 25 +#define MULHIGH_DC_THRESHOLD 25 #define MULHIGH_MUL_THRESHOLD 9970 -#define MULMOD_2EXPM1_THRESHOLD 20 +#define MULMOD_2EXPM1_THRESHOLD 22 #define FAC_UI_THRESHOLD 32756 #define DC_DIV_QR_THRESHOLD 46 #define DC_DIVAPPR_Q_N_THRESHOLD 156 #define INV_DIV_QR_THRESHOLD 9894 #define INV_DIVAPPR_Q_N_THRESHOLD 156 -#define DC_DIV_Q_THRESHOLD 195 +#define DC_DIV_Q_THRESHOLD 192 #define INV_DIV_Q_THRESHOLD 9894 #define DC_DIVAPPR_Q_THRESHOLD 171 #define INV_DIVAPPR_Q_THRESHOLD 19441 #define DC_BDIV_QR_THRESHOLD 42 -#define DC_BDIV_Q_THRESHOLD 24 -/* Tuneup completed successfully, took 131 seconds */ - +#define DC_BDIV_Q_THRESHOLD 20 +/* Tuneup completed successfully, took 817 seconds */ #define MUL_FFT_TABLE2 {{1, 3}, {205, 4}, {377, 5}, {386, 4}, {404, 5}, {813, 6}, {850, 5}, {869, 6}, {971, 5}, {993, 6}, {2392, 7}, {2445, 6}, {2668, 7}, {2727, 6}, {2787, 7}, {2976, 6}, {3042, 7}, {3109, 6}, {3178, 7}, {3248, 8}, {3393, 7}, {3468, 8}, {3544, 7}, {3784, 8}, {3867, 6}, {3952, 7}, {4039, 8}, {4407, 7}, {4504, 8}, {4914, 7}, {5022, 8}, {6957, 9}, {7756, 8}, {8460, 9}, {8836, 8}, {9030, 9}, {9850, 8}, {10513, 9}, {11976, 8}, {12239, 9}, {13939, 8}, {14245, 9}, {15876, 8}, {16224, 9}, {22461, 10}, {23970, 9}, {26142, 10}, {27898, 9}, {28509, 10}, {31772, 9}, {33906, 10}, {36184, 9}, {36977, 10}, {40326, 9}, {41210, 10}, {44943, 11}, {45928, 10}, {46934, 11}, {47962, 10}, {57042, 11}, {62207, 12}, {63570, 10}, {73983, 11}, {80681, 10}, {86099, 11}, {98051, 10}, {102394, 11}, {114110, 12}, {127165, 11}, {164920, 12}, {196129, 11}, {228243, 12}, {233241, 13}, {254354, 12}, {259924, 11}, {277377, 12}, {456509, 13}, {519871, 12}, {659749, 13}, {784582, 14}, {MP_SIZE_T_MAX,0}}