From d942415a1c9ef8714b518cef8170149bf68a80bc Mon Sep 17 00:00:00 2001 From: gladman Date: Sat, 9 May 2009 13:26:27 +0000 Subject: [PATCH] 1. Update Windows Powershell scripts in mpirbench to refer to MPIR rather than GMP 2. Update MPIR version number in Windows config files to 1.1.2 3. Add an MSVC inline definition in in gmp-h.in 4. Correct locale test (as per GMP correction) 5. Add Windows x64 set/copy intrinsics to mul_fft.c (improves FFT speed score by 2%) --- bench/build.vc9/runbench.32.ps1 | 8 ++++---- bench/build.vc9/runbench.64.ps1 | 8 ++++---- build.vc9/config.amd64 | 2 +- build.vc9/config.core2 | 2 +- build.vc9/config.gc | 2 +- build.vc9/config.p0 | 2 +- build.vc9/config.p3 | 2 +- build.vc9/config.p4 | 2 +- gmp-h.in | 4 ++++ mpn/generic/mul_fft.c | 12 ++++++++++++ mpn/x86_64w/amd64/gmp-mparam.h | 32 ++++++++++++++++---------------- mpn/x86_64w/core2/gmp-mparam.h | 30 +++++++++++++++--------------- tests/cxx/t-locale.cc | 6 ++++-- 13 files changed, 65 insertions(+), 47 deletions(-) diff --git a/bench/build.vc9/runbench.32.ps1 b/bench/build.vc9/runbench.32.ps1 index 6e699e36..eb029ede 100644 --- a/bench/build.vc9/runbench.32.ps1 +++ b/bench/build.vc9/runbench.32.ps1 @@ -40,19 +40,19 @@ foreach ($cat in $tests) " $t $parms" | Out-Host $out = Invoke-Expression ".\$dir\$t $parms" foreach ($s in $out) { if($s -match "RESULT: [0-9]*") { $res = $s.Split(' ')[1] } } - " GMPbench.$cat.$t.$pcm result: $res" | Out-Host + " MPIRbench.$cat.$t.$pcm result: $res" | Out-Host $acc = Invoke-Expression ".\$dir\gexpr -prec 10 `"$acc*$res`"" $n++ } $out = Invoke-Expression ".\$dir\gexpr `"$acc^(1/$n)`"" - " GMPbench.$cat.$t result: $out" | Out-Host + " MPIRbench.$cat.$t result: $out" | Out-Host $acc1 = Invoke-Expression ".\$dir\gexpr -prec 10 `"$acc1*$acc^(1/$n)`"" $n1++ } $out = Invoke-Expression ".\$dir\gexpr `"$acc1^(1/$n1)`"" - " GMPbench.$cat result: $out" | Out-Host + " MPIRbench.$cat result: $out" | Out-Host $acc2 = Invoke-Expression ".\$dir\gexpr -round -prec 10 `"$acc2*$acc1^(1/$n1)`"" $n2++ } $out = Invoke-Expression ".\$dir\gexpr -round -prec 10 `"$acc2^(1/$n2)`"" -"GMPbench result: $out" | Out-Host +"MPIRbench result: $out" | Out-Host diff --git a/bench/build.vc9/runbench.64.ps1 b/bench/build.vc9/runbench.64.ps1 index b0879a81..eaa5b29e 100644 --- a/bench/build.vc9/runbench.64.ps1 +++ b/bench/build.vc9/runbench.64.ps1 @@ -40,19 +40,19 @@ foreach ($cat in $tests) " $t $parms" | Out-Host $out = Invoke-Expression ".\$dir\$t $parms" foreach ($s in $out) { if($s -match "RESULT: [0-9]*") { $res = $s.Split(' ')[1] } } - " GMPbench.$cat.$t.$pcm result: $res" | Out-Host + " MPIRbench.$cat.$t.$pcm result: $res" | Out-Host $acc = Invoke-Expression ".\$dir\gexpr -prec 10 `"$acc*$res`"" $n++ } $out = Invoke-Expression ".\$dir\gexpr `"$acc^(1/$n)`"" - " GMPbench.$cat.$t result: $out" | Out-Host + " MPIRbench.$cat.$t result: $out" | Out-Host $acc1 = Invoke-Expression ".\$dir\gexpr -prec 10 `"$acc1*$acc^(1/$n)`"" $n1++ } $out = Invoke-Expression ".\$dir\gexpr `"$acc1^(1/$n1)`"" - " GMPbench.$cat result: $out" | Out-Host + " MPIRbench.$cat result: $out" | Out-Host $acc2 = Invoke-Expression ".\$dir\gexpr -round -prec 10 `"$acc2*$acc1^(1/$n1)`"" $n2++ } $out = Invoke-Expression ".\$dir\gexpr -round -prec 10 `"$acc2^(1/$n2)`"" -"GMPbench result: $out" | Out-Host +"MPIRbench result: $out" | Out-Host diff --git a/build.vc9/config.amd64 b/build.vc9/config.amd64 index 0ea79349..b944d9f0 100644 --- a/build.vc9/config.amd64 +++ b/build.vc9/config.amd64 @@ -479,7 +479,7 @@ MA 02111-1307, USA. */ /* Version number of package */ #define GMP_VERSION "4.2.1" -#define VERSION "1.0.0" +#define VERSION "1.1.2" /* ./configure --enable-assert option, to enable some ASSERT()s */ #undef WANT_ASSERT diff --git a/build.vc9/config.core2 b/build.vc9/config.core2 index 284f4c2a..1365dc82 100644 --- a/build.vc9/config.core2 +++ b/build.vc9/config.core2 @@ -471,7 +471,7 @@ MA 02111-1307, USA. */ /* Version number of package */ #define GMP_VERSION "4.2.1" -#define VERSION "1.0.0" +#define VERSION "1.1.2" /* ./configure --enable-assert option, to enable some ASSERT()s */ #undef WANT_ASSERT diff --git a/build.vc9/config.gc b/build.vc9/config.gc index f01444b1..6c692419 100644 --- a/build.vc9/config.gc +++ b/build.vc9/config.gc @@ -468,7 +468,7 @@ MA 02111-1307, USA. */ /* Version number of package */ #define GMP_VERSION "4.2.1" -#define VERSION "1.0.0" +#define VERSION "1.1.2" /* ./configure --enable-assert option, to enable some ASSERT()s */ #undef WANT_ASSERT diff --git a/build.vc9/config.p0 b/build.vc9/config.p0 index 534e6068..1aa1e55b 100644 --- a/build.vc9/config.p0 +++ b/build.vc9/config.p0 @@ -493,7 +493,7 @@ MA 02111-1307, USA. */ /* Version number of package */ #define GMP_VERSION "4.2.1" -#define VERSION "1.0.0" +#define VERSION "1.1.2" /* ./configure --enable-assert option, to enable some ASSERT()s */ #undef WANT_ASSERT diff --git a/build.vc9/config.p3 b/build.vc9/config.p3 index bc24f199..25a7ad95 100644 --- a/build.vc9/config.p3 +++ b/build.vc9/config.p3 @@ -499,7 +499,7 @@ MA 02111-1307, USA. */ /* Version number of package */ #define GMP_VERSION "4.2.1" -#define VERSION "1.0.0" +#define VERSION "1.1.2" /* ./configure --enable-assert option, to enable some ASSERT()s */ #undef WANT_ASSERT diff --git a/build.vc9/config.p4 b/build.vc9/config.p4 index f81b71c3..04ded4d9 100644 --- a/build.vc9/config.p4 +++ b/build.vc9/config.p4 @@ -503,7 +503,7 @@ MA 02111-1307, USA. */ /* Version number of package */ #define GMP_VERSION "4.2.1" -#define VERSION "1.0.0" +#define VERSION "1.1.2" /* ./configure --enable-assert option, to enable some ASSERT()s */ #undef WANT_ASSERT diff --git a/gmp-h.in b/gmp-h.in index f87aa5a5..f55c96da 100644 --- a/gmp-h.in +++ b/gmp-h.in @@ -442,6 +442,10 @@ typedef __mpq_struct *mpq_ptr; #endif #endif +#ifdef _MSC_VER +#define __GMP_EXTERN_INLINE __inline +#endif + /* DEC C (eg. version 5.9) supports "static __inline foo()", even in -std1 strict ANSI mode. Inlining is done even when not optimizing (ie. -O0 mode, which is the default), but an unnecessary local copy of foo is diff --git a/mpn/generic/mul_fft.c b/mpn/generic/mul_fft.c index 0d48bc15..71b4bd66 100644 --- a/mpn/generic/mul_fft.c +++ b/mpn/generic/mul_fft.c @@ -116,6 +116,10 @@ MPN_FFT_ZERO (void *dst, long int n) __asm__ __volatile__ ("rep stosl" : "+c" (n), "+D" (dst) : "a" (0) : "memory"); } +#elif defined( _MSC_VER ) && defined( _M_X64 ) +#include +#pragma intrinsic(__stosq) +#define MPN_FFT_ZERO(d, l) __stosq(d, 0, l) #else /* Fall back to GMP's MPN_ZERO() macro */ #define MPN_FFT_ZERO(dst, n) MPN_ZERO(dst,n) @@ -140,6 +144,10 @@ MPN_FFT_STORE (void *dst, long int n, long int d) __asm__ __volatile__ ("rep stosl" : "+c" (n), "+D" (dst) : "a" (d) : "memory"); } +#elif defined( _MSC_VER ) && defined( _M_X64 ) +#include +#pragma intrinsic(__stosq) +#define MPN_FFT_STORE(d, l, v) __stosq(d, v, l) #else void static inline MPN_FFT_STORE (mp_limb_t *dst, mp_size_t n, mp_limb_t d) @@ -168,6 +176,10 @@ MPN_FFT_COPY (void *dst, const void *src, long int n) __asm__ __volatile__ ("rep movsl" : "+c" (n), "+S" (src), "+D" (dst) : "memory"); } +#elif defined( _MSC_VER ) && defined( _M_X64 ) +#include +#pragma intrinsic(__movsq) +#define MPN_FFT_COPY(d, s, l) __movsq(d, s, l) #else /* Fall back to GMP's MPN_COPY() macro */ #define MPN_FFT_COPY(dst, src, n) MPN_COPY(dst,src,n) diff --git a/mpn/x86_64w/amd64/gmp-mparam.h b/mpn/x86_64w/amd64/gmp-mparam.h index d6837526..13af7352 100644 --- a/mpn/x86_64w/amd64/gmp-mparam.h +++ b/mpn/x86_64w/amd64/gmp-mparam.h @@ -1,24 +1,24 @@ -/* Generated by tuneup.c, 2009-05-06, system compiler */ +/* Generated by tuneup.c, 2009-05-08, system compiler */ -#define MUL_KARATSUBA_THRESHOLD 26 -#define MUL_TOOM3_THRESHOLD 168 -#define MUL_TOOM4_THRESHOLD 842 -#define MUL_TOOM7_THRESHOLD 842 +#define MUL_KARATSUBA_THRESHOLD 28 +#define MUL_TOOM3_THRESHOLD 129 +#define MUL_TOOM4_THRESHOLD 746 +#define MUL_TOOM7_THRESHOLD 746 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_KARATSUBA_THRESHOLD 34 #define SQR_TOOM3_THRESHOLD 218 -#define MULLOW_BASECASE_THRESHOLD 7 -#define MULLOW_DC_THRESHOLD 99 -#define MULLOW_MUL_N_THRESHOLD 210 +#define MULLOW_BASECASE_THRESHOLD 38 +#define MULLOW_DC_THRESHOLD 87 +#define MULLOW_MUL_N_THRESHOLD 177 #define DIV_SB_PREINV_THRESHOLD 0 /* always */ -#define DIV_DC_THRESHOLD 108 -#define POWM_THRESHOLD 170 +#define DIV_DC_THRESHOLD 106 +#define POWM_THRESHOLD 190 -#define GCD_ACCEL_THRESHOLD 3 -#define GCDEXT_THRESHOLD 224 +#define GCD_ACCEL_THRESHOLD 14 +#define GCDEXT_THRESHOLD 208 #define JACOBI_BASE_METHOD 2 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ @@ -35,15 +35,15 @@ #define GET_STR_PRECOMPUTE_THRESHOLD 11 #define SET_STR_THRESHOLD 8540 -#define MUL_FFT_TABLE { 464, 1056, 3392, 3840, 7168, 28672, 0 } +#define MUL_FFT_TABLE { 432, 1056, 3264, 3840, 7168, 36864, 0 } #define MUL_FFT_MODF_THRESHOLD 1488 #define MUL_FFT_THRESHOLD 14336 -#define SQR_FFT_TABLE { 560, 1056, 3392, 3840, 7168, 36864, 0 } +#define SQR_FFT_TABLE { 464, 1056, 3264, 3840, 7168, 36864, 0 } #define SQR_FFT_MODF_THRESHOLD 1488 -#define SQR_FFT_THRESHOLD 13120 +#define SQR_FFT_THRESHOLD 8916 -/* Tuneup completed successfully, took 1381 seconds */ +/* Tuneup completed successfully, took 1073 seconds */ #define MUL_FFT_TABLE2 {{1, 2}, {215, 3}, {220, 2}, {404, 3}, {413, 2}, {443, 3}, {453, 2}, {507, 3}, {519, 2}, {695, 3}, {711, 2}, {727, 3}, {777, 2}, {831, 3}, {850, 2}, {1015, 3}, {1038, 2}, {1185, 3}, {1211, 2}, {1266, 3}, {1352, 2}, {1382, 3}, {1476, 4}, {1509, 2}, {1543, 3}, {1577, 2}, {1612, 3}, {1648, 2}, {2340, 3}, {2392, 2}, {2499, 3}, {2610, 2}, {2668, 3}, {2787, 2}, {3248, 3}, {3393, 2}, {3468, 3}, {3544, 2}, {3622, 3}, {3702, 2}, {3784, 3}, {3867, 4}, {3952, 3}, {4128, 2}, {4914, 3}, {5022, 4}, {5132, 3}, {5245, 2}, {5360, 3}, {5478, 4}, {5598, 2}, {5721, 3}, {6107, 2}, {6378, 3}, {6661, 2}, {6807, 3}, {6957, 2}, {7110, 3}, {7426, 2}, {7926, 3}, {8100, 4}, {8460, 2}, {9030, 3}, {9228, 4}, {9431, 2}, {9850, 3}, {10287, 2}, {10744, 3}, {10980, 4}, {11221, 3}, {11467, 4}, {11976, 3}, {13061, 4}, {13640, 3}, {13939, 4}, {14245, 3}, {14557, 4}, {14876, 5}, {15202, 4}, {16224, 3}, {17316, 4}, {18084, 2}, {18480, 3}, {19299, 2}, {19722, 3}, {20154, 2}, {20596, 3}, {21508, 4}, {21979, 3}, {22461, 2}, {23970, 3}, {24495, 4}, {25032, 5}, {25581, 6}, {26142, 7}, {26715, 8}, {27300, 9}, {28509, 7}, {29134, 8}, {31091, 9}, {33179, 10}, {33906, 9}, {37787, 10}, {39461, 9}, {42113, 10}, {43036, 9}, {43979, 10}, {45928, 11}, {46934, 10}, {47962, 9}, {51184, 10}, {52305, 9}, {54622, 10}, {55819, 8}, {57042, 9}, {59569, 10}, {60874, 11}, {62207, 12}, {63570, 10}, {64963, 9}, {67840, 10}, {72397, 9}, {75604, 10}, {77260, 9}, {78952, 10}, {86099, 11}, {87985, 10}, {91881, 11}, {93893, 10}, {95949, 11}, {98051, 12}, {MP_SIZE_T_MAX,0}} diff --git a/mpn/x86_64w/core2/gmp-mparam.h b/mpn/x86_64w/core2/gmp-mparam.h index 32730711..442c6857 100644 --- a/mpn/x86_64w/core2/gmp-mparam.h +++ b/mpn/x86_64w/core2/gmp-mparam.h @@ -1,24 +1,24 @@ -/* Generated by tuneup.c, 2009-05-06, system compiler */ +/* Generated by tuneup.c, 2009-05-08, system compiler */ #define MUL_KARATSUBA_THRESHOLD 16 -#define MUL_TOOM3_THRESHOLD 113 -#define MUL_TOOM4_THRESHOLD 502 -#define MUL_TOOM7_THRESHOLD 502 +#define MUL_TOOM3_THRESHOLD 114 +#define MUL_TOOM4_THRESHOLD 199 +#define MUL_TOOM7_THRESHOLD 390 #define SQR_BASECASE_THRESHOLD 0 /* always (native) */ #define SQR_KARATSUBA_THRESHOLD 26 -#define SQR_TOOM3_THRESHOLD 177 +#define SQR_TOOM3_THRESHOLD 176 -#define MULLOW_BASECASE_THRESHOLD 6 -#define MULLOW_DC_THRESHOLD 79 -#define MULLOW_MUL_N_THRESHOLD 378 +#define MULLOW_BASECASE_THRESHOLD 7 +#define MULLOW_DC_THRESHOLD 71 +#define MULLOW_MUL_N_THRESHOLD 339 #define DIV_SB_PREINV_THRESHOLD 0 /* always */ -#define DIV_DC_THRESHOLD 93 -#define POWM_THRESHOLD 79 +#define DIV_DC_THRESHOLD 88 +#define POWM_THRESHOLD 89 -#define GCD_ACCEL_THRESHOLD 16 -#define GCDEXT_THRESHOLD 155 +#define GCD_ACCEL_THRESHOLD 3 +#define GCDEXT_THRESHOLD 173 #define JACOBI_BASE_METHOD 1 #define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */ @@ -37,13 +37,13 @@ #define MUL_FFT_TABLE { 432, 864, 2368, 2816, 7168, 20480, 0 } #define MUL_FFT_MODF_THRESHOLD 1120 -#define MUL_FFT_THRESHOLD 5760 +#define MUL_FFT_THRESHOLD 6016 -#define SQR_FFT_TABLE { 432, 928, 2496, 2816, 7168, 36864, 0 } +#define SQR_FFT_TABLE { 432, 928, 2496, 2816, 7168, 20480, 49152, 0 } #define SQR_FFT_MODF_THRESHOLD 1120 #define SQR_FFT_THRESHOLD 5408 -/* Tuneup completed successfully, took 202 seconds */ +/* Tuneup completed successfully, took 194 seconds */ #define MUL_FFT_TABLE2 {{1, 2}, {248, 3}, {254, 2}, {299, 3}, {306, 2}, {360, 3}, {368, 2}, {760, 3}, {777, 2}, {831, 3}, {869, 2}, {971, 3}, {993, 2}, {1015, 3}, {1038, 2}, {1085, 3}, {1109, 4}, {1159, 2}, {1211, 3}, {1238, 2}, {1266, 3}, {1294, 4}, {1323, 2}, {1444, 3}, {1476, 2}, {1799, 3}, {1839, 2}, {2098, 3}, {2144, 2}, {2392, 3}, {2445, 2}, {2668, 3}, {2727, 2}, {2787, 3}, {2849, 2}, {2912, 3}, {3042, 2}, {3109, 3}, {3248, 4}, {3320, 5}, {3468, 6}, {3544, 5}, {3622, 3}, {3784, 2}, {4504, 3}, {4603, 2}, {4808, 3}, {4914, 4}, {5022, 3}, {5245, 2}, {5478, 3}, {5721, 2}, {5976, 3}, {6107, 2}, {6378, 3}, {6518, 2}, {7110, 3}, {7756, 4}, {7926, 3}, {8100, 2}, {9431, 3}, {10066, 4}, {10287, 5}, {10513, 6}, {10744, 7}, {11221, 8}, {11467, 7}, {11719, 8}, {12507, 9}, {12781, 8}, {13347, 9}, {13939, 10}, {14876, 8}, {15202, 9}, {15876, 8}, {16224, 9}, {16944, 8}, {17316, 9}, {18480, 8}, {18885, 9}, {19722, 10}, {20596, 9}, {21047, 10}, {21508, 9}, {21979, 10}, {23970, 9}, {25581, 10}, {26715, 8}, {27300, 9}, {29134, 10}, {29772, 11}, {31091, 9}, {33179, 10}, {36977, 11}, {37787, 9}, {38615, 10}, {41210, 11}, {42113, 10}, {43036, 11}, {43979, 10}, {46934, 11}, {47962, 10}, {49013, 11}, {50087, 10}, {52305, 11}, {54622, 12}, {55819, 11}, {57042, 12}, {58292, 11}, {62207, 12}, {63570, 10}, {69326, 11}, {70845, 10}, {72397, 11}, {75604, 10}, {77260, 11}, {87985, 10}, {89912, 11}, {MP_SIZE_T_MAX,0}} diff --git a/tests/cxx/t-locale.cc b/tests/cxx/t-locale.cc index 7f068983..21c8ba4e 100644 --- a/tests/cxx/t-locale.cc +++ b/tests/cxx/t-locale.cc @@ -29,8 +29,10 @@ MA 02110-1301, USA. */ using namespace std; - -char point_string[2]; +extern "C" +{ + char point_string[2]; +}; #if HAVE_STD__LOCALE // Like std::numpunct, but with decimal_point coming from point_string[].