1. Update Windows Powershell scripts in mpirbench to refer to MPIR rather than GMP

2. Update MPIR version number in Windows config files to 1.1.2 
3. Add an MSVC inline definition in in gmp-h.in
4. Correct locale test (as per GMP correction)
5. Add Windows x64 set/copy intrinsics to mul_fft.c (improves FFT speed score by 2%)
This commit is contained in:
gladman 2009-05-09 13:26:27 +00:00
parent 0ba06242c6
commit d942415a1c
13 changed files with 65 additions and 47 deletions

View File

@ -40,19 +40,19 @@ foreach ($cat in $tests)
" $t $parms" | Out-Host
$out = Invoke-Expression ".\$dir\$t $parms"
foreach ($s in $out) { if($s -match "RESULT: [0-9]*") { $res = $s.Split(' ')[1] } }
" GMPbench.$cat.$t.$pcm result: $res" | Out-Host
" MPIRbench.$cat.$t.$pcm result: $res" | Out-Host
$acc = Invoke-Expression ".\$dir\gexpr -prec 10 `"$acc*$res`""
$n++
}
$out = Invoke-Expression ".\$dir\gexpr `"$acc^(1/$n)`""
" GMPbench.$cat.$t result: $out" | Out-Host
" MPIRbench.$cat.$t result: $out" | Out-Host
$acc1 = Invoke-Expression ".\$dir\gexpr -prec 10 `"$acc1*$acc^(1/$n)`""
$n1++
}
$out = Invoke-Expression ".\$dir\gexpr `"$acc1^(1/$n1)`""
" GMPbench.$cat result: $out" | Out-Host
" MPIRbench.$cat result: $out" | Out-Host
$acc2 = Invoke-Expression ".\$dir\gexpr -round -prec 10 `"$acc2*$acc1^(1/$n1)`""
$n2++
}
$out = Invoke-Expression ".\$dir\gexpr -round -prec 10 `"$acc2^(1/$n2)`""
"GMPbench result: $out" | Out-Host
"MPIRbench result: $out" | Out-Host

View File

@ -40,19 +40,19 @@ foreach ($cat in $tests)
" $t $parms" | Out-Host
$out = Invoke-Expression ".\$dir\$t $parms"
foreach ($s in $out) { if($s -match "RESULT: [0-9]*") { $res = $s.Split(' ')[1] } }
" GMPbench.$cat.$t.$pcm result: $res" | Out-Host
" MPIRbench.$cat.$t.$pcm result: $res" | Out-Host
$acc = Invoke-Expression ".\$dir\gexpr -prec 10 `"$acc*$res`""
$n++
}
$out = Invoke-Expression ".\$dir\gexpr `"$acc^(1/$n)`""
" GMPbench.$cat.$t result: $out" | Out-Host
" MPIRbench.$cat.$t result: $out" | Out-Host
$acc1 = Invoke-Expression ".\$dir\gexpr -prec 10 `"$acc1*$acc^(1/$n)`""
$n1++
}
$out = Invoke-Expression ".\$dir\gexpr `"$acc1^(1/$n1)`""
" GMPbench.$cat result: $out" | Out-Host
" MPIRbench.$cat result: $out" | Out-Host
$acc2 = Invoke-Expression ".\$dir\gexpr -round -prec 10 `"$acc2*$acc1^(1/$n1)`""
$n2++
}
$out = Invoke-Expression ".\$dir\gexpr -round -prec 10 `"$acc2^(1/$n2)`""
"GMPbench result: $out" | Out-Host
"MPIRbench result: $out" | Out-Host

View File

@ -479,7 +479,7 @@ MA 02111-1307, USA. */
/* Version number of package */
#define GMP_VERSION "4.2.1"
#define VERSION "1.0.0"
#define VERSION "1.1.2"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT

View File

@ -471,7 +471,7 @@ MA 02111-1307, USA. */
/* Version number of package */
#define GMP_VERSION "4.2.1"
#define VERSION "1.0.0"
#define VERSION "1.1.2"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT

View File

@ -468,7 +468,7 @@ MA 02111-1307, USA. */
/* Version number of package */
#define GMP_VERSION "4.2.1"
#define VERSION "1.0.0"
#define VERSION "1.1.2"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT

View File

@ -493,7 +493,7 @@ MA 02111-1307, USA. */
/* Version number of package */
#define GMP_VERSION "4.2.1"
#define VERSION "1.0.0"
#define VERSION "1.1.2"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT

View File

@ -499,7 +499,7 @@ MA 02111-1307, USA. */
/* Version number of package */
#define GMP_VERSION "4.2.1"
#define VERSION "1.0.0"
#define VERSION "1.1.2"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT

View File

@ -503,7 +503,7 @@ MA 02111-1307, USA. */
/* Version number of package */
#define GMP_VERSION "4.2.1"
#define VERSION "1.0.0"
#define VERSION "1.1.2"
/* ./configure --enable-assert option, to enable some ASSERT()s */
#undef WANT_ASSERT

View File

@ -442,6 +442,10 @@ typedef __mpq_struct *mpq_ptr;
#endif
#endif
#ifdef _MSC_VER
#define __GMP_EXTERN_INLINE __inline
#endif
/* DEC C (eg. version 5.9) supports "static __inline foo()", even in -std1
strict ANSI mode. Inlining is done even when not optimizing (ie. -O0
mode, which is the default), but an unnecessary local copy of foo is

View File

@ -116,6 +116,10 @@ MPN_FFT_ZERO (void *dst, long int n)
__asm__ __volatile__ ("rep stosl" : "+c" (n), "+D" (dst) : "a" (0) :
"memory");
}
#elif defined( _MSC_VER ) && defined( _M_X64 )
#include <intrin.h>
#pragma intrinsic(__stosq)
#define MPN_FFT_ZERO(d, l) __stosq(d, 0, l)
#else
/* Fall back to GMP's MPN_ZERO() macro */
#define MPN_FFT_ZERO(dst, n) MPN_ZERO(dst,n)
@ -140,6 +144,10 @@ MPN_FFT_STORE (void *dst, long int n, long int d)
__asm__ __volatile__ ("rep stosl" : "+c" (n), "+D" (dst) : "a" (d) :
"memory");
}
#elif defined( _MSC_VER ) && defined( _M_X64 )
#include <intrin.h>
#pragma intrinsic(__stosq)
#define MPN_FFT_STORE(d, l, v) __stosq(d, v, l)
#else
void static inline
MPN_FFT_STORE (mp_limb_t *dst, mp_size_t n, mp_limb_t d)
@ -168,6 +176,10 @@ MPN_FFT_COPY (void *dst, const void *src, long int n)
__asm__ __volatile__ ("rep movsl" : "+c" (n), "+S" (src), "+D" (dst) :
"memory");
}
#elif defined( _MSC_VER ) && defined( _M_X64 )
#include <intrin.h>
#pragma intrinsic(__movsq)
#define MPN_FFT_COPY(d, s, l) __movsq(d, s, l)
#else
/* Fall back to GMP's MPN_COPY() macro */
#define MPN_FFT_COPY(dst, src, n) MPN_COPY(dst,src,n)

View File

@ -1,24 +1,24 @@
/* Generated by tuneup.c, 2009-05-06, system compiler */
/* Generated by tuneup.c, 2009-05-08, system compiler */
#define MUL_KARATSUBA_THRESHOLD 26
#define MUL_TOOM3_THRESHOLD 168
#define MUL_TOOM4_THRESHOLD 842
#define MUL_TOOM7_THRESHOLD 842
#define MUL_KARATSUBA_THRESHOLD 28
#define MUL_TOOM3_THRESHOLD 129
#define MUL_TOOM4_THRESHOLD 746
#define MUL_TOOM7_THRESHOLD 746
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_KARATSUBA_THRESHOLD 34
#define SQR_TOOM3_THRESHOLD 218
#define MULLOW_BASECASE_THRESHOLD 7
#define MULLOW_DC_THRESHOLD 99
#define MULLOW_MUL_N_THRESHOLD 210
#define MULLOW_BASECASE_THRESHOLD 38
#define MULLOW_DC_THRESHOLD 87
#define MULLOW_MUL_N_THRESHOLD 177
#define DIV_SB_PREINV_THRESHOLD 0 /* always */
#define DIV_DC_THRESHOLD 108
#define POWM_THRESHOLD 170
#define DIV_DC_THRESHOLD 106
#define POWM_THRESHOLD 190
#define GCD_ACCEL_THRESHOLD 3
#define GCDEXT_THRESHOLD 224
#define GCD_ACCEL_THRESHOLD 14
#define GCDEXT_THRESHOLD 208
#define JACOBI_BASE_METHOD 2
#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
@ -35,15 +35,15 @@
#define GET_STR_PRECOMPUTE_THRESHOLD 11
#define SET_STR_THRESHOLD 8540
#define MUL_FFT_TABLE { 464, 1056, 3392, 3840, 7168, 28672, 0 }
#define MUL_FFT_TABLE { 432, 1056, 3264, 3840, 7168, 36864, 0 }
#define MUL_FFT_MODF_THRESHOLD 1488
#define MUL_FFT_THRESHOLD 14336
#define SQR_FFT_TABLE { 560, 1056, 3392, 3840, 7168, 36864, 0 }
#define SQR_FFT_TABLE { 464, 1056, 3264, 3840, 7168, 36864, 0 }
#define SQR_FFT_MODF_THRESHOLD 1488
#define SQR_FFT_THRESHOLD 13120
#define SQR_FFT_THRESHOLD 8916
/* Tuneup completed successfully, took 1381 seconds */
/* Tuneup completed successfully, took 1073 seconds */
#define MUL_FFT_TABLE2 {{1, 2}, {215, 3}, {220, 2}, {404, 3}, {413, 2}, {443, 3}, {453, 2}, {507, 3}, {519, 2}, {695, 3}, {711, 2}, {727, 3}, {777, 2}, {831, 3}, {850, 2}, {1015, 3}, {1038, 2}, {1185, 3}, {1211, 2}, {1266, 3}, {1352, 2}, {1382, 3}, {1476, 4}, {1509, 2}, {1543, 3}, {1577, 2}, {1612, 3}, {1648, 2}, {2340, 3}, {2392, 2}, {2499, 3}, {2610, 2}, {2668, 3}, {2787, 2}, {3248, 3}, {3393, 2}, {3468, 3}, {3544, 2}, {3622, 3}, {3702, 2}, {3784, 3}, {3867, 4}, {3952, 3}, {4128, 2}, {4914, 3}, {5022, 4}, {5132, 3}, {5245, 2}, {5360, 3}, {5478, 4}, {5598, 2}, {5721, 3}, {6107, 2}, {6378, 3}, {6661, 2}, {6807, 3}, {6957, 2}, {7110, 3}, {7426, 2}, {7926, 3}, {8100, 4}, {8460, 2}, {9030, 3}, {9228, 4}, {9431, 2}, {9850, 3}, {10287, 2}, {10744, 3}, {10980, 4}, {11221, 3}, {11467, 4}, {11976, 3}, {13061, 4}, {13640, 3}, {13939, 4}, {14245, 3}, {14557, 4}, {14876, 5}, {15202, 4}, {16224, 3}, {17316, 4}, {18084, 2}, {18480, 3}, {19299, 2}, {19722, 3}, {20154, 2}, {20596, 3}, {21508, 4}, {21979, 3}, {22461, 2}, {23970, 3}, {24495, 4}, {25032, 5}, {25581, 6}, {26142, 7}, {26715, 8}, {27300, 9}, {28509, 7}, {29134, 8}, {31091, 9}, {33179, 10}, {33906, 9}, {37787, 10}, {39461, 9}, {42113, 10}, {43036, 9}, {43979, 10}, {45928, 11}, {46934, 10}, {47962, 9}, {51184, 10}, {52305, 9}, {54622, 10}, {55819, 8}, {57042, 9}, {59569, 10}, {60874, 11}, {62207, 12}, {63570, 10}, {64963, 9}, {67840, 10}, {72397, 9}, {75604, 10}, {77260, 9}, {78952, 10}, {86099, 11}, {87985, 10}, {91881, 11}, {93893, 10}, {95949, 11}, {98051, 12}, {MP_SIZE_T_MAX,0}}

View File

@ -1,24 +1,24 @@
/* Generated by tuneup.c, 2009-05-06, system compiler */
/* Generated by tuneup.c, 2009-05-08, system compiler */
#define MUL_KARATSUBA_THRESHOLD 16
#define MUL_TOOM3_THRESHOLD 113
#define MUL_TOOM4_THRESHOLD 502
#define MUL_TOOM7_THRESHOLD 502
#define MUL_TOOM3_THRESHOLD 114
#define MUL_TOOM4_THRESHOLD 199
#define MUL_TOOM7_THRESHOLD 390
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_KARATSUBA_THRESHOLD 26
#define SQR_TOOM3_THRESHOLD 177
#define SQR_TOOM3_THRESHOLD 176
#define MULLOW_BASECASE_THRESHOLD 6
#define MULLOW_DC_THRESHOLD 79
#define MULLOW_MUL_N_THRESHOLD 378
#define MULLOW_BASECASE_THRESHOLD 7
#define MULLOW_DC_THRESHOLD 71
#define MULLOW_MUL_N_THRESHOLD 339
#define DIV_SB_PREINV_THRESHOLD 0 /* always */
#define DIV_DC_THRESHOLD 93
#define POWM_THRESHOLD 79
#define DIV_DC_THRESHOLD 88
#define POWM_THRESHOLD 89
#define GCD_ACCEL_THRESHOLD 16
#define GCDEXT_THRESHOLD 155
#define GCD_ACCEL_THRESHOLD 3
#define GCDEXT_THRESHOLD 173
#define JACOBI_BASE_METHOD 1
#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
@ -37,13 +37,13 @@
#define MUL_FFT_TABLE { 432, 864, 2368, 2816, 7168, 20480, 0 }
#define MUL_FFT_MODF_THRESHOLD 1120
#define MUL_FFT_THRESHOLD 5760
#define MUL_FFT_THRESHOLD 6016
#define SQR_FFT_TABLE { 432, 928, 2496, 2816, 7168, 36864, 0 }
#define SQR_FFT_TABLE { 432, 928, 2496, 2816, 7168, 20480, 49152, 0 }
#define SQR_FFT_MODF_THRESHOLD 1120
#define SQR_FFT_THRESHOLD 5408
/* Tuneup completed successfully, took 202 seconds */
/* Tuneup completed successfully, took 194 seconds */
#define MUL_FFT_TABLE2 {{1, 2}, {248, 3}, {254, 2}, {299, 3}, {306, 2}, {360, 3}, {368, 2}, {760, 3}, {777, 2}, {831, 3}, {869, 2}, {971, 3}, {993, 2}, {1015, 3}, {1038, 2}, {1085, 3}, {1109, 4}, {1159, 2}, {1211, 3}, {1238, 2}, {1266, 3}, {1294, 4}, {1323, 2}, {1444, 3}, {1476, 2}, {1799, 3}, {1839, 2}, {2098, 3}, {2144, 2}, {2392, 3}, {2445, 2}, {2668, 3}, {2727, 2}, {2787, 3}, {2849, 2}, {2912, 3}, {3042, 2}, {3109, 3}, {3248, 4}, {3320, 5}, {3468, 6}, {3544, 5}, {3622, 3}, {3784, 2}, {4504, 3}, {4603, 2}, {4808, 3}, {4914, 4}, {5022, 3}, {5245, 2}, {5478, 3}, {5721, 2}, {5976, 3}, {6107, 2}, {6378, 3}, {6518, 2}, {7110, 3}, {7756, 4}, {7926, 3}, {8100, 2}, {9431, 3}, {10066, 4}, {10287, 5}, {10513, 6}, {10744, 7}, {11221, 8}, {11467, 7}, {11719, 8}, {12507, 9}, {12781, 8}, {13347, 9}, {13939, 10}, {14876, 8}, {15202, 9}, {15876, 8}, {16224, 9}, {16944, 8}, {17316, 9}, {18480, 8}, {18885, 9}, {19722, 10}, {20596, 9}, {21047, 10}, {21508, 9}, {21979, 10}, {23970, 9}, {25581, 10}, {26715, 8}, {27300, 9}, {29134, 10}, {29772, 11}, {31091, 9}, {33179, 10}, {36977, 11}, {37787, 9}, {38615, 10}, {41210, 11}, {42113, 10}, {43036, 11}, {43979, 10}, {46934, 11}, {47962, 10}, {49013, 11}, {50087, 10}, {52305, 11}, {54622, 12}, {55819, 11}, {57042, 12}, {58292, 11}, {62207, 12}, {63570, 10}, {69326, 11}, {70845, 10}, {72397, 11}, {75604, 10}, {77260, 11}, {87985, 10}, {89912, 11}, {MP_SIZE_T_MAX,0}}

View File

@ -29,8 +29,10 @@ MA 02110-1301, USA. */
using namespace std;
char point_string[2];
extern "C"
{
char point_string[2];
};
#if HAVE_STD__LOCALE
// Like std::numpunct, but with decimal_point coming from point_string[].