Removed unneeded tune-fft files and made tuning stop if 25 fft tuning values are
found and we have at least five 2's in a row.
This commit is contained in:
parent
bde720d23e
commit
a9a73c77ee
@ -1,201 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright 2009, 2011 William Hart. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are
|
||||
permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
of conditions and the following disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
The views and conclusions contained in the software and documentation are those of the
|
||||
authors and should not be interpreted as representing official policies, either expressed
|
||||
or implied, of William Hart.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpir.h>
|
||||
#include <time.h>
|
||||
#include <mpir.h>
|
||||
#include "gmp-impl.h"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
mp_bitcnt_t depth, w, depth1, w1;
|
||||
clock_t start, end;
|
||||
double elapsed;
|
||||
double best = 0.0;
|
||||
mp_size_t best_off, off, best_d, best_w;
|
||||
|
||||
gmp_randstate_t state;
|
||||
|
||||
printf("/* fft_tuning.h -- autogenerated by tune-fft */\n\n");
|
||||
printf("#ifndef FFT_TUNING_H\n");
|
||||
printf("#define FFT_TUNING_H\n\n");
|
||||
printf("#include \"mpir.h\"\n\n");
|
||||
printf("#define FFT_TAB \\\n");
|
||||
fflush(stdout);
|
||||
|
||||
gmp_randinit_default(state);
|
||||
|
||||
printf(" { "); fflush(stdout);
|
||||
for (depth = 6; depth <= 10; depth++)
|
||||
{
|
||||
printf("{ "); fflush(stdout);
|
||||
for (w = 1; w <= 2; w++)
|
||||
{
|
||||
int iters = 100*((mp_size_t) 1 << (3*(10 - depth)/2)), i;
|
||||
|
||||
mp_size_t n = ((mp_limb_t)1<<depth);
|
||||
mp_bitcnt_t bits1 = (n*w - (depth + 1))/2;
|
||||
mp_size_t len1 = 2*n;
|
||||
mp_size_t len2 = 2*n;
|
||||
|
||||
mp_bitcnt_t b1 = len1*bits1, b2 = len2*bits1;
|
||||
mp_size_t n1, n2;
|
||||
mp_size_t j;
|
||||
mp_limb_t * i1, *i2, *r1;
|
||||
|
||||
n1 = (b1 - 1)/GMP_LIMB_BITS + 1;
|
||||
n2 = (b2 - 1)/GMP_LIMB_BITS + 1;
|
||||
|
||||
i1 = malloc(2*(n1 + n2)*sizeof(mp_limb_t));
|
||||
i2 = i1 + n1;
|
||||
r1 = i2 + n2;
|
||||
|
||||
mpn_urandomb(i1, state, b1);
|
||||
mpn_urandomb(i2, state, b2);
|
||||
|
||||
best_off = -1;
|
||||
|
||||
for (off = 0; off <= 4; off++)
|
||||
{
|
||||
start = clock();
|
||||
for (i = 0; i < iters; i++)
|
||||
mpn_mul_trunc_sqrt2(r1, i1, n1, i2, n2, depth - off, w*((mp_size_t)1 << (off*2)));
|
||||
end = clock();
|
||||
|
||||
elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||
|
||||
if (elapsed < best || best_off == -1)
|
||||
{
|
||||
best_off = off;
|
||||
best = elapsed;
|
||||
}
|
||||
}
|
||||
|
||||
printf("%ld", best_off);
|
||||
if (w != 2) printf(",");
|
||||
printf(" "); fflush(stdout);
|
||||
|
||||
free(i1);
|
||||
}
|
||||
printf("}");
|
||||
if (depth != 10) printf(",");
|
||||
printf(" "); fflush(stdout);
|
||||
}
|
||||
|
||||
printf("}\n\n");
|
||||
|
||||
best_d = 12;
|
||||
best_w = 1;
|
||||
best_off = -1;
|
||||
|
||||
printf("#define MULMOD_TAB \\\n");
|
||||
fflush(stdout);
|
||||
printf(" { "); fflush(stdout);
|
||||
for (depth = 12; best_off != 1 ; depth++)
|
||||
{
|
||||
for (w = 1; w <= 2; w++)
|
||||
{
|
||||
int iters = 100*((mp_size_t) 1 << (3*(18 - depth)/2)), i;
|
||||
mp_size_t n = ((mp_limb_t)1<<depth);
|
||||
mp_bitcnt_t bits = n*w;
|
||||
mp_size_t int_limbs = (bits - 1)/GMP_LIMB_BITS + 1;
|
||||
mp_size_t j;
|
||||
mp_limb_t c, * i1, * i2, * r1, * tt;
|
||||
|
||||
if (depth <= 21) iters = 32*((mp_size_t) 1 << (21 - depth));
|
||||
else iters = MAX(32/((mp_size_t) 1 << (depth - 21)), 1);
|
||||
|
||||
i1 = malloc(6*(int_limbs+1)*sizeof(mp_limb_t));
|
||||
i2 = i1 + int_limbs + 1;
|
||||
r1 = i2 + int_limbs + 1;
|
||||
tt = r1 + 2*(int_limbs + 1);
|
||||
|
||||
mpn_urandomb(i1, state, int_limbs*GMP_LIMB_BITS);
|
||||
mpn_urandomb(i2, state, int_limbs*GMP_LIMB_BITS);
|
||||
i1[int_limbs] = 0;
|
||||
i2[int_limbs] = 0;
|
||||
|
||||
depth1 = 1;
|
||||
while ((((mp_limb_t)1)<<depth1) < bits) depth1++;
|
||||
depth1 = depth1/2;
|
||||
|
||||
w1 = bits/(((mp_limb_t)1)<<(2*depth1));
|
||||
|
||||
best_off = -1;
|
||||
|
||||
for (off = 0; off <= 4; off++)
|
||||
{
|
||||
start = clock();
|
||||
for (i = 0; i < iters; i++)
|
||||
mpir_fft_mulmod_2expp1(r1, i1, i2, int_limbs, depth1 - off, w1*((mp_size_t)1 << (off*2)));
|
||||
end = clock();
|
||||
|
||||
elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||
|
||||
if (best_off == -1 || elapsed < best)
|
||||
{
|
||||
best_off = off;
|
||||
best = elapsed;
|
||||
}
|
||||
}
|
||||
|
||||
start = clock();
|
||||
for (i = 0; i < iters; i++)
|
||||
mpn_mulmod_2expp1_basecase(r1, i1, i2, 0, bits, tt);
|
||||
end = clock();
|
||||
|
||||
elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||
if (elapsed < best)
|
||||
{
|
||||
best_d = depth + (w == 2);
|
||||
best_w = w + 1 - 2*(w == 2);
|
||||
}
|
||||
|
||||
printf("%ld", best_off);
|
||||
if (w != 2) printf(", "); fflush(stdout);
|
||||
|
||||
free(i1);
|
||||
}
|
||||
printf(", "); fflush(stdout);
|
||||
}
|
||||
printf("1 }\n\n");
|
||||
|
||||
printf("#define FFT_N_NUM %ld\n\n", 2*(depth - 12) + 1);
|
||||
|
||||
printf("#define FFT_MULMOD_2EXPP1_CUTOFF %ld\n\n", ((mp_limb_t) 1 << best_d)*best_w/(2*GMP_LIMB_BITS));
|
||||
|
||||
gmp_randclear(state);
|
||||
|
||||
printf("#endif\n");
|
||||
return 0;
|
||||
}
|
@ -137,7 +137,7 @@ TUNE_MPN_SRCS_BASIC = divrem_2.c gcd.c gcdext.c get_str.c set_str.c \
|
||||
TUNE_FFT_SRCS_BASIC = split_bits.c revbin.c normmod_2expp1.c mulmod_2expp1.c \
|
||||
mul_trunc_sqrt2.c mul_mfa_trunc_sqrt2.c mul_fft_main.c \
|
||||
mul_2expmod_2expp1.c ifft_trunc_sqrt2.c ifft_trunc.c ifft_radix2.c \
|
||||
ifft_negacyclic.c fft_trunc.c fft_radix2.c fft_negacylic.c \
|
||||
ifft_negacyclic.c fft_trunc.c fft_radix2.c fft_negacyclic.c \
|
||||
fft_mfa_trunc_sqrt2.c fft_mfa_trunc_sqrt2_inner.c fermat_to_mpz.c \
|
||||
div_2expmod_2expp1.c combine_bits.c butterfly_rshB.c butterfly_lshB.c \
|
||||
adjust_sqrt2.c adjust.c
|
||||
|
@ -144,7 +144,7 @@ am__objects_2 = split_bits.$(OBJEXT) revbin.$(OBJEXT) \
|
||||
ifft_trunc_sqrt2.$(OBJEXT) ifft_trunc.$(OBJEXT) \
|
||||
ifft_radix2.$(OBJEXT) ifft_negacyclic.$(OBJEXT) \
|
||||
fft_trunc.$(OBJEXT) fft_radix2.$(OBJEXT) \
|
||||
fft_negacylic.$(OBJEXT) fft_mfa_trunc_sqrt2.$(OBJEXT) \
|
||||
fft_negacyclic.$(OBJEXT) fft_mfa_trunc_sqrt2.$(OBJEXT) \
|
||||
fft_mfa_trunc_sqrt2_inner.$(OBJEXT) fermat_to_mpz.$(OBJEXT) \
|
||||
div_2expmod_2expp1.$(OBJEXT) combine_bits.$(OBJEXT) \
|
||||
butterfly_rshB.$(OBJEXT) butterfly_lshB.$(OBJEXT) \
|
||||
@ -408,7 +408,7 @@ TUNE_MPN_SRCS_BASIC = divrem_2.c gcd.c gcdext.c get_str.c set_str.c \
|
||||
TUNE_FFT_SRCS_BASIC = split_bits.c revbin.c normmod_2expp1.c mulmod_2expp1.c \
|
||||
mul_trunc_sqrt2.c mul_mfa_trunc_sqrt2.c mul_fft_main.c \
|
||||
mul_2expmod_2expp1.c ifft_trunc_sqrt2.c ifft_trunc.c ifft_radix2.c \
|
||||
ifft_negacyclic.c fft_trunc.c fft_radix2.c fft_negacylic.c \
|
||||
ifft_negacyclic.c fft_trunc.c fft_radix2.c fft_negacyclic.c \
|
||||
fft_mfa_trunc_sqrt2.c fft_mfa_trunc_sqrt2_inner.c fermat_to_mpz.c \
|
||||
div_2expmod_2expp1.c combine_bits.c butterfly_rshB.c butterfly_lshB.c \
|
||||
adjust_sqrt2.c adjust.c
|
||||
|
201
tune/tune-fft.c
201
tune/tune-fft.c
@ -1,201 +0,0 @@
|
||||
/*
|
||||
|
||||
Copyright 2009, 2011 William Hart. All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without modification, are
|
||||
permitted provided that the following conditions are met:
|
||||
|
||||
1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
conditions and the following disclaimer.
|
||||
|
||||
2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
of conditions and the following disclaimer in the documentation and/or other materials
|
||||
provided with the distribution.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED
|
||||
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
|
||||
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR
|
||||
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
||||
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
|
||||
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
|
||||
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
The views and conclusions contained in the software and documentation are those of the
|
||||
authors and should not be interpreted as representing official policies, either expressed
|
||||
or implied, of William Hart.
|
||||
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <mpir.h>
|
||||
#include <time.h>
|
||||
#include <mpir.h>
|
||||
#include "gmp-impl.h"
|
||||
|
||||
int
|
||||
main(void)
|
||||
{
|
||||
mp_bitcnt_t depth, w, depth1, w1;
|
||||
clock_t start, end;
|
||||
double elapsed;
|
||||
double best = 0.0;
|
||||
mp_size_t best_off, off, best_d, best_w;
|
||||
|
||||
gmp_randstate_t state;
|
||||
|
||||
printf("/* fft_tuning.h -- autogenerated by tune-fft */\n\n");
|
||||
printf("#ifndef FFT_TUNING_H\n");
|
||||
printf("#define FFT_TUNING_H\n\n");
|
||||
printf("#include \"mpir.h\"\n\n");
|
||||
printf("#define FFT_TAB \\\n");
|
||||
fflush(stdout);
|
||||
|
||||
gmp_randinit_default(state);
|
||||
|
||||
printf(" { "); fflush(stdout);
|
||||
for (depth = 6; depth <= 10; depth++)
|
||||
{
|
||||
printf("{ "); fflush(stdout);
|
||||
for (w = 1; w <= 2; w++)
|
||||
{
|
||||
int iters = 100*((mp_size_t) 1 << (3*(10 - depth)/2)), i;
|
||||
|
||||
mp_size_t n = ((mp_limb_t)1<<depth);
|
||||
mp_bitcnt_t bits1 = (n*w - (depth + 1))/2;
|
||||
mp_size_t len1 = 2*n;
|
||||
mp_size_t len2 = 2*n;
|
||||
|
||||
mp_bitcnt_t b1 = len1*bits1, b2 = len2*bits1;
|
||||
mp_size_t n1, n2;
|
||||
mp_size_t j;
|
||||
mp_limb_t * i1, *i2, *r1;
|
||||
|
||||
n1 = (b1 - 1)/GMP_LIMB_BITS + 1;
|
||||
n2 = (b2 - 1)/GMP_LIMB_BITS + 1;
|
||||
|
||||
i1 = malloc(2*(n1 + n2)*sizeof(mp_limb_t));
|
||||
i2 = i1 + n1;
|
||||
r1 = i2 + n2;
|
||||
|
||||
mpn_urandomb(i1, state, b1);
|
||||
mpn_urandomb(i2, state, b2);
|
||||
|
||||
best_off = -1;
|
||||
|
||||
for (off = 0; off <= 4; off++)
|
||||
{
|
||||
start = clock();
|
||||
for (i = 0; i < iters; i++)
|
||||
mpn_mul_trunc_sqrt2(r1, i1, n1, i2, n2, depth - off, w*((mp_size_t)1 << (off*2)));
|
||||
end = clock();
|
||||
|
||||
elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||
|
||||
if (elapsed < best || best_off == -1)
|
||||
{
|
||||
best_off = off;
|
||||
best = elapsed;
|
||||
}
|
||||
}
|
||||
|
||||
printf("%ld", best_off);
|
||||
if (w != 2) printf(",");
|
||||
printf(" "); fflush(stdout);
|
||||
|
||||
free(i1);
|
||||
}
|
||||
printf("}");
|
||||
if (depth != 10) printf(",");
|
||||
printf(" "); fflush(stdout);
|
||||
}
|
||||
|
||||
printf("}\n\n");
|
||||
|
||||
best_d = 12;
|
||||
best_w = 1;
|
||||
best_off = -1;
|
||||
|
||||
printf("#define MULMOD_TAB \\\n");
|
||||
fflush(stdout);
|
||||
printf(" { "); fflush(stdout);
|
||||
for (depth = 12; best_off != 1 ; depth++)
|
||||
{
|
||||
for (w = 1; w <= 2; w++)
|
||||
{
|
||||
int iters = 100*((mp_size_t) 1 << (3*(18 - depth)/2)), i;
|
||||
mp_size_t n = ((mp_limb_t)1<<depth);
|
||||
mp_bitcnt_t bits = n*w;
|
||||
mp_size_t int_limbs = (bits - 1)/GMP_LIMB_BITS + 1;
|
||||
mp_size_t j;
|
||||
mp_limb_t c, * i1, * i2, * r1, * tt;
|
||||
|
||||
if (depth <= 21) iters = 32*((mp_size_t) 1 << (21 - depth));
|
||||
else iters = MAX(32/((mp_size_t) 1 << (depth - 21)), 1);
|
||||
|
||||
i1 = malloc(6*(int_limbs+1)*sizeof(mp_limb_t));
|
||||
i2 = i1 + int_limbs + 1;
|
||||
r1 = i2 + int_limbs + 1;
|
||||
tt = r1 + 2*(int_limbs + 1);
|
||||
|
||||
mpn_urandomb(i1, state, int_limbs*GMP_LIMB_BITS);
|
||||
mpn_urandomb(i2, state, int_limbs*GMP_LIMB_BITS);
|
||||
i1[int_limbs] = 0;
|
||||
i2[int_limbs] = 0;
|
||||
|
||||
depth1 = 1;
|
||||
while ((((mp_limb_t)1)<<depth1) < bits) depth1++;
|
||||
depth1 = depth1/2;
|
||||
|
||||
w1 = bits/(((mp_limb_t)1)<<(2*depth1));
|
||||
|
||||
best_off = -1;
|
||||
|
||||
for (off = 0; off <= 4; off++)
|
||||
{
|
||||
start = clock();
|
||||
for (i = 0; i < iters; i++)
|
||||
mpir_fft_mulmod_2expp1(r1, i1, i2, int_limbs, depth1 - off, w1*((mp_size_t)1 << (off*2)));
|
||||
end = clock();
|
||||
|
||||
elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||
|
||||
if (best_off == -1 || elapsed < best)
|
||||
{
|
||||
best_off = off;
|
||||
best = elapsed;
|
||||
}
|
||||
}
|
||||
|
||||
start = clock();
|
||||
for (i = 0; i < iters; i++)
|
||||
mpn_mulmod_2expp1_basecase(r1, i1, i2, 0, bits, tt);
|
||||
end = clock();
|
||||
|
||||
elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;
|
||||
if (elapsed < best)
|
||||
{
|
||||
best_d = depth + (w == 2);
|
||||
best_w = w + 1 - 2*(w == 2);
|
||||
}
|
||||
|
||||
printf("%ld", best_off);
|
||||
if (w != 2) printf(", "); fflush(stdout);
|
||||
|
||||
free(i1);
|
||||
}
|
||||
printf(", "); fflush(stdout);
|
||||
}
|
||||
printf("1 }\n\n");
|
||||
|
||||
printf("#define FFT_N_NUM %ld\n\n", 2*(depth - 12) + 1);
|
||||
|
||||
printf("#define FFT_MULMOD_2EXPP1_CUTOFF %ld\n\n", ((mp_limb_t) 1 << best_d)*best_w/(2*GMP_LIMB_BITS));
|
||||
|
||||
gmp_randclear(state);
|
||||
|
||||
printf("#endif\n");
|
||||
return 0;
|
||||
}
|
@ -1855,7 +1855,7 @@ tune_fft(gmp_randstate_t state)
|
||||
clock_t start, end;
|
||||
double elapsed;
|
||||
double best = 0.0;
|
||||
mp_size_t best_off, off, best_d, best_w;
|
||||
mp_size_t best_off, off, best_d, best_w, num_twos, num_printed;
|
||||
|
||||
if (option_fft_max_size == 0)
|
||||
return;
|
||||
@ -1926,11 +1926,13 @@ tune_fft(gmp_randstate_t state)
|
||||
best_d = 12;
|
||||
best_w = 1;
|
||||
best_off = -1;
|
||||
num_printed = 0;
|
||||
num_twos = 0;
|
||||
|
||||
printf("#define MULMOD_TAB \\\n");
|
||||
fflush(stdout);
|
||||
printf(" { "); fflush(stdout);
|
||||
for (depth = 12; best_off != 1 ; depth++)
|
||||
for (depth = 12; best_off != 1 && !(num_printed >= 25 && best_off == 2 && num_twos >= 5) ; depth++)
|
||||
{
|
||||
for (w = 1; w <= 2; w++)
|
||||
{
|
||||
@ -1991,15 +1993,25 @@ tune_fft(gmp_randstate_t state)
|
||||
}
|
||||
|
||||
printf("%ld", best_off);
|
||||
if (best_off == 2)
|
||||
num_twos++;
|
||||
else
|
||||
num_twos = 0;
|
||||
num_printed++;
|
||||
if (w != 2) printf(", "); fflush(stdout);
|
||||
|
||||
free(i1);
|
||||
}
|
||||
printf(", "); fflush(stdout);
|
||||
}
|
||||
printf("1 }\n\n");
|
||||
if (best_off == 2)
|
||||
{
|
||||
printf("2, 2, 2, 2, 2, 1, 1 }\n\n");
|
||||
num_printed += 6;
|
||||
} else
|
||||
printf("1 }\n\n");
|
||||
|
||||
printf("#define FFT_N_NUM %ld\n\n", 2*(depth - 12) + 1);
|
||||
printf("#define FFT_N_NUM %ld\n\n", num_printed + 1);
|
||||
|
||||
printf("#define FFT_MULMOD_2EXPP1_CUTOFF %ld\n\n", ((mp_limb_t) 1 << best_d)*best_w/(2*GMP_LIMB_BITS));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user