From 1253dff1711f1ffe0a89fa0a8638b9f7406349d8 Mon Sep 17 00:00:00 2001 From: gladman Date: Tue, 10 Jan 2012 16:26:07 +0000 Subject: [PATCH] Final integration of Bills new FFT code --- .../lib_mpir_nehalem/lib_mpir_nehalem.vcxproj | 1703 +++---- .../lib_mpir_nehalem.vcxproj.filters | 3013 ++++++------- build.vc10/lib_speed/lib_speed.vcxproj | 8 +- build.vc10/speed/speed.vcxproj | 16 +- build.vc10/try/try.vcxproj | 16 +- build.vc10/tune/tune.vcxproj | 16 +- fft/fft_mfa_truncate_sqrt2_inner.c | 4 +- fft/mul_truncate_sqrt2.c | 2 +- fft/mulmod_2expp1.c | 30 +- fft/tune/tune-fft.c | 2 +- gmp-h.in | 4 - gmp-impl.h | 3 + mpn/generic/mul_n.c | 6 +- mpn/generic/mulmod_2expm1.c | 4 +- mpn/generic/mulmod_2expp1.c | 221 +- new_fft.c | 3745 ---------------- new_fft_with_flint.c | 3972 ----------------- tests/fft/t-mulmod_2expp1.c | 8 +- tests/mpn/t-mulmod_2expp1.c | 18 +- tune/common.c | 4 +- tune/speed.c | 2 +- tune/speed.h | 6 +- tune/tune-fft.c | 2 +- 23 files changed, 2490 insertions(+), 10315 deletions(-) delete mode 100644 new_fft.c delete mode 100644 new_fft_with_flint.c diff --git a/build.vc10/lib_mpir_nehalem/lib_mpir_nehalem.vcxproj b/build.vc10/lib_mpir_nehalem/lib_mpir_nehalem.vcxproj index 9f7c2f2a..c1351af5 100644 --- a/build.vc10/lib_mpir_nehalem/lib_mpir_nehalem.vcxproj +++ b/build.vc10/lib_mpir_nehalem/lib_mpir_nehalem.vcxproj @@ -1,4 +1,4 @@ - + @@ -45,12 +45,13 @@ - cd ..\ + cd ..\ prebuild nehalem x64 - + + ..\..\mpn\x86_64w\ true $(IntDir)mpn\ @@ -65,21 +66,21 @@ prebuild nehalem x64 $(TargetDir)$(TargetName).pdb ProgramDatabase - - + cd ..\ postbuild "$(TargetPath)" - + - cd ..\ + cd ..\ prebuild nehalem x64 - + + ..\..\mpn\x86_64w\ true $(IntDir)mpn\ @@ -94,12 +95,11 @@ prebuild nehalem x64 $(TargetDir)$(TargetName).pdb ProgramDatabase - - + cd ..\ postbuild "$(TargetPath)" - + @@ -114,6 +114,7 @@ postbuild "$(TargetPath)" + @@ -135,1680 +136,1680 @@ postbuild "$(TargetPath)" - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)fft\ - $(IntDir)fft\ + $(IntDir)fft\ + $(IntDir)fft\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpf\ - $(IntDir)mpf\ + $(IntDir)mpf\ + $(IntDir)mpf\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpq\ - $(IntDir)mpq\ + $(IntDir)mpq\ + $(IntDir)mpq\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)mpz\ - $(IntDir)mpz\ + $(IntDir)mpz\ + $(IntDir)mpz\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)printf\ - $(IntDir)printf\ + $(IntDir)printf\ + $(IntDir)printf\ - $(IntDir)scanf\ - $(IntDir)scanf\ + $(IntDir)scanf\ + $(IntDir)scanf\ - $(IntDir)scanf\ - $(IntDir)scanf\ + $(IntDir)scanf\ + $(IntDir)scanf\ - $(IntDir)scanf\ - $(IntDir)scanf\ + $(IntDir)scanf\ + $(IntDir)scanf\ - $(IntDir)scanf\ - $(IntDir)scanf\ + $(IntDir)scanf\ + $(IntDir)scanf\ - $(IntDir)scanf\ - $(IntDir)scanf\ + $(IntDir)scanf\ + $(IntDir)scanf\ - $(IntDir)scanf\ - $(IntDir)scanf\ + $(IntDir)scanf\ + $(IntDir)scanf\ - $(IntDir)scanf\ - $(IntDir)scanf\ + $(IntDir)scanf\ + $(IntDir)scanf\ - $(IntDir)scanf\ - $(IntDir)scanf\ + $(IntDir)scanf\ + $(IntDir)scanf\ - $(IntDir)scanf\ - $(IntDir)scanf\ + $(IntDir)scanf\ + $(IntDir)scanf\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ - $(IntDir)mpn\ - $(IntDir)mpn\ + $(IntDir)mpn\ + $(IntDir)mpn\ @@ -1874,7 +1875,7 @@ postbuild "$(TargetPath)" - + - + \ No newline at end of file diff --git a/build.vc10/lib_mpir_nehalem/lib_mpir_nehalem.vcxproj.filters b/build.vc10/lib_mpir_nehalem/lib_mpir_nehalem.vcxproj.filters index b5acc401..28c25897 100644 --- a/build.vc10/lib_mpir_nehalem/lib_mpir_nehalem.vcxproj.filters +++ b/build.vc10/lib_mpir_nehalem/lib_mpir_nehalem.vcxproj.filters @@ -1,4 +1,4 @@ - + @@ -30,1513 +30,1516 @@ - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\fft - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpf - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpq - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\mpz - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\printf - - - Source Files\scanf - - - Source Files\scanf - - - Source Files\scanf - - - Source Files\scanf - - - Source Files\scanf - - - Source Files\scanf - - - Source Files\scanf - - - Source Files\scanf - - - Source Files\scanf - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - - - Source Files\mpn - + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\fft + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpf + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpq + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\mpz + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\printf + + + Source Files\scanf + + + Source Files\scanf + + + Source Files\scanf + + + Source Files\scanf + + + Source Files\scanf + + + Source Files\scanf + + + Source Files\scanf + + + Source Files\scanf + + + Source Files\scanf + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + + + Source Files\mpn + - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - - - Source Files\mpn\yasm - + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + + + Source Files\mpn\yasm + - + \ No newline at end of file diff --git a/build.vc10/lib_speed/lib_speed.vcxproj b/build.vc10/lib_speed/lib_speed.vcxproj index 01755d6a..2e10c27a 100644 --- a/build.vc10/lib_speed/lib_speed.vcxproj +++ b/build.vc10/lib_speed/lib_speed.vcxproj @@ -77,7 +77,7 @@ Level3 EditAndContinue - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\ psapi.lib;%(AdditionalDependencies) @@ -96,7 +96,7 @@ Level3 ProgramDatabase - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\ psapi.lib;%(AdditionalDependencies) @@ -112,7 +112,7 @@ Level3 ProgramDatabase - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\ psapi.lib;%(AdditionalDependencies) @@ -130,7 +130,7 @@ Level3 ProgramDatabase - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\ false %(PreprocessorDefinitions) diff --git a/build.vc10/speed/speed.vcxproj b/build.vc10/speed/speed.vcxproj index 6e1391cf..a0f1e9cc 100644 --- a/build.vc10/speed/speed.vcxproj +++ b/build.vc10/speed/speed.vcxproj @@ -87,10 +87,10 @@ Level3 EditAndContinue Default - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\lib\$(IntDir);..\..\;..\..\tests\;..\$(PlatformName)\ - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console MachineX86 @@ -115,10 +115,10 @@ Level3 ProgramDatabase Default - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\lib\$(IntDir);..\..\;..\..\tests\;..\$(PlatformName)\ - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console MachineX64 @@ -140,10 +140,10 @@ Level3 ProgramDatabase Default - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\lib\$(IntDir);..\..\;..\..\tests\;..\$(PlatformName)\ - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console true @@ -170,10 +170,10 @@ Level3 ProgramDatabase Default - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\lib\$(IntDir);..\..\;..\..\tests\;..\$(PlatformName)\ - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console true diff --git a/build.vc10/try/try.vcxproj b/build.vc10/try/try.vcxproj index 8ddffb5c..4e57ad52 100644 --- a/build.vc10/try/try.vcxproj +++ b/build.vc10/try/try.vcxproj @@ -76,7 +76,7 @@ Disabled - ..\$(Platform);..\;..\..\;..\..\tests\;%(AdditionalIncludeDirectories) + ..\$(Platform);..\;..\..\;..\..\tests\;..\..\lib\$(IntDir);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true EnableFastChecks @@ -87,7 +87,7 @@ ProgramDatabase - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console MachineX86 @@ -99,7 +99,7 @@ Disabled - ..\$(Platform);..\;..\..\;..\..\tests\;%(AdditionalIncludeDirectories) + ..\$(Platform);..\;..\..\;..\..\tests\;..\..\lib\$(IntDir);%(AdditionalIncludeDirectories) WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) true EnableFastChecks @@ -110,7 +110,7 @@ ProgramDatabase - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console MachineX64 @@ -120,7 +120,7 @@ MaxSpeed true - ..\$(Platform);..\;..\..\;..\..\tests\;%(AdditionalIncludeDirectories) + ..\$(Platform);..\;..\..\;..\..\tests\;..\..\lib\$(IntDir);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) MultiThreaded true @@ -130,7 +130,7 @@ ProgramDatabase - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console true @@ -145,7 +145,7 @@ Full true - ..\$(Platform);..\;..\..\;..\..\tests\;%(AdditionalIncludeDirectories) + ..\$(Platform);..\;..\..\;..\..\tests\;..\..\lib\$(IntDir);%(AdditionalIncludeDirectories) WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) MultiThreaded true @@ -155,7 +155,7 @@ ProgramDatabase - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console true diff --git a/build.vc10/tune/tune.vcxproj b/build.vc10/tune/tune.vcxproj index 370ec4e9..fce05965 100644 --- a/build.vc10/tune/tune.vcxproj +++ b/build.vc10/tune/tune.vcxproj @@ -84,10 +84,10 @@ Level3 EditAndContinue - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\ - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console MachineX86 @@ -110,10 +110,10 @@ Level3 ProgramDatabase - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\ - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console MachineX64 @@ -133,10 +133,10 @@ Level3 ProgramDatabase - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\ - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console true @@ -160,11 +160,11 @@ Level3 ProgramDatabase - .\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\" + .\;..\;..\..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\ false - ..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) + ..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies) true Console true diff --git a/fft/fft_mfa_truncate_sqrt2_inner.c b/fft/fft_mfa_truncate_sqrt2_inner.c index 8a2a8d11..5ba56b4f 100644 --- a/fft/fft_mfa_truncate_sqrt2_inner.c +++ b/fft/fft_mfa_truncate_sqrt2_inner.c @@ -60,7 +60,7 @@ void fft_mfa_truncate_sqrt2_inner(mp_limb_t ** ii, mp_limb_t ** jj, mp_size_t n, mp_size_t t = i*n1 + j; mpn_normmod_2expp1(ii[t], limbs); if (ii != jj) mpn_normmod_2expp1(jj[t], limbs); - mpn_fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); + mpn_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); } ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); @@ -80,7 +80,7 @@ void fft_mfa_truncate_sqrt2_inner(mp_limb_t ** ii, mp_limb_t ** jj, mp_size_t n, mp_size_t t = i*n1 + j; mpn_normmod_2expp1(ii[t], limbs); if (ii != jj) mpn_normmod_2expp1(jj[t], limbs); - mpn_fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); + mpn_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt); } ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2); diff --git a/fft/mul_truncate_sqrt2.c b/fft/mul_truncate_sqrt2.c index 05406b23..f7e706f5 100644 --- a/fft/mul_truncate_sqrt2.c +++ b/fft/mul_truncate_sqrt2.c @@ -96,7 +96,7 @@ void mpn_mul_truncate_sqrt2(mp_limb_t * r1, mp_limb_t * i1, mp_size_t n1, if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs); c = 2*ii[j][limbs] + jj[j][limbs]; - ii[j][limbs] = mpn_mulmod_2expp1(ii[j], ii[j], jj[j], c, n*w, tt); + ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt); } ifft_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, trunc); diff --git a/fft/mulmod_2expp1.c b/fft/mulmod_2expp1.c index f07e6e17..10e43577 100644 --- a/fft/mulmod_2expp1.c +++ b/fft/mulmod_2expp1.c @@ -122,7 +122,7 @@ void fft_mulmod_2expp1(mp_limb_t * r1, mp_limb_t * i1, mp_limb_t * i2, if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs); c = 2*ii[j][limbs] + jj[j][limbs]; - ii[j][limbs] = mpn_mulmod_2expp1(ii[j], ii[j], jj[j], c, n*w, tt); + ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt); } ifft_negacyclic(ii, n, w, &t1, &t2, &s1); @@ -176,33 +176,7 @@ void fft_mulmod_2expp1(mp_limb_t * r1, mp_limb_t * i1, mp_limb_t * i2, mpn_normmod_2expp1(r1, r_limbs); TMP_FREE; -} - -void mpn_fft_mulmod_2expp1(mp_limb_t * r, mp_limb_t * i1, mp_limb_t * i2, - mp_size_t n, mp_size_t w, mp_limb_t * tt) -{ - mp_size_t bits = n*w; - mp_size_t limbs = bits/GMP_LIMB_BITS; - mp_bitcnt_t depth1, depth = 1; - - mp_size_t w1, off; - - if (limbs <= FFT_MULMOD_2EXPP1_CUTOFF) - { - mp_limb_t c = 2 * i1[limbs] + i2[limbs]; - r[limbs] = mpn_mulmod_2expp1(r, i1, i2, c, bits, tt); - return; - } - - while ((((mp_limb_t)1)<> k; } mpn_mulmod_2expm1 (S, typm, tzpm, h, temp); // unroll this recursion S=A rename - c = mpn_mulmod_2expp1 (D, typp, tzpp, c1 * 2 + c2, h, temp); // D=B rename + c = mpn_mulmod_2expp1_basecase (D, typp, tzpp, c1 * 2 + c2, h, temp); // D=B rename if (LIKELY (c == 0)) { c1 = mpn_sumdiff_n (S, D, S, D, m); @@ -361,7 +361,7 @@ else tzpp[m-1]&=GMP_NUMB_MASK>>k;} mpn_mulmod_2expm1_new(S,typm,tzpm,h,temp);// unroll this recursion S=A rename -c=mpn_mulmod_2expp1(D,typp,tzpp,c1*2+c2,h,temp); // D=B rename +c=mpn_mulmod_2expp1_basecase(D,typp,tzpp,c1*2+c2,h,temp); // D=B rename __GMP_FREE_FUNC_LIMBS(typm,m);__GMP_FREE_FUNC_LIMBS(tzpm,m);__GMP_FREE_FUNC_LIMBS(typp,m);__GMP_FREE_FUNC_LIMBS(tzpp,m); __GMP_FREE_FUNC_LIMBS(temp,2*m); if(LIKELY(c==0)) diff --git a/mpn/generic/mulmod_2expp1.c b/mpn/generic/mulmod_2expp1.c index 2fef58dd..f1458b8e 100644 --- a/mpn/generic/mulmod_2expp1.c +++ b/mpn/generic/mulmod_2expp1.c @@ -1,153 +1,68 @@ -/* mpn_mulmod_2expp1 - -Copyright 2009 Jason Moxham - -This file is part of the MPIR Library. - -The MPIR Library is free software; you can redistribute it and/or modify -it under the terms of the GNU Lesser General Public License as published -by the Free Software Foundation; either version 2.1 of the License, or (at -your option) any later version. - -The MPIR Library is distributed in the hope that it will be useful, but -WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY -or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public -License for more details. - -You should have received a copy of the GNU Lesser General Public License -along with the MPIR Library; see the file COPYING.LIB. If not, write -to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. -*/ - -#include "mpir.h" -#include "gmp-impl.h" -#include "longlong.h" - -// k 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 -static mp_size_t tab[]={0,0,0,2632,304,448,1024,2304,6400,11264,45056,114688,327680,1310720,3145728,12582912 };// OUT OF DATE -// from K8 old mul_fft_table 560 1184 1856 3840 -// these thresholds could do with tuning , and extending - -// ret+(xp,n)=(yp,n)*(zp,n) % 2^b+1 -// needs (tp,2n) temp space , everything reduced mod 2^b -// inputs,outputs are fully reduced -// NOTE: 2n is not the same as 2b rounded up to nearest limb -inline static int -mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, - gmp_ui b, mp_ptr tp) -{ - mp_size_t n, k; - mp_limb_t c; - - n = BITS_TO_LIMBS (b); - k = GMP_NUMB_BITS * n - b; - ASSERT (b > 0); - ASSERT (n > 0); - ASSERT_MPN (yp, n); - ASSERT_MPN (zp, n); - ASSERT (!MPN_OVERLAP_P (tp, 2 * n, yp, n)); - ASSERT (!MPN_OVERLAP_P (tp, 2 * n, zp, n)); - ASSERT (!MPN_OVERLAP_P (xp, n, yp, n)); - ASSERT (!MPN_OVERLAP_P (xp, n, zp, n)); - ASSERT (MPN_SAME_OR_SEPARATE_P (xp, tp, n)); - ASSERT (MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); - ASSERT (k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); - ASSERT (k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); -#if 0 && GMP_NAIL_BITS == 0 -// mpn_mul_fft dont do nails -// fft has changed cant use this like this , but can use it HOW? - if (k == 0 && n % 8 == 0) - { - count_trailing_zeros (c, n); - if (c > 15) - c = 15; - for (c = c; c > 2; c--) - if (n >= tab[c]) - return mpn_mul_fft (xp, n, yp, n, zp, n, c); - } -#endif - mpn_mul_n (tp, yp, zp, n); - if (k == 0) - { - c = mpn_sub_n (xp, tp, tp + n, n); - return mpn_add_1 (xp, xp, n, c); - } - c = tp[n - 1]; - tp[n - 1] &= GMP_NUMB_MASK >> k; -#if HAVE_NATIVE_mpn_sublsh_nc - c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c); -#else - { - mp_limb_t c1; - c1 = mpn_lshift (tp + n, tp + n, n, k); - tp[n] |= c >> (GMP_NUMB_BITS - k); - c = mpn_sub_n (xp, tp, tp + n, n) + c1; - } -#endif - c = mpn_add_1 (xp, xp, n, c); - xp[n - 1] &= GMP_NUMB_MASK >> k; - return c; -} - -// c is the top bits of the inputs, must be fully reduced -int -mpn_mulmod_2expp1 (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, int c, - gmp_ui b, mp_ptr tp) -{ - int cy, cz; - mp_size_t n, k; - - cy = c & 2; - cz = c & 1; - n = BITS_TO_LIMBS (b); - k = GMP_NUMB_BITS * n - b; - ASSERT (b > 0); - ASSERT (n > 0); - ASSERT_MPN (yp, n); - ASSERT_MPN (zp, n); - ASSERT (!MPN_OVERLAP_P (tp, 2 * n, yp, n)); - ASSERT (!MPN_OVERLAP_P (tp, 2 * n, zp, n)); - ASSERT (MPN_SAME_OR_SEPARATE_P (xp, tp, n)); - ASSERT (MPN_SAME_OR_SEPARATE_P (xp, tp + n, n)); - ASSERT (k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0); - ASSERT (k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0); -#if WANT_ASSERT - mp_size_t t; - t = n; - MPN_NORMALIZE (yp, t); - ASSERT (cy == 0 || t == 0); - t = n; - MPN_NORMALIZE (zp, t); - ASSERT (cz == 0 || t == 0); -#endif - if (LIKELY (cy == 0)) - { - if (LIKELY (cz == 0)) - { - c = mpn_mulmod_2expp1_internal (xp, yp, zp, b, tp); - } - else - { - c = mpn_neg_n (xp, yp, n); - c = mpn_add_1 (xp, xp, n, c); - xp[n - 1] &= GMP_NUMB_MASK >> k; - } - } - else - { - if (LIKELY (cz == 0)) - { - c = mpn_neg_n (xp, zp, n); - c = mpn_add_1 (xp, xp, n, c); - xp[n - 1] &= GMP_NUMB_MASK >> k; - } - else - { - c = 0; - xp[0] = 1; - MPN_ZERO (xp + 1, n - 1); - } - } - return c; -} +/* + +Copyright 2009, 2011 William Hart. All rights reserved. + +Redistribution and use in source and binary forms, with or without modification, are +permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, this list of + conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, this list + of conditions and the following disclaimer in the documentation and/or other materials + provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON +ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those of the +authors and should not be interpreted as representing official policies, either expressed +or implied, of William Hart. + +*/ + +#include "mpir.h" +#include "gmp-impl.h" +#include "longlong.h" +#include "fft\fft_tuning.h" + +static mp_size_t mulmod_2expp1_table_n[FFT_N_NUM] = MULMOD_TAB; + +int mpn_mulmod_2expp1(mp_limb_t * r, mp_limb_t * i1, mp_limb_t * i2, mp_size_t n, mp_size_t w, mp_limb_t * tt) +{ +#if defined( OLD_FFT ) + return mpn_mulmod_2expp1_basecase(r, i1, i2, c, bits, tt); +#else + mp_size_t bits = n*w; + mp_size_t limbs = bits/GMP_LIMB_BITS; + mp_bitcnt_t depth1, depth = 1; + + mp_size_t w1, off; + + if (limbs <= FFT_MULMOD_2EXPP1_CUTOFF) + { + mp_limb_t c = 2 * i1[limbs] + i2[limbs]; + r[limbs] = mpn_mulmod_2expp1_basecase(r, i1, i2, c, bits, tt); + return r[limbs]; + } + + while ((((mp_limb_t)1)< -#include -#include "mpir.h" -#include "gmp-impl.h" -#include "longlong.h" - -#define ulong unsigned long - -void mpn_to_mpz(mpz_t m, mp_limb_t * i, ulong limbs); -void set_p(mpz_t p, ulong n, ulong w); -void rand_n(mp_limb_t * n, gmp_randstate_t state, ulong limbs); -void ref_mul_2expmod(mpz_t m, mpz_t i2, mpz_t p, ulong n, ulong w, ulong d); -void ref_norm(mpz_t m, mpz_t p); -void ref_sumdiff_rshBmod(mpz_t t, mpz_t u, mpz_t i1, - mpz_t i2, mpz_t p, ulong n, ulong w, ulong x, ulong y); - -/* - FIXME: This can be done faster for the case where c is small. - The function adds c to the integer r modulo 2^l + 1. -*/ -void mpn_addmod_2expp1_1(mp_limb_t * r, ulong l, mp_limb_signed_t c) -{ - if (c >= 0) mpn_add_1(r, r, l + 1, c); - else mpn_sub_1(r, r, l + 1, -c); -} - -/* - FIXME: this is the most inefficient implementation I could come up with. ;-) -*/ -ulong revbin(ulong c, ulong depth) -{ - ulong i = 0; - ulong ret = 0; - for (i = 0; i < depth + 1; i++) - if (c & (1UL< skip) MPN_COPY(poly[i], limbs + skip, total_limbs - skip); - - return length; -} - -/* - Splits an mpn into segments of length _bits_ and stores in - zero padded coefficients of length output_limbs, for use in FFT - convolution code. Assumes that the input is total_limbs in length. - Returns the total number of coefficient written. -*/ - -ulong FFT_split_bits(mp_limb_t ** poly, mp_limb_t * limbs, - ulong total_limbs, ulong bits, ulong output_limbs) -{ - ulong length = (GMP_LIMB_BITS*total_limbs - 1)/bits + 1; - ulong i, j; - - ulong top_bits = ((GMP_LIMB_BITS - 1) & bits); - if (top_bits == 0) - { - return FFT_split(poly, limbs, total_limbs, bits/GMP_LIMB_BITS, output_limbs); - } - - ulong coeff_limbs = (bits/GMP_LIMB_BITS) + 1; - ulong mask = (1L<coeffs[i+1], j); - - MPN_ZERO(poly[i], output_limbs + 1); - // convert a coefficient - if (!shift_bits) - { - MPN_COPY(poly[i], limb_ptr, coeff_limbs); - poly[i][coeff_limbs - 1] &= mask; - limb_ptr += (coeff_limbs - 1); - shift_bits += top_bits; - } else - { - mpn_rshift(poly[i], limb_ptr, coeff_limbs, shift_bits); - limb_ptr += (coeff_limbs - 1); - shift_bits += top_bits; - if (shift_bits >= GMP_LIMB_BITS) - { - limb_ptr++; - poly[i][coeff_limbs - 1] += (limb_ptr[0] << (GMP_LIMB_BITS - (shift_bits - top_bits))); - shift_bits -= GMP_LIMB_BITS; - } - poly[i][coeff_limbs - 1] &= mask; - } - } - - MPN_ZERO(poly[i], output_limbs + 1); - ulong limbs_left = total_limbs - (limb_ptr - limbs); - if (!shift_bits) - { - MPN_COPY(poly[i], limb_ptr, limbs_left); - } else - { - mpn_rshift(poly[i], limb_ptr, limbs_left, shift_bits); - } - - return length; -} - -/* - Recombines coefficients after doing a convolution. Assumes each of the - coefficients of the poly of the given length is output_limbs long, that each - of the coefficients is being shifted by a multiple of coeff_limbs and added - to an mpn which is total_limbs long. It is assumed that the mpn has been - zeroed in advance. -*/ - -void FFT_combine(mp_limb_t * res, mp_limb_t ** poly, ulong length, ulong coeff_limbs, - ulong output_limbs, ulong total_limbs) -{ - ulong skip, i, j; - - for (skip = 0, i = 0; (i < length) && (skip + output_limbs <= total_limbs); i++, skip += coeff_limbs) - { - //for (j = 0; j < output_limbs; j += 8) PREFETCH(poly->coeffs[i+1], j); - mpn_add(res + skip, res + skip, output_limbs + 1, poly[i], output_limbs); - } - - while ((skip < total_limbs) && (i < length)) - { - mpn_add(res + skip, res + skip, total_limbs - skip, poly[i], MIN(total_limbs - skip, output_limbs)); - i++; - skip += coeff_limbs; - } -} - -/* - Recombines coefficients of a poly after doing a convolution. Assumes - each of the coefficients of the poly of the given length is output_limbs - long, that each is being shifted by a multiple of _bits_ and added - to an mpn which is total_limbs long. It is assumed that the mpn has been - zeroed in advance. -*/ - -void FFT_combine_bits(mp_limb_t * res, mp_limb_t ** poly, ulong length, ulong bits, - ulong output_limbs, ulong total_limbs) -{ - ulong top_bits = ((GMP_LIMB_BITS - 1) & bits); - if (top_bits == 0) - { - FFT_combine(res, poly, length, bits/GMP_LIMB_BITS, output_limbs, total_limbs); - return; - } - TMP_DECL; - TMP_MARK; - - ulong coeff_limbs = (bits/GMP_LIMB_BITS) + 1; - ulong i, j; - ulong * temp = (ulong *) TMP_BALLOC_LIMBS(output_limbs + 1); - ulong shift_bits = 0; - ulong * limb_ptr = res; - ulong * end = res + total_limbs; - - for (i = 0; (i < length) && (limb_ptr + output_limbs < end); i++) - { - //for (j = 0; j < output_limbs; j += 8) PREFETCH(poly->coeffs[i+1], j); - if (shift_bits) - { - mpn_lshift(temp, poly[i], output_limbs + 1, shift_bits); - mpn_add_n(limb_ptr, limb_ptr, temp, output_limbs + 1); - } else - { - mpn_add(limb_ptr, limb_ptr, output_limbs + 1, poly[i], output_limbs); - } - shift_bits += top_bits; - limb_ptr += (coeff_limbs - 1); - if (shift_bits >= GMP_LIMB_BITS) - { - limb_ptr++; - shift_bits -= GMP_LIMB_BITS; - } - } - - while ((limb_ptr < end) && (i < length)) - { - if (shift_bits) - { - mpn_lshift(temp, poly[i], output_limbs + 1, shift_bits); - mpn_add_n(limb_ptr, limb_ptr, temp, end - limb_ptr); - } else - { - mpn_add_n(limb_ptr, limb_ptr, poly[i], end - limb_ptr); - } - shift_bits += top_bits; - limb_ptr += (coeff_limbs - 1); - if (shift_bits >= GMP_LIMB_BITS) - { - limb_ptr++; - shift_bits -= GMP_LIMB_BITS; - } - i++; - } - - TMP_FREE; -} - -/* - Normalise t to be in the range [0, 2^nw] -*/ -void mpn_normmod_2expp1(mp_limb_t * t, ulong l) -{ - mp_limb_signed_t hi = t[l]; - - if (hi) - { - t[l] = CNST_LIMB(0); - mpn_addmod_2expp1_1(t, l, -hi); - hi = t[l]; - - // hi will be in [-1,1] - if (t[l]) - { - t[l] = CNST_LIMB(0); - mpn_addmod_2expp1_1(t, l, -hi); - if (t[l] == ~CNST_LIMB(0)) // if we now have -1 (very unlikely) - { - t[l] = CNST_LIMB(0); - mpn_addmod_2expp1_1(t, l, 1); - } - } - } -} - -/* - We are given two integers modulo 2^wn+1, i1 and i2, which are not - necessarily normalised. We compute t = sqrt(-1)*(i1 - i2) and are given - n and w. Note 2^wn/2 = sqrt(-1) mod 2^wn+1. Requires wn to be divisible - by 2*GMP_LIMB_BITS. -*/ -void mpn_submod_i_2expp1(mp_limb_t * t, mp_limb_t * i1, mp_limb_t * i2, ulong l) -{ - mp_limb_t cy; - mp_limb_t l2 = l/2; - - t[l] = -mpn_sub_n(t + l2, i1, i2, l2); - cy = i2[l] - i1[l] - mpn_sub_n(t, i2 + l2, i1 + l2, l2); - mpn_addmod_2expp1_1(t + l2, l2, cy); -} - -/* - We are given two integers modulo 2^wn+1, i1 and i2, which are - not necessarily normalised and are given n and w. We compute - t = (i1 + i2)*B^x, u = (i1 - i2)*B^y. Aliasing between inputs and - outputs is not permitted. We require x and y to be less than limbs. -*/ -void mpn_lshB_sumdiffmod_2expp1(mp_limb_t * t, mp_limb_t * u, mp_limb_t * i1, - mp_limb_t * i2, ulong limbs, ulong x, ulong y) -{ - mp_limb_t cy, cy1, cy2; - - if (x == 0) - { - if (y == 0) - { - mpn_sumdiff_n(t + x, u + y, i1, i2, limbs + 1); - } else - { - cy = mpn_sumdiff_n(t, u + y, i1, i2, limbs - y); - u[limbs] = -(cy&1); - cy1 = cy>>1; - cy = mpn_sumdiff_n(t + limbs - y, u, i2 + limbs - y, i1 + limbs - y, y); - t[limbs] = cy>>1; - mpn_add_1(t + limbs - y, t + limbs - y, y + 1, cy1); - cy1 = -(cy&1) + (i2[limbs] - i1[limbs]); - mpn_addmod_2expp1_1(u + y, limbs - y, cy1); - cy1 = -(i1[limbs] + i2[limbs]); - mpn_addmod_2expp1_1(t, limbs, cy1); - } - } else if (y == 0) - { - cy = mpn_sumdiff_n(t + x, u, i1, i2, limbs - x); - t[limbs] = cy>>1; - cy1 = cy&1; - cy = mpn_sumdiff_n(t, u + limbs - x, i1 + limbs - x, i2 + limbs - x, x); - cy2 = mpn_neg_n(t, t, x); - u[limbs] = -(cy&1); - mpn_sub_1(u + limbs - x, u + limbs - x, x + 1, cy1); - cy1 = -(cy>>1) - cy2; - cy1 -= (i1[limbs] + i2[limbs]); - mpn_addmod_2expp1_1(t + x, limbs - x, cy1); - cy1 = (i2[limbs] - i1[limbs]); - mpn_addmod_2expp1_1(u, limbs, cy1); - } else if (x > y) - { - cy = mpn_sumdiff_n(t + x, u + y, i1, i2, limbs - x); - t[limbs] = cy>>1; - cy1 = cy&1; - cy = mpn_sumdiff_n(t, u + y + limbs - x, i1 + limbs - x, i2 + limbs - x, x - y); - cy2 = mpn_neg_n(t, t, x - y); - u[limbs] = -(cy&1); - mpn_sub_1(u + y + limbs - x, u + y + limbs - x, x - y + 1, cy1); - cy1 = (cy>>1) + cy2; - cy = mpn_sumdiff_n(t + x - y, u, i2 + limbs - y, i1 + limbs - y, y); - cy2 = mpn_neg_n(t + x - y, t + x - y, y); - cy1 = -(cy>>1) - mpn_sub_1(t + x - y, t + x - y, y, cy1) - cy2; - cy1 -= (i1[limbs] + i2[limbs]); - mpn_addmod_2expp1_1(t + x, limbs - x, cy1); - cy1 = -(cy&1) + (i2[limbs] - i1[limbs]); - mpn_addmod_2expp1_1(u + y, limbs - y, cy1); - } else if (x < y) - { - cy = mpn_sumdiff_n(t + x, u + y, i1, i2, limbs - y); - u[limbs] = -(cy&1); - cy1 = cy>>1; - cy = mpn_sumdiff_n(t + x + limbs - y, u, i2 + limbs - y, i1 + limbs - y, y - x); - t[limbs] = cy>>1; - mpn_add_1(t + x + limbs - y, t + x + limbs - y, y - x + 1, cy1); - cy1 = cy&1; - cy = mpn_sumdiff_n(t, u + y - x, i2 + limbs - x, i1 + limbs - x, x); - cy1 = -(cy&1) - mpn_sub_1(u + y - x, u + y - x, x, cy1); - cy1 += (i2[limbs] - i1[limbs]); - mpn_addmod_2expp1_1(u + y, limbs - y, cy1); - cy2 = mpn_neg_n(t, t, x); - cy1 = -(cy>>1) - (i1[limbs] + i2[limbs]) - cy2; - mpn_addmod_2expp1_1(t + x, limbs - x, cy1); - } else // x == y - { - cy = mpn_sumdiff_n(t + x, u + x, i1, i2, limbs - x); - t[limbs] = cy>>1; - u[limbs] = -(cy&1); - cy = mpn_sumdiff_n(t, u, i2 + limbs - x, i1 + limbs - x, x); - cy2 = mpn_neg_n(t, t, x); - cy1 = -(cy>>1) - (i1[limbs] + i2[limbs]) - cy2; - mpn_addmod_2expp1_1(t + x, limbs - x, cy1); - cy1 = -(cy&1) + i2[limbs] - i1[limbs]; - mpn_addmod_2expp1_1(u + x, limbs - x, cy1); - } -} - -/* - We are given two integers modulo 2^wn+1, i1 and i2, which are - not necessarily normalised and are given n and w. We compute - t = i1 + i2/B^x, u = i1 - i2/B^y. Aliasing between inputs and - outputs is not permitted. We require x, y be less than the - number of limbs of i1 and i2. -*/ -void mpn_sumdiff_rshBmod_2expp1(mp_limb_t * t, mp_limb_t * u, mp_limb_t * i1, - mp_limb_t * i2, ulong limbs, ulong x, ulong y) -{ - mp_limb_t cy, cy1, cy2, cy3; - - if (x == 0) - { - if (y == 0) - { - mpn_sumdiff_n(t, u, i1, i2, limbs + 1); - } else // y != 0 - { - cy = mpn_sumdiff_n(t, u, i1, i2 + y, limbs - y); - cy1 = (cy>>1); - cy2 = -(cy&1); - cy = mpn_sumdiff_n(u + limbs - y, t + limbs - y, i1 + limbs - y, i2, y); - u[limbs] = (cy>>1) + i1[limbs]; - t[limbs] = i1[limbs] - (cy&1); - mpn_addmod_2expp1_1(t + limbs - y, y, cy1 + i2[limbs]); - mpn_addmod_2expp1_1(u + limbs - y, y, cy2 - i2[limbs]); - } - } else if (y == 0) // x != 0 - { - cy = mpn_sumdiff_n(t, u, i1 + x, i2, limbs - x); - cy1 = (cy>>1); - cy2 = -(cy&1); - cy3 = mpn_neg_n(i1, i1, x); - cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i1, i2 + limbs - x, x); - u[limbs] = -cy3 - (cy&1) - i2[limbs]; - t[limbs] = -cy3 + i2[limbs] + (cy>>1); - mpn_addmod_2expp1_1(t + limbs - x, x, cy1 + i1[limbs]); - mpn_addmod_2expp1_1(u + limbs - x, x, cy2 + i1[limbs]); - } else if (x == y) - { - cy = mpn_sumdiff_n(t, u, i1 + x, i2 + x, limbs - x); - cy1 = (cy>>1); - cy2 = -(cy&1); - cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i2, i1, x); - cy3 = mpn_neg_n(t + limbs - x, t + limbs - x, x); - u[limbs] = -(cy&1); - t[limbs] = -(cy>>1) - cy3; - mpn_addmod_2expp1_1(t + limbs - x, x, cy1 + i1[limbs] + i2[limbs]); - mpn_addmod_2expp1_1(u + limbs - x, x, cy2 + i1[limbs] - i2[limbs]); - } else if (x > y) - { - cy = mpn_sumdiff_n(t + limbs - y, u + limbs - y, i2, i1 + x - y, y); - cy3 = mpn_neg_n(t + limbs - y, t + limbs - y, y); - t[limbs] = -(cy>>1) - cy3; - u[limbs] = -(cy&1); - cy3 = mpn_neg_n(i1, i1, x - y); - cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i1, i2 + limbs - x + y, x - y); - mpn_addmod_2expp1_1(t + limbs - y, y, (cy>>1) + i2[limbs] - cy3); - mpn_addmod_2expp1_1(u + limbs - y, y, -(cy&1) - i2[limbs] - cy3); - cy = mpn_sumdiff_n(t, u, i1 + x, i2 + y, limbs - x); - mpn_addmod_2expp1_1(t + limbs - x, x, (cy>>1) + i1[limbs]); - mpn_addmod_2expp1_1(u + limbs - x, x, -(cy&1) + i1[limbs]); - } else //(x < y) - { - cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i2 + y - x, i1, x); - cy3 = mpn_neg_n(t + limbs - x, t + limbs - x, x); - t[limbs] = -(cy>>1) - cy3; - u[limbs] = -(cy&1); - cy3 = mpn_neg_n(i2, i2, y - x); - cy = mpn_sumdiff_n(t + limbs - y, u + limbs - y, i1 + limbs - y + x, i2, y - x); - mpn_addmod_2expp1_1(t + limbs - x, x, (cy>>1) + i1[limbs] - cy3); - mpn_addmod_2expp1_1(u + limbs - x, x, -(cy&1) + i1[limbs] + cy3); - cy = mpn_sumdiff_n(t, u, i1 + x, i2 + y, limbs - y); - mpn_addmod_2expp1_1(t + limbs - y, y, (cy>>1) + i2[limbs]); - mpn_addmod_2expp1_1(u + limbs - y, y, -(cy&1) - i2[limbs]); - } -} - -/* - Given an integer i1 modulo 2^wn+1, set t to 2^d*i1 modulo 2^wm+1. - We must have GMP_LIMB_BITS > d >= 0. -*/ -void mpn_mul_2expmod_2expp1(mp_limb_t * t, mp_limb_t * i1, ulong limbs, ulong d) -{ - mp_limb_signed_t hi, hi2; - - if (d == 0) - { - if (t != i1) - MPN_COPY(t, i1, limbs + 1); - } else - { - hi = i1[limbs]; - mpn_lshift(t, i1, limbs + 1, d); - hi2 = t[limbs]; - t[limbs] = CNST_LIMB(0); - mpn_sub_1(t, t, limbs + 1, hi2); - hi >>= (GMP_LIMB_BITS - d); - mpn_addmod_2expp1_1(t + 1, limbs - 1, -hi); - } -} - -/* - Given an integer i1 modulo 2^wn+1, set t to i1/2^d modulo 2^wm+1. - We must have GMP_LIMB_BITS > d >= 0. -*/ -void mpn_div_2expmod_2expp1(mp_limb_t * t, mp_limb_t * i1, ulong limbs, ulong d) -{ - mp_limb_t lo; - mp_limb_t * ptr; - mp_limb_signed_t hi; - - if (d == 0) - { - if (t != i1) - MPN_COPY(t, i1, limbs + 1); - } else - { - hi = i1[limbs]; - lo = mpn_rshift(t, i1, limbs + 1, d); - t[limbs] = (hi>>d); - ptr = t + limbs - 1; - sub_ddmmss(ptr[1], ptr[0], ptr[1], ptr[0], CNST_LIMB(0), lo); - } -} - -/* - Set {s, t} to { z1^i*(s+t), z2^i*(s-t) } where - z1 = exp(2*Pi*I/(2*n)), z2 = exp(2*Pi*I*3/(2*n)), z1=>w bits -*/ -void FFT_split_radix_butterfly(mp_limb_t * s, mp_limb_t * t, ulong i, ulong n, ulong w, mp_limb_t * u) -{ - mp_limb_t size = (w*n)/GMP_LIMB_BITS + 1; - mp_limb_t * v; - ulong x, y, b1, b2; - int negate = 0; - int negate2 = 0; - - v = u + size; - - b1 = i; - while (b1 >= n) - { - negate2 = 1 - negate2; - b1 -= n; - } - b1 = b1*w; - x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - b2 = 3*i; - while (b2 >= n) - { - negate = 1 - negate; - b2 -= n; - } - b2 = b2*w; - y = b2/GMP_LIMB_BITS; - b2 -= y*GMP_LIMB_BITS; - - mpn_lshB_sumdiffmod_2expp1(u, v, s, t, size - 1, x, y); - mpn_mul_2expmod_2expp1(s, u, size - 1, b1); - mpn_mul_2expmod_2expp1(t, v, size - 1, b2); - if (negate) mpn_neg_n(t, t, size); - if (negate2) mpn_neg_n(s, s, size); -} - -void FFT_split_radix_butterfly2(mp_limb_t * u, mp_limb_t * v, mp_limb_t * s, mp_limb_t * t, ulong i, ulong n, ulong w) -{ - mp_limb_t size = (w*n)/GMP_LIMB_BITS + 1; - ulong x, y, b1, b2; - int negate = 0; - int negate2 = 0; - - b1 = i; - while (b1 >= n) - { - negate2 = 1 - negate2; - b1 -= n; - } - b1 = b1*w; - x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - b2 = 3*i; - while (b2 >= n) - { - negate = 1 - negate; - b2 -= n; - } - b2 = b2*w; - y = b2/GMP_LIMB_BITS; - b2 -= y*GMP_LIMB_BITS; - - mpn_lshB_sumdiffmod_2expp1(u, v, s, t, size - 1, x, y); - mpn_mul_2expmod_2expp1(u, u, size - 1, b1); - if (negate2) mpn_neg_n(u, u, size); - mpn_mul_2expmod_2expp1(v, v, size - 1, b2); - if (negate) mpn_neg_n(v, v, size); -} - -/* - Set u = 2^{b1}*(s + t), v = 2^{b2}*(s - t) - Note NW is just n*w. -*/ -void FFT_radix2_twiddle_butterfly(mp_limb_t * u, mp_limb_t * v, - mp_limb_t * s, mp_limb_t * t, ulong NW, ulong b1, ulong b2) -{ - mp_limb_t size = NW/GMP_LIMB_BITS + 1; - ulong x, y; - int negate = 0; - int negate2 = 0; - - b1 %= (2*NW); - if (b1 >= NW) - { - negate2 = 1; - b1 -= NW; - } - x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - - b2 %= (2*NW); - if (b2 >= NW) - { - negate = 1; - b2 -= NW; - } - y = b2/GMP_LIMB_BITS; - b2 -= y*GMP_LIMB_BITS; - - mpn_lshB_sumdiffmod_2expp1(u, v, s, t, size - 1, x, y); - mpn_mul_2expmod_2expp1(u, u, size - 1, b1); - if (negate2) mpn_neg_n(u, u, size); - mpn_mul_2expmod_2expp1(v, v, size - 1, b2); - if (negate) mpn_neg_n(v, v, size); -} - -/* - Set s = i1 + i2, t = z1*(i1 - i2) where z1 = exp(2*Pi*I/m) => w bits -*/ -void FFT_radix2_butterfly(mp_limb_t * s, mp_limb_t * t, - mp_limb_t * i1, mp_limb_t * i2, ulong i, ulong n, ulong w) -{ - mp_limb_t size = (w*n)/GMP_LIMB_BITS + 1; - ulong x, y, b1; - int negate = 0; - - x = 0; - - b1 = i; - while (b1 >= n) - { - negate = 1 - negate; - b1 -= n; - } - b1 = b1*w; - y = b1/GMP_LIMB_BITS; - b1 -= y*GMP_LIMB_BITS; - - mpn_lshB_sumdiffmod_2expp1(s, t, i1, i2, size - 1, x, y); - mpn_mul_2expmod_2expp1(t, t, size - 1, b1); - if (negate) mpn_neg_n(t, t, size); -} - -/* - Set s = i1 + z1*i2, t = i1 - z1*i2 where z1 = exp(-2*Pi*I/m) => w bits -*/ -void FFT_radix2_inverse_butterfly(mp_limb_t * s, mp_limb_t * t, - mp_limb_t * i1, mp_limb_t * i2, ulong i, ulong n, ulong w) -{ - mp_limb_t limbs = (w*n)/GMP_LIMB_BITS; - ulong y, b1; - - b1 = i*w; - y = b1/GMP_LIMB_BITS; - b1 -= y*GMP_LIMB_BITS; - - mpn_div_2expmod_2expp1(i2, i2, limbs, b1); - mpn_sumdiff_rshBmod_2expp1(s, t, i1, i2, limbs, 0, y); -} - -/* - s = 2^-b1*i1 + 2^-b2*i2, t = 2^-b1*i1 - 2^-b1*i2. - Note NW is just n*w -*/ -void FFT_radix2_twiddle_inverse_butterfly(mp_limb_t * s, mp_limb_t * t, - mp_limb_t * i1, mp_limb_t * i2, ulong NW, ulong b1, ulong b2) -{ - mp_limb_t limbs = NW/GMP_LIMB_BITS; - ulong x, y; - int negate = 0; - int negate2 = 0; - - b1 %= (2*NW); - if (b1 >= NW) - { - negate = 1; - b1 -= NW; - } - x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - - b2 %= (2*NW); - if (b2 >= NW) - { - negate2 = 1; - b2 -= NW; - } - y = b2/GMP_LIMB_BITS; - b2 -= y*GMP_LIMB_BITS; - - if (negate) mpn_neg_n(i1, i1, limbs + 1); - mpn_div_2expmod_2expp1(i1, i1, limbs, b1); - if (negate2) mpn_neg_n(i2, i2, limbs + 1); - mpn_div_2expmod_2expp1(i2, i2, limbs, b2); - mpn_sumdiff_rshBmod_2expp1(s, t, i1, i2, limbs, x, y); -} - -void FFT_radix2(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp); - -/* - The split radix DIF FFT works as follows: - Given: inputs [i0, i1, ..., i{m-1}], for m a power of 2 - - Output: Fsplit[i0, i1, ..., i{m-1}] = [r0, r1, ..., r{m-1}] - (Inputs and outputs may be subject to a stride and not in consecutive locations.) - - Algorithm: * Recursively compute [r0, r2, r4, ...., r{m-2}] - = Fsplit[i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - * Let [s0, s1, ..., s{m/4-1}] - = [i0-i{m/2}, i1-i{m/2+1}, ..., i{m/4-1}-i{3m/4-1}] - * Let [t0, t1, ..., t{m/4-1}] - = sqrt(-1)[i{m/4}-i{3m/4}, i{m/4+1}-i{3m/4+1}, ..., i{m/2-1}-i{m-1}] - * Let [u0, u1, ..., u{m/4-1}] - = [z1^0*(s0+t0), z1^1(s1+t1), ..., z1^{m/4-1}*(s{m/4-1}+t{m/4-1})] - where z1 = exp(2*Pi*I/m) - * Let [v0, v1, ..., v{m/4-1}] - = [z2^0*(s0-t0), z2^1(s1-t1), ..., z2^{m/4-1}*(s{m/4-1}-t{m/4-1})] - where z2 = exp(2*Pi*I*3/m) - * Recursively compute [r1, r5, ..., r{m-3}] - = Fsplit[u0, u1, ..., u{m/4-1}] - * Recursively compute [r3, r7, ..., r{m-1}] - = Fsplit[v0, v1, ..., v{m/4-1}] - - The parameters are as follows: - * 2*n is the length of the input and output arrays (m in the above) - * w is such that 2^w is an 2n-th root of unity in the ring Z/pZ that - we are working in, i.e. p = 2^wn + 1 (here n is divisible by - GMP_LIMB_BITS) - * ii is the array of inputs (each input is an array of limbs of length - wn/GMP_LIMB_BITS + 1 (the extra limb being a "carry limb") - * rr is the array of outputs (each being an array of limbs of length - wn/GMP_LIMB_BITS + 1) - * rs is the stride with which entries of rr should be written (e.g. - rs = 4 means that the output is to be written in every 4th entry of - rr) - * is is the stride with which entries of ii should be read -*/ - -void FFT_split_radix(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t * ptr; - ulong i; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n < 4) - { - FFT_radix2(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - // [s0, s1, ..., s{m/4-1}] = [i0-i{m/2}, i1-i{m/2+1}, ..., i{m/4-1}-i{3m/4-1}] - for (i = 0; i < n/2; i++) - { - mpn_sub_n(temp[i], ii[i], ii[i+n], size); - mpn_add_n(ii[i+n], ii[i], ii[i+n], size); - } - - // [t0, t1, ..., t{m/4-1}] = sqrt(-1)[i{m/4}-i{3m/4}, i{m/4+1}-i{3m/4+1}, ..., i{m/2-1}-i{m-1}] - for (i = 0; i < n/2; i++) - { - mpn_submod_i_2expp1(temp[i+n/2], ii[i+n/2], ii[i+3*n/2], size - 1); - mpn_add_n(ii[i+3*n/2], ii[i+n/2], ii[i+3*n/2], size); - } - - // [u0, u1, ..., u{m/4-1}] = [z1^0*(s0+t0), z1^1(s1+t1), ..., z1^{m/4-1}*(s{m/4-1}+t{m/4-1})] - // [v0, v1, ..., v{m/4-1}] = [z2^0*(s0-t0), z2^1(s1-t1), ..., z2^{m/4-1}*(s{m/4-1}-t{m/4-1})] - // where z1 = exp(2*Pi*I/m), z2 = exp(2*Pi*I*3/m), z1 => w bits - // note {u, v} = {ss, tt} - for (i = 0; i < n/2; i++) - { - FFT_split_radix_butterfly2(*t1, *t2, temp[i], temp[i+n/2], i, n, w); - ptr = temp[i]; - temp[i] = *t1; - *t1 = ptr; - ptr = temp[i+n/2]; - temp[i+n/2] = *t2; - *t2 = ptr; - } - - // [r0, r2, r4, ...., r{m-2}] = Fsplit[i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - FFT_split_radix(rr, 2*rs, ii+n, n/2, 2*w, t1, t2, temp + n); - - // [r1, r5, ..., r{m-3}] = Fsplit[u0, u1, ..., u{m/4-1}] - FFT_split_radix(rr + rs, 4*rs, temp, n/4, 4*w, t1, t2, temp + n); - - // [r3, r7, ..., r{m-1}] = Fsplit[v0, v1, ..., v{m/4-1}] - FFT_split_radix(rr + 3*rs, 4*rs, temp+n/2, n/4, 4*w, t1, t2, temp + n); -} - -/* - The radix 2 DIF FFT works as follows: - Given: inputs [i0, i1, ..., i{m-1}], for m a power of 2 - - Output: Fradix2[i0, i1, ..., i{m-1}] = [r0, r1, ..., r{m-1}] - (Inputs and outputs may be subject to a stride and not in consecutive locations.) - - Algorithm: * Recursively compute [r0, r2, r4, ...., r{m-2}] - = Fsplit[i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - * Let [t0, t1, ..., t{m/2-1}] - = [i0-i{m/2}, i1-i{m/2+1}, ..., i{m/2-1}-i{m-1}] - * Let [u0, u1, ..., u{m/2-1}] - = [z1^0*t0, z1^1*t1, ..., z1^{m/2-1}*t{m/2-1}] - where z1 = exp(2*Pi*I/m) - * Recursively compute [r1, r3, ..., r{m-1}] - = Fsplit[u0, u1, ..., u{m/2-1}] - - The parameters are as follows: - * 2*n is the length of the input and output arrays (m in the above) - * w is such that 2^w is an 2n-th root of unity in the ring Z/pZ that - we are working in, i.e. p = 2^wn + 1 (here n is divisible by - GMP_LIMB_BITS) - * ii is the array of inputs (each input is an array of limbs of length - wn/GMP_LIMB_BITS + 1 (the extra limb being a "carry limb") - * rr is the array of outputs (each being an array of limbs of length - wn/GMP_LIMB_BITS + 1) - * rs is the stride with which entries of rr should be written (e.g. - rs = 4 means that the output is to be written in every 4th entry of - rr) - * is is the stride with which entries of ii should be read -*/ - -void FFT_radix2(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - FFT_radix2_butterfly(*t1, *t2, ii[0], ii[1], 0, n, w); - ptr = rr[0]; - rr[0] = *t1; - *t1 = ptr; - ptr = rr[rs]; - rr[rs] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, temp[i], ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2(rr, 2*rs, ii, n/2, 2*w, t1, t2, temp + n); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2(rr + rs, 2*rs, temp, n/2, 2*w, t1, t2, temp + n); -} - -void FFT_radix2_new(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - FFT_radix2_butterfly(*t1, *t2, ii[0], ii[1], 0, n, w); - ptr = rr[0]; - rr[0] = *t1; - *t1 = ptr; - ptr = rr[rs]; - rr[rs] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - ptr = ii[n+i]; - ii[n+i] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_new(rr, 1, ii, n/2, 2*w, t1, t2, temp); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_new(rr + n, 1, ii+n, n/2, 2*w, t1, t2, temp); -} - -int FFT_negacyclic_twiddle(mp_limb_t * r, mp_limb_t * i1, ulong i, ulong n, ulong w) -{ - mp_limb_t cy; - ulong limbs = (n*w)/GMP_LIMB_BITS; - int negate = 0; - while (i >= 2*n) - { - negate = 1 - negate; - i -= 2*n; - } - ulong b1 = (w*i)/2; - ulong x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - //if ((!x) && (negate)) - if (negate) mpn_neg_n(i1, i1, limbs + 1); - mpn_mul_2expmod_2expp1(i1, i1, limbs, b1); - if (x) - { - /*if (negate) - { - r[limbs] = -mpn_neg_n(r + x, i1, limbs - x); - MPN_COPY(r, i1 + limbs - x, x); - mpn_addmod_2expp1_1(r + x, limbs - x, i1[limbs]); - } else - {*/ - MPN_COPY(r + x, i1, limbs - x); - r[limbs] = CNST_LIMB(0); - cy = mpn_neg_n(r, i1 + limbs - x, x); - mpn_addmod_2expp1_1(r + x, limbs - x, -cy - i1[limbs]); - //} - return 1; - } - return 0; -} - -void FFT_twiddle(mp_limb_t * r, mp_limb_t * i1, ulong i, ulong n, ulong w) -{ - mp_limb_t cy; - ulong limbs = (n*w)/GMP_LIMB_BITS; - int negate = 0; - while (i >= n) - { - negate = 1 - negate; - i -= n; - } - ulong b1 = w*i; - ulong x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - if (x) - { - MPN_COPY(r + x, i1, limbs - x); - r[limbs] = CNST_LIMB(0); - cy = mpn_neg_n(r, i1 + limbs - x, x); - mpn_addmod_2expp1_1(r + x, limbs - x, -cy - i1[limbs]); - if (negate) mpn_neg_n(r, r, limbs + 1); - mpn_mul_2expmod_2expp1(r, r, limbs, b1); - } else - { - if (negate) - { - mpn_neg_n(r, i1, limbs + 1); - mpn_mul_2expmod_2expp1(r, r, limbs, b1); - } else - mpn_mul_2expmod_2expp1(r, i1, limbs, b1); - } -} - -void FFT_radix2_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs); - -/* - Truncate FFT to any length given by trunc, so long as trunc is - divisible by 2. -*/ -void FFT_radix2_truncate1(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - FFT_radix2_new(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - if (trunc <= n) - { - for (i = 0; i < n; i++) - mpn_add_n(ii[i], ii[i], ii[i+n], size); - - FFT_radix2_truncate1(rr, rs, ii, n/2, 2*w, t1, t2, temp, trunc); - } else - { - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - ptr = ii[n+i]; - ii[n+i] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_new(rr, 1, ii, n/2, 2*w, t1, t2, temp); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_truncate1(rr + n, 1, ii+n, n/2, 2*w, t1, t2, temp, trunc - n); - } -} - -void FFT_radix2_truncate1_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs, ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - FFT_radix2_twiddle(ii, is, n, w, t1, t2, temp, ws, r, c, rs); - return; - } - - if (trunc <= n) - { - for (i = 0; i < n; i++) - mpn_add_n(ii[i*is], ii[i*is], ii[(i+n)*is], size); - - FFT_radix2_truncate1_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs, trunc); - } else - { - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_truncate1_twiddle(ii + n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs, trunc - n); - } -} - -/* - Truncate FFT to any length given by trunc, so long as trunc is - divisible by 2. Assumes zeros from trunc to n. -*/ -void FFT_radix2_truncate(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - FFT_radix2_new(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - if (trunc <= n) - { - // PASS - FFT_radix2_truncate(rr, rs, ii, n/2, 2*w, t1, t2, temp, trunc); - } else - { - // PASS - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - ptr = ii[n+i]; - ii[n+i] = *t2; - *t2 = ptr; - } - - for (i = trunc; i < 2*n; i++) - { - FFT_twiddle(ii[i], ii[i-n], i - n, n, w); - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_new(rr, 1, ii, n/2, 2*w, t1, t2, temp); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_truncate1(rr + n, 1, ii+n, n/2, 2*w, t1, t2, temp, trunc - n); - } -} - -void FFT_radix2_truncate_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs, ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - FFT_radix2_twiddle(ii, is, n, w, t1, t2, temp, ws, r, c, rs); - return; - } - - if (trunc <= n) - { - // PASS - FFT_radix2_truncate_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs, trunc); - } else - { - // PASS - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } - - for (i = trunc; i < 2*n; i++) - { - FFT_twiddle(ii[i*is], ii[(i-n)*is], i - n, n, w); - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_truncate1_twiddle(ii + n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs, trunc - n); - } -} - - -void FFT_radix2_negacyclic(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - // we first apply twiddle factors corresponding to shifts of w*i/2 bits - // this could partially be combined with the butterflies below - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - if (FFT_negacyclic_twiddle(*t1, ii[i], i, n, w)) - { - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - } - - if (FFT_negacyclic_twiddle(*t1, ii[n + i], n + i, n, w)) - { - ptr = ii[n + i]; - ii[n + i] = *t1; - *t1 = ptr; - } - - FFT_radix2_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - ptr = ii[n+i]; - ii[n+i] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_new(rr, 1, ii, n/2, 2*w, t1, t2, temp); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_new(rr + n, 1, ii+n, n/2, 2*w, t1, t2, temp); - - j = 0; k = 1; l = n+1; - temp[0] = ii[1]; - ii[1] = ii[n]; - for (i = 2; i < n; i += 2) - { - temp[k++] = ii[i]; - ii[i] = temp[j++]; - temp[k++] = ii[i+1]; - ii[i+1] = ii[l++]; - } - for ( ; i < 2*n; i+=2) - { - ii[i] = temp[j++]; - ii[i+1] = ii[l++]; - } -} - -/* - FFT of length 2*n on entries of ii with stride is. - Apply additional twists by z^{c*i} where i starts at r and increases by rs - for each coeff. Note z => ws bits. -*/ -void FFT_radix2_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - ulong tw1, tw2; - tw1 = r*c; - tw2 = tw1 + rs*c; - FFT_radix2_twiddle_butterfly(*t1, *t2, ii[0], ii[is], n*w, tw1*ws, tw2*ws); - ptr = ii[0]; - ii[0] = *t1; - *t1 = ptr; - ptr = ii[is]; - ii[is] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_twiddle(ii+n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs); -} - -void IFFT_radix2_new(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[0], ii[1], 0, n, w); - ptr = rr[0]; - rr[0] = *t1; - *t1 = ptr; - ptr = rr[rs]; - rr[rs] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_new(ii, 1, ii, n/2, 2*w, t1, t2, temp); - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_new(ii+n, 1, ii+n, n/2, 2*w, t1, t2, temp); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = rr[i]; - rr[i] = *t1; - *t1 = ptr; - ptr = rr[n+i]; - rr[n+i] = *t2; - *t2 = ptr; - } -} - -void IFFT_radix2_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs); - -void IFFT_radix2_truncate1(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - IFFT_radix2_new(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - if (trunc <= n) - { - // PASS - for (i = trunc; i < n; i++) - { - mpn_add_n(ii[i], ii[i], ii[i+n], size); - mpn_div_2expmod_2expp1(ii[i], ii[i], size - 1, 1); - } - - IFFT_radix2_truncate1(rr, rs, ii, n/2, 2*w, t1, t2, temp, trunc); - - for (i = 0; i < trunc; i++) - mpn_addsub_n(ii[i], ii[i], ii[i], ii[n+i], size); - - return; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_new(ii, 1, ii, n/2, 2*w, t1, t2, temp); - - for (i = trunc; i < 2*n; i++) - { - mpn_sub_n(ii[i], ii[i-n], ii[i], size); - FFT_twiddle(*t1, ii[i], i - n, n, w); - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - } - - for (i = trunc - n; i < n; i++) - { - FFT_twiddle(*t1, ii[i + n], 2*n - i, n, w); - mpn_add_n(ii[i], ii[i], *t1, size); - } - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_truncate1(ii+n, 1, ii+n, n/2, 2*w, t1, t2, temp, trunc - n); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = rr[i]; - rr[i] = *t1; - *t1 = ptr; - ptr = rr[n+i]; - rr[n+i] = *t2; - *t2 = ptr; - } -} - -void IFFT_radix2_truncate1_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs, ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - IFFT_radix2_twiddle(ii, is, n, w, t1, t2, temp, ws, r, c, rs); - return; - } - - if (trunc <= n) - { - // PASS - for (i = trunc; i < n; i++) - { - mpn_add_n(ii[i*is], ii[i*is], ii[(i+n)*is], size); - mpn_div_2expmod_2expp1(ii[i*is], ii[i*is], size - 1, 1); - } - - IFFT_radix2_truncate1_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs, trunc); - - for (i = 0; i < trunc; i++) - mpn_addsub_n(ii[i*is], ii[i*is], ii[i*is], ii[(n+i)*is], size); - - return; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - for (i = trunc; i < 2*n; i++) - { - mpn_sub_n(ii[i*is], ii[(i-n)*is], ii[i*is], size); - FFT_twiddle(*t1, ii[i*is], i - n, n, w); - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - } - - for (i = trunc - n; i < n; i++) - { - FFT_twiddle(*t1, ii[(i+n)*is], 2*n - i, n, w); - mpn_add_n(ii[i*is], ii[i*is], *t1, size); - } - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_truncate1_twiddle(ii + n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs, trunc - n); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } -} - -/* - Truncate IFFT to given length. Requires trunc a multiple of 8. - Assumes (conceptually) zeroes from trunc to n. -*/ -void IFFT_radix2_truncate(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - IFFT_radix2_new(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - if (trunc <= n) - { - // PASS - IFFT_radix2_truncate(rr, rs, ii, n/2, 2*w, t1, t2, temp, trunc); - - for (i = 0; i < trunc; i++) - mpn_add_n(ii[i], ii[i], ii[i], size); - - return; - } - - //PASS - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_new(ii, 1, ii, n/2, 2*w, t1, t2, temp); - - for (i = trunc; i < 2*n; i++) - { - FFT_twiddle(ii[i], ii[i-n], i - n, n, w); - } - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_truncate1(ii+n, 1, ii+n, n/2, 2*w, t1, t2, temp, trunc - n); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = rr[i]; - rr[i] = *t1; - *t1 = ptr; - ptr = rr[n+i]; - rr[n+i] = *t2; - *t2 = ptr; - } - - for (i = trunc - n; i < n; i++) - mpn_add_n(ii[i], ii[i], ii[i], size); -} - -void IFFT_radix2_truncate_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs, ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - IFFT_radix2_twiddle(ii, is, n, w, t1, t2, temp, ws, r, c, rs); - return; - } - - if (trunc <= n) - { - // PASS - IFFT_radix2_truncate_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs, trunc); - - for (i = 0; i < trunc; i++) - mpn_add_n(ii[i*is], ii[i*is], ii[i*is], size); - - return; - } - - //PASS - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - for (i = trunc; i < 2*n; i++) - { - FFT_twiddle(ii[i*is], ii[(i-n)*is], i - n, n, w); - } - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_truncate1_twiddle(ii+n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs, trunc - n); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } - - for (i = trunc - n; i < n; i++) - mpn_add_n(ii[i*is], ii[i*is], ii[i*is], size); -} - -void IFFT_radix2_negacyclic(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - j = 0; k = 0; - for (i = 0; i < n; i += 2) - { - ii[i] = ii[2*i]; - temp[k++] = ii[i+1]; - ii[i+1] = ii[2*(i+1)]; - } - for ( ; i < 2*n; i+=2) - { - ii[i] = temp[j++]; - temp[k++] = ii[i+1]; - ii[i+1] = temp[j++]; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_new(ii, 1, ii, n/2, 2*w, t1, t2, temp); - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_new(ii+n, 1, ii+n, n/2, 2*w, t1, t2, temp); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = rr[i]; - rr[i] = *t1; - *t1 = ptr; - ptr = rr[n+i]; - rr[n+i] = *t2; - *t2 = ptr; - - // we finally apply twiddle factors corresponding to shifts of w*i bits - // this could partially be combined with the butterflies above - - if (FFT_negacyclic_twiddle(*t1, ii[i], 4*n - i, n, w)) - { - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - } - - if (FFT_negacyclic_twiddle(*t1, ii[n + i], 3*n - i, n, w)) - { - ptr = ii[n + i]; - ii[n + i] = *t1; - *t1 = ptr; - } - } -} - -void IFFT_radix2_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - ulong tw1, tw2; - tw1 = r*c; - tw2 = tw1 + rs*c; - FFT_radix2_twiddle_inverse_butterfly(*t1, *t2, ii[0], ii[is], n*w, tw1*ws, tw2*ws); - ptr = ii[0]; - ii[0] = *t1; - *t1 = ptr; - ptr = ii[is]; - ii[is] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_twiddle(ii+n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } -} - -void FFT_mulmod_2expp1(mp_limb_t * r1, mp_limb_t * i1, mp_limb_t * i2, - ulong r_limbs, ulong depth, ulong w) -{ - ulong n = (1UL<= GMP_LIMB_BITS) - { - b -= GMP_LIMB_BITS; - ll++; - } - bit = (1UL<w bits - * Perform a length C FFT on each row, i.e. with an input stride of 1 -*/ -void FFT_radix2_mfa(mp_limb_t ** ii, ulong n, ulong w, - mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, ulong n1) -{ - ulong i, j; - ulong n2 = (2*n)/n1; - ulong depth = 0; - ulong depth2 = 0; - mp_limb_t * ptr; - - while ((1UL< w bits - - FFT_radix2_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1); - for (j = 0; j < n2; j++) - { - ulong s = revbin(j, depth); - if (j < s) - { - ptr = ii[i + j*n1]; - ii[i + j*n1] = ii[i + s*n1]; - ii[i + s*n1] = ptr; - } - } - } - - for (i = 0; i < n2; i++) - { - FFT_radix2_new(ii + i*n1, 1, ii + i*n1, n1/2, w*n2, t1, t2, temp); - - for (j = 0; j < n1; j++) - { - ulong s = revbin(j, depth2); - if (j < s) - { - ptr = ii[i*n1 + j]; - ii[i*n1 + j] = ii[i*n1 + s]; - ii[i*n1 + s] = ptr; - } - } - } -} - -void FFT_radix2_mfa_truncate(mp_limb_t ** ii, ulong n, ulong w, - mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, ulong n1, ulong trunc) -{ - ulong i, j, s; - ulong n2 = (2*n)/n1; - ulong limbs = (n*w)/GMP_LIMB_BITS; - ulong depth = 0; - ulong depth2 = 0; - mp_limb_t * ptr; - - while ((1UL< w bits - - FFT_radix2_truncate_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1, trunc); - for (j = 0; j < n2; j++) - { - s = revbin(j, depth); - if (j < s) - { - ptr = ii[i + j*n1]; - ii[i + j*n1] = ii[i + s*n1]; - ii[i + s*n1] = ptr; - } - } - } - - for (s = 0; s < trunc; s++) - { - i = revbin(s, depth); - FFT_radix2_new(ii + i*n1, 1, ii + i*n1, n1/2, w*n2, t1, t2, temp); - - for (j = 0; j < n1; j++) - mpn_normmod_2expp1(ii[i*n1 + j], limbs); - } -} - -void IFFT_radix2_mfa(mp_limb_t ** ii, ulong n, ulong w, - mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, ulong n1) -{ - ulong i, j; - ulong n2 = (2*n)/n1; - ulong depth = 0; - ulong depth2 = 0; - mp_limb_t * ptr; - - while ((1UL< w bits - IFFT_radix2_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1); - } - -} - -void convolution_mfa_truncate(mp_limb_t ** ii, mp_limb_t ** jj, ulong n, ulong w, - mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong n1, ulong trunc, mp_limb_t * tt) -{ - ulong i, j, s; - ulong n2 = (2*n)/n1; - ulong limbs = (n*w)/GMP_LIMB_BITS; - ulong depth = 0; - ulong depth2 = 0; - mp_limb_t * ptr, c; - - while ((1UL< w bits - - FFT_radix2_truncate_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1, trunc); - for (j = 0; j < n2; j++) - { - s = revbin(j, depth); - if (j < s) - { - ptr = ii[i + j*n1]; - ii[i + j*n1] = ii[i + s*n1]; - ii[i + s*n1] = ptr; - } - } - } - - for (s = 0; s < trunc; s++) - { - i = revbin(s, depth); - FFT_radix2_new(ii + i*n1, 1, ii + i*n1, n1/2, w*n2, t1, t2, temp); - - for (j = 0; j < n1; j++) - { - mpn_normmod_2expp1(ii[i*n1 + j], limbs); - c = ii[i*n1 + j][limbs] + 2*jj[i*n1 + j][limbs]; - ii[i*n1 + j][limbs] = new_mpn_mulmod_2expp1(ii[i*n1 + j], ii[i*n1 + j], jj[i*n1 + j], c, n*w, tt); - } - - IFFT_radix2_new(ii + i*n1, 1, ii + i*n1, n1/2, w*n2, t1, t2, temp); - } - - for (i = 0; i < n1; i++) - { - for (j = 0; j < n2; j++) - { - s = revbin(j, depth); - if (j < s) - { - ptr = ii[i + j*n1]; - ii[i + j*n1] = ii[i + s*n1]; - ii[i + s*n1] = ptr; - } - } - - // IFFT of length n2 on column i, applying z^{r*i} for rows going up in steps - // of 1 starting at row 0, where z => w bits - IFFT_radix2_truncate_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1, trunc); - for (j = 0; j < trunc; j++) - { - mpn_normmod_2expp1(ii[i + n1*j], limbs); - mpn_div_2expmod_2expp1(ii[i + n1*j], ii[i + n1*j], limbs, depth + depth2 + 2); - mpn_normmod_2expp1(ii[i + n1*j], limbs); - } - } -} - -void IFFT_radix2_mfa_truncate(mp_limb_t ** ii, ulong n, ulong w, - mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, ulong n1, ulong trunc) -{ - ulong i, j, s; - ulong n2 = (2*n)/n1; - ulong limbs = (n*w)/GMP_LIMB_BITS; - ulong depth = 0; - ulong depth2 = 0; - mp_limb_t * ptr; - - while ((1UL< w bits - IFFT_radix2_truncate_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1, trunc); - for (j = 0; j < trunc; j++) - mpn_normmod_2expp1(ii[i + j], limbs); - } - -} - -/* - The main integer multiplication routine. Multiplies i1 of n1 limbs by i2 of - n2 limbs and puts the result in r1, which must have space for n1 + n2 limbs. - - Currently there is no sqrt2 trick, thus a convolution of depth d gives a - convolution length 2*n where n = 2^d with coefficients of size n*w, where - w = w2^2. - - Note the *output* polynomial must fit in that convolution size. - - The computation, bits1 = (n*w - depth)/2, gives the maximum size of input - coefficients for a given n and w if you want the output to be meaningful. - - The function automatically truncates the FFT, pointwise mults and IFFT, - thus the inputs can be different lengths. The function will just segfault - if n and w2 are not sufficiently large. Except for the smallest multiplications - (where w2 should be 2), the value of w2 should probably always just be 1. -*/ -void new_mpn_mul(mp_limb_t * r1, mp_limb_t * i1, ulong n1, mp_limb_t * i2, ulong n2, - ulong depth, ulong w) -{ - ulong n = (1UL<_mp_d, i, limbs + 1); - hi = i[limbs]; - if (hi < 0L) - { - mpn_neg_n(m->_mp_d, m->_mp_d, limbs + 1); - m->_mp_size = limbs + 1; - while ((m->_mp_size) && (!m->_mp_d[m->_mp_size - 1])) - m->_mp_size--; - m->_mp_size = -m->_mp_size; - } else - { - m->_mp_size = limbs + 1; - while ((m->_mp_size) && (!m->_mp_d[m->_mp_size - 1])) - m->_mp_size--; - } -} - -void ref_norm(mpz_t m, mpz_t p) -{ - mpz_mod(m, m, p); -} - -void ref_submod_i(mpz_t m, mpz_t i1, mpz_t i2, mpz_t p, ulong n, ulong w) -{ - mpz_sub(m, i1, i2); - mpz_mul_2exp(m, m, (n*w)/2); - mpz_mod(m, m, p); -} - -void ref_mul_2expmod(mpz_t m, mpz_t i2, mpz_t p, ulong n, ulong w, ulong d) -{ - mpz_mul_2exp(m, i2, d); - mpz_mod(m, m, p); -} - -void ref_div_2expmod(mpz_t m, mpz_t i2, mpz_t p, ulong n, ulong w, ulong d) -{ - mpz_t temp; - mpz_init(temp); - mpz_set_ui(temp, 1); - mpz_mul_2exp(temp, temp, d); - mpz_invert(temp, temp, p); - mpz_mul(m, i2, temp); - mpz_mod(m, m, p); - mpz_clear(temp); -} - -void ref_lshB_sumdiffmod(mpz_t t, mpz_t u, mpz_t i1, - mpz_t i2, mpz_t p, ulong n, ulong w, ulong x, ulong y) -{ - mpz_add(t, i1, i2); - mpz_sub(u, i1, i2); - mpz_mul_2exp(t, t, x*GMP_LIMB_BITS); - mpz_mul_2exp(u, u, y*GMP_LIMB_BITS); - mpz_mod(t, t, p); - mpz_mod(u, u, p); -} - -void ref_sumdiff_rshBmod(mpz_t t, mpz_t u, mpz_t i1, - mpz_t i2, mpz_t p, ulong n, ulong w, ulong x, ulong y) -{ - mpz_t mult1, mult2; - mpz_init(mult1); - mpz_init(mult2); - mpz_set_ui(mult1, 1); - mpz_mul_2exp(mult1, mult1, x*GMP_LIMB_BITS); - mpz_invert(mult1, mult1, p); - mpz_set_ui(mult2, 1); - mpz_mul_2exp(mult2, mult2, y*GMP_LIMB_BITS); - mpz_invert(mult2, mult2, p); - mpz_mul(mult1, mult1, i1); - mpz_mul(mult2, mult2, i2); - mpz_add(t, mult1, mult2); - mpz_sub(u, mult1, mult2); - mpz_mod(t, t, p); - mpz_mod(u, u, p); - mpz_clear(mult1); - mpz_clear(mult2); -} - -void ref_FFT_split_radix_butterfly(mpz_t s, mpz_t t, mpz_t p, ulong i, ulong n, ulong w) -{ - mpz_t temp; - mpz_init(temp); - mpz_add(temp, s, t); - mpz_sub(t, s, t); - mpz_set(s, temp); - mpz_mul_2exp(s, s, i*w); - mpz_mul_2exp(t, t, 3*i*w); - mpz_mod(s, s, p); - mpz_mod(t, t, p); - mpz_clear(temp); -} - -void set_p(mpz_t p, ulong n, ulong w) -{ - mpz_set_ui(p, 1); - mpz_mul_2exp(p, p, n*w); - mpz_add_ui(p, p, 1); -} - -void rand_n(mp_limb_t * n, gmp_randstate_t state, ulong limbs) -{ - mpn_rrandom(n, state, limbs); - n[limbs] = gmp_urandomm_ui(state, 10); - if (gmp_urandomm_ui(state, 2)) n[limbs] = -n[limbs]; -} - -void test_norm() -{ - ulong i, j, k, l, n, w, limbs; - mpz_t p, m, m2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mp_limb_t * nn; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = GMP_LIMB_BITS; i < 32*GMP_LIMB_BITS; i += GMP_LIMB_BITS) - { - for (j = 1; j < 32; j++) - { - for (k = 1; k <= GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn = TMP_BALLOC_LIMBS(limbs + 1); - mpn_rrandom(nn, state, limbs + 1); - mpn_to_mpz(m, nn, limbs); - set_p(p, n, w); - - mpn_normmod_2expp1(nn, limbs); - mpn_to_mpz(m2, nn, limbs); - ref_norm(m, p); - - if (mpz_cmp(m, m2) != 0) - { - printf("mpn_normmod_2expp1 error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - gmp_randclear(state); -} - -void test_submod_i() -{ - ulong i, j, k, l, n, w, limbs; - mpz_t p, m, m2, mn1, mn2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mpz_init(mn1); - mpz_init(mn2); - mp_limb_t * nn1, * nn2, * r; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 64*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 32; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - nn2 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - rand_n(nn2, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - mpn_to_mpz(mn2, nn2, limbs); - set_p(p, n, w); - - mpn_submod_i_2expp1(r, nn1, nn2, limbs); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_submod_i(m, mn1, mn2, p, n, w); - - if (mpz_cmp(m, m2) != 0) - { - printf("mpn_submmod_i_2expp1 error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - mpz_clear(mn1); - mpz_clear(mn2); - gmp_randclear(state); -} - -void test_mul_2expmod() -{ - ulong i, j, k, l, n, w, limbs, d; - mpz_t p, m, m2, mn1, mn2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mpz_init(mn1); - mp_limb_t * nn1, * r; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 64*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 32; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - for (d = 0; d < GMP_LIMB_BITS; d++) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - mpn_to_mpz(mn1, nn1, limbs); - set_p(p, n, w); - - mpn_mul_2expmod_2expp1(r, nn1, limbs, d); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_mul_2expmod(m, mn1, p, n, w, d); - - if (mpz_cmp(m, m2) != 0) - { - printf("mpn_mul_2expmod_2expp1 error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - mpz_clear(mn1); - gmp_randclear(state); -} - -void test_FFT_negacyclic_twiddle() -{ - ulong i, j, k, l, n, w, limbs, d; - mpz_t p, m, m2, mn1, mn2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mpz_init(mn1); - mp_limb_t * nn1, * r; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = 2*j*k; - for (d = 0; d < 2*n; d++) - { - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - mpn_to_mpz(mn1, nn1, limbs); - set_p(p, n, w); - - if (!FFT_negacyclic_twiddle(r, nn1, d, n, w)) - MPN_COPY(r, nn1, limbs + 1); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_mul_2expmod(m, mn1, p, n, w, d*w/2); - - if (mpz_cmp(m, m2) != 0) - { - printf("FFT_negacyclic_twiddle error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - } - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = 2*j*k; - for (d = 0; d < 2*n; d++) - { - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - mpn_to_mpz(mn1, nn1, limbs); - set_p(p, n, w); - - if (!FFT_negacyclic_twiddle(r, nn1, 4*n - d, n, w)) - MPN_COPY(r, nn1, limbs + 1); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_div_2expmod(m, mn1, p, n, w, d*w/2); - - if (mpz_cmp(m, m2) != 0) - { - printf("FFT_negacyclic_twiddle error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - mpz_clear(mn1); - gmp_randclear(state); -} - -void test_div_2expmod() -{ - ulong i, j, k, l, n, w, limbs, d; - mpz_t p, m, m2, mn1, mn2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mpz_init(mn1); - mp_limb_t * nn1, * r; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 64*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 32; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - for (d = 0; d < GMP_LIMB_BITS; d++) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - set_p(p, n, w); - - mpn_div_2expmod_2expp1(r, nn1, limbs, d); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_norm(mn1, p); - ref_mul_2expmod(m, m2, p, n, w, d); - - if (mpz_cmp(m, mn1) != 0) - { - printf("mpn_div_2expmod_2expp1 error\n"); - gmp_printf("want %Zx\n\n", mn1); - gmp_printf("got %Zx\n", m); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - mpz_clear(mn1); - gmp_randclear(state); -} - -void test_lshB_sumdiffmod() -{ - ulong c, i, j, k, l, x, y, n, w, limbs; - mpz_t p, ma, mb, m2a, m2b, mn1, mn2; - mpz_init(p); - mpz_init(ma); - mpz_init(mb); - mpz_init(m2a); - mpz_init(m2b); - mpz_init(mn1); - mpz_init(mn2); - mp_limb_t * nn1, * nn2, * r1, * r2; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - for (c = 0; c < limbs; c++) - { - x = gmp_urandomm_ui(state, limbs + 1); - y = gmp_urandomm_ui(state, limbs + 1); - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - nn2 = TMP_BALLOC_LIMBS(limbs + 1); - r1 = TMP_BALLOC_LIMBS(limbs + 1); - r2 = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - rand_n(nn2, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - mpn_to_mpz(mn2, nn2, limbs); - set_p(p, n, w); - - mpn_lshB_sumdiffmod_2expp1(r1, r2, nn1, nn2, limbs, x, y); - mpn_to_mpz(m2a, r1, limbs); - mpn_to_mpz(m2b, r2, limbs); - ref_norm(m2a, p); - ref_norm(m2b, p); - ref_lshB_sumdiffmod(ma, mb, mn1, mn2, p, n, w, x, y); - - if (mpz_cmp(ma, m2a) != 0) - { - printf("mpn_lshB_sumdiffmod_2expp1 error a\n"); - printf("x = %ld, y = %ld\n", x, y); - gmp_printf("want %Zx\n\n", ma); - gmp_printf("got %Zx\n", m2a); - abort(); - } - if (mpz_cmp(mb, m2b) != 0) - { - printf("mpn_lshB_sumdiffmod_2expp1 error b\n"); - printf("x = %ld, y = %ld\n", x, y); - gmp_printf("want %Zx\n\n", mb); - gmp_printf("got %Zx\n", m2b); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(ma); - mpz_clear(mb); - mpz_clear(m2a); - mpz_clear(m2b); - mpz_clear(mn1); - mpz_clear(mn2); - gmp_randclear(state); -} - -void test_sumdiff_rshBmod() -{ - ulong c, i, j, k, l, x, y, n, w, limbs; - mpz_t p, ma, mb, m2a, m2b, mn1, mn2; - mpz_init(p); - mpz_init(ma); - mpz_init(mb); - mpz_init(m2a); - mpz_init(m2b); - mpz_init(mn1); - mpz_init(mn2); - mp_limb_t * nn1, * nn2, * r1, * r2; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - for (c = 0; c < limbs; c++) - { - x = gmp_urandomm_ui(state, limbs); - y = gmp_urandomm_ui(state, limbs); - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - nn2 = TMP_BALLOC_LIMBS(limbs + 1); - r1 = TMP_BALLOC_LIMBS(limbs + 1); - r2 = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - rand_n(nn2, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - mpn_to_mpz(mn2, nn2, limbs); - set_p(p, n, w); - - mpn_sumdiff_rshBmod_2expp1(r1, r2, nn1, nn2, limbs, x, y); - mpn_to_mpz(m2a, r1, limbs); - mpn_to_mpz(m2b, r2, limbs); - ref_norm(m2a, p); - ref_norm(m2b, p); - ref_sumdiff_rshBmod(ma, mb, mn1, mn2, p, n, w, x, y); - - if (mpz_cmp(ma, m2a) != 0) - { - printf("mpn_sumdiff_rshBmod_2expp1 error a\n"); - printf("x = %ld, y = %ld, limbs = %ld\n", x, y, limbs); - gmp_printf("want %Zx\n\n", ma); - gmp_printf("got %Zx\n", m2a); - abort(); - } - if (mpz_cmp(mb, m2b) != 0) - { - printf("mpn_sumdiff_rshBmod_2expp1 error b\n"); - printf("x = %ld, y = %ld, limbs = %ld\n", x, y, limbs); - gmp_printf("want %Zx\n\n", mb); - gmp_printf("got %Zx\n", m2b); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(ma); - mpz_clear(mb); - mpz_clear(m2a); - mpz_clear(m2b); - mpz_clear(mn1); - mpz_clear(mn2); - gmp_randclear(state); -} - -void test_FFT_split_radix_butterfly() -{ - ulong i, j, k, x, n, w, limbs; - mpz_t p, ma, mb, m2a, m2b, mn1, mn2; - mpz_init(p); - mpz_init(ma); - mpz_init(mb); - mpz_init(m2a); - mpz_init(m2b); - mpz_init(mn1); - mpz_init(mn2); - mp_limb_t * nn1, * nn2, * temp; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - for (x = 0; x < n + 1; x++) - { - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - nn2 = TMP_BALLOC_LIMBS(limbs + 1); - temp = TMP_BALLOC_LIMBS(2*(limbs + 1)); - rand_n(nn1, state, limbs); - rand_n(nn2, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - mpn_to_mpz(mn2, nn2, limbs); - set_p(p, n, w); - - FFT_split_radix_butterfly(nn1, nn2, x, n, w, temp); - mpn_to_mpz(m2a, nn1, limbs); - mpn_to_mpz(m2b, nn2, limbs); - ref_norm(m2a, p); - ref_norm(m2b, p); - ref_FFT_split_radix_butterfly(mn1, mn2, p, x, n, w); - - if (mpz_cmp(m2a, mn1) != 0) - { - printf("FFT_split_radix_butterfly error a\n"); - printf("x = %ld\n", x); - gmp_printf("want %Zx\n\n", mn1); - gmp_printf("got %Zx\n", m2a); - abort(); - } - if (mpz_cmp(m2b, mn2) != 0) - { - printf("FFT_split_radix_butterfly error b\n"); - printf("x = %ld\n", x); - gmp_printf("want %Zx\n\n", mn2); - gmp_printf("got %Zx\n", m2b); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(ma); - mpz_clear(mb); - mpz_clear(m2a); - mpz_clear(m2b); - mpz_clear(mn1); - mpz_clear(mn2); - gmp_randclear(state); -} - -void time_mul_with_negacyclic() -{ - ulong depth = 17UL; - ulong w2 = 1UL; - ulong iters = 1; - - ulong n = (1UL< -#include -#include "mpir.h" -#include "gmp-impl.h" -#include "flint.h" -#include "ZmodF.h" -#include "ZmodF_poly.h" -#include "longlong.h" - -void mpn_to_mpz(mpz_t m, mp_limb_t * i, ulong limbs); -void set_p(mpz_t p, ulong n, ulong w); -void rand_n(mp_limb_t * n, gmp_randstate_t state, ulong limbs); -void ref_mul_2expmod(mpz_t m, mpz_t i2, mpz_t p, ulong n, ulong w, ulong d); -void ref_norm(mpz_t m, mpz_t p); -void ref_sumdiff_rshBmod(mpz_t t, mpz_t u, mpz_t i1, - mpz_t i2, mpz_t p, ulong n, ulong w, ulong x, ulong y); - -/* - FIXME: This can be done faster for the case where c is small. - The function adds c to the integer r modulo 2^l + 1. -*/ -void mpn_addmod_2expp1_1(mp_limb_t * r, ulong l, mp_limb_signed_t c) -{ - if (c >= 0) mpn_add_1(r, r, l + 1, c); - else mpn_sub_1(r, r, l + 1, -c); -} - -/* - FIXME: this is the most inefficient implementation I could come up with. ;-) -*/ -ulong revbin(ulong c, ulong depth) -{ - ulong i = 0; - ulong ret = 0; - for (i = 0; i < depth + 1; i++) - if (c & (1UL< skip) MPN_COPY(poly[i], limbs + skip, total_limbs - skip); - - return length; -} - -/* - Splits an mpn into segments of length _bits_ and stores in - zero padded coefficients of length output_limbs, for use in FFT - convolution code. Assumes that the input is total_limbs in length. - Returns the total number of coefficient written. -*/ - -ulong FFT_split_bits(mp_limb_t ** poly, mp_limb_t * limbs, - ulong total_limbs, ulong bits, ulong output_limbs) -{ - ulong length = (GMP_LIMB_BITS*total_limbs - 1)/bits + 1; - ulong i, j; - - ulong top_bits = ((GMP_LIMB_BITS - 1) & bits); - if (top_bits == 0) - { - return FFT_split(poly, limbs, total_limbs, bits/GMP_LIMB_BITS, output_limbs); - } - - ulong coeff_limbs = (bits/GMP_LIMB_BITS) + 1; - ulong mask = (1L<coeffs[i+1], j); - - MPN_ZERO(poly[i], output_limbs + 1); - // convert a coefficient - if (!shift_bits) - { - MPN_COPY(poly[i], limb_ptr, coeff_limbs); - poly[i][coeff_limbs - 1] &= mask; - limb_ptr += (coeff_limbs - 1); - shift_bits += top_bits; - } else - { - mpn_rshift(poly[i], limb_ptr, coeff_limbs, shift_bits); - limb_ptr += (coeff_limbs - 1); - shift_bits += top_bits; - if (shift_bits >= GMP_LIMB_BITS) - { - limb_ptr++; - poly[i][coeff_limbs - 1] += (limb_ptr[0] << (GMP_LIMB_BITS - (shift_bits - top_bits))); - shift_bits -= GMP_LIMB_BITS; - } - poly[i][coeff_limbs - 1] &= mask; - } - } - - MPN_ZERO(poly[i], output_limbs + 1); - ulong limbs_left = total_limbs - (limb_ptr - limbs); - if (!shift_bits) - { - MPN_COPY(poly[i], limb_ptr, limbs_left); - } else - { - mpn_rshift(poly[i], limb_ptr, limbs_left, shift_bits); - } - - return length; -} - -/* - Recombines coefficients after doing a convolution. Assumes each of the - coefficients of the poly of the given length is output_limbs long, that each - of the coefficients is being shifted by a multiple of coeff_limbs and added - to an mpn which is total_limbs long. It is assumed that the mpn has been - zeroed in advance. -*/ - -void FFT_combine(mp_limb_t * res, mp_limb_t ** poly, ulong length, ulong coeff_limbs, - ulong output_limbs, ulong total_limbs) -{ - ulong skip, i, j; - - for (skip = 0, i = 0; (i < length) && (skip + output_limbs <= total_limbs); i++, skip += coeff_limbs) - { - //for (j = 0; j < output_limbs; j += 8) PREFETCH(poly->coeffs[i+1], j); - mpn_add(res + skip, res + skip, output_limbs + 1, poly[i], output_limbs); - } - - while ((skip < total_limbs) && (i < length)) - { - mpn_add(res + skip, res + skip, total_limbs - skip, poly[i], MIN(total_limbs - skip, output_limbs)); - i++; - skip += coeff_limbs; - } -} - -/* - Recombines coefficients of a poly after doing a convolution. Assumes - each of the coefficients of the poly of the given length is output_limbs - long, that each is being shifted by a multiple of _bits_ and added - to an mpn which is total_limbs long. It is assumed that the mpn has been - zeroed in advance. -*/ - -void FFT_combine_bits(mp_limb_t * res, mp_limb_t ** poly, ulong length, ulong bits, - ulong output_limbs, ulong total_limbs) -{ - ulong top_bits = ((GMP_LIMB_BITS - 1) & bits); - if (top_bits == 0) - { - FFT_combine(res, poly, length, bits/GMP_LIMB_BITS, output_limbs, total_limbs); - return; - } - TMP_DECL; - TMP_MARK; - - ulong coeff_limbs = (bits/GMP_LIMB_BITS) + 1; - ulong i, j; - ulong * temp = (ulong *) TMP_BALLOC_LIMBS(output_limbs + 1); - ulong shift_bits = 0; - ulong * limb_ptr = res; - ulong * end = res + total_limbs; - - for (i = 0; (i < length) && (limb_ptr + output_limbs < end); i++) - { - //for (j = 0; j < output_limbs; j += 8) PREFETCH(poly->coeffs[i+1], j); - if (shift_bits) - { - mpn_lshift(temp, poly[i], output_limbs + 1, shift_bits); - mpn_add_n(limb_ptr, limb_ptr, temp, output_limbs + 1); - } else - { - mpn_add(limb_ptr, limb_ptr, output_limbs + 1, poly[i], output_limbs); - } - shift_bits += top_bits; - limb_ptr += (coeff_limbs - 1); - if (shift_bits >= GMP_LIMB_BITS) - { - limb_ptr++; - shift_bits -= GMP_LIMB_BITS; - } - } - - while ((limb_ptr < end) && (i < length)) - { - if (shift_bits) - { - mpn_lshift(temp, poly[i], output_limbs + 1, shift_bits); - mpn_add_n(limb_ptr, limb_ptr, temp, end - limb_ptr); - } else - { - mpn_add_n(limb_ptr, limb_ptr, poly[i], end - limb_ptr); - } - shift_bits += top_bits; - limb_ptr += (coeff_limbs - 1); - if (shift_bits >= GMP_LIMB_BITS) - { - limb_ptr++; - shift_bits -= GMP_LIMB_BITS; - } - i++; - } - - TMP_FREE; -} - -/* - Normalise t to be in the range [0, 2^nw] -*/ -void mpn_normmod_2expp1(mp_limb_t * t, ulong l) -{ - mp_limb_signed_t hi = t[l]; - - if (hi) - { - t[l] = CNST_LIMB(0); - mpn_addmod_2expp1_1(t, l, -hi); - hi = t[l]; - - // hi will be in [-1,1] - if (t[l]) - { - t[l] = CNST_LIMB(0); - mpn_addmod_2expp1_1(t, l, -hi); - if (t[l] == ~CNST_LIMB(0)) // if we now have -1 (very unlikely) - { - t[l] = CNST_LIMB(0); - mpn_addmod_2expp1_1(t, l, 1); - } - } - } -} - -/* - We are given two integers modulo 2^wn+1, i1 and i2, which are not - necessarily normalised. We compute t = sqrt(-1)*(i1 - i2) and are given - n and w. Note 2^wn/2 = sqrt(-1) mod 2^wn+1. Requires wn to be divisible - by 2*GMP_LIMB_BITS. -*/ -void mpn_submod_i_2expp1(mp_limb_t * t, mp_limb_t * i1, mp_limb_t * i2, ulong l) -{ - mp_limb_t cy; - mp_limb_t l2 = l/2; - - t[l] = -mpn_sub_n(t + l2, i1, i2, l2); - cy = i2[l] - i1[l] - mpn_sub_n(t, i2 + l2, i1 + l2, l2); - mpn_addmod_2expp1_1(t + l2, l2, cy); -} - -/* - We are given two integers modulo 2^wn+1, i1 and i2, which are - not necessarily normalised and are given n and w. We compute - t = (i1 + i2)*B^x, u = (i1 - i2)*B^y. Aliasing between inputs and - outputs is not permitted. We require x and y to be less than the - number of limbs of i1 and i2. -*/ -void mpn_lshB_sumdiffmod_2expp1(mp_limb_t * t, mp_limb_t * u, mp_limb_t * i1, - mp_limb_t * i2, ulong limbs, ulong x, ulong y) -{ - mp_limb_t cy, cy1, cy2; - - if (x == 0) - { - if (y == 0) - { - mpn_sumdiff_n(t + x, u + y, i1, i2, limbs + 1); - } else - { - cy = mpn_sumdiff_n(t, u + y, i1, i2, limbs - y); - u[limbs] = -(cy&1); - cy1 = cy>>1; - cy = mpn_sumdiff_n(t + limbs - y, u, i2 + limbs - y, i1 + limbs - y, y); - t[limbs] = cy>>1; - mpn_add_1(t + limbs - y, t + limbs - y, y + 1, cy1); - cy1 = -(cy&1) + (i2[limbs] - i1[limbs]); - mpn_addmod_2expp1_1(u + y, limbs - y, cy1); - cy1 = -(i1[limbs] + i2[limbs]); - mpn_addmod_2expp1_1(t, limbs, cy1); - } - } else if (y == 0) - { - cy = mpn_sumdiff_n(t + x, u, i1, i2, limbs - x); - t[limbs] = cy>>1; - cy1 = cy&1; - cy = mpn_sumdiff_n(t, u + limbs - x, i1 + limbs - x, i2 + limbs - x, x); - cy2 = mpn_neg_n(t, t, x); - u[limbs] = -(cy&1); - mpn_sub_1(u + limbs - x, u + limbs - x, x + 1, cy1); - cy1 = -(cy>>1) - cy2; - cy1 -= (i1[limbs] + i2[limbs]); - mpn_addmod_2expp1_1(t + x, limbs - x, cy1); - cy1 = (i2[limbs] - i1[limbs]); - mpn_addmod_2expp1_1(u, limbs, cy1); - } else if (x > y) - { - cy = mpn_sumdiff_n(t + x, u + y, i1, i2, limbs - x); - t[limbs] = cy>>1; - cy1 = cy&1; - cy = mpn_sumdiff_n(t, u + y + limbs - x, i1 + limbs - x, i2 + limbs - x, x - y); - cy2 = mpn_neg_n(t, t, x - y); - u[limbs] = -(cy&1); - mpn_sub_1(u + y + limbs - x, u + y + limbs - x, x - y + 1, cy1); - cy1 = (cy>>1) + cy2; - cy = mpn_sumdiff_n(t + x - y, u, i2 + limbs - y, i1 + limbs - y, y); - cy2 = mpn_neg_n(t + x - y, t + x - y, y); - cy1 = -(cy>>1) - mpn_sub_1(t + x - y, t + x - y, y, cy1) - cy2; - cy1 -= (i1[limbs] + i2[limbs]); - mpn_addmod_2expp1_1(t + x, limbs - x, cy1); - cy1 = -(cy&1) + (i2[limbs] - i1[limbs]); - mpn_addmod_2expp1_1(u + y, limbs - y, cy1); - } else if (x < y) - { - cy = mpn_sumdiff_n(t + x, u + y, i1, i2, limbs - y); - u[limbs] = -(cy&1); - cy1 = cy>>1; - cy = mpn_sumdiff_n(t + x + limbs - y, u, i2 + limbs - y, i1 + limbs - y, y - x); - t[limbs] = cy>>1; - mpn_add_1(t + x + limbs - y, t + x + limbs - y, y - x + 1, cy1); - cy1 = cy&1; - cy = mpn_sumdiff_n(t, u + y - x, i2 + limbs - x, i1 + limbs - x, x); - cy1 = -(cy&1) - mpn_sub_1(u + y - x, u + y - x, x, cy1); - cy1 += (i2[limbs] - i1[limbs]); - mpn_addmod_2expp1_1(u + y, limbs - y, cy1); - cy2 = mpn_neg_n(t, t, x); - cy1 = -(cy>>1) - (i1[limbs] + i2[limbs]) - cy2; - mpn_addmod_2expp1_1(t + x, limbs - x, cy1); - } else // x == y - { - cy = mpn_sumdiff_n(t + x, u + x, i1, i2, limbs - x); - t[limbs] = cy>>1; - u[limbs] = -(cy&1); - cy = mpn_sumdiff_n(t, u, i2 + limbs - x, i1 + limbs - x, x); - cy2 = mpn_neg_n(t, t, x); - cy1 = -(cy>>1) - (i1[limbs] + i2[limbs]) - cy2; - mpn_addmod_2expp1_1(t + x, limbs - x, cy1); - cy1 = -(cy&1) + i2[limbs] - i1[limbs]; - mpn_addmod_2expp1_1(u + x, limbs - x, cy1); - } -} - -/* - We are given two integers modulo 2^wn+1, i1 and i2, which are - not necessarily normalised and are given n and w. We compute - t = i1 + i2/B^x, u = i1 - i2/B^x. Aliasing between inputs and - outputs is not permitted. We require x be less than the - number of limbs of i1 and i2. -*/ -void mpn_sumdiff_rshBmod_2expp1(mp_limb_t * t, mp_limb_t * u, mp_limb_t * i1, - mp_limb_t * i2, ulong limbs, ulong x, ulong y) -{ - mp_limb_t cy, cy1, cy2, cy3; - - if (x == 0) - { - if (y == 0) - { - mpn_sumdiff_n(t, u, i1, i2, limbs + 1); - } else // y != 0 - { - cy = mpn_sumdiff_n(t, u, i1, i2 + y, limbs - y); - cy1 = (cy>>1); - cy2 = -(cy&1); - cy = mpn_sumdiff_n(u + limbs - y, t + limbs - y, i1 + limbs - y, i2, y); - u[limbs] = (cy>>1) + i1[limbs]; - t[limbs] = i1[limbs] - (cy&1); - mpn_addmod_2expp1_1(t + limbs - y, y, cy1 + i2[limbs]); - mpn_addmod_2expp1_1(u + limbs - y, y, cy2 - i2[limbs]); - } - } else if (y == 0) // x != 0 - { - cy = mpn_sumdiff_n(t, u, i1 + x, i2, limbs - x); - cy1 = (cy>>1); - cy2 = -(cy&1); - cy3 = mpn_neg_n(i1, i1, x); - cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i1, i2 + limbs - x, x); - u[limbs] = -cy3 - (cy&1) - i2[limbs]; - t[limbs] = -cy3 + i2[limbs] + (cy>>1); - mpn_addmod_2expp1_1(t + limbs - x, x, cy1 + i1[limbs]); - mpn_addmod_2expp1_1(u + limbs - x, x, cy2 + i1[limbs]); - } else if (x == y) - { - cy = mpn_sumdiff_n(t, u, i1 + x, i2 + x, limbs - x); - cy1 = (cy>>1); - cy2 = -(cy&1); - cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i2, i1, x); - cy3 = mpn_neg_n(t + limbs - x, t + limbs - x, x); - u[limbs] = -(cy&1); - t[limbs] = -(cy>>1) - cy3; - mpn_addmod_2expp1_1(t + limbs - x, x, cy1 + i1[limbs] + i2[limbs]); - mpn_addmod_2expp1_1(u + limbs - x, x, cy2 + i1[limbs] - i2[limbs]); - } else if (x > y) - { - cy = mpn_sumdiff_n(t + limbs - y, u + limbs - y, i2, i1 + x - y, y); - cy3 = mpn_neg_n(t + limbs - y, t + limbs - y, y); - t[limbs] = -(cy>>1) - cy3; - u[limbs] = -(cy&1); - cy3 = mpn_neg_n(i1, i1, x - y); - cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i1, i2 + limbs - x + y, x - y); - mpn_addmod_2expp1_1(t + limbs - y, y, (cy>>1) + i2[limbs] - cy3); - mpn_addmod_2expp1_1(u + limbs - y, y, -(cy&1) - i2[limbs] - cy3); - cy = mpn_sumdiff_n(t, u, i1 + x, i2 + y, limbs - x); - mpn_addmod_2expp1_1(t + limbs - x, x, (cy>>1) + i1[limbs]); - mpn_addmod_2expp1_1(u + limbs - x, x, -(cy&1) + i1[limbs]); - } else //(x < y) - { - cy = mpn_sumdiff_n(t + limbs - x, u + limbs - x, i2 + y - x, i1, x); - cy3 = mpn_neg_n(t + limbs - x, t + limbs - x, x); - t[limbs] = -(cy>>1) - cy3; - u[limbs] = -(cy&1); - cy3 = mpn_neg_n(i2, i2, y - x); - cy = mpn_sumdiff_n(t + limbs - y, u + limbs - y, i1 + limbs - y + x, i2, y - x); - mpn_addmod_2expp1_1(t + limbs - x, x, (cy>>1) + i1[limbs] - cy3); - mpn_addmod_2expp1_1(u + limbs - x, x, -(cy&1) + i1[limbs] + cy3); - cy = mpn_sumdiff_n(t, u, i1 + x, i2 + y, limbs - y); - mpn_addmod_2expp1_1(t + limbs - y, y, (cy>>1) + i2[limbs]); - mpn_addmod_2expp1_1(u + limbs - y, y, -(cy&1) - i2[limbs]); - } -} - -/* - Given an integer i1 modulo 2^wn+1, set t to 2^d*i1 modulo 2^wm+1. - We must have GMP_LIMB_BITS > d >= 0. -*/ -void mpn_mul_2expmod_2expp1(mp_limb_t * t, mp_limb_t * i1, ulong limbs, ulong d) -{ - mp_limb_signed_t hi, hi2; - - if (d == 0) - { - if (t != i1) - MPN_COPY(t, i1, limbs + 1); - } else - { - hi = i1[limbs]; - mpn_lshift(t, i1, limbs + 1, d); - hi2 = t[limbs]; - t[limbs] = CNST_LIMB(0); - mpn_sub_1(t, t, limbs + 1, hi2); - hi >>= (GMP_LIMB_BITS - d); - mpn_addmod_2expp1_1(t + 1, limbs - 1, -hi); - } -} - -/* - Given an integer i1 modulo 2^wn+1, set t to i1/2^d modulo 2^wm+1. - We must have GMP_LIMB_BITS > d >= 0. -*/ -void mpn_div_2expmod_2expp1(mp_limb_t * t, mp_limb_t * i1, ulong limbs, ulong d) -{ - mp_limb_t lo; - mp_limb_t * ptr; - mp_limb_signed_t hi; - - if (d == 0) - { - if (t != i1) - MPN_COPY(t, i1, limbs + 1); - } else - { - hi = i1[limbs]; - lo = mpn_rshift(t, i1, limbs + 1, d); - t[limbs] = (hi>>d); - ptr = t + limbs - 1; - sub_ddmmss(ptr[1], ptr[0], ptr[1], ptr[0], CNST_LIMB(0), lo); - } -} - -/* - Set {s, t} to { z1^i*(s+t), z2^i*(s-t) } where - z1 = exp(2*Pi*I/(2*n)), z2 = exp(2*Pi*I*3/(2*n)), z1=>w bits -*/ -void FFT_split_radix_butterfly(mp_limb_t * s, mp_limb_t * t, ulong i, ulong n, ulong w, mp_limb_t * u) -{ - mp_limb_t size = (w*n)/GMP_LIMB_BITS + 1; - mp_limb_t * v; - ulong x, y, b1, b2; - int negate = 0; - int negate2 = 0; - - v = u + size; - - b1 = i; - while (b1 >= n) - { - negate2 = 1 - negate2; - b1 -= n; - } - b1 = b1*w; - x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - b2 = 3*i; - while (b2 >= n) - { - negate = 1 - negate; - b2 -= n; - } - b2 = b2*w; - y = b2/GMP_LIMB_BITS; - b2 -= y*GMP_LIMB_BITS; - - mpn_lshB_sumdiffmod_2expp1(u, v, s, t, size - 1, x, y); - mpn_mul_2expmod_2expp1(s, u, size - 1, b1); - mpn_mul_2expmod_2expp1(t, v, size - 1, b2); - if (negate) mpn_neg_n(t, t, size); - if (negate2) mpn_neg_n(s, s, size); -} - -void FFT_split_radix_butterfly2(mp_limb_t * u, mp_limb_t * v, mp_limb_t * s, mp_limb_t * t, ulong i, ulong n, ulong w) -{ - mp_limb_t size = (w*n)/GMP_LIMB_BITS + 1; - ulong x, y, b1, b2; - int negate = 0; - int negate2 = 0; - - b1 = i; - while (b1 >= n) - { - negate2 = 1 - negate2; - b1 -= n; - } - b1 = b1*w; - x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - b2 = 3*i; - while (b2 >= n) - { - negate = 1 - negate; - b2 -= n; - } - b2 = b2*w; - y = b2/GMP_LIMB_BITS; - b2 -= y*GMP_LIMB_BITS; - - mpn_lshB_sumdiffmod_2expp1(u, v, s, t, size - 1, x, y); - mpn_mul_2expmod_2expp1(u, u, size - 1, b1); - if (negate2) mpn_neg_n(u, u, size); - mpn_mul_2expmod_2expp1(v, v, size - 1, b2); - if (negate) mpn_neg_n(v, v, size); -} - -/* - Set u = 2^{ws*tw1}*(s + t), v = 2^{w+ws*tw2}*(s - t) -*/ -void FFT_radix2_twiddle_butterfly(mp_limb_t * u, mp_limb_t * v, - mp_limb_t * s, mp_limb_t * t, ulong NW, ulong b1, ulong b2) -{ - mp_limb_t size = NW/GMP_LIMB_BITS + 1; - ulong x, y; - int negate = 0; - int negate2 = 0; - - b1 %= (2*NW); - if (b1 >= NW) - { - negate2 = 1; - b1 -= NW; - } - x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - - b2 %= (2*NW); - if (b2 >= NW) - { - negate = 1; - b2 -= NW; - } - y = b2/GMP_LIMB_BITS; - b2 -= y*GMP_LIMB_BITS; - - mpn_lshB_sumdiffmod_2expp1(u, v, s, t, size - 1, x, y); - mpn_mul_2expmod_2expp1(u, u, size - 1, b1); - if (negate2) mpn_neg_n(u, u, size); - mpn_mul_2expmod_2expp1(v, v, size - 1, b2); - if (negate) mpn_neg_n(v, v, size); -} - -/* - Set s = i1 + i2, t = z1*(i1 - i2) where z1 = exp(2*Pi*I/m) => w bits -*/ -void FFT_radix2_butterfly(mp_limb_t * s, mp_limb_t * t, - mp_limb_t * i1, mp_limb_t * i2, ulong i, ulong n, ulong w) -{ - mp_limb_t size = (w*n)/GMP_LIMB_BITS + 1; - ulong x, y, b1; - int negate = 0; - - x = 0; - - b1 = i; - while (b1 >= n) - { - negate = 1 - negate; - b1 -= n; - } - b1 = b1*w; - y = b1/GMP_LIMB_BITS; - b1 -= y*GMP_LIMB_BITS; - - mpn_lshB_sumdiffmod_2expp1(s, t, i1, i2, size - 1, x, y); - mpn_mul_2expmod_2expp1(t, t, size - 1, b1); - if (negate) mpn_neg_n(t, t, size); -} - -/* - Set s = i1 + z1*i2, t = i1 - z1*i2 where z1 = exp(-2*Pi*I/m) => w bits -*/ -void FFT_radix2_inverse_butterfly(mp_limb_t * s, mp_limb_t * t, - mp_limb_t * i1, mp_limb_t * i2, ulong i, ulong n, ulong w) -{ - mp_limb_t limbs = (w*n)/GMP_LIMB_BITS; - ulong y, b1; - - b1 = i*w; - y = b1/GMP_LIMB_BITS; - b1 -= y*GMP_LIMB_BITS; - - mpn_div_2expmod_2expp1(i2, i2, limbs, b1); - mpn_sumdiff_rshBmod_2expp1(s, t, i1, i2, limbs, 0, y); -} - -void FFT_radix2_twiddle_inverse_butterfly(mp_limb_t * s, mp_limb_t * t, - mp_limb_t * i1, mp_limb_t * i2, ulong NW, ulong b1, ulong b2) -{ - mp_limb_t limbs = NW/GMP_LIMB_BITS; - ulong x, y; - int negate = 0; - int negate2 = 0; - - b1 %= (2*NW); - if (b1 >= NW) - { - negate = 1; - b1 -= NW; - } - x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - - b2 %= (2*NW); - if (b2 >= NW) - { - negate2 = 1; - b2 -= NW; - } - y = b2/GMP_LIMB_BITS; - b2 -= y*GMP_LIMB_BITS; - - if (negate) mpn_neg_n(i1, i1, limbs + 1); - mpn_div_2expmod_2expp1(i1, i1, limbs, b1); - if (negate2) mpn_neg_n(i2, i2, limbs + 1); - mpn_div_2expmod_2expp1(i2, i2, limbs, b2); - mpn_sumdiff_rshBmod_2expp1(s, t, i1, i2, limbs, x, y); -} - -void FFT_radix2(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp); - -/* - The split radix DIF FFT works as follows: - Given: inputs [i0, i1, ..., i{m-1}], for m a power of 2 - - Output: Fsplit[i0, i1, ..., i{m-1}] = [r0, r1, ..., r{m-1}] - (Inputs and outputs may be subject to a stride and not in consecutive locations.) - - Algorithm: * Recursively compute [r0, r2, r4, ...., r{m-2}] - = Fsplit[i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - * Let [s0, s1, ..., s{m/4-1}] - = [i0-i{m/2}, i1-i{m/2+1}, ..., i{m/4-1}-i{3m/4-1}] - * Let [t0, t1, ..., t{m/4-1}] - = sqrt(-1)[i{m/4}-i{3m/4}, i{m/4+1}-i{3m/4+1}, ..., i{m/2-1}-i{m-1}] - * Let [u0, u1, ..., u{m/4-1}] - = [z1^0*(s0+t0), z1^1(s1+t1), ..., z1^{m/4-1}*(s{m/4-1}+t{m/4-1})] - where z1 = exp(2*Pi*I/m) - * Let [v0, v1, ..., v{m/4-1}] - = [z2^0*(s0-t0), z2^1(s1-t1), ..., z2^{m/4-1}*(s{m/4-1}-t{m/4-1})] - where z2 = exp(2*Pi*I*3/m) - * Recursively compute [r1, r5, ..., r{m-3}] - = Fsplit[u0, u1, ..., u{m/4-1}] - * Recursively compute [r3, r7, ..., r{m-1}] - = Fsplit[v0, v1, ..., v{m/4-1}] - - The parameters are as follows: - * 2*n is the length of the input and output arrays (m in the above) - * w is such that 2^w is an 2n-th root of unity in the ring Z/pZ that - we are working in, i.e. p = 2^wn + 1 (here n is divisible by - GMP_LIMB_BITS) - * ii is the array of inputs (each input is an array of limbs of length - wn/GMP_LIMB_BITS + 1 (the extra limb being a "carry limb") - * rr is the array of outputs (each being an array of limbs of length - wn/GMP_LIMB_BITS + 1) - * rs is the stride with which entries of rr should be written (e.g. - rs = 4 means that the output is to be written in every 4th entry of - rr) - * is is the stride with which entries of ii should be read -*/ - -void FFT_split_radix(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t * ptr; - ulong i; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n < 4) - { - FFT_radix2(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - // [s0, s1, ..., s{m/4-1}] = [i0-i{m/2}, i1-i{m/2+1}, ..., i{m/4-1}-i{3m/4-1}] - for (i = 0; i < n/2; i++) - { - mpn_sub_n(temp[i], ii[i], ii[i+n], size); - mpn_add_n(ii[i+n], ii[i], ii[i+n], size); - } - - // [t0, t1, ..., t{m/4-1}] = sqrt(-1)[i{m/4}-i{3m/4}, i{m/4+1}-i{3m/4+1}, ..., i{m/2-1}-i{m-1}] - for (i = 0; i < n/2; i++) - { - mpn_submod_i_2expp1(temp[i+n/2], ii[i+n/2], ii[i+3*n/2], size - 1); - mpn_add_n(ii[i+3*n/2], ii[i+n/2], ii[i+3*n/2], size); - } - - // [u0, u1, ..., u{m/4-1}] = [z1^0*(s0+t0), z1^1(s1+t1), ..., z1^{m/4-1}*(s{m/4-1}+t{m/4-1})] - // [v0, v1, ..., v{m/4-1}] = [z2^0*(s0-t0), z2^1(s1-t1), ..., z2^{m/4-1}*(s{m/4-1}-t{m/4-1})] - // where z1 = exp(2*Pi*I/m), z2 = exp(2*Pi*I*3/m), z1 => w bits - // note {u, v} = {ss, tt} - for (i = 0; i < n/2; i++) - { - FFT_split_radix_butterfly2(*t1, *t2, temp[i], temp[i+n/2], i, n, w); - ptr = temp[i]; - temp[i] = *t1; - *t1 = ptr; - ptr = temp[i+n/2]; - temp[i+n/2] = *t2; - *t2 = ptr; - } - - // [r0, r2, r4, ...., r{m-2}] = Fsplit[i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - FFT_split_radix(rr, 2*rs, ii+n, n/2, 2*w, t1, t2, temp + n); - - // [r1, r5, ..., r{m-3}] = Fsplit[u0, u1, ..., u{m/4-1}] - FFT_split_radix(rr + rs, 4*rs, temp, n/4, 4*w, t1, t2, temp + n); - - // [r3, r7, ..., r{m-1}] = Fsplit[v0, v1, ..., v{m/4-1}] - FFT_split_radix(rr + 3*rs, 4*rs, temp+n/2, n/4, 4*w, t1, t2, temp + n); -} - -/* - The radix 2 DIF FFT works as follows: - Given: inputs [i0, i1, ..., i{m-1}], for m a power of 2 - - Output: Fradix2[i0, i1, ..., i{m-1}] = [r0, r1, ..., r{m-1}] - (Inputs and outputs may be subject to a stride and not in consecutive locations.) - - Algorithm: * Recursively compute [r0, r2, r4, ...., r{m-2}] - = Fsplit[i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - * Let [t0, t1, ..., t{m/2-1}] - = [i0-i{m/2}, i1-i{m/2+1}, ..., i{m/2-1}-i{m-1}] - * Let [u0, u1, ..., u{m/2-1}] - = [z1^0*t0, z1^1*t1, ..., z1^{m/2-1}*t{m/2-1}] - where z1 = exp(2*Pi*I/m) - * Recursively compute [r1, r3, ..., r{m-1}] - = Fsplit[u0, u1, ..., u{m/2-1}] - - The parameters are as follows: - * 2*n is the length of the input and output arrays (m in the above) - * w is such that 2^w is an 2n-th root of unity in the ring Z/pZ that - we are working in, i.e. p = 2^wn + 1 (here n is divisible by - GMP_LIMB_BITS) - * ii is the array of inputs (each input is an array of limbs of length - wn/GMP_LIMB_BITS + 1 (the extra limb being a "carry limb") - * rr is the array of outputs (each being an array of limbs of length - wn/GMP_LIMB_BITS + 1) - * rs is the stride with which entries of rr should be written (e.g. - rs = 4 means that the output is to be written in every 4th entry of - rr) - * is is the stride with which entries of ii should be read -*/ - -void FFT_radix2(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - FFT_radix2_butterfly(*t1, *t2, ii[0], ii[1], 0, n, w); - ptr = rr[0]; - rr[0] = *t1; - *t1 = ptr; - ptr = rr[rs]; - rr[rs] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, temp[i], ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2(rr, 2*rs, ii, n/2, 2*w, t1, t2, temp + n); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2(rr + rs, 2*rs, temp, n/2, 2*w, t1, t2, temp + n); -} - -void FFT_radix2_new(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - FFT_radix2_butterfly(*t1, *t2, ii[0], ii[1], 0, n, w); - ptr = rr[0]; - rr[0] = *t1; - *t1 = ptr; - ptr = rr[rs]; - rr[rs] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - ptr = ii[n+i]; - ii[n+i] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_new(rr, 1, ii, n/2, 2*w, t1, t2, temp); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_new(rr + n, 1, ii+n, n/2, 2*w, t1, t2, temp); -} - -int FFT_negacyclic_twiddle(mp_limb_t * r, mp_limb_t * i1, ulong i, ulong n, ulong w) -{ - mp_limb_t cy; - ulong limbs = (n*w)/GMP_LIMB_BITS; - int negate = 0; - while (i >= 2*n) - { - negate = 1 - negate; - i -= 2*n; - } - ulong b1 = (w*i)/2; - ulong x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - //if ((!x) && (negate)) - if (negate) mpn_neg_n(i1, i1, limbs + 1); - mpn_mul_2expmod_2expp1(i1, i1, limbs, b1); - if (x) - { - /*if (negate) - { - r[limbs] = -mpn_neg_n(r + x, i1, limbs - x); - MPN_COPY(r, i1 + limbs - x, x); - mpn_addmod_2expp1_1(r + x, limbs - x, i1[limbs]); - } else - {*/ - MPN_COPY(r + x, i1, limbs - x); - r[limbs] = CNST_LIMB(0); - cy = mpn_neg_n(r, i1 + limbs - x, x); - mpn_addmod_2expp1_1(r + x, limbs - x, -cy - i1[limbs]); - //} - return 1; - } - return 0; -} - -void FFT_twiddle(mp_limb_t * r, mp_limb_t * i1, ulong i, ulong n, ulong w) -{ - mp_limb_t cy; - ulong limbs = (n*w)/GMP_LIMB_BITS; - int negate = 0; - while (i >= n) - { - negate = 1 - negate; - i -= n; - } - ulong b1 = w*i; - ulong x = b1/GMP_LIMB_BITS; - b1 -= x*GMP_LIMB_BITS; - if (x) - { - MPN_COPY(r + x, i1, limbs - x); - r[limbs] = CNST_LIMB(0); - cy = mpn_neg_n(r, i1 + limbs - x, x); - mpn_addmod_2expp1_1(r + x, limbs - x, -cy - i1[limbs]); - if (negate) mpn_neg_n(r, r, limbs + 1); - mpn_mul_2expmod_2expp1(r, r, limbs, b1); - } else - { - if (negate) - { - mpn_neg_n(r, i1, limbs + 1); - mpn_mul_2expmod_2expp1(r, r, limbs, b1); - } else - mpn_mul_2expmod_2expp1(r, i1, limbs, b1); - } -} - -void FFT_radix2_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs); - -/* - Truncate FFT to any length given by trunc, so long as trunc is - divisible by 8. -*/ -void FFT_radix2_truncate1(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - FFT_radix2_new(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - if (trunc <= n) - { - for (i = 0; i < n; i++) - mpn_add_n(ii[i], ii[i], ii[i+n], size); - - FFT_radix2_truncate1(rr, rs, ii, n/2, 2*w, t1, t2, temp, trunc); - } else - { - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - ptr = ii[n+i]; - ii[n+i] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_new(rr, 1, ii, n/2, 2*w, t1, t2, temp); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_truncate1(rr + n, 1, ii+n, n/2, 2*w, t1, t2, temp, trunc - n); - } -} - -void FFT_radix2_truncate1_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs, ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - FFT_radix2_twiddle(ii, is, n, w, t1, t2, temp, ws, r, c, rs); - return; - } - - if (trunc <= n) - { - for (i = 0; i < n; i++) - mpn_add_n(ii[i*is], ii[i*is], ii[(i+n)*is], size); - - FFT_radix2_truncate1_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs, trunc); - } else - { - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_truncate1_twiddle(ii + n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs, trunc - n); - } -} - -/* - Truncate FFT to any length given by trunc, so long as trunc is - divisible by 8. Assumes zeros from trunc to n. -*/ -void FFT_radix2_truncate(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - FFT_radix2_new(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - if (trunc <= n) - { - // PASS - FFT_radix2_truncate(rr, rs, ii, n/2, 2*w, t1, t2, temp, trunc); - } else - { - // PASS - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - ptr = ii[n+i]; - ii[n+i] = *t2; - *t2 = ptr; - } - - for (i = trunc; i < 2*n; i++) - { - FFT_twiddle(ii[i], ii[i-n], i - n, n, w); - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_new(rr, 1, ii, n/2, 2*w, t1, t2, temp); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_truncate1(rr + n, 1, ii+n, n/2, 2*w, t1, t2, temp, trunc - n); - } -} - -void FFT_radix2_truncate_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs, ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - FFT_radix2_twiddle(ii, is, n, w, t1, t2, temp, ws, r, c, rs); - return; - } - - if (trunc <= n) - { - // PASS - FFT_radix2_truncate_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs, trunc); - } else - { - // PASS - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } - - for (i = trunc; i < 2*n; i++) - { - FFT_twiddle(ii[i*is], ii[(i-n)*is], i - n, n, w); - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_truncate1_twiddle(ii + n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs, trunc - n); - } -} - - -void FFT_radix2_negacyclic(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - // we first apply twiddle factors corresponding to shifts of w*i/2 bits - // this could partially be combined with the butterflies below - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - if (FFT_negacyclic_twiddle(*t1, ii[i], i, n, w)) - { - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - } - - if (FFT_negacyclic_twiddle(*t1, ii[n + i], n + i, n, w)) - { - ptr = ii[n + i]; - ii[n + i] = *t1; - *t1 = ptr; - } - - FFT_radix2_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - ptr = ii[n+i]; - ii[n+i] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_new(rr, 1, ii, n/2, 2*w, t1, t2, temp); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_new(rr + n, 1, ii+n, n/2, 2*w, t1, t2, temp); - - j = 0; k = 1; l = n+1; - temp[0] = ii[1]; - ii[1] = ii[n]; - for (i = 2; i < n; i += 2) - { - temp[k++] = ii[i]; - ii[i] = temp[j++]; - temp[k++] = ii[i+1]; - ii[i+1] = ii[l++]; - } - for ( ; i < 2*n; i+=2) - { - ii[i] = temp[j++]; - ii[i+1] = ii[l++]; - } -} - -/* - FFT of length 2*n on entries of ii with stride is. - Apply additional twists by z^{c*i} where i starts at r and increases by rs - for each coeff. Note z => ws bits. -*/ -void FFT_radix2_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - ulong tw1, tw2; - tw1 = r*c; - tw2 = tw1 + rs*c; - FFT_radix2_twiddle_butterfly(*t1, *t2, ii[0], ii[is], n*w, tw1*ws, tw2*ws); - ptr = ii[0]; - ii[0] = *t1; - *t1 = ptr; - ptr = ii[is]; - ii[is] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2}] = [i0+i{m/2}, i1+i{m/2+1}, ..., i{m/2-1}+i{m-1}] - // [t0, t1, ..., t{m/2-1}] - // = [z1^0*(i0-i{m/2}), z1^1*(i1-i{m/2+1}), ..., z1^{m/2-1}*(i{m/2-1}-i{m-1})] - // where z1 = exp(2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } - - // [r0, r2, ..., r{m-2}] = Fradix2[s0, s1, ..., s{m/2-1}] - FFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - // [r1, r3, ..., r{m-1}] = Fradix2[t0, t1, ..., t{m/2-1}] - FFT_radix2_twiddle(ii+n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs); -} - -void IFFT_radix2_new(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[0], ii[1], 0, n, w); - ptr = rr[0]; - rr[0] = *t1; - *t1 = ptr; - ptr = rr[rs]; - rr[rs] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_new(ii, 1, ii, n/2, 2*w, t1, t2, temp); - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_new(ii+n, 1, ii+n, n/2, 2*w, t1, t2, temp); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = rr[i]; - rr[i] = *t1; - *t1 = ptr; - ptr = rr[n+i]; - rr[n+i] = *t2; - *t2 = ptr; - } -} - -void IFFT_radix2_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs); - -void IFFT_radix2_truncate1(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - IFFT_radix2_new(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - if (trunc <= n) - { - // PASS - for (i = trunc; i < n; i++) - { - mpn_add_n(ii[i], ii[i], ii[i+n], size); - mpn_div_2expmod_2expp1(ii[i], ii[i], size - 1, 1); - } - - IFFT_radix2_truncate1(rr, rs, ii, n/2, 2*w, t1, t2, temp, trunc); - - for (i = 0; i < trunc; i++) - mpn_addsub_n(ii[i], ii[i], ii[i], ii[n+i], size); - - return; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_new(ii, 1, ii, n/2, 2*w, t1, t2, temp); - - for (i = trunc; i < 2*n; i++) - { - mpn_sub_n(ii[i], ii[i-n], ii[i], size); - FFT_twiddle(*t1, ii[i], i - n, n, w); - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - } - - for (i = trunc - n; i < n; i++) - { - FFT_twiddle(*t1, ii[i + n], 2*n - i, n, w); - mpn_add_n(ii[i], ii[i], *t1, size); - } - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_truncate1(ii+n, 1, ii+n, n/2, 2*w, t1, t2, temp, trunc - n); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = rr[i]; - rr[i] = *t1; - *t1 = ptr; - ptr = rr[n+i]; - rr[n+i] = *t2; - *t2 = ptr; - } -} - -void IFFT_radix2_truncate1_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs, ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - IFFT_radix2_twiddle(ii, is, n, w, t1, t2, temp, ws, r, c, rs); - return; - } - - if (trunc <= n) - { - // PASS - for (i = trunc; i < n; i++) - { - mpn_add_n(ii[i*is], ii[i*is], ii[(i+n)*is], size); - mpn_div_2expmod_2expp1(ii[i*is], ii[i*is], size - 1, 1); - } - - IFFT_radix2_truncate1_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs, trunc); - - for (i = 0; i < trunc; i++) - mpn_addsub_n(ii[i*is], ii[i*is], ii[i*is], ii[(n+i)*is], size); - - return; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - for (i = trunc; i < 2*n; i++) - { - mpn_sub_n(ii[i*is], ii[(i-n)*is], ii[i*is], size); - FFT_twiddle(*t1, ii[i*is], i - n, n, w); - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - } - - for (i = trunc - n; i < n; i++) - { - FFT_twiddle(*t1, ii[(i+n)*is], 2*n - i, n, w); - mpn_add_n(ii[i*is], ii[i*is], *t1, size); - } - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_truncate1_twiddle(ii + n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs, trunc - n); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } -} - -/* - Truncate IFFT to given length. Requires trunc a multiple of 8. - Assumes (conceptually) zeroes from trunc to n. -*/ -void IFFT_radix2_truncate(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - IFFT_radix2_new(rr, rs, ii, n, w, t1, t2, temp); - return; - } - - if (trunc <= n) - { - // PASS - IFFT_radix2_truncate(rr, rs, ii, n/2, 2*w, t1, t2, temp, trunc); - - for (i = 0; i < trunc; i++) - mpn_add_n(ii[i], ii[i], ii[i], size); - - return; - } - - //PASS - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_new(ii, 1, ii, n/2, 2*w, t1, t2, temp); - - for (i = trunc; i < 2*n; i++) - { - FFT_twiddle(ii[i], ii[i-n], i - n, n, w); - } - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_truncate1(ii+n, 1, ii+n, n/2, 2*w, t1, t2, temp, trunc - n); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = rr[i]; - rr[i] = *t1; - *t1 = ptr; - ptr = rr[n+i]; - rr[n+i] = *t2; - *t2 = ptr; - } - - for (i = trunc - n; i < n; i++) - mpn_add_n(ii[i], ii[i], ii[i], size); -} - -void IFFT_radix2_truncate_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs, ulong trunc) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (trunc == 2*n) - { - IFFT_radix2_twiddle(ii, is, n, w, t1, t2, temp, ws, r, c, rs); - return; - } - - if (trunc <= n) - { - // PASS - IFFT_radix2_truncate_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs, trunc); - - for (i = 0; i < trunc; i++) - mpn_add_n(ii[i*is], ii[i*is], ii[i*is], size); - - return; - } - - //PASS - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - for (i = trunc; i < 2*n; i++) - { - FFT_twiddle(ii[i*is], ii[(i-n)*is], i - n, n, w); - } - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_truncate1_twiddle(ii+n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs, trunc - n); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < trunc - n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } - - for (i = trunc - n; i < n; i++) - mpn_add_n(ii[i*is], ii[i*is], ii[i*is], size); -} - -void IFFT_radix2_negacyclic(mp_limb_t ** rr, ulong rs, mp_limb_t ** ii, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - j = 0; k = 0; - for (i = 0; i < n; i += 2) - { - ii[i] = ii[2*i]; - temp[k++] = ii[i+1]; - ii[i+1] = ii[2*(i+1)]; - } - for ( ; i < 2*n; i+=2) - { - ii[i] = temp[j++]; - temp[k++] = ii[i+1]; - ii[i+1] = temp[j++]; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_new(ii, 1, ii, n/2, 2*w, t1, t2, temp); - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_new(ii+n, 1, ii+n, n/2, 2*w, t1, t2, temp); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i], ii[n+i], i, n, w); - - ptr = rr[i]; - rr[i] = *t1; - *t1 = ptr; - ptr = rr[n+i]; - rr[n+i] = *t2; - *t2 = ptr; - - // we finally apply twiddle factors corresponding to shifts of w*i bits - // this could partially be combined with the butterflies above - - if (FFT_negacyclic_twiddle(*t1, ii[i], 4*n - i, n, w)) - { - ptr = ii[i]; - ii[i] = *t1; - *t1 = ptr; - } - - if (FFT_negacyclic_twiddle(*t1, ii[n + i], 3*n - i, n, w)) - { - ptr = ii[n + i]; - ii[n + i] = *t1; - *t1 = ptr; - } - } -} - -void IFFT_radix2_twiddle(mp_limb_t ** ii, ulong is, - ulong n, ulong w, mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, - ulong ws, ulong r, ulong c, ulong rs) -{ - mp_limb_t ** ss, ** tt; - mp_limb_t * ptr; - ulong i, j, k, l; - ulong size = (w*n)/GMP_LIMB_BITS + 1; - - if (n == 1) - { - ulong tw1, tw2; - tw1 = r*c; - tw2 = tw1 + rs*c; - FFT_radix2_twiddle_inverse_butterfly(*t1, *t2, ii[0], ii[is], n*w, tw1*ws, tw2*ws); - ptr = ii[0]; - ii[0] = *t1; - *t1 = ptr; - ptr = ii[is]; - ii[is] = *t2; - *t2 = ptr; - return; - } - - // [s0, s1, ..., s{m/2-1}] = Fradix2_inverse[i0, i2, ..., i{m-2}] - IFFT_radix2_twiddle(ii, is, n/2, 2*w, t1, t2, temp, ws, r, c, 2*rs); - - // [t{m/2}, t{m/2+1}, ..., t{m-1}] = Fradix2_inverse[i1, i3, ..., i{m-1}] - IFFT_radix2_twiddle(ii+n*is, is, n/2, 2*w, t1, t2, temp, ws, r + rs, c, 2*rs); - - // [r0, r1, ..., r{m/2-1}] - // = [s0+z1^0*t0, s1+z1^1*t1, ..., s{m/2-1}+z1^{m/2-1}*t{m-1}] - // [r{m/2}, t{m/2+1}, ..., r{m-1}] - // = [s0-z1^0*t{m/2}, s1-z1^1*t{m/2+1}, ..., s{m/2-1}-z1^{m/2-1}*t{m-1}] - // where z1 = exp(-2*Pi*I/m), z1 => w bits - for (i = 0; i < n; i++) - { - FFT_radix2_inverse_butterfly(*t1, *t2, ii[i*is], ii[(n+i)*is], i, n, w); - - ptr = ii[i*is]; - ii[i*is] = *t1; - *t1 = ptr; - ptr = ii[(n+i)*is]; - ii[(n+i)*is] = *t2; - *t2 = ptr; - } -} - -/* - The matrix Fourier algorithm for a 1D Fourier transform of length m = 2*n, - works as follows: - - * Split the coefficients into R rows of C columns (here C = n1, R = m/n1 = n2) - * Perform a length R FFT on each column, i.e. with an input stride of n1 - * Multiply each coefficient by z^{r*c} where z = exp(2*Pi*I/m), note z=>w bits - * Perform a length C FFT on each row, i.e. with an input stride of 1 -*/ -void FFT_radix2_mfa(mp_limb_t ** ii, ulong n, ulong w, - mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, ulong n1) -{ - ulong i, j; - ulong n2 = (2*n)/n1; - ulong depth = 0; - ulong depth2 = 0; - mp_limb_t * ptr; - - while ((1UL< w bits - - FFT_radix2_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1); - for (j = 0; j < n2; j++) - { - ulong s = revbin(j, depth); - if (j < s) - { - ptr = ii[i + j*n1]; - ii[i + j*n1] = ii[i + s*n1]; - ii[i + s*n1] = ptr; - } - } - } - - for (i = 0; i < n2; i++) - { - FFT_radix2_new(ii + i*n1, 1, ii + i*n1, n1/2, w*n2, t1, t2, temp); - - for (j = 0; j < n1; j++) - { - ulong s = revbin(j, depth2); - if (j < s) - { - ptr = ii[i*n1 + j]; - ii[i*n1 + j] = ii[i*n1 + s]; - ii[i*n1 + s] = ptr; - } - } - } -} - -void FFT_radix2_mfa_truncate(mp_limb_t ** ii, ulong n, ulong w, - mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, ulong n1, ulong trunc) -{ - ulong i, j, s; - ulong n2 = (2*n)/n1; - ulong limbs = (n*w)/GMP_LIMB_BITS; - ulong depth = 0; - ulong depth2 = 0; - mp_limb_t * ptr; - - while ((1UL< w bits - - FFT_radix2_truncate_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1, trunc); - for (j = 0; j < n2; j++) - { - s = revbin(j, depth); - if (j < s) - { - ptr = ii[i + j*n1]; - ii[i + j*n1] = ii[i + s*n1]; - ii[i + s*n1] = ptr; - } - } - } - - for (s = 0; s < trunc; s++) - { - i = revbin(s, depth); - FFT_radix2_new(ii + i*n1, 1, ii + i*n1, n1/2, w*n2, t1, t2, temp); - - for (j = 0; j < n1; j++) - { - ulong t = revbin(j, depth2); - if (j < t) - { - ptr = ii[i*n1 + j]; - ii[i*n1 + j] = ii[i*n1 + t]; - ii[i*n1 + t] = ptr; - } - mpn_normmod_2expp1(ii[i*n1 + j], limbs); - } - } -} - -void IFFT_radix2_mfa(mp_limb_t ** ii, ulong n, ulong w, - mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, ulong n1) -{ - ulong i, j; - ulong n2 = (2*n)/n1; - ulong depth = 0; - ulong depth2 = 0; - mp_limb_t * ptr; - - while ((1UL< w bits - IFFT_radix2_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1); - } - -} - -void IFFT_radix2_mfa_truncate(mp_limb_t ** ii, ulong n, ulong w, - mp_limb_t ** t1, mp_limb_t ** t2, mp_limb_t ** temp, ulong n1, ulong trunc) -{ - ulong i, j, s; - ulong n2 = (2*n)/n1; - ulong limbs = (n*w)/GMP_LIMB_BITS; - ulong depth = 0; - ulong depth2 = 0; - mp_limb_t * ptr; - - while ((1UL< w bits - IFFT_radix2_truncate_twiddle(ii + i, n1, n2/2, w*n1, t1, t2, temp, w, 0, i, 1, trunc); - for (j = 0; j < trunc; j++) - mpn_normmod_2expp1(ii[i + j], limbs); - } - -} - -void FFT_mulmod_2expp1(mp_limb_t * r1, mp_limb_t * i1, mp_limb_t * i2, - ulong r_limbs, ulong depth, ulong w) -{ - ulong n = (1UL<= GMP_LIMB_BITS) - { - b -= GMP_LIMB_BITS; - ll++; - } - bit = (1UL<_mp_d, i, limbs + 1); - hi = i[limbs]; - if (hi < 0L) - { - mpn_neg_n(m->_mp_d, m->_mp_d, limbs + 1); - m->_mp_size = limbs + 1; - while ((m->_mp_size) && (!m->_mp_d[m->_mp_size - 1])) - m->_mp_size--; - m->_mp_size = -m->_mp_size; - } else - { - m->_mp_size = limbs + 1; - while ((m->_mp_size) && (!m->_mp_d[m->_mp_size - 1])) - m->_mp_size--; - } -} - -void ref_norm(mpz_t m, mpz_t p) -{ - mpz_mod(m, m, p); -} - -void ref_submod_i(mpz_t m, mpz_t i1, mpz_t i2, mpz_t p, ulong n, ulong w) -{ - mpz_sub(m, i1, i2); - mpz_mul_2exp(m, m, (n*w)/2); - mpz_mod(m, m, p); -} - -void ref_mul_2expmod(mpz_t m, mpz_t i2, mpz_t p, ulong n, ulong w, ulong d) -{ - mpz_mul_2exp(m, i2, d); - mpz_mod(m, m, p); -} - -void ref_div_2expmod(mpz_t m, mpz_t i2, mpz_t p, ulong n, ulong w, ulong d) -{ - mpz_t temp; - mpz_init(temp); - mpz_set_ui(temp, 1); - mpz_mul_2exp(temp, temp, d); - mpz_invert(temp, temp, p); - mpz_mul(m, i2, temp); - mpz_mod(m, m, p); - mpz_clear(temp); -} - -void ref_lshB_sumdiffmod(mpz_t t, mpz_t u, mpz_t i1, - mpz_t i2, mpz_t p, ulong n, ulong w, ulong x, ulong y) -{ - mpz_add(t, i1, i2); - mpz_sub(u, i1, i2); - mpz_mul_2exp(t, t, x*GMP_LIMB_BITS); - mpz_mul_2exp(u, u, y*GMP_LIMB_BITS); - mpz_mod(t, t, p); - mpz_mod(u, u, p); -} - -void ref_sumdiff_rshBmod(mpz_t t, mpz_t u, mpz_t i1, - mpz_t i2, mpz_t p, ulong n, ulong w, ulong x, ulong y) -{ - mpz_t mult1, mult2; - mpz_init(mult1); - mpz_init(mult2); - mpz_set_ui(mult1, 1); - mpz_mul_2exp(mult1, mult1, x*GMP_LIMB_BITS); - mpz_invert(mult1, mult1, p); - mpz_set_ui(mult2, 1); - mpz_mul_2exp(mult2, mult2, y*GMP_LIMB_BITS); - mpz_invert(mult2, mult2, p); - mpz_mul(mult1, mult1, i1); - mpz_mul(mult2, mult2, i2); - mpz_add(t, mult1, mult2); - mpz_sub(u, mult1, mult2); - mpz_mod(t, t, p); - mpz_mod(u, u, p); - mpz_clear(mult1); - mpz_clear(mult2); -} - -void ref_FFT_split_radix_butterfly(mpz_t s, mpz_t t, mpz_t p, ulong i, ulong n, ulong w) -{ - mpz_t temp; - mpz_init(temp); - mpz_add(temp, s, t); - mpz_sub(t, s, t); - mpz_set(s, temp); - mpz_mul_2exp(s, s, i*w); - mpz_mul_2exp(t, t, 3*i*w); - mpz_mod(s, s, p); - mpz_mod(t, t, p); - mpz_clear(temp); -} - -void set_p(mpz_t p, ulong n, ulong w) -{ - mpz_set_ui(p, 1); - mpz_mul_2exp(p, p, n*w); - mpz_add_ui(p, p, 1); -} - -void rand_n(mp_limb_t * n, gmp_randstate_t state, ulong limbs) -{ - mpn_rrandom(n, state, limbs); - n[limbs] = gmp_urandomm_ui(state, 10); - if (gmp_urandomm_ui(state, 2)) n[limbs] = -n[limbs]; -} - -void test_norm() -{ - ulong i, j, k, l, n, w, limbs; - mpz_t p, m, m2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mp_limb_t * nn; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = GMP_LIMB_BITS; i < 32*GMP_LIMB_BITS; i += GMP_LIMB_BITS) - { - for (j = 1; j < 32; j++) - { - for (k = 1; k <= GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn = TMP_BALLOC_LIMBS(limbs + 1); - mpn_rrandom(nn, state, limbs + 1); - mpn_to_mpz(m, nn, limbs); - set_p(p, n, w); - - mpn_normmod_2expp1(nn, limbs); - mpn_to_mpz(m2, nn, limbs); - ref_norm(m, p); - - if (mpz_cmp(m, m2) != 0) - { - printf("mpn_normmod_2expp1 error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - gmp_randclear(state); -} - -void test_submod_i() -{ - ulong i, j, k, l, n, w, limbs; - mpz_t p, m, m2, mn1, mn2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mpz_init(mn1); - mpz_init(mn2); - mp_limb_t * nn1, * nn2, * r; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 64*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 32; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - nn2 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - rand_n(nn2, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - mpn_to_mpz(mn2, nn2, limbs); - set_p(p, n, w); - - mpn_submod_i_2expp1(r, nn1, nn2, limbs); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_submod_i(m, mn1, mn2, p, n, w); - - if (mpz_cmp(m, m2) != 0) - { - printf("mpn_submmod_i_2expp1 error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - mpz_clear(mn1); - mpz_clear(mn2); - gmp_randclear(state); -} - -void test_mul_2expmod() -{ - ulong i, j, k, l, n, w, limbs, d; - mpz_t p, m, m2, mn1, mn2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mpz_init(mn1); - mp_limb_t * nn1, * r; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 64*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 32; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - for (d = 0; d < GMP_LIMB_BITS; d++) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - mpn_to_mpz(mn1, nn1, limbs); - set_p(p, n, w); - - mpn_mul_2expmod_2expp1(r, nn1, limbs, d); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_mul_2expmod(m, mn1, p, n, w, d); - - if (mpz_cmp(m, m2) != 0) - { - printf("mpn_mul_2expmod_2expp1 error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - mpz_clear(mn1); - gmp_randclear(state); -} - -void test_FFT_negacyclic_twiddle() -{ - ulong i, j, k, l, n, w, limbs, d; - mpz_t p, m, m2, mn1, mn2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mpz_init(mn1); - mp_limb_t * nn1, * r; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = 2*j*k; - for (d = 0; d < 2*n; d++) - { - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - mpn_to_mpz(mn1, nn1, limbs); - set_p(p, n, w); - - if (!FFT_negacyclic_twiddle(r, nn1, d, n, w)) - MPN_COPY(r, nn1, limbs + 1); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_mul_2expmod(m, mn1, p, n, w, d*w/2); - - if (mpz_cmp(m, m2) != 0) - { - printf("FFT_negacyclic_twiddle error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - } - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = 2*j*k; - for (d = 0; d < 2*n; d++) - { - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - mpn_to_mpz(mn1, nn1, limbs); - set_p(p, n, w); - - if (!FFT_negacyclic_twiddle(r, nn1, 4*n - d, n, w)) - MPN_COPY(r, nn1, limbs + 1); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_div_2expmod(m, mn1, p, n, w, d*w/2); - - if (mpz_cmp(m, m2) != 0) - { - printf("FFT_negacyclic_twiddle error\n"); - gmp_printf("want %Zx\n\n", m); - gmp_printf("got %Zx\n", m2); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - mpz_clear(mn1); - gmp_randclear(state); -} - -void test_div_2expmod() -{ - ulong i, j, k, l, n, w, limbs, d; - mpz_t p, m, m2, mn1, mn2; - mpz_init(p); - mpz_init(m); - mpz_init(m2); - mpz_init(mn1); - mp_limb_t * nn1, * r; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 64*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 32; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - for (d = 0; d < GMP_LIMB_BITS; d++) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - r = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - set_p(p, n, w); - - mpn_div_2expmod_2expp1(r, nn1, limbs, d); - mpn_to_mpz(m2, r, limbs); - ref_norm(m2, p); - ref_norm(mn1, p); - ref_mul_2expmod(m, m2, p, n, w, d); - - if (mpz_cmp(m, mn1) != 0) - { - printf("mpn_div_2expmod_2expp1 error\n"); - gmp_printf("want %Zx\n\n", mn1); - gmp_printf("got %Zx\n", m); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(m); - mpz_clear(m2); - mpz_clear(mn1); - gmp_randclear(state); -} - -void test_lshB_sumdiffmod() -{ - ulong c, i, j, k, l, x, y, n, w, limbs; - mpz_t p, ma, mb, m2a, m2b, mn1, mn2; - mpz_init(p); - mpz_init(ma); - mpz_init(mb); - mpz_init(m2a); - mpz_init(m2b); - mpz_init(mn1); - mpz_init(mn2); - mp_limb_t * nn1, * nn2, * r1, * r2; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - for (c = 0; c < limbs; c++) - { - x = gmp_urandomm_ui(state, limbs + 1); - y = gmp_urandomm_ui(state, limbs + 1); - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - nn2 = TMP_BALLOC_LIMBS(limbs + 1); - r1 = TMP_BALLOC_LIMBS(limbs + 1); - r2 = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - rand_n(nn2, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - mpn_to_mpz(mn2, nn2, limbs); - set_p(p, n, w); - - mpn_lshB_sumdiffmod_2expp1(r1, r2, nn1, nn2, limbs, x, y); - mpn_to_mpz(m2a, r1, limbs); - mpn_to_mpz(m2b, r2, limbs); - ref_norm(m2a, p); - ref_norm(m2b, p); - ref_lshB_sumdiffmod(ma, mb, mn1, mn2, p, n, w, x, y); - - if (mpz_cmp(ma, m2a) != 0) - { - printf("mpn_lshB_sumdiffmod_2expp1 error a\n"); - printf("x = %ld, y = %ld\n", x, y); - gmp_printf("want %Zx\n\n", ma); - gmp_printf("got %Zx\n", m2a); - abort(); - } - if (mpz_cmp(mb, m2b) != 0) - { - printf("mpn_lshB_sumdiffmod_2expp1 error b\n"); - printf("x = %ld, y = %ld\n", x, y); - gmp_printf("want %Zx\n\n", mb); - gmp_printf("got %Zx\n", m2b); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(ma); - mpz_clear(mb); - mpz_clear(m2a); - mpz_clear(m2b); - mpz_clear(mn1); - mpz_clear(mn2); - gmp_randclear(state); -} - -void test_sumdiff_rshBmod() -{ - ulong c, i, j, k, l, x, y, n, w, limbs; - mpz_t p, ma, mb, m2a, m2b, mn1, mn2; - mpz_init(p); - mpz_init(ma); - mpz_init(mb); - mpz_init(m2a); - mpz_init(m2b); - mpz_init(mn1); - mpz_init(mn2); - mp_limb_t * nn1, * nn2, * r1, * r2; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - for (c = 0; c < limbs; c++) - { - x = gmp_urandomm_ui(state, limbs); - y = gmp_urandomm_ui(state, limbs); - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - nn2 = TMP_BALLOC_LIMBS(limbs + 1); - r1 = TMP_BALLOC_LIMBS(limbs + 1); - r2 = TMP_BALLOC_LIMBS(limbs + 1); - rand_n(nn1, state, limbs); - rand_n(nn2, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - mpn_to_mpz(mn2, nn2, limbs); - set_p(p, n, w); - - mpn_sumdiff_rshBmod_2expp1(r1, r2, nn1, nn2, limbs, x, y); - mpn_to_mpz(m2a, r1, limbs); - mpn_to_mpz(m2b, r2, limbs); - ref_norm(m2a, p); - ref_norm(m2b, p); - ref_sumdiff_rshBmod(ma, mb, mn1, mn2, p, n, w, x, y); - - if (mpz_cmp(ma, m2a) != 0) - { - printf("mpn_sumdiff_rshBmod_2expp1 error a\n"); - printf("x = %ld, y = %ld, limbs = %ld\n", x, y, limbs); - gmp_printf("want %Zx\n\n", ma); - gmp_printf("got %Zx\n", m2a); - abort(); - } - if (mpz_cmp(mb, m2b) != 0) - { - printf("mpn_sumdiff_rshBmod_2expp1 error b\n"); - printf("x = %ld, y = %ld, limbs = %ld\n", x, y, limbs); - gmp_printf("want %Zx\n\n", mb); - gmp_printf("got %Zx\n", m2b); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(ma); - mpz_clear(mb); - mpz_clear(m2a); - mpz_clear(m2b); - mpz_clear(mn1); - mpz_clear(mn2); - gmp_randclear(state); -} - -void test_FFT_split_radix_butterfly() -{ - ulong i, j, k, x, n, w, limbs; - mpz_t p, ma, mb, m2a, m2b, mn1, mn2; - mpz_init(p); - mpz_init(ma); - mpz_init(mb); - mpz_init(m2a); - mpz_init(m2b); - mpz_init(mn1); - mpz_init(mn2); - mp_limb_t * nn1, * nn2, * temp; - gmp_randstate_t state; - gmp_randinit_default(state); - TMP_DECL; - - for (i = 2*GMP_LIMB_BITS; i < 20*GMP_LIMB_BITS; i += 2*GMP_LIMB_BITS) - { - for (j = 1; j < 10; j++) - { - for (k = 1; k <= 2*GMP_NUMB_BITS; k <<= 1) - { - n = i/k; - w = j*k; - limbs = (n*w)/GMP_LIMB_BITS; - for (x = 0; x < n + 1; x++) - { - TMP_MARK; - nn1 = TMP_BALLOC_LIMBS(limbs + 1); - nn2 = TMP_BALLOC_LIMBS(limbs + 1); - temp = TMP_BALLOC_LIMBS(2*(limbs + 1)); - rand_n(nn1, state, limbs); - rand_n(nn2, state, limbs); - - mpn_to_mpz(mn1, nn1, limbs); - mpn_to_mpz(mn2, nn2, limbs); - set_p(p, n, w); - - FFT_split_radix_butterfly(nn1, nn2, x, n, w, temp); - mpn_to_mpz(m2a, nn1, limbs); - mpn_to_mpz(m2b, nn2, limbs); - ref_norm(m2a, p); - ref_norm(m2b, p); - ref_FFT_split_radix_butterfly(mn1, mn2, p, x, n, w); - - if (mpz_cmp(m2a, mn1) != 0) - { - printf("FFT_split_radix_butterfly error a\n"); - printf("x = %ld\n", x); - gmp_printf("want %Zx\n\n", mn1); - gmp_printf("got %Zx\n", m2a); - abort(); - } - if (mpz_cmp(m2b, mn2) != 0) - { - printf("FFT_split_radix_butterfly error b\n"); - printf("x = %ld\n", x); - gmp_printf("want %Zx\n\n", mn2); - gmp_printf("got %Zx\n", m2b); - abort(); - } - TMP_FREE; - } - } - } - } - mpz_clear(p); - mpz_clear(ma); - mpz_clear(mb); - mpz_clear(m2a); - mpz_clear(m2b); - mpz_clear(mn1); - mpz_clear(mn2); - gmp_randclear(state); -} - -void test_mul() -{ - ulong depth = 13UL; - ulong w2 = 3UL; - ulong iters = 1; - - ulong n = (1UL<length = 2*n; - - for (j = 0; j < 2*n; j++) - mpn_normmod_2expp1(ii[j], limbs); - - for (i = 0; i < 2*n; i++) - MPN_COPY(poly->coeffs[i], ii[i], limbs + 1); - - tt = (mp_limb_t *) TMP_BALLOC_LIMBS(2*size); - - for (i = 0; i < 1; i++) - { - FFT_radix2_new(ii, 1, ii, n, w, &t1, &t2, s1); - - ZmodF_poly_FFT(poly, 2*n); - for (j = 0; j < 2*n; j++) - { - mpn_normmod_2expp1(ii[j], limbs); - ZmodF_normalise(poly->coeffs[j], limbs); - if (mpn_cmp(ii[j], poly->coeffs[j], limbs + 1) != 0) - { - printf("Coeff %ld incorrect\n", j); - } - } - } - - ZmodF_poly_clear(poly); - TMP_FREE; - gmp_randclear(state); -} - -void time_flint_mfa() -{ -#define NEWMFA 1 - ulong depth = 12L; - ulong iters = 1000; - ulong w2 = 1; - - ulong n = (1UL<length = 2*n; - - for (j = 0; j < 2*n; j++) - mpn_normmod_2expp1(ii[j], limbs); - - for (i = 0; i < 2*n; i++) - MPN_COPY(poly->coeffs[i], ii[i], limbs + 1); - - tt = (mp_limb_t *) TMP_BALLOC_LIMBS(2*size); - - for (i = 0; i < iters; i++) - { -#if NEWMFA - FFT_radix2_mfa(ii, n, w, &t1, &t2, s1, (1UL<<(depth/2))/w2); -#else - ZmodF_poly_FFT(poly, 2*n); -#endif - } - - ZmodF_poly_clear(poly); - TMP_FREE; - gmp_randclear(state); -} - -void test_mfa_fft() -{ - ulong depth = 12L; - ulong iters = 1; - ulong w = 1; - - ulong n = (1UL<length = 2*n; - - for (j = 0; j < 2*n; j++) - mpn_normmod_2expp1(ii[j], limbs); - - for (i = 0; i < 2*n; i++) - MPN_COPY(poly->coeffs[i], ii[i], limbs + 1); - - tt = (mp_limb_t *) TMP_BALLOC_LIMBS(2*size); - - trans = (mp_limb_t **) TMP_BALLOC_LIMBS(2*n); - n1 = (1UL<<(depth/2)); - n2 = (2*n)/n1; - FFT_radix2_mfa(ii, n, w, &t1, &t2, s1, n1); - - // transpose matrix - for (i = 0; i < n2; i++) - { - for (j = 0; j < n1; j++) - { - trans[j*n2 + i] = ii[i*n1 + j]; - } - } - - for (i = 0; i < 2*n; i++) - ii[i] = trans[i]; - - ZmodF_poly_FFT(poly, 2*n); - for (j = 0; j < 2*n; j++) - { - s = revbin(j, depth); - if (j < s) - { - ptr = ii[j]; - ii[j] = ii[s]; - ii[s] = ptr; - } - mpn_normmod_2expp1(ii[j], limbs); - ZmodF_normalise(poly->coeffs[j], limbs); - if (mpn_cmp(ii[j], poly->coeffs[j], limbs + 1) != 0) - { - printf("Coeff %ld incorrect\n", j); - } - } - - ZmodF_poly_clear(poly); - TMP_FREE; - gmp_randclear(state); -} - -void test_flint_ifft() -{ -#define NEWFFT 1 -#define IFFT 1 - ulong depth = 10L; - ulong iters = 1; - ulong w = 1; - - ulong n = (1UL<length = 2*n; - - for (j = 0; j < 2*n; j++) - mpn_normmod_2expp1(ii[j], limbs); - - for (i = 0; i < 2*n; i++) - MPN_COPY(poly->coeffs[i], ii[i], limbs + 1); - - tt = (mp_limb_t *) TMP_BALLOC_LIMBS(2*size); - - for (i = 0; i < 1; i++) - { - IFFT_radix2_new(ii, 1, ii, n, w, &t1, &t2, s1); - - ZmodF_poly_IFFT(poly); - for (j = 0; j < 2*n; j++) - { - mpn_normmod_2expp1(ii[j], limbs); - ZmodF_normalise(poly->coeffs[j], limbs); - if (mpn_cmp(ii[j], poly->coeffs[j], limbs) != 0) - { - gmp_printf("%Nx\n", ii[j], limbs + 1); - gmp_printf("%Nx\n", poly->coeffs[j], limbs + 1); - printf("Coeff %ld incorrect\n", j); - } - } - - } - - ZmodF_poly_clear(poly); - TMP_FREE; - gmp_randclear(state); -} - -void time_flint_ifft() -{ -#define NEWIFFT 0 - ulong depth = 16L; - ulong iters = 1; - ulong w = 1; - - ulong n = (1UL<length = 2*n; - - for (j = 0; j < 2*n; j++) - mpn_normmod_2expp1(ii[j], limbs); - - for (i = 0; i < 2*n; i++) - MPN_COPY(poly->coeffs[i], ii[i], limbs + 1); - - tt = (mp_limb_t *) TMP_BALLOC_LIMBS(2*size); - - for (i = 0; i < iters; i++) - { -#if NEWIFFT - IFFT_radix2_new(ii, 1, ii, n, w, &t1, &t2, s1); -#else - ZmodF_poly_IFFT(poly); -#endif - } - - ZmodF_poly_clear(poly); - TMP_FREE; - gmp_randclear(state); -} - -void time_flint_negacyclic_fft() -{ -#define NEGFFT 1 - ulong depth = 10L; - ulong iters = 10000; - ulong w = 4; - - ulong n = 512;//(1UL<length = 2*n; - - for (j = 0; j < 2*n; j++) - mpn_normmod_2expp1(ii[j], limbs); - - for (i = 0; i < 2*n; i++) - MPN_COPY(poly->coeffs[i], ii[i], limbs + 1); - - tt = (mp_limb_t *) TMP_BALLOC_LIMBS(2*size); - - for (i = 0; i < iters; i++) - { -#if NEGFFT - FFT_radix2_negacyclic(ii, 1, ii, n, w, &t1, &t2, s1); -#else - ZmodF_poly_IFFT(poly); -#endif - } - - ZmodF_poly_clear(poly); - TMP_FREE; - gmp_randclear(state); -} - -void time_flint_imfa() -{ -#define NEWIMFA 0 - ulong depth = 16L; - ulong iters = 1; - ulong w2 = 1; - - ulong w = w2*w2; - ulong n = (1UL<length = 2*n; - - for (j = 0; j < 2*n; j++) - mpn_normmod_2expp1(ii[j], limbs); - - for (i = 0; i < 2*n; i++) - MPN_COPY(poly->coeffs[i], ii[i], limbs + 1); - - tt = (mp_limb_t *) TMP_BALLOC_LIMBS(2*size); - - for (i = 0; i < iters; i++) - { -#if NEWIMFA - IFFT_radix2_mfa(ii, n, w, &t1, &t2, s1, (1UL<<(depth/2))/w2); -#else - ZmodF_poly_IFFT(poly); -#endif - } - - ZmodF_poly_clear(poly); - TMP_FREE; - gmp_randclear(state); -} - -void time_mul() -{ - ulong depth = 13UL; - ulong w2 = 1UL; - ulong iters = 100; - - ulong n = (1UL<=dn){mpn_tdiv_qr(qp,rp,0,zp,zn,dp,dn);}else{MPN_COPY(rp,zp,dn);} - cc=tp[xn]=mpn_mulmod_2expp1(tp,xp,yp,0,b,qp); + cc=tp[xn]=mpn_mulmod_2expp1_basecase(tp,xp,yp,0,b,qp); if(cc!=0 && dn==xn){tp[xn-1]|=tb;} - if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1 error %ld\n",b);abort();} + if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1_basecase error %ld\n",b);abort();} } } @@ -70,9 +70,9 @@ for( ;b<600;b++,tb*=2) ASSERT_MPN(dp,dn); mpn_mul(zp,yp,yn,xp,xn);zn=xn*2;MPN_NORMALIZE(zp,zn);MPN_ZERO(yp,xn);// set yp to 2^b if(zn>=dn){mpn_tdiv_qr(qp,rp,0,zp,zn,dp,dn);}else{MPN_COPY(rp,zp,dn);} - cc=tp[xn]=mpn_mulmod_2expp1(tp,xp,yp,1,b,qp); + cc=tp[xn]=mpn_mulmod_2expp1_basecase(tp,xp,yp,1,b,qp); if(cc!=0 && dn==xn){tp[xn-1]|=tb;} - if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1 error %ld\n",b);abort();} + if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1_basecase error %ld\n",b);abort();} } } @@ -90,9 +90,9 @@ for( ;b<600;b++,tb*=2) ASSERT_MPN(xp,xn);ASSERT_MPN(yp,yn);ASSERT_MPN(zp,zn);ASSERT_MPN(dp,dn); mpn_mul(zp,yp,yn,xp,xn);zn=xn*2;MPN_NORMALIZE(zp,zn);MPN_ZERO(yp,xn);// set yp to 2^b if(zn>=dn){mpn_tdiv_qr(qp,rp,0,zp,zn,dp,dn);}else{MPN_COPY(rp,zp,dn);} - cc=tp[xn]=mpn_mulmod_2expp1(tp,yp,xp,2,b,qp); + cc=tp[xn]=mpn_mulmod_2expp1_basecase(tp,yp,xp,2,b,qp); if(cc!=0 && dn==xn){tp[xn-1]|=tb;} - if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1 error %ld\n",b);abort();} + if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1_basecase error %ld\n",b);abort();} } } @@ -106,9 +106,9 @@ for( ;b<600;b++,tb*=2) {MPN_ZERO(xp,xn);MPN_ZERO(yp,xn);// set xp,yp to 2^b xp[xn-1]&=GMP_NUMB_MASK>>k; yp[xn-1]&=GMP_NUMB_MASK>>k; - cc=tp[xn]=mpn_mulmod_2expp1(tp,yp,xp,3,b,qp); + cc=tp[xn]=mpn_mulmod_2expp1_basecase(tp,yp,xp,3,b,qp); if(cc!=0 && dn==xn){tp[xn-1]|=tb;} - if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1 error %ld\n",b);abort();} + if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1_basecase error %ld\n",b);abort();} } } gmp_randclear(rands); diff --git a/tune/common.c b/tune/common.c index daa1adcb..fbc8992e 100644 --- a/tune/common.c +++ b/tune/common.c @@ -1237,9 +1237,9 @@ speed_mpn_mulmod_2expm1 (struct speed_params *s) } double -speed_mpn_mulmod_2expp1 (struct speed_params *s) +speed_mpn_mulmod_2expp1_basecase (struct speed_params *s) { - SPEED_ROUTINE_MPN_MULMOD_2EXPP1 (mpn_mulmod_2expp1); + SPEED_ROUTINE_MPN_MULMOD_2EXPP1_BASECASE (mpn_mulmod_2expp1_basecase); } double diff --git a/tune/speed.c b/tune/speed.c index 73b0bbf3..03672d5a 100644 --- a/tune/speed.c +++ b/tune/speed.c @@ -336,7 +336,7 @@ const struct routine_t { { "mpn_mullow_n_basecase", speed_mpn_mullow_n_basecase}, { "mpn_mulhigh_n", speed_mpn_mulhigh_n }, { "mpn_mulmod_2expm1", speed_mpn_mulmod_2expm1 }, - { "mpn_mulmod_2expp1", speed_mpn_mulmod_2expp1 }, + { "mpn_mulmod_2expp1_basecase", speed_mpn_mulmod_2expp1_basecase }, { "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL }, diff --git a/tune/speed.h b/tune/speed.h index e650ccd6..8863c40a 100644 --- a/tune/speed.h +++ b/tune/speed.h @@ -260,7 +260,7 @@ double speed_mpn_mul_n_sqr _PROTO ((struct speed_params *s)); double speed_mpn_mullow_n _PROTO ((struct speed_params *s)); double speed_mpn_mulhigh_n _PROTO ((struct speed_params *s)); double speed_mpn_mulmod_2expm1 _PROTO ((struct speed_params *s)); -double speed_mpn_mulmod_2expp1 _PROTO ((struct speed_params *s)); +double speed_mpn_mulmod_2expp1_basecase _PROTO ((struct speed_params *s)); double speed_mpn_mullow_n_basecase _PROTO ((struct speed_params *s)); double speed_mpn_nand_n _PROTO ((struct speed_params *s)); double speed_mpn_nior_n _PROTO ((struct speed_params *s)); @@ -1179,8 +1179,8 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s, return t; \ } -/* For mpn_mulmod_2expp1 , xsize=r, ysize=s->size. */ -#define SPEED_ROUTINE_MPN_MULMOD_2EXPP1(function) \ +/* For mpn_mulmod_2expp1_basecase , xsize=r, ysize=s->size. */ +#define SPEED_ROUTINE_MPN_MULMOD_2EXPP1_BASECASE(function) \ { \ mp_ptr wp,temps; \ unsigned i; \ diff --git a/tune/tune-fft.c b/tune/tune-fft.c index ed6a88dc..1372c3b7 100644 --- a/tune/tune-fft.c +++ b/tune/tune-fft.c @@ -171,7 +171,7 @@ main(void) start = clock(); for (i = 0; i < iters; i++) - mpn_mulmod_2expp1(r1, i1, i2, 0, bits, tt); + mpn_mulmod_2expp1_basecase(r1, i1, i2, 0, bits, tt); end = clock(); elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;