Final integration of Bills new FFT code

This commit is contained in:
gladman 2012-01-10 16:26:07 +00:00
parent d540bef9cb
commit 1253dff171
23 changed files with 2490 additions and 10315 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -77,7 +77,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Lib>
<AdditionalDependencies>psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -96,7 +96,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Lib>
<AdditionalDependencies>psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -112,7 +112,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Lib>
<AdditionalDependencies>psapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
@ -130,7 +130,7 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
<BufferSecurityCheck>false</BufferSecurityCheck>
<PreprocessorDefinitions>%(PreprocessorDefinitions)</PreprocessorDefinitions>
</ClCompile>

View File

@ -87,10 +87,10 @@
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
<CompileAs>Default</CompileAs>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\lib\$(IntDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<TargetMachine>MachineX86</TargetMachine>
@ -115,10 +115,10 @@
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<CompileAs>Default</CompileAs>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\lib\$(IntDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<TargetMachine>MachineX64</TargetMachine>
@ -140,10 +140,10 @@
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<CompileAs>Default</CompileAs>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\lib\$(IntDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<OptimizeReferences>true</OptimizeReferences>
@ -170,10 +170,10 @@
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<CompileAs>Default</CompileAs>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\lib\$(IntDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<OptimizeReferences>true</OptimizeReferences>

View File

@ -76,7 +76,7 @@
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>..\$(Platform);..\;..\..\;..\..\tests\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>..\$(Platform);..\;..\..\;..\..\tests\;..\..\lib\$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
@ -87,7 +87,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<TargetMachine>MachineX86</TargetMachine>
@ -99,7 +99,7 @@
</Midl>
<ClCompile>
<Optimization>Disabled</Optimization>
<AdditionalIncludeDirectories>..\$(Platform);..\;..\..\;..\..\tests\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>..\$(Platform);..\;..\..\;..\..\tests\;..\..\lib\$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<MinimalRebuild>true</MinimalRebuild>
<BasicRuntimeChecks>EnableFastChecks</BasicRuntimeChecks>
@ -110,7 +110,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<TargetMachine>MachineX64</TargetMachine>
@ -120,7 +120,7 @@
<ClCompile>
<Optimization>MaxSpeed</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<AdditionalIncludeDirectories>..\$(Platform);..\;..\..\;..\..\tests\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>..\$(Platform);..\;..\..\;..\..\tests\;..\..\lib\$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
@ -130,7 +130,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<OptimizeReferences>true</OptimizeReferences>
@ -145,7 +145,7 @@
<ClCompile>
<Optimization>Full</Optimization>
<IntrinsicFunctions>true</IntrinsicFunctions>
<AdditionalIncludeDirectories>..\$(Platform);..\;..\..\;..\..\tests\;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>..\$(Platform);..\;..\..\;..\..\tests\;..\..\lib\$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<RuntimeLibrary>MultiThreaded</RuntimeLibrary>
<FunctionLevelLinking>true</FunctionLevelLinking>
@ -155,7 +155,7 @@
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<OptimizeReferences>true</OptimizeReferences>

View File

@ -84,10 +84,10 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>EditAndContinue</DebugInformationFormat>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<TargetMachine>MachineX86</TargetMachine>
@ -110,10 +110,10 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<TargetMachine>MachineX64</TargetMachine>
@ -133,10 +133,10 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<OptimizeReferences>true</OptimizeReferences>
@ -160,11 +160,11 @@
</PrecompiledHeader>
<WarningLevel>Level3</WarningLevel>
<DebugInformationFormat>ProgramDatabase</DebugInformationFormat>
<AdditionalIncludeDirectories>.\;..\;..\..\;..\..\tests\;"..\$(PlatformName)\"</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>.\;..\;..\..\..\lib\$(intDir);..\..\;..\..\tests\;..\$(PlatformName)\</AdditionalIncludeDirectories>
<BufferSecurityCheck>false</BufferSecurityCheck>
</ClCompile>
<Link>
<AdditionalDependencies>..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies>..\..\lib\$(IntDir)mpir.lib;%(AdditionalDependencies)</AdditionalDependencies>
<GenerateDebugInformation>true</GenerateDebugInformation>
<SubSystem>Console</SubSystem>
<OptimizeReferences>true</OptimizeReferences>

View File

@ -60,7 +60,7 @@ void fft_mfa_truncate_sqrt2_inner(mp_limb_t ** ii, mp_limb_t ** jj, mp_size_t n,
mp_size_t t = i*n1 + j;
mpn_normmod_2expp1(ii[t], limbs);
if (ii != jj) mpn_normmod_2expp1(jj[t], limbs);
mpn_fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt);
mpn_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt);
}
ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);
@ -80,7 +80,7 @@ void fft_mfa_truncate_sqrt2_inner(mp_limb_t ** ii, mp_limb_t ** jj, mp_size_t n,
mp_size_t t = i*n1 + j;
mpn_normmod_2expp1(ii[t], limbs);
if (ii != jj) mpn_normmod_2expp1(jj[t], limbs);
mpn_fft_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt);
mpn_mulmod_2expp1(ii[t], ii[t], jj[t], n, w, tt);
}
ifft_radix2(ii + i*n1, n1/2, w*n2, t1, t2);

View File

@ -96,7 +96,7 @@ void mpn_mul_truncate_sqrt2(mp_limb_t * r1, mp_limb_t * i1, mp_size_t n1,
if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs);
c = 2*ii[j][limbs] + jj[j][limbs];
ii[j][limbs] = mpn_mulmod_2expp1(ii[j], ii[j], jj[j], c, n*w, tt);
ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt);
}
ifft_truncate_sqrt2(ii, n, w, &t1, &t2, &s1, trunc);

View File

@ -122,7 +122,7 @@ void fft_mulmod_2expp1(mp_limb_t * r1, mp_limb_t * i1, mp_limb_t * i2,
if (i1 != i2) mpn_normmod_2expp1(jj[j], limbs);
c = 2*ii[j][limbs] + jj[j][limbs];
ii[j][limbs] = mpn_mulmod_2expp1(ii[j], ii[j], jj[j], c, n*w, tt);
ii[j][limbs] = mpn_mulmod_2expp1_basecase(ii[j], ii[j], jj[j], c, n*w, tt);
}
ifft_negacyclic(ii, n, w, &t1, &t2, &s1);
@ -176,33 +176,7 @@ void fft_mulmod_2expp1(mp_limb_t * r1, mp_limb_t * i1, mp_limb_t * i2,
mpn_normmod_2expp1(r1, r_limbs);
TMP_FREE;
}
void mpn_fft_mulmod_2expp1(mp_limb_t * r, mp_limb_t * i1, mp_limb_t * i2,
mp_size_t n, mp_size_t w, mp_limb_t * tt)
{
mp_size_t bits = n*w;
mp_size_t limbs = bits/GMP_LIMB_BITS;
mp_bitcnt_t depth1, depth = 1;
mp_size_t w1, off;
if (limbs <= FFT_MULMOD_2EXPP1_CUTOFF)
{
mp_limb_t c = 2 * i1[limbs] + i2[limbs];
r[limbs] = mpn_mulmod_2expp1(r, i1, i2, c, bits, tt);
return;
}
while ((((mp_limb_t)1)<<depth) < bits) depth++;
if (depth < 12) off = mulmod_2expp1_table_n[0];
else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];
depth1 = depth/2 - off;
w1 = bits/(((mp_limb_t)1)<<(2*depth1));
fft_mulmod_2expp1(r, i1, i2, limbs, depth1, w1);
return 0;
}
gmp_si fft_adjust_limbs(mp_size_t limbs)

View File

@ -171,7 +171,7 @@ main(void)
start = clock();
for (i = 0; i < iters; i++)
mpn_mulmod_2expp1(r1, i1, i2, 0, bits, tt);
mpn_mulmod_2expp1_basecase(r1, i1, i2, 0, bits, tt);
end = clock();
elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;

View File

@ -1829,10 +1829,6 @@ __GMP_DECLSPEC void mpn_zero __GMP_PROTO ((mp_ptr, mp_size_t));
/**************** MPN API for FFT ****************/
#define mpn_fft_mulmod_2expp1 __MPN(fft_mulmod_2expp1)
__GMP_DECLSPEC void mpn_fft_mulmod_2expp1 __GMP_PROTO ((mp_limb_t * r, mp_limb_t * i1, mp_limb_t * i2,
mp_size_t n, mp_size_t w, mp_limb_t * tt));
#define mpn_normmod_2expp1 __MPN(normmod_2expp1)
__GMP_DECLSPEC void mpn_normmod_2expp1 __GMP_PROTO ((mp_limb_t * t, mp_size_t limbs));

View File

@ -1192,6 +1192,9 @@ __GMP_DECLSPEC void ifft_truncate1_twiddle __GMP_PROTO ((mp_limb_t ** ii, mp_siz
__GMP_DECLSPEC void fft_naive_convolution_1 __GMP_PROTO ((mp_limb_t * r, mp_limb_t * ii,
mp_limb_t * jj, mp_size_t m));
#define mpn_mulmod_2expp1_basecase __MPN(mulmod_2expp1_basecase)
__GMP_DECLSPEC int mpn_mulmod_2expp1_basecase __GMP_PROTO ((mp_ptr, mp_srcptr, mp_srcptr, int, gmp_ui, mp_ptr));
typedef __gmp_randstate_struct *gmp_randstate_ptr;
typedef const __gmp_randstate_struct *gmp_randstate_srcptr;

View File

@ -243,10 +243,10 @@ mpn_sqr (mp_ptr p, mp_srcptr a, mp_size_t n)
#if WANT_FFT || TUNE_PROGRAM_BUILD
else
{
#if 0
mpn_mul_fft_main(p, a, n, a, n);
#else
#if defined( OLD_FFT )
mpn_mul_fft_full (p, a, n, a, n);
#else
mpn_mul_fft_main(p, a, n, a, n);
#endif
}
#endif

View File

@ -180,7 +180,7 @@ mpn_mulmod_2expm1 (mp_ptr xp, mp_ptr yp, mp_ptr zp, gmp_ui b,
tzpp[m - 1] &= GMP_NUMB_MASK >> k;
}
mpn_mulmod_2expm1 (S, typm, tzpm, h, temp); // unroll this recursion S=A rename
c = mpn_mulmod_2expp1 (D, typp, tzpp, c1 * 2 + c2, h, temp); // D=B rename
c = mpn_mulmod_2expp1_basecase (D, typp, tzpp, c1 * 2 + c2, h, temp); // D=B rename
if (LIKELY (c == 0))
{
c1 = mpn_sumdiff_n (S, D, S, D, m);
@ -361,7 +361,7 @@ else
tzpp[m-1]&=GMP_NUMB_MASK>>k;}
mpn_mulmod_2expm1_new(S,typm,tzpm,h,temp);// unroll this recursion S=A rename
c=mpn_mulmod_2expp1(D,typp,tzpp,c1*2+c2,h,temp); // D=B rename
c=mpn_mulmod_2expp1_basecase(D,typp,tzpp,c1*2+c2,h,temp); // D=B rename
__GMP_FREE_FUNC_LIMBS(typm,m);__GMP_FREE_FUNC_LIMBS(tzpm,m);__GMP_FREE_FUNC_LIMBS(typp,m);__GMP_FREE_FUNC_LIMBS(tzpp,m);
__GMP_FREE_FUNC_LIMBS(temp,2*m);
if(LIKELY(c==0))

View File

@ -1,153 +1,68 @@
/* mpn_mulmod_2expp1
Copyright 2009 Jason Moxham
This file is part of the MPIR Library.
The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 2.1 of the License, or (at
your option) any later version.
The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
License for more details.
You should have received a copy of the GNU Lesser General Public License
along with the MPIR Library; see the file COPYING.LIB. If not, write
to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/
#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"
// k 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
static mp_size_t tab[]={0,0,0,2632,304,448,1024,2304,6400,11264,45056,114688,327680,1310720,3145728,12582912 };// OUT OF DATE
// from K8 old mul_fft_table 560 1184 1856 3840
// these thresholds could do with tuning , and extending
// ret+(xp,n)=(yp,n)*(zp,n) % 2^b+1
// needs (tp,2n) temp space , everything reduced mod 2^b
// inputs,outputs are fully reduced
// NOTE: 2n is not the same as 2b rounded up to nearest limb
inline static int
mpn_mulmod_2expp1_internal (mp_ptr xp, mp_srcptr yp, mp_srcptr zp,
gmp_ui b, mp_ptr tp)
{
mp_size_t n, k;
mp_limb_t c;
n = BITS_TO_LIMBS (b);
k = GMP_NUMB_BITS * n - b;
ASSERT (b > 0);
ASSERT (n > 0);
ASSERT_MPN (yp, n);
ASSERT_MPN (zp, n);
ASSERT (!MPN_OVERLAP_P (tp, 2 * n, yp, n));
ASSERT (!MPN_OVERLAP_P (tp, 2 * n, zp, n));
ASSERT (!MPN_OVERLAP_P (xp, n, yp, n));
ASSERT (!MPN_OVERLAP_P (xp, n, zp, n));
ASSERT (MPN_SAME_OR_SEPARATE_P (xp, tp, n));
ASSERT (MPN_SAME_OR_SEPARATE_P (xp, tp + n, n));
ASSERT (k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0);
ASSERT (k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0);
#if 0 && GMP_NAIL_BITS == 0
// mpn_mul_fft dont do nails
// fft has changed cant use this like this , but can use it HOW?
if (k == 0 && n % 8 == 0)
{
count_trailing_zeros (c, n);
if (c > 15)
c = 15;
for (c = c; c > 2; c--)
if (n >= tab[c])
return mpn_mul_fft (xp, n, yp, n, zp, n, c);
}
#endif
mpn_mul_n (tp, yp, zp, n);
if (k == 0)
{
c = mpn_sub_n (xp, tp, tp + n, n);
return mpn_add_1 (xp, xp, n, c);
}
c = tp[n - 1];
tp[n - 1] &= GMP_NUMB_MASK >> k;
#if HAVE_NATIVE_mpn_sublsh_nc
c = mpn_sublsh_nc (xp, tp, tp + n, n, k, c);
#else
{
mp_limb_t c1;
c1 = mpn_lshift (tp + n, tp + n, n, k);
tp[n] |= c >> (GMP_NUMB_BITS - k);
c = mpn_sub_n (xp, tp, tp + n, n) + c1;
}
#endif
c = mpn_add_1 (xp, xp, n, c);
xp[n - 1] &= GMP_NUMB_MASK >> k;
return c;
}
// c is the top bits of the inputs, must be fully reduced
int
mpn_mulmod_2expp1 (mp_ptr xp, mp_srcptr yp, mp_srcptr zp, int c,
gmp_ui b, mp_ptr tp)
{
int cy, cz;
mp_size_t n, k;
cy = c & 2;
cz = c & 1;
n = BITS_TO_LIMBS (b);
k = GMP_NUMB_BITS * n - b;
ASSERT (b > 0);
ASSERT (n > 0);
ASSERT_MPN (yp, n);
ASSERT_MPN (zp, n);
ASSERT (!MPN_OVERLAP_P (tp, 2 * n, yp, n));
ASSERT (!MPN_OVERLAP_P (tp, 2 * n, zp, n));
ASSERT (MPN_SAME_OR_SEPARATE_P (xp, tp, n));
ASSERT (MPN_SAME_OR_SEPARATE_P (xp, tp + n, n));
ASSERT (k == 0 || yp[n - 1] >> (GMP_NUMB_BITS - k) == 0);
ASSERT (k == 0 || zp[n - 1] >> (GMP_NUMB_BITS - k) == 0);
#if WANT_ASSERT
mp_size_t t;
t = n;
MPN_NORMALIZE (yp, t);
ASSERT (cy == 0 || t == 0);
t = n;
MPN_NORMALIZE (zp, t);
ASSERT (cz == 0 || t == 0);
#endif
if (LIKELY (cy == 0))
{
if (LIKELY (cz == 0))
{
c = mpn_mulmod_2expp1_internal (xp, yp, zp, b, tp);
}
else
{
c = mpn_neg_n (xp, yp, n);
c = mpn_add_1 (xp, xp, n, c);
xp[n - 1] &= GMP_NUMB_MASK >> k;
}
}
else
{
if (LIKELY (cz == 0))
{
c = mpn_neg_n (xp, zp, n);
c = mpn_add_1 (xp, xp, n, c);
xp[n - 1] &= GMP_NUMB_MASK >> k;
}
else
{
c = 0;
xp[0] = 1;
MPN_ZERO (xp + 1, n - 1);
}
}
return c;
}
/*
Copyright 2009, 2011 William Hart. All rights reserved.
Redistribution and use in source and binary forms, with or without modification, are
permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice, this list of
conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice, this list
of conditions and the following disclaimer in the documentation and/or other materials
provided with the distribution.
THIS SOFTWARE IS PROVIDED BY William Hart ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND
FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL William Hart OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those of the
authors and should not be interpreted as representing official policies, either expressed
or implied, of William Hart.
*/
#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"
#include "fft\fft_tuning.h"
static mp_size_t mulmod_2expp1_table_n[FFT_N_NUM] = MULMOD_TAB;
int mpn_mulmod_2expp1(mp_limb_t * r, mp_limb_t * i1, mp_limb_t * i2, mp_size_t n, mp_size_t w, mp_limb_t * tt)
{
#if defined( OLD_FFT )
return mpn_mulmod_2expp1_basecase(r, i1, i2, c, bits, tt);
#else
mp_size_t bits = n*w;
mp_size_t limbs = bits/GMP_LIMB_BITS;
mp_bitcnt_t depth1, depth = 1;
mp_size_t w1, off;
if (limbs <= FFT_MULMOD_2EXPP1_CUTOFF)
{
mp_limb_t c = 2 * i1[limbs] + i2[limbs];
r[limbs] = mpn_mulmod_2expp1_basecase(r, i1, i2, c, bits, tt);
return r[limbs];
}
while ((((mp_limb_t)1)<<depth) < bits) depth++;
if (depth < 12) off = mulmod_2expp1_table_n[0];
else off = mulmod_2expp1_table_n[MIN(depth, FFT_N_NUM + 11) - 12];
depth1 = depth/2 - off;
w1 = bits/(((mp_limb_t)1)<<(2*depth1));
fft_mulmod_2expp1(r, i1, i2, limbs, depth1, w1);
return r[limbs];
#endif
}

3745
new_fft.c

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@ -70,9 +70,9 @@ main(void)
mpn_normmod_2expp1(i1, int_limbs);
mpn_normmod_2expp1(i2, int_limbs);
mpn_fft_mulmod_2expp1(r2, i1, i2, n, w, tt);
mpn_mulmod_2expp1(r2, i1, i2, n, w, tt);
c = i1[int_limbs] + 2*i2[int_limbs];
c = mpn_mulmod_2expp1(r1, i1, i2, c, int_limbs*GMP_LIMB_BITS, tt);
c = mpn_mulmod_2expp1_basecase(r1, i1, i2, c, int_limbs*GMP_LIMB_BITS, tt);
for (j = 0; j < int_limbs; j++)
{
@ -115,9 +115,9 @@ main(void)
random_fermat(i1, state, int_limbs);
mpn_normmod_2expp1(i1, int_limbs);
mpn_fft_mulmod_2expp1(r2, i1, i1, n, w, tt);
mpn_mulmod_2expp1(r2, i1, i1, n, w, tt);
c = i1[int_limbs] + 2*i1[int_limbs];
c = mpn_mulmod_2expp1(r1, i1, i1, c, int_limbs*GMP_LIMB_BITS, tt);
c = mpn_mulmod_2expp1_basecase(r1, i1, i1, c, int_limbs*GMP_LIMB_BITS, tt);
for (j = 0; j < int_limbs; j++)
{

View File

@ -1,4 +1,4 @@
/* Test mpn_mulmod_2expp1
/* Test mpn_mulmod_2expp1_basecase
Copyright 2009 Jason Moxham
@ -47,9 +47,9 @@ for( ;b<600;b++,tb*=2)
ASSERT_MPN(xp,xn);ASSERT_MPN(yp,xn);ASSERT_MPN(zp,zn);ASSERT_MPN(dp,dn);
mpn_mul_n(zp,xp,yp,xn);zn=xn*2;MPN_NORMALIZE(zp,zn);
if(zn>=dn){mpn_tdiv_qr(qp,rp,0,zp,zn,dp,dn);}else{MPN_COPY(rp,zp,dn);}
cc=tp[xn]=mpn_mulmod_2expp1(tp,xp,yp,0,b,qp);
cc=tp[xn]=mpn_mulmod_2expp1_basecase(tp,xp,yp,0,b,qp);
if(cc!=0 && dn==xn){tp[xn-1]|=tb;}
if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1 error %ld\n",b);abort();}
if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1_basecase error %ld\n",b);abort();}
}
}
@ -70,9 +70,9 @@ for( ;b<600;b++,tb*=2)
ASSERT_MPN(dp,dn);
mpn_mul(zp,yp,yn,xp,xn);zn=xn*2;MPN_NORMALIZE(zp,zn);MPN_ZERO(yp,xn);// set yp to 2^b
if(zn>=dn){mpn_tdiv_qr(qp,rp,0,zp,zn,dp,dn);}else{MPN_COPY(rp,zp,dn);}
cc=tp[xn]=mpn_mulmod_2expp1(tp,xp,yp,1,b,qp);
cc=tp[xn]=mpn_mulmod_2expp1_basecase(tp,xp,yp,1,b,qp);
if(cc!=0 && dn==xn){tp[xn-1]|=tb;}
if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1 error %ld\n",b);abort();}
if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1_basecase error %ld\n",b);abort();}
}
}
@ -90,9 +90,9 @@ for( ;b<600;b++,tb*=2)
ASSERT_MPN(xp,xn);ASSERT_MPN(yp,yn);ASSERT_MPN(zp,zn);ASSERT_MPN(dp,dn);
mpn_mul(zp,yp,yn,xp,xn);zn=xn*2;MPN_NORMALIZE(zp,zn);MPN_ZERO(yp,xn);// set yp to 2^b
if(zn>=dn){mpn_tdiv_qr(qp,rp,0,zp,zn,dp,dn);}else{MPN_COPY(rp,zp,dn);}
cc=tp[xn]=mpn_mulmod_2expp1(tp,yp,xp,2,b,qp);
cc=tp[xn]=mpn_mulmod_2expp1_basecase(tp,yp,xp,2,b,qp);
if(cc!=0 && dn==xn){tp[xn-1]|=tb;}
if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1 error %ld\n",b);abort();}
if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1_basecase error %ld\n",b);abort();}
}
}
@ -106,9 +106,9 @@ for( ;b<600;b++,tb*=2)
{MPN_ZERO(xp,xn);MPN_ZERO(yp,xn);// set xp,yp to 2^b
xp[xn-1]&=GMP_NUMB_MASK>>k;
yp[xn-1]&=GMP_NUMB_MASK>>k;
cc=tp[xn]=mpn_mulmod_2expp1(tp,yp,xp,3,b,qp);
cc=tp[xn]=mpn_mulmod_2expp1_basecase(tp,yp,xp,3,b,qp);
if(cc!=0 && dn==xn){tp[xn-1]|=tb;}
if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1 error %ld\n",b);abort();}
if(mpn_cmp(tp,rp,dn)!=0){printf("mpn_mulmod_2expp1_basecase error %ld\n",b);abort();}
}
}
gmp_randclear(rands);

View File

@ -1237,9 +1237,9 @@ speed_mpn_mulmod_2expm1 (struct speed_params *s)
}
double
speed_mpn_mulmod_2expp1 (struct speed_params *s)
speed_mpn_mulmod_2expp1_basecase (struct speed_params *s)
{
SPEED_ROUTINE_MPN_MULMOD_2EXPP1 (mpn_mulmod_2expp1);
SPEED_ROUTINE_MPN_MULMOD_2EXPP1_BASECASE (mpn_mulmod_2expp1_basecase);
}
double

View File

@ -336,7 +336,7 @@ const struct routine_t {
{ "mpn_mullow_n_basecase", speed_mpn_mullow_n_basecase},
{ "mpn_mulhigh_n", speed_mpn_mulhigh_n },
{ "mpn_mulmod_2expm1", speed_mpn_mulmod_2expm1 },
{ "mpn_mulmod_2expp1", speed_mpn_mulmod_2expp1 },
{ "mpn_mulmod_2expp1_basecase", speed_mpn_mulmod_2expp1_basecase },
{ "mpn_get_str", speed_mpn_get_str, FLAG_R_OPTIONAL },

View File

@ -260,7 +260,7 @@ double speed_mpn_mul_n_sqr _PROTO ((struct speed_params *s));
double speed_mpn_mullow_n _PROTO ((struct speed_params *s));
double speed_mpn_mulhigh_n _PROTO ((struct speed_params *s));
double speed_mpn_mulmod_2expm1 _PROTO ((struct speed_params *s));
double speed_mpn_mulmod_2expp1 _PROTO ((struct speed_params *s));
double speed_mpn_mulmod_2expp1_basecase _PROTO ((struct speed_params *s));
double speed_mpn_mullow_n_basecase _PROTO ((struct speed_params *s));
double speed_mpn_nand_n _PROTO ((struct speed_params *s));
double speed_mpn_nior_n _PROTO ((struct speed_params *s));
@ -1179,8 +1179,8 @@ int speed_routine_count_zeros_setup _PROTO ((struct speed_params *s,
return t; \
}
/* For mpn_mulmod_2expp1 , xsize=r, ysize=s->size. */
#define SPEED_ROUTINE_MPN_MULMOD_2EXPP1(function) \
/* For mpn_mulmod_2expp1_basecase , xsize=r, ysize=s->size. */
#define SPEED_ROUTINE_MPN_MULMOD_2EXPP1_BASECASE(function) \
{ \
mp_ptr wp,temps; \
unsigned i; \

View File

@ -171,7 +171,7 @@ main(void)
start = clock();
for (i = 0; i < iters; i++)
mpn_mulmod_2expp1(r1, i1, i2, 0, bits, tt);
mpn_mulmod_2expp1_basecase(r1, i1, i2, 0, bits, tt);
end = clock();
elapsed = ((double) (end - start)) / CLOCKS_PER_SEC;