34efd8413b
2. Make changes for non C99 compilers (like VC++)
181 lines
5.8 KiB
C
181 lines
5.8 KiB
C
/*
|
|
Copyright 2009 Jason Moxham
|
|
|
|
dnl This file is part of the MPIR Library.
|
|
|
|
dnl The MPIR Library is free software; you can redistribute it and/or modify
|
|
dnl it under the terms of the GNU Lesser General Public License as published
|
|
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
|
dnl your option) any later version.
|
|
|
|
dnl The MPIR Library is distributed in the hope that it will be useful, but
|
|
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
dnl License for more details.
|
|
|
|
dnl You should have received a copy of the GNU Lesser General Public License
|
|
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
|
|
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
dnl Boston, MA 02110-1301, USA.
|
|
|
|
*/
|
|
|
|
#include "mpir.h"
|
|
#include "gmp-impl.h"
|
|
#include "longlong.h"
|
|
|
|
// basic divrem_hensel_1
|
|
// divisor is 1 limb and odd
|
|
mp_limb_t mpn_divrem_hensel_1(mp_ptr qp, mp_ptr xp, mp_size_t n, mp_limb_t d, mp_limb_t m)
|
|
{int j;mp_limb_t c,h,q,dummy,h1,t;
|
|
|
|
ASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
|
|
ASSERT(m*d==1);//modlimb_invert(m,d);
|
|
c=0;h=0;t=0;
|
|
for(j=0;j<=n-1;j++)
|
|
{h1=xp[j];
|
|
t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow
|
|
q=h1*m;
|
|
qp[j]=q;
|
|
umul_ppmm(h,dummy,q,d);
|
|
ASSERT(dummy==h1);}
|
|
return h+c;} // so (xp,n) = (qp,n)*d -ret*B^n and 0 <= ret < d
|
|
|
|
// basic divexact
|
|
mp_limb_t divexact_shiftin(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d)
|
|
{int j,s;mp_limb_t c,h,q,dummy,h1,t,m;
|
|
|
|
ASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
|
|
count_trailing_zeros(s,d);d=d>>s;modlimb_invert(m,d);
|
|
//shift local xp right by s
|
|
c=0;h=0;t=0;
|
|
for(j=0;j<=n-2;j++)
|
|
{h1=(xp[j]>>s)|(xp[j+1]<<(GMP_LIMB_BITS-1-s)<<1);
|
|
t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow
|
|
q=h1*m;
|
|
qp[j]=q;
|
|
umul_ppmm(h,dummy,q,d);
|
|
ASSERT(dummy==h1);}
|
|
h1=xp[n-1]>>s;
|
|
t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}
|
|
q=h1*m;qp[n-1]=q;
|
|
umul_ppmm(h,dummy,q,d);
|
|
t=h+c;
|
|
return t;}
|
|
// so (xp,n) = low_s_bitsof(xp[0]) + (qp,n)*(d) -(ret<<s)*B^n and 0 <= ret < d/2^s
|
|
|
|
// basic divexact
|
|
mp_limb_t divexact_shiftout(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d)
|
|
{int j,s;mp_limb_t c,h,q,dummy,h1,t,m,qo,qb;
|
|
|
|
ASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
|
|
count_trailing_zeros(s,d);d=d>>s;modlimb_invert(m,d);
|
|
h1=xp[0];q=h1*m;
|
|
qo=q>>s;qb=(q<<(GMP_LIMB_BITS-1-s)<<1);
|
|
qb=(qb>>(GMP_LIMB_BITS-1-s)>>1);// ie qb is low s bits of q
|
|
umul_ppmm(h,dummy,q,d);
|
|
c=0;
|
|
for(j=1;j<=n-1;j++)
|
|
{h1=xp[j];
|
|
t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow
|
|
q=h1*m;
|
|
qo=qo|(q<<(GMP_LIMB_BITS-1-s)<<1);
|
|
qp[j-1]=qo;
|
|
qo=q>>s;
|
|
umul_ppmm(h,dummy,q,d);
|
|
ASSERT(dummy==h1);}
|
|
qp[n-1]=qo;
|
|
t=h+c;
|
|
t=(t<<s)+qb;
|
|
return t;}
|
|
// so (xp,n) = low_s_bitsof(ret)*d/2^s + (qp,n)*(d) - (ret>>s)*B^n and 0 <= (ret>>s) < d/2^s
|
|
|
|
|
|
|
|
int main(void)
|
|
{mp_limb_t qp[2000],xp[2000],yp[2000],tp[2000],d,m,r1,r2;mp_size_t s,n,j;unsigned long t1;
|
|
|
|
|
|
for(j=0;j<100;j++){
|
|
for(n=1;n<1000;n+=1)
|
|
{mpn_random(&d,1);if(d==0)continue;
|
|
mpn_random(xp,n);if(j>50)xp[n-1]=mpn_mul_1(xp,xp,n-1,d);
|
|
count_trailing_zeros(s,d);
|
|
/*
|
|
r1=divexact_basic(qp,xp,n,d);
|
|
// so (xp,n) = (qp,n)*d -ret*B^n and 0 <= ret < d
|
|
r2=mpn_mul_1(tp,qp,n,d);
|
|
if(r1!=r2){printf("error 1\n");abort();}
|
|
if(mpn_cmp(tp,xp,n)!=0){printf("error 2\n");abort();}
|
|
*/
|
|
/*
|
|
r1=divexact_shiftin(qp,xp,n,d);
|
|
// so (xp,n) = low_s_bitsof(xp[0]) + (qp,n)*(d) -(ret<<s)*B^n and 0 <= ret < d/2^s
|
|
r2=mpn_mul_1(tp,qp,n,d);
|
|
m=xp[0];m<<=63-s;m<<=1;m>>=63-s;m>>=1;
|
|
if(mpn_add_1(tp,tp,n,m)!=0){printf("error 3a\n");abort();}
|
|
if((r1<<s)!=r2){printf("error 1a\n");abort();}
|
|
if(mpn_cmp(tp,xp,n)!=0){printf("error 2a %d\n",n);abort();}
|
|
*/
|
|
/*
|
|
r1=divexact_shiftout(qp,xp,n,d);
|
|
// so (xp,n) = low_s_bitsof(ret)*d/2^s + (qp,n)*(d) - (ret>>s)*B^n and 0 <= (ret>>s) < d/2^s
|
|
r2=mpn_mul_1(tp,qp,n,d);
|
|
m=r1;m<<=63-s;m<<=1;m>>=63-s;m>>=1;
|
|
r2+=mpn_add_1(tp,tp,n,m*(d>>s));
|
|
if((r1>>s)!=r2){printf("error 1b\n");abort();}
|
|
if((r1>>s)>(d>>s)){printf("error 3b\n");abort();}
|
|
if(mpn_cmp(tp,xp,n)!=0){printf("error 2b\n");abort();}
|
|
*/
|
|
|
|
modlimb_invert(m,d>>s);
|
|
r1=jaydivexact(qp,xp,n,d,m);
|
|
// so (xp,n) = low_s_bitsof(ret)*d/2^s + (qp,n)*(d) - (ret>>s)*B^n and 0 <= (ret>>s) < d/2^s
|
|
r2=mpn_mul_1(tp,qp,n,d);
|
|
m=r1;m<<=63-s;m<<=1;m>>=63-s;m>>=1;
|
|
r2+=mpn_add_1(tp,tp,n,m*(d>>s));
|
|
if((r1>>s)!=r2){printf("error 1c\n");abort();}
|
|
if((r1>>s)>(d>>s)){printf("error 3c\n");abort();}
|
|
if(mpn_cmp(tp,xp,n)!=0){printf("error 2c\n");abort();}
|
|
|
|
}}
|
|
|
|
d=242354423522;
|
|
count_trailing_zeros(s,d);
|
|
modlimb_invert(m,d>>s);
|
|
t1=rdtsc();jaydivexact(qp,xp,1000,d,m);t1=rdtsc()-t1;
|
|
t1=rdtsc();jaydivexact(qp,xp,1000,d,m);t1=rdtsc()-t1;
|
|
t1=rdtsc();jaydivexact(qp,xp,1000,d,m);t1=rdtsc()-t1;
|
|
t1=rdtsc();jaydivexact(qp,xp,1000,d,m);t1=rdtsc()-t1;
|
|
printf("time %d\n",t1);
|
|
|
|
return 0;}
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
|
mp_limb_t jaybidivexact(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d,mp_limb_t m)
|
|
{int j;mp_limb_t c,h,q,dummy,h1,t,k,hd,ld,qd;
|
|
|
|
ASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
|
|
ASSERT(d%2==1);
|
|
// if d is even then either shift the input xp or the output qp
|
|
// output is better as not on depandant path , and for div test dont need to do it
|
|
ASSERT(n%2==0);k=n/2;
|
|
c=0;h=0;t=0;hd=0;
|
|
for(j=0;j<=k-1;j++)
|
|
{h1=xp[j];ld=xp[n-1-j]
|
|
if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}
|
|
q=h1*m;
|
|
udiv_qrnnd(qd,dummy,hd,ld,d);
|
|
hd=ld-qd*d;
|
|
qp[n-1-j]=qd;
|
|
qp[j]=q;
|
|
umul_ppmm(h,dummy,q,d);
|
|
t=h+c;
|
|
}
|
|
return hd-t;} // so (xp,n) = (qp,n)*d +(hd-t)*B^k and d divides xp <=> ret=0
|
|
*/
|