mpir/mpn/generic/divrem_hensel_1.c
gladman 34efd8413b 1. Add Windows code for latest trunk revisions
2. Make changes for non C99 compilers (like VC++)
2009-04-01 14:58:15 +00:00

181 lines
5.8 KiB
C

/*
Copyright 2009 Jason Moxham
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
*/
#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"
// basic divrem_hensel_1
// divisor is 1 limb and odd
mp_limb_t mpn_divrem_hensel_1(mp_ptr qp, mp_ptr xp, mp_size_t n, mp_limb_t d, mp_limb_t m)
{int j;mp_limb_t c,h,q,dummy,h1,t;
ASSERT(n>0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
ASSERT(m*d==1);//modlimb_invert(m,d);
c=0;h=0;t=0;
for(j=0;j<=n-1;j++)
{h1=xp[j];
t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow
q=h1*m;
qp[j]=q;
umul_ppmm(h,dummy,q,d);
ASSERT(dummy==h1);}
return h+c;} // so (xp,n) = (qp,n)*d -ret*B^n and 0 <= ret < d
// basic divexact
mp_limb_t divexact_shiftin(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d)
{int j,s;mp_limb_t c,h,q,dummy,h1,t,m;
ASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
count_trailing_zeros(s,d);d=d>>s;modlimb_invert(m,d);
//shift local xp right by s
c=0;h=0;t=0;
for(j=0;j<=n-2;j++)
{h1=(xp[j]>>s)|(xp[j+1]<<(GMP_LIMB_BITS-1-s)<<1);
t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow
q=h1*m;
qp[j]=q;
umul_ppmm(h,dummy,q,d);
ASSERT(dummy==h1);}
h1=xp[n-1]>>s;
t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}
q=h1*m;qp[n-1]=q;
umul_ppmm(h,dummy,q,d);
t=h+c;
return t;}
// so (xp,n) = low_s_bitsof(xp[0]) + (qp,n)*(d) -(ret<<s)*B^n and 0 <= ret < d/2^s
// basic divexact
mp_limb_t divexact_shiftout(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d)
{int j,s;mp_limb_t c,h,q,dummy,h1,t,m,qo,qb;
ASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
count_trailing_zeros(s,d);d=d>>s;modlimb_invert(m,d);
h1=xp[0];q=h1*m;
qo=q>>s;qb=(q<<(GMP_LIMB_BITS-1-s)<<1);
qb=(qb>>(GMP_LIMB_BITS-1-s)>>1);// ie qb is low s bits of q
umul_ppmm(h,dummy,q,d);
c=0;
for(j=1;j<=n-1;j++)
{h1=xp[j];
t=h+c;if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}// set borrow to c ; sbb t,h1 ; set c to borrow
q=h1*m;
qo=qo|(q<<(GMP_LIMB_BITS-1-s)<<1);
qp[j-1]=qo;
qo=q>>s;
umul_ppmm(h,dummy,q,d);
ASSERT(dummy==h1);}
qp[n-1]=qo;
t=h+c;
t=(t<<s)+qb;
return t;}
// so (xp,n) = low_s_bitsof(ret)*d/2^s + (qp,n)*(d) - (ret>>s)*B^n and 0 <= (ret>>s) < d/2^s
int main(void)
{mp_limb_t qp[2000],xp[2000],yp[2000],tp[2000],d,m,r1,r2;mp_size_t s,n,j;unsigned long t1;
for(j=0;j<100;j++){
for(n=1;n<1000;n+=1)
{mpn_random(&d,1);if(d==0)continue;
mpn_random(xp,n);if(j>50)xp[n-1]=mpn_mul_1(xp,xp,n-1,d);
count_trailing_zeros(s,d);
/*
r1=divexact_basic(qp,xp,n,d);
// so (xp,n) = (qp,n)*d -ret*B^n and 0 <= ret < d
r2=mpn_mul_1(tp,qp,n,d);
if(r1!=r2){printf("error 1\n");abort();}
if(mpn_cmp(tp,xp,n)!=0){printf("error 2\n");abort();}
*/
/*
r1=divexact_shiftin(qp,xp,n,d);
// so (xp,n) = low_s_bitsof(xp[0]) + (qp,n)*(d) -(ret<<s)*B^n and 0 <= ret < d/2^s
r2=mpn_mul_1(tp,qp,n,d);
m=xp[0];m<<=63-s;m<<=1;m>>=63-s;m>>=1;
if(mpn_add_1(tp,tp,n,m)!=0){printf("error 3a\n");abort();}
if((r1<<s)!=r2){printf("error 1a\n");abort();}
if(mpn_cmp(tp,xp,n)!=0){printf("error 2a %d\n",n);abort();}
*/
/*
r1=divexact_shiftout(qp,xp,n,d);
// so (xp,n) = low_s_bitsof(ret)*d/2^s + (qp,n)*(d) - (ret>>s)*B^n and 0 <= (ret>>s) < d/2^s
r2=mpn_mul_1(tp,qp,n,d);
m=r1;m<<=63-s;m<<=1;m>>=63-s;m>>=1;
r2+=mpn_add_1(tp,tp,n,m*(d>>s));
if((r1>>s)!=r2){printf("error 1b\n");abort();}
if((r1>>s)>(d>>s)){printf("error 3b\n");abort();}
if(mpn_cmp(tp,xp,n)!=0){printf("error 2b\n");abort();}
*/
modlimb_invert(m,d>>s);
r1=jaydivexact(qp,xp,n,d,m);
// so (xp,n) = low_s_bitsof(ret)*d/2^s + (qp,n)*(d) - (ret>>s)*B^n and 0 <= (ret>>s) < d/2^s
r2=mpn_mul_1(tp,qp,n,d);
m=r1;m<<=63-s;m<<=1;m>>=63-s;m>>=1;
r2+=mpn_add_1(tp,tp,n,m*(d>>s));
if((r1>>s)!=r2){printf("error 1c\n");abort();}
if((r1>>s)>(d>>s)){printf("error 3c\n");abort();}
if(mpn_cmp(tp,xp,n)!=0){printf("error 2c\n");abort();}
}}
d=242354423522;
count_trailing_zeros(s,d);
modlimb_invert(m,d>>s);
t1=rdtsc();jaydivexact(qp,xp,1000,d,m);t1=rdtsc()-t1;
t1=rdtsc();jaydivexact(qp,xp,1000,d,m);t1=rdtsc()-t1;
t1=rdtsc();jaydivexact(qp,xp,1000,d,m);t1=rdtsc()-t1;
t1=rdtsc();jaydivexact(qp,xp,1000,d,m);t1=rdtsc()-t1;
printf("time %d\n",t1);
return 0;}
/*
mp_limb_t jaybidivexact(mp_ptr qp,mp_ptr xp,mp_size_t n,mp_limb_t d,mp_limb_t m)
{int j;mp_limb_t c,h,q,dummy,h1,t,k,hd,ld,qd;
ASSERT(n>0);ASSERT(d!=0);ASSERT_MPN(xp,n);ASSERT(MPN_SAME_OR_SEPARATE_P(qp,xp,n));
ASSERT(d%2==1);
// if d is even then either shift the input xp or the output qp
// output is better as not on depandant path , and for div test dont need to do it
ASSERT(n%2==0);k=n/2;
c=0;h=0;t=0;hd=0;
for(j=0;j<=k-1;j++)
{h1=xp[j];ld=xp[n-1-j]
if(t>h1){h1=h1-t;c=1;}else{h1=h1-t;c=0;}
q=h1*m;
udiv_qrnnd(qd,dummy,hd,ld,d);
hd=ld-qd*d;
qp[n-1-j]=qd;
qp[j]=q;
umul_ppmm(h,dummy,q,d);
t=h+c;
}
return hd-t;} // so (xp,n) = (qp,n)*d +(hd-t)*B^k and d divides xp <=> ret=0
*/