mpir/mpn/generic/sqrtrem.c

/* mpn_sqrtrem -- square root and remainder

Copyright 1999, 2000, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.

This file is part of the GNU MP Library.

The GNU MP Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.

The GNU MP Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the GNU MP Library; see the file COPYING.LIB.  If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */


/* Contributed by Paul Zimmermann.
   See "Karatsuba Square Root", reference in gmp.texi.  */


#include <stdio.h>
#include <stdlib.h>

#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"

static mp_limb_t mpn_intdivrem (mp_ptr qp, mp_size_t qxn,
	    mp_ptr np, mp_size_t nn,
	    mp_srcptr dp, mp_size_t dn)
{
  ASSERT (qxn >= 0);
  ASSERT (nn >= dn);
  ASSERT (dn >= 1);
  ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);
  ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) || qp==np+dn+qxn);
  ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));
  ASSERT_MPN (np, nn);
  ASSERT_MPN (dp, dn);

  if (dn == 1)
    {
      mp_limb_t ret;
      mp_ptr q2p;
      mp_size_t qn;
      TMP_DECL;

      TMP_MARK;
      q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);

      np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);
      qn = nn + qxn - 1;
      MPN_COPY (qp, q2p, qn);
      ret = q2p[qn];

      TMP_FREE;
      return ret;
    }
  else if (dn == 2)
    {
      return mpn_divrem_2 (qp, qxn, np, nn, dp);
    }
  else
    {
      mp_ptr rp, q2p;
      mp_limb_t qhl;
      mp_size_t qn;
      TMP_DECL;

      TMP_MARK;
      if (UNLIKELY (qxn != 0))
	{
	  mp_ptr n2p;
	  n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);
	  MPN_ZERO (n2p, qxn);
	  MPN_COPY (n2p + qxn, np, nn);
	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);
	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
	  mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);
	  MPN_COPY (np, rp, dn);
	  qn = nn - dn + qxn;
	  MPN_COPY (qp, q2p, qn);
	  qhl = q2p[qn];
	}
      else
	{
	  q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);
	  rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);
	  mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);
	  MPN_COPY (np, rp, dn);	/* overwrite np area with remainder */
	  qn = nn - dn;
	  MPN_COPY (qp, q2p, qn);
	  qhl = q2p[qn];
	}
      TMP_FREE;
      return qhl;
    }
}


/* Square roots table.  Generated by the following program:
#include "mpir.h"
main(){mpz_t x;int i;mpz_init(x);for(i=64;i<256;i++){mpz_set_ui(x,256*i);
mpz_sqrt(x,x);mpz_out_str(0,10,x);printf(",");if(i%16==15)printf("\n");}}
*/
static const unsigned char approx_tab[192] =
  {
    128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,
    143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,
    156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,
    169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,
    181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,
    192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,
    202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,
    212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,
    221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,
    230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,
    239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,
    247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255
  };

#define HALF_NAIL (GMP_NAIL_BITS / 2)

/* same as mpn_sqrtrem, but for size=1 and {np, 1} normalized */
static mp_size_t
mpn_sqrtrem1 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
{
  mp_limb_t np0, s, r, q, u;
  int prec;

  ASSERT (np[0] >= GMP_NUMB_HIGHBIT / 2);
  ASSERT (GMP_LIMB_BITS >= 16);

  /* first compute a 8-bit approximation from the high 8-bits of np[0] */
  np0 = np[0] << GMP_NAIL_BITS;
  q = np0 >> (GMP_LIMB_BITS - 8);
  /* 2^6 = 64 <= q < 256 = 2^8 */
  s = approx_tab[q - 64];				/* 128 <= s < 255 */
  r = (np0 >> (GMP_LIMB_BITS - 16)) - s * s;		/* r <= 2*s */
  if (r > 2 * s)
    {
      r -= 2 * s + 1;
      s++;
    }

  prec = 8;
  np0 <<= 2 * prec;
  while (2 * prec < GMP_LIMB_BITS)
    {
      /* invariant: s has prec bits, and r <= 2*s */
      r = (r << prec) + (np0 >> (GMP_LIMB_BITS - prec));
      np0 <<= prec;
      u = 2 * s;
      q = r / u;
      u = r - q * u;
      s = (s << prec) + q;
      u = (u << prec) + (np0 >> (GMP_LIMB_BITS - prec));
      q = q * q;
      r = u - q;
      if (u < q)
	{
	  r += 2 * s - 1;
	  s --;
	}
      np0 <<= prec;
      prec = 2 * prec;
    }

  ASSERT (2 * prec == GMP_LIMB_BITS); /* GMP_LIMB_BITS must be a power of 2 */

  /* normalize back, assuming GMP_NAIL_BITS is even */
  ASSERT (GMP_NAIL_BITS % 2 == 0);
  sp[0] = s >> HALF_NAIL;
  u = s - (sp[0] << HALF_NAIL); /* s mod 2^HALF_NAIL */
  r += u * ((sp[0] << (HALF_NAIL + 1)) + u);
  r = r >> GMP_NAIL_BITS;

  if (rp != NULL)
    rp[0] = r;
  return r != 0 ? 1 : 0;
}


#define Prec (GMP_NUMB_BITS >> 1)

/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized
   return cc such that {np, 2} = sp[0]^2 + cc*2^GMP_NUMB_BITS + rp[0] */
static mp_limb_t
mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)
{
  mp_limb_t qhl, q, u, np0;
  int cc;

  ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);

  np0 = np[0];
  mpn_sqrtrem1 (sp, rp, np + 1);
  qhl = 0;
  while (rp[0] >= sp[0])
    {
      qhl++;
      rp[0] -= sp[0];
    }
  /* now rp[0] < sp[0] < 2^Prec */
  rp[0] = (rp[0] << Prec) + (np0 >> Prec);
  u = 2 * sp[0];
  q = rp[0] / u;
  u = rp[0] - q * u;
  q += (qhl & 1) << (Prec - 1);
  qhl >>= 1; /* if qhl=1, necessary q=0 as qhl*2^Prec + q <= 2^Prec */
  /* now we have (initial rp[0])<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp[0]) + u */
  sp[0] = ((sp[0] + qhl) << Prec) + q;
  cc = u >> Prec;
  rp[0] = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));
  /* subtract q * q or qhl*2^(2*Prec) from rp */
  cc -= mpn_sub_1 (rp, rp, 1, q * q) + qhl;
  /* now subtract 2*q*2^Prec + 2^(2*Prec) if qhl is set */
  if (cc < 0)
    {
      cc += sp[0] != 0 ? mpn_add_1 (rp, rp, 1, sp[0]) : 1;
      cc += mpn_add_1 (rp, rp, 1, --sp[0]);
    }

  return cc;
}

/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},
   and in {np, n} the low n limbs of the remainder, returns the high
   limb of the remainder (which is 0 or 1).
   Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4
   where B=2^GMP_NUMB_BITS.  */
static mp_limb_t
mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)
{
  mp_limb_t q;			/* carry out of {sp, n} */
  int c, b;			/* carry out of remainder */
  mp_size_t l, h;

  ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);

  if (n == 1)
    c = mpn_sqrtrem2 (sp, np, np);
  else
    {
      l = n / 2;
      h = n - l;
      q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);
      if (q != 0)
        mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);
      q += mpn_intdivrem (sp, 0, np + l, n, sp + l, h);
      c = sp[0] & 1;
      mpn_rshift1 (sp, sp, l);
      sp[l - 1] |= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;
      q >>= 1;
      if (c != 0)
        c = mpn_add_n (np + l, np + l, sp + l, h);
      mpn_sqr_n (np + n, sp, l);
      b = q + mpn_sub_n (np, np, np + n, 2 * l);
      c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);
      q = mpn_add_1 (sp + l, sp + l, h, q);

      if (c < 0)
        {
          c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;
          c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));
          q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));
        }
    }

  return c;
}


mp_size_t
mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)
{
  mp_limb_t *tp, s0[1], cc, high, rl;
  int c;
  mp_size_t rn, tn;
  TMP_DECL;

  ASSERT (nn >= 0);
  ASSERT_MPN (np, nn);

  /* If OP is zero, both results are zero.  */
  if (nn == 0)
    return 0;

  ASSERT (np[nn - 1] != 0);
  ASSERT (rp == NULL || MPN_SAME_OR_SEPARATE_P (np, rp, nn));
  ASSERT (rp == NULL || ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));
  ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));

  high = np[nn - 1];
  if (nn == 1 && (high & GMP_NUMB_HIGHBIT))
    return mpn_sqrtrem1 (sp, rp, np);
  count_leading_zeros (c, high);
  c -= GMP_NAIL_BITS;

  c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */
  tn = (nn + 1) / 2; /* 2*tn is the smallest even integer >= nn */

  TMP_MARK;
  if (nn % 2 != 0 || c > 0)
    {
      tp = TMP_ALLOC_LIMBS (2 * tn);
      tp[0] = 0;	     /* needed only when 2*tn > nn, but saves a test */
      if (c != 0)
	mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);
      else
	MPN_COPY (tp + 2 * tn - nn, np, nn);
      rl = mpn_dc_sqrtrem (sp, tp, tn);
      /* We have 2^(2k)*N = S^2 + R where k = c + (2tn-nn)*GMP_NUMB_BITS/2,
	 thus 2^(2k)*N = (S-s0)^2 + 2*S*s0 - s0^2 + R where s0=S mod 2^k */
      c += (nn % 2) * GMP_NUMB_BITS / 2;		/* c now represents k */
      s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1);	/* S mod 2^k */
      rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]);	/* R = R + 2*s0*S */
      cc = mpn_submul_1 (tp, s0, 1, s0[0]);
      rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;
      mpn_rshift (sp, sp, tn, c);
      tp[tn] = rl;
      if (rp == NULL)
	rp = tp;
      c = c << 1;
      if (c < GMP_NUMB_BITS)
	tn++;
      else
	{
	  tp++;
	  c -= GMP_NUMB_BITS;
	}
      if (c != 0)
	mpn_rshift (rp, tp, tn, c);
      else
	MPN_COPY_INCR (rp, tp, tn);
      rn = tn;
    }
  else
    {
      if (rp == NULL)
	rp = TMP_ALLOC_LIMBS (nn);
      if (rp != np)
	MPN_COPY (rp, np, nn);
      rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));
    }

  MPN_NORMALIZE (rp, rn);

  TMP_FREE;
  return rn;
}
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`/* mpn_sqrtrem -- square root and remainder`

			`Copyright 1999, 2000, 2001, 2002, 2004, 2005 Free Software Foundation, Inc.`

			`This file is part of the GNU MP Library.`

			`The GNU MP Library is free software; you can redistribute it and/or modify`
			`it under the terms of the GNU Lesser General Public License as published by`
			`the Free Software Foundation; either version 2.1 of the License, or (at your`
			`option) any later version.`

			`The GNU MP Library is distributed in the hope that it will be useful, but`
			`WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
			`or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public`
			`License for more details.`

			`You should have received a copy of the GNU Lesser General Public License`
			`along with the GNU MP Library; see the file COPYING.LIB. If not, write to`
			`the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,`
			`MA 02110-1301, USA. */`


			`/* Contributed by Paul Zimmermann.`
			`See "Karatsuba Square Root", reference in gmp.texi. */`


			`#include <stdio.h>`
			`#include <stdlib.h>`

for file in $(find -name \.c ) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done for file in $(find -name \.h ) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done for file in $(find -name \*.cc) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done 2009-02-12 05:24:24 -05:00			`#include "mpir.h"`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`#include "gmp-impl.h"`
			`#include "longlong.h"`

removed obsolete fns mpn_divrem mpn_divmod and all referances to them 2009-08-11 18:11:33 -04:00			`static mp_limb_t mpn_intdivrem (mp_ptr qp, mp_size_t qxn,`
			`mp_ptr np, mp_size_t nn,`
			`mp_srcptr dp, mp_size_t dn)`
			`{`
			`ASSERT (qxn >= 0);`
			`ASSERT (nn >= dn);`
			`ASSERT (dn >= 1);`
			`ASSERT (dp[dn-1] & GMP_NUMB_HIGHBIT);`
			`ASSERT (! MPN_OVERLAP_P (np, nn, dp, dn));`
			`ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, np, nn) \|\| qp==np+dn+qxn);`
			`ASSERT (! MPN_OVERLAP_P (qp, nn-dn+qxn, dp, dn));`
			`ASSERT_MPN (np, nn);`
			`ASSERT_MPN (dp, dn);`

			`if (dn == 1)`
			`{`
			`mp_limb_t ret;`
			`mp_ptr q2p;`
			`mp_size_t qn;`
			`TMP_DECL;`

			`TMP_MARK;`
			`q2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);`

			`np[0] = mpn_divrem_1 (q2p, qxn, np, nn, dp[0]);`
			`qn = nn + qxn - 1;`
			`MPN_COPY (qp, q2p, qn);`
			`ret = q2p[qn];`

			`TMP_FREE;`
			`return ret;`
			`}`
			`else if (dn == 2)`
			`{`
			`return mpn_divrem_2 (qp, qxn, np, nn, dp);`
			`}`
			`else`
			`{`
			`mp_ptr rp, q2p;`
			`mp_limb_t qhl;`
			`mp_size_t qn;`
			`TMP_DECL;`

			`TMP_MARK;`
			`if (UNLIKELY (qxn != 0))`
			`{`
			`mp_ptr n2p;`
			`n2p = (mp_ptr) TMP_ALLOC ((nn + qxn) * BYTES_PER_MP_LIMB);`
			`MPN_ZERO (n2p, qxn);`
			`MPN_COPY (n2p + qxn, np, nn);`
			`q2p = (mp_ptr) TMP_ALLOC ((nn - dn + qxn + 1) * BYTES_PER_MP_LIMB);`
			`rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);`
			`mpn_tdiv_qr (q2p, rp, 0L, n2p, nn + qxn, dp, dn);`
			`MPN_COPY (np, rp, dn);`
			`qn = nn - dn + qxn;`
			`MPN_COPY (qp, q2p, qn);`
			`qhl = q2p[qn];`
			`}`
			`else`
			`{`
			`q2p = (mp_ptr) TMP_ALLOC ((nn - dn + 1) * BYTES_PER_MP_LIMB);`
			`rp = (mp_ptr) TMP_ALLOC (dn * BYTES_PER_MP_LIMB);`
			`mpn_tdiv_qr (q2p, rp, 0L, np, nn, dp, dn);`
			`MPN_COPY (np, rp, dn); /* overwrite np area with remainder */`
			`qn = nn - dn;`
			`MPN_COPY (qp, q2p, qn);`
			`qhl = q2p[qn];`
			`}`
			`TMP_FREE;`
			`return qhl;`
			`}`
			`}`


Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00

			`/* Square roots table. Generated by the following program:`
for file in $(find -name \.c ) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done for file in $(find -name \.h ) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done for file in $(find -name \*.cc) ; do sed -e "s/#include \"gmp\.h\"/#include \"mpir.h\"/g" $file > temp ; mv temp $file ; done 2009-02-12 05:24:24 -05:00			`#include "mpir.h"`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`main(){mpz_t x;int i;mpz_init(x);for(i=64;i<256;i++){mpz_set_ui(x,256*i);`
			`mpz_sqrt(x,x);mpz_out_str(0,10,x);printf(",");if(i%16==15)printf("\n");}}`
			`*/`
			`static const unsigned char approx_tab[192] =`
			`{`
			`128,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,`
			`143,144,144,145,146,147,148,149,150,150,151,152,153,154,155,155,`
			`156,157,158,159,160,160,161,162,163,163,164,165,166,167,167,168,`
			`169,170,170,171,172,173,173,174,175,176,176,177,178,178,179,180,`
			`181,181,182,183,183,184,185,185,186,187,187,188,189,189,190,191,`
			`192,192,193,193,194,195,195,196,197,197,198,199,199,200,201,201,`
			`202,203,203,204,204,205,206,206,207,208,208,209,209,210,211,211,`
			`212,212,213,214,214,215,215,216,217,217,218,218,219,219,220,221,`
			`221,222,222,223,224,224,225,225,226,226,227,227,228,229,229,230,`
			`230,231,231,232,232,233,234,234,235,235,236,236,237,237,238,238,`
			`239,240,240,241,241,242,242,243,243,244,244,245,245,246,246,247,`
			`247,248,248,249,249,250,250,251,251,252,252,253,253,254,254,255`
			`};`

			`#define HALF_NAIL (GMP_NAIL_BITS / 2)`

			`/* same as mpn_sqrtrem, but for size=1 and {np, 1} normalized */`
			`static mp_size_t`
			`mpn_sqrtrem1 (mp_ptr sp, mp_ptr rp, mp_srcptr np)`
			`{`
			`mp_limb_t np0, s, r, q, u;`
			`int prec;`

			`ASSERT (np[0] >= GMP_NUMB_HIGHBIT / 2);`
			`ASSERT (GMP_LIMB_BITS >= 16);`

			`/* first compute a 8-bit approximation from the high 8-bits of np[0] */`
			`np0 = np[0] << GMP_NAIL_BITS;`
			`q = np0 >> (GMP_LIMB_BITS - 8);`
			`/* 2^6 = 64 <= q < 256 = 2^8 */`
			`s = approx_tab[q - 64]; /* 128 <= s < 255 */`
			`r = (np0 >> (GMP_LIMB_BITS - 16)) - s * s; /* r <= 2s /`
			`if (r > 2 * s)`
			`{`
			`r -= 2 * s + 1;`
			`s++;`
			`}`

			`prec = 8;`
			`np0 <<= 2 * prec;`
			`while (2 * prec < GMP_LIMB_BITS)`
			`{`
			`/* invariant: s has prec bits, and r <= 2s /`
			`r = (r << prec) + (np0 >> (GMP_LIMB_BITS - prec));`
			`np0 <<= prec;`
			`u = 2 * s;`
			`q = r / u;`
			`u = r - q * u;`
			`s = (s << prec) + q;`
			`u = (u << prec) + (np0 >> (GMP_LIMB_BITS - prec));`
			`q = q * q;`
			`r = u - q;`
			`if (u < q)`
			`{`
			`r += 2 * s - 1;`
			`s --;`
			`}`
			`np0 <<= prec;`
			`prec = 2 * prec;`
			`}`

			`ASSERT (2 * prec == GMP_LIMB_BITS); /* GMP_LIMB_BITS must be a power of 2 */`

			`/* normalize back, assuming GMP_NAIL_BITS is even */`
			`ASSERT (GMP_NAIL_BITS % 2 == 0);`
			`sp[0] = s >> HALF_NAIL;`
			`u = s - (sp[0] << HALF_NAIL); /* s mod 2^HALF_NAIL */`
			`r += u * ((sp[0] << (HALF_NAIL + 1)) + u);`
			`r = r >> GMP_NAIL_BITS;`

			`if (rp != NULL)`
			`rp[0] = r;`
			`return r != 0 ? 1 : 0;`
			`}`


			`#define Prec (GMP_NUMB_BITS >> 1)`

			`/* same as mpn_sqrtrem, but for size=2 and {np, 2} normalized`
			`return cc such that {np, 2} = sp[0]^2 + cc2^GMP_NUMB_BITS + rp[0] /`
			`static mp_limb_t`
			`mpn_sqrtrem2 (mp_ptr sp, mp_ptr rp, mp_srcptr np)`
			`{`
			`mp_limb_t qhl, q, u, np0;`
			`int cc;`

			`ASSERT (np[1] >= GMP_NUMB_HIGHBIT / 2);`

			`np0 = np[0];`
			`mpn_sqrtrem1 (sp, rp, np + 1);`
			`qhl = 0;`
			`while (rp[0] >= sp[0])`
			`{`
			`qhl++;`
			`rp[0] -= sp[0];`
			`}`
			`/* now rp[0] < sp[0] < 2^Prec */`
			`rp[0] = (rp[0] << Prec) + (np0 >> Prec);`
			`u = 2 * sp[0];`
			`q = rp[0] / u;`
			`u = rp[0] - q * u;`
			`q += (qhl & 1) << (Prec - 1);`
			`qhl >>= 1; /* if qhl=1, necessary q=0 as qhl2^Prec + q <= 2^Prec /`
			`/* now we have (initial rp[0])<<Prec + np0>>Prec = (qhl<<Prec + q) * (2sp[0]) + u */`
			`sp[0] = ((sp[0] + qhl) << Prec) + q;`
			`cc = u >> Prec;`
			`rp[0] = ((u << Prec) & GMP_NUMB_MASK) + (np0 & (((mp_limb_t) 1 << Prec) - 1));`
			`/* subtract q * q or qhl2^(2Prec) from rp */`
			`cc -= mpn_sub_1 (rp, rp, 1, q * q) + qhl;`
			`/* now subtract 2q2^Prec + 2^(2Prec) if qhl is set /`
			`if (cc < 0)`
			`{`
			`cc += sp[0] != 0 ? mpn_add_1 (rp, rp, 1, sp[0]) : 1;`
			`cc += mpn_add_1 (rp, rp, 1, --sp[0]);`
			`}`

			`return cc;`
			`}`

			`/* writes in {sp, n} the square root (rounded towards zero) of {np, 2n},`
			`and in {np, n} the low n limbs of the remainder, returns the high`
			`limb of the remainder (which is 0 or 1).`
			`Assumes {np, 2n} is normalized, i.e. np[2n-1] >= B/4`
			`where B=2^GMP_NUMB_BITS. */`
			`static mp_limb_t`
			`mpn_dc_sqrtrem (mp_ptr sp, mp_ptr np, mp_size_t n)`
			`{`
			`mp_limb_t q; /* carry out of {sp, n} */`
			`int c, b; /* carry out of remainder */`
			`mp_size_t l, h;`

			`ASSERT (np[2 * n - 1] >= GMP_NUMB_HIGHBIT / 2);`

			`if (n == 1)`
			`c = mpn_sqrtrem2 (sp, np, np);`
			`else`
			`{`
			`l = n / 2;`
			`h = n - l;`
			`q = mpn_dc_sqrtrem (sp + l, np + 2 * l, h);`
			`if (q != 0)`
			`mpn_sub_n (np + 2 * l, np + 2 * l, sp + l, h);`
removed obsolete fns mpn_divrem mpn_divmod and all referances to them 2009-08-11 18:11:33 -04:00			`q += mpn_intdivrem (sp, 0, np + l, n, sp + l, h);`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`c = sp[0] & 1;`
merged k8-branch into trunk , autotools , few handbits , windows bits just copyed over 2009-02-19 18:22:30 -05:00			`mpn_rshift1 (sp, sp, l);`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`sp[l - 1] \|= (q << (GMP_NUMB_BITS - 1)) & GMP_NUMB_MASK;`
			`q >>= 1;`
			`if (c != 0)`
			`c = mpn_add_n (np + l, np + l, sp + l, h);`
			`mpn_sqr_n (np + n, sp, l);`
			`b = q + mpn_sub_n (np, np, np + n, 2 * l);`
			`c -= (l == h) ? b : mpn_sub_1 (np + 2 * l, np + 2 * l, 1, (mp_limb_t) b);`
			`q = mpn_add_1 (sp + l, sp + l, h, q);`

			`if (c < 0)`
			`{`
			`c += mpn_addmul_1 (np, sp, n, CNST_LIMB(2)) + 2 * q;`
			`c -= mpn_sub_1 (np, np, n, CNST_LIMB(1));`
			`q -= mpn_sub_1 (sp, sp, n, CNST_LIMB(1));`
			`}`
			`}`

			`return c;`
			`}`


			`mp_size_t`
			`mpn_sqrtrem (mp_ptr sp, mp_ptr rp, mp_srcptr np, mp_size_t nn)`
			`{`
			`mp_limb_t *tp, s0[1], cc, high, rl;`
			`int c;`
			`mp_size_t rn, tn;`
			`TMP_DECL;`

			`ASSERT (nn >= 0);`
			`ASSERT_MPN (np, nn);`

			`/* If OP is zero, both results are zero. */`
			`if (nn == 0)`
			`return 0;`

			`ASSERT (np[nn - 1] != 0);`
			`ASSERT (rp == NULL \|\| MPN_SAME_OR_SEPARATE_P (np, rp, nn));`
			`ASSERT (rp == NULL \|\| ! MPN_OVERLAP_P (sp, (nn + 1) / 2, rp, nn));`
			`ASSERT (! MPN_OVERLAP_P (sp, (nn + 1) / 2, np, nn));`

			`high = np[nn - 1];`
			`if (nn == 1 && (high & GMP_NUMB_HIGHBIT))`
			`return mpn_sqrtrem1 (sp, rp, np);`
			`count_leading_zeros (c, high);`
			`c -= GMP_NAIL_BITS;`

			`c = c / 2; /* we have to shift left by 2c bits to normalize {np, nn} */`
			`tn = (nn + 1) / 2; /* 2tn is the smallest even integer >= nn /`

			`TMP_MARK;`
			`if (nn % 2 != 0 \|\| c > 0)`
			`{`
			`tp = TMP_ALLOC_LIMBS (2 * tn);`
			`tp[0] = 0; /* needed only when 2tn > nn, but saves a test /`
			`if (c != 0)`
			`mpn_lshift (tp + 2 * tn - nn, np, nn, 2 * c);`
			`else`
			`MPN_COPY (tp + 2 * tn - nn, np, nn);`
			`rl = mpn_dc_sqrtrem (sp, tp, tn);`
			`/* We have 2^(2k)N = S^2 + R where k = c + (2tn-nn)GMP_NUMB_BITS/2,`
			`thus 2^(2k)N = (S-s0)^2 + 2Ss0 - s0^2 + R where s0=S mod 2^k /`
			`c += (nn % 2) * GMP_NUMB_BITS / 2; /* c now represents k */`
			`s0[0] = sp[0] & (((mp_limb_t) 1 << c) - 1); /* S mod 2^k */`
			`rl += mpn_addmul_1 (tp, sp, tn, 2 * s0[0]); /* R = R + 2s0S */`
			`cc = mpn_submul_1 (tp, s0, 1, s0[0]);`
			`rl -= (tn > 1) ? mpn_sub_1 (tp + 1, tp + 1, tn - 1, cc) : cc;`
			`mpn_rshift (sp, sp, tn, c);`
			`tp[tn] = rl;`
			`if (rp == NULL)`
			`rp = tp;`
			`c = c << 1;`
			`if (c < GMP_NUMB_BITS)`
			`tn++;`
			`else`
			`{`
			`tp++;`
			`c -= GMP_NUMB_BITS;`
			`}`
			`if (c != 0)`
			`mpn_rshift (rp, tp, tn, c);`
			`else`
			`MPN_COPY_INCR (rp, tp, tn);`
			`rn = tn;`
			`}`
			`else`
			`{`
			`if (rp == NULL)`
			`rp = TMP_ALLOC_LIMBS (nn);`
			`if (rp != np)`
			`MPN_COPY (rp, np, nn);`
			`rn = tn + (rp[tn] = mpn_dc_sqrtrem (sp, rp, tn));`
			`}`

			`MPN_NORMALIZE (rp, rn);`

			`TMP_FREE;`
			`return rn;`
			`}`