mpir/mpn/generic/invert.c

/* floating-point Newton, with inversion in 3M(n) */

/* mpn_invert

Copyright 2009 Paul Zimmermann
Copyright 2009 William Hart

This file is part of the MPIR Library.

The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published by
the Free Software Foundation; either version 2.1 of the License, or (at your
option) any later version.

The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the MPIR Library; see the file COPYING.LIB.  If not, write to
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
MA 02110-1301, USA. */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"

#define ZERO (mp_limb_t) 0
#define ONE  (mp_limb_t) 1
#define WRAP_AROUND_BOUND 1500

int
mpn_is_invert (mp_srcptr xp, mp_srcptr ap, mp_size_t n)
{
  int res = 1;
  mp_size_t i;
  mp_ptr tp, up;
  mp_limb_t cy;
  TMP_DECL;

  TMP_MARK;
  tp = TMP_ALLOC_LIMBS (2 * n);
  up = TMP_ALLOC_LIMBS (2 * n);

  /* first check X*A < B^(2*n) */
  mpn_mul_n (tp, xp, ap, n);
  cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */
  if (cy != 0)
    res = 0;

  /* now check B^(2n) - X*A <= A */
  mpn_com_n (tp, tp, 2 * n);
  mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - X*A */
  MPN_ZERO (up, 2 * n);
  MPN_COPY (up, ap, n);
  res = mpn_cmp (tp, up, 2 * n) <= 0;
  TMP_FREE;
  return res;
}

/* Input: A = {ap, n} with most significant bit set.
   Output: X = B^n + {xp, n} where B = 2^GMP_NUMB_BITS.

   X is a lower approximation of B^(2n)/A with implicit msb.
   More precisely, one has:

              A*X < B^(2n) <= A*(X+1)

   or X = ceil(B^(2n)/A) - 1.
*/
void
mpn_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)
{
  if (n == 1)
    {
      /* invert_limb returns min(B-1, floor(B^2/ap[0])-B),
	 which is B-1 when ap[0]=B/2, and 1 when ap[0]=B-1.
	 For X=B+xp[0], we have A*X < B^2 <= A*(X+1) where
	 the equality holds only when A=B/2.

	 We thus have A*X < B^2 <= A*(X+1).
      */
      invert_limb (xp[0], ap[0]);
    }
  else if (n == 2)
    {
      mp_limb_t tp[4], up[2], sp[2], cy;

      tp[0] = ZERO;
      invert_limb (xp[1], ap[1]);
      tp[3] = mpn_mul_1 (tp + 1, ap, 2, xp[1]);
      cy = mpn_add_n (tp + 2, tp + 2, ap, 2);
      while (cy) /* Xh is too large */
	{
	  xp[1] --;
	  cy -= mpn_sub (tp + 1, tp + 1, 3, ap, 2);
	}
      /* tp[3] should be 111...111 */

      mpn_com_n (sp, tp + 1, 2);
      cy = mpn_add_1 (sp, sp, 2, ONE);
      /* cy should be 0 */

      up[1] = mpn_mul_1 (up, sp + 1, 1, xp[1]);
      cy = mpn_add_1 (up + 1, up + 1, 1, sp[1]);
      /* cy should be 0 */
      xp[0] = up[1];

      /* update tp */
      cy = mpn_addmul_1 (tp, ap, 2, xp[0]);
      cy = mpn_add_1 (tp + 2, tp + 2, 2, cy);
      do
	{
	  cy = mpn_add (tp, tp, 4, ap, 2);
	  if (cy == ZERO)
	    mpn_add_1 (xp, xp, 2, ONE);
	}
      while (cy == ZERO);

      /* now A*X < B^4 <= A*(X+1) */
    }
  else
    {
      mp_size_t l, h;
      mp_ptr tp, up;
      mp_limb_t cy, th;
      int special = 0;
      TMP_DECL;

      l = (n - 1) / 2;
      h = n - l;

      mpn_invert (xp + l, ap + l, h);

      TMP_MARK;
      tp = TMP_ALLOC_LIMBS (n + h);
      up = TMP_ALLOC_LIMBS (2 * h);

      if (n <= WRAP_AROUND_BOUND)
	{
          mpn_mul (tp, ap, n, xp + l, h);
          cy = mpn_add_n (tp + h, tp + h, ap, n);
        }
      else
	{
          mp_size_t m = n + 1;
          unsigned long k;
          int cc;

          k = mpn_fft_best_k (m, 0);
          m = mpn_fft_next_size (m, k);
          /* we have m >= n + 1 by construction, thus m > h */
          ASSERT(m < n + h);
          cy = mpn_mul_fft (tp, m, ap, n, xp + l, h, k);
          /* cy, {tp, m} = A * {xp + l, h} mod (B^m+1) */
          cy += mpn_add_n (tp + h, tp + h, ap, m - h);
          cc = mpn_sub_n (tp, tp, ap + m - h, n + h - m);
          cc = mpn_sub_1 (tp + n + h - m, tp + n + h - m, 2 * m - n - h, cc);
          if (cc > cy) /* can only occur if cc=1 and cy=0 */
            cy = mpn_add_1 (tp, tp, m, ONE);
          else
            cy -= cc;
          /* cy, {tp, m} = A * Xh */

          /* add B^(n+h) + B^(n+h-m) */
          MPN_ZERO (tp + m, n + h - m);
          tp[m] = cy;
          /* note: since tp[n+h-1] is either 0, or cy<=1 if m=n+h-1,
             the mpn_incr_u() below cannot produce a carry */
          mpn_incr_u (tp + n + h - m, ONE);
          cy = 1;
          do /* check if T >= B^(n+h) + 2*B^n */
            {
              mp_size_t i;

              if (cy == ZERO)
                break; /* surely T < B^(n+h) */
              if (cy == ONE)
                {
                  for (i = n + h - 1; tp[i] == ZERO && i > n; i--);
                  if (i == n && tp[i] < (mp_limb_t) 2)
                    break;
                }
              /* subtract B^m+1 */
              cy -= mpn_sub_1 (tp, tp, n + h, ONE);
              cy -= mpn_sub_1 (tp + m, tp + m, n + h - m, ONE);
            }
          while (1);
        }

      while (cy)
	{
	  mpn_sub_1 (xp + l, xp + l, h, ONE);
	  cy -= mpn_sub (tp, tp, n + h, ap, n);
	}

      mpn_com_n (tp, tp, n);
      th = ~tp[n] + mpn_add_1 (tp, tp, n, ONE);
      mpn_mul_n (up, tp + l, xp + l, h);
      cy = mpn_add_n (up + h, up + h, tp + l, h);
      if (th != ZERO)
      {
         cy += ONE + mpn_add_n (up + h, up + h, xp + l, h);
      }
      if (up[2*h-l-1] + 4 <= CNST_LIMB(3)) special = 1;
      MPN_COPY (xp, up + 2 * h - l, l);
      mpn_add_1 (xp + l, xp + l, h, cy);
      TMP_FREE;
      if ((special) && !mpn_is_invert(xp, ap, n))
         mpn_add_1 (xp, xp, n, 1);
    }
}

void mpn_invert_truncate(mp_ptr x_new, mp_size_t m, mp_srcptr xp, mp_size_t n, mp_srcptr ap)
{
  mp_ptr tp;
  mp_limb_t cy;
  TMP_DECL;

  TMP_MARK;
  tp = TMP_ALLOC_LIMBS (2 * m);
  
  MPN_COPY(x_new, xp + n - m, m);
  ap += (n - m);

  mpn_mul_n (tp, x_new, ap, m);
  mpn_add_n (tp + m, tp + m, ap, m); /* A * msb(X) */
  
  /* now check B^(2n) - X*A <= A */
  mpn_com_n (tp, tp, 2 * m);
  mpn_add_1 (tp, tp, 2 * m, 1); /* B^(2m) - X*A */
  
  while (tp[m] || mpn_cmp (tp, ap, m) > 0)
  {
     mpn_add_1(x_new, x_new, m, 1);
     tp[m] -= mpn_sub_n(tp, tp, ap, m);
  }
  TMP_FREE;
}
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00			`/* floating-point Newton, with inversion in 3M(n) */`

			`/* mpn_invert`

			`Copyright 2009 Paul Zimmermann`
Added my copyright to the file mpn/generic/invert.c. 2009-12-09 13:56:38 -05:00			`Copyright 2009 William Hart`
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00
			`This file is part of the MPIR Library.`

			`The MPIR Library is free software; you can redistribute it and/or modify`
			`it under the terms of the GNU Lesser General Public License as published by`
			`the Free Software Foundation; either version 2.1 of the License, or (at your`
			`option) any later version.`

			`The MPIR Library is distributed in the hope that it will be useful, but`
			`WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
			`or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public`
			`License for more details.`

			`You should have received a copy of the GNU Lesser General Public License`
			`along with the MPIR Library; see the file COPYING.LIB. If not, write to`
			`the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,`
			`MA 02110-1301, USA. */`

			`#include <stdio.h>`
			`#include <stdlib.h>`
			`#include <assert.h>`
Fixed a missing mpir.h. 2009-09-29 19:19:37 -04:00			`#include "mpir.h"`
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00			`#include "gmp-impl.h"`
			`#include "longlong.h"`

			`#define ZERO (mp_limb_t) 0`
			`#define ONE (mp_limb_t) 1`
Removed a mid block declaration. 2009-12-09 13:40:54 -05:00			`#define WRAP_AROUND_BOUND 1500`
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00
Corrected some bugs in the division code. 2010-02-21 10:47:37 -05:00			`int`
			`mpn_is_invert (mp_srcptr xp, mp_srcptr ap, mp_size_t n)`
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00			`{`
Removed a mid block declaration. 2009-12-09 13:40:54 -05:00			`int res = 1;`
			`mp_size_t i;`
			`mp_ptr tp, up;`
			`mp_limb_t cy;`
			`TMP_DECL;`
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00
Removed a mid block declaration. 2009-12-09 13:40:54 -05:00			`TMP_MARK;`
			`tp = TMP_ALLOC_LIMBS (2 * n);`
			`up = TMP_ALLOC_LIMBS (2 * n);`

			`/* first check XA < B^(2n) */`
			`mpn_mul_n (tp, xp, ap, n);`
			`cy = mpn_add_n (tp + n, tp + n, ap, n); /* A * msb(X) */`
			`if (cy != 0)`
			`res = 0;`

			`/* now check B^(2n) - XA <= A /`
			`mpn_com_n (tp, tp, 2 * n);`
			`mpn_add_1 (tp, tp, 2 * n, 1); /* B^(2n) - XA /`
			`MPN_ZERO (up, 2 * n);`
			`MPN_COPY (up, ap, n);`
			`res = mpn_cmp (tp, up, 2 * n) <= 0;`
			`TMP_FREE;`
			`return res;`
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00			`}`

			`/* Input: A = {ap, n} with most significant bit set.`
			`Output: X = B^n + {xp, n} where B = 2^GMP_NUMB_BITS.`

			`X is a lower approximation of B^(2n)/A with implicit msb.`
			`More precisely, one has:`

			`AX < B^(2n) <= A(X+1)`

			`or X = ceil(B^(2n)/A) - 1.`
			`*/`
			`void`
			`mpn_invert (mp_ptr xp, mp_srcptr ap, mp_size_t n)`
			`{`
			`if (n == 1)`
			`{`
			`/* invert_limb returns min(B-1, floor(B^2/ap[0])-B),`
			`which is B-1 when ap[0]=B/2, and 1 when ap[0]=B-1.`
			`For X=B+xp[0], we have AX < B^2 <= A(X+1) where`
			`the equality holds only when A=B/2.`

			`We thus have AX < B^2 <= A(X+1).`
			`*/`
			`invert_limb (xp[0], ap[0]);`
			`}`
			`else if (n == 2)`
			`{`
			`mp_limb_t tp[4], up[2], sp[2], cy;`

			`tp[0] = ZERO;`
			`invert_limb (xp[1], ap[1]);`
			`tp[3] = mpn_mul_1 (tp + 1, ap, 2, xp[1]);`
			`cy = mpn_add_n (tp + 2, tp + 2, ap, 2);`
			`while (cy) /* Xh is too large */`
			`{`
			`xp[1] --;`
			`cy -= mpn_sub (tp + 1, tp + 1, 3, ap, 2);`
			`}`
			`/* tp[3] should be 111...111 */`

			`mpn_com_n (sp, tp + 1, 2);`
			`cy = mpn_add_1 (sp, sp, 2, ONE);`
			`/* cy should be 0 */`

			`up[1] = mpn_mul_1 (up, sp + 1, 1, xp[1]);`
			`cy = mpn_add_1 (up + 1, up + 1, 1, sp[1]);`
			`/* cy should be 0 */`
			`xp[0] = up[1];`

			`/* update tp */`
			`cy = mpn_addmul_1 (tp, ap, 2, xp[0]);`
			`cy = mpn_add_1 (tp + 2, tp + 2, 2, cy);`
			`do`
			`{`
			`cy = mpn_add (tp, tp, 4, ap, 2);`
			`if (cy == ZERO)`
			`mpn_add_1 (xp, xp, 2, ONE);`
			`}`
			`while (cy == ZERO);`

			`/* now AX < B^4 <= A(X+1) */`
			`}`
			`else`
			`{`
			`mp_size_t l, h;`
			`mp_ptr tp, up;`
			`mp_limb_t cy, th;`
Removed a mid block declaration. 2009-12-09 13:40:54 -05:00			`int special = 0;`
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00			`TMP_DECL;`

			`l = (n - 1) / 2;`
			`h = n - l;`

			`mpn_invert (xp + l, ap + l, h);`

			`TMP_MARK;`
			`tp = TMP_ALLOC_LIMBS (n + h);`
			`up = TMP_ALLOC_LIMBS (2 * h);`

			`if (n <= WRAP_AROUND_BOUND)`
			`{`
			`mpn_mul (tp, ap, n, xp + l, h);`
			`cy = mpn_add_n (tp + h, tp + h, ap, n);`
			`}`
			`else`
			`{`
			`mp_size_t m = n + 1;`
			`unsigned long k;`
			`int cc;`

			`k = mpn_fft_best_k (m, 0);`
			`m = mpn_fft_next_size (m, k);`
			`/* we have m >= n + 1 by construction, thus m > h */`
			`ASSERT(m < n + h);`
			`cy = mpn_mul_fft (tp, m, ap, n, xp + l, h, k);`
			`/* cy, {tp, m} = A * {xp + l, h} mod (B^m+1) */`
			`cy += mpn_add_n (tp + h, tp + h, ap, m - h);`
			`cc = mpn_sub_n (tp, tp, ap + m - h, n + h - m);`
			`cc = mpn_sub_1 (tp + n + h - m, tp + n + h - m, 2 * m - n - h, cc);`
			`if (cc > cy) /* can only occur if cc=1 and cy=0 */`
			`cy = mpn_add_1 (tp, tp, m, ONE);`
			`else`
			`cy -= cc;`
			`/* cy, {tp, m} = A * Xh */`

			`/* add B^(n+h) + B^(n+h-m) */`
			`MPN_ZERO (tp + m, n + h - m);`
			`tp[m] = cy;`
			`/* note: since tp[n+h-1] is either 0, or cy<=1 if m=n+h-1,`
			`the mpn_incr_u() below cannot produce a carry */`
			`mpn_incr_u (tp + n + h - m, ONE);`
			`cy = 1;`
			`do /* check if T >= B^(n+h) + 2B^n /`
			`{`
			`mp_size_t i;`

			`if (cy == ZERO)`
			`break; /* surely T < B^(n+h) */`
			`if (cy == ONE)`
			`{`
			`for (i = n + h - 1; tp[i] == ZERO && i > n; i--);`
			`if (i == n && tp[i] < (mp_limb_t) 2)`
			`break;`
			`}`
			`/* subtract B^m+1 */`
			`cy -= mpn_sub_1 (tp, tp, n + h, ONE);`
			`cy -= mpn_sub_1 (tp + m, tp + m, n + h - m, ONE);`
			`}`
			`while (1);`
			`}`

			`while (cy)`
			`{`
			`mpn_sub_1 (xp + l, xp + l, h, ONE);`
			`cy -= mpn_sub (tp, tp, n + h, ap, n);`
			`}`

			`mpn_com_n (tp, tp, n);`
			`th = ~tp[n] + mpn_add_1 (tp, tp, n, ONE);`
			`mpn_mul_n (up, tp + l, xp + l, h);`
			`cy = mpn_add_n (up + h, up + h, tp + l, h);`
			`if (th != ZERO)`
Removed a mid block declaration. 2009-12-09 13:40:54 -05:00			`{`
			`cy += ONE + mpn_add_n (up + h, up + h, xp + l, h);`
			`}`
			`if (up[2*h-l-1] + 4 <= CNST_LIMB(3)) special = 1;`
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00			`MPN_COPY (xp, up + 2 * h - l, l);`
			`mpn_add_1 (xp + l, xp + l, h, cy);`
			`TMP_FREE;`
Corrected some bugs in the division code. 2010-02-21 10:47:37 -05:00			`if ((special) && !mpn_is_invert(xp, ap, n))`
Removed a mid block declaration. 2009-12-09 13:40:54 -05:00			`mpn_add_1 (xp, xp, n, 1);`
Got invett and sb_divappr_q working (modulo some linker issue). 2009-09-29 18:55:10 -04:00			`}`
			`}`
Fixed some incorrect things in the division code. 2010-02-21 19:45:04 -05:00
			`void mpn_invert_truncate(mp_ptr x_new, mp_size_t m, mp_srcptr xp, mp_size_t n, mp_srcptr ap)`
			`{`
			`mp_ptr tp;`
			`mp_limb_t cy;`
			`TMP_DECL;`

			`TMP_MARK;`
			`tp = TMP_ALLOC_LIMBS (2 * m);`

			`MPN_COPY(x_new, xp + n - m, m);`
			`ap += (n - m);`

			`mpn_mul_n (tp, x_new, ap, m);`
			`mpn_add_n (tp + m, tp + m, ap, m); /* A * msb(X) */`

			`/* now check B^(2n) - XA <= A /`
			`mpn_com_n (tp, tp, 2 * m);`
			`mpn_add_1 (tp, tp, 2 * m, 1); /* B^(2m) - XA /`

			`while (tp[m] \|\| mpn_cmp (tp, ap, m) > 0)`
			`{`
			`mpn_add_1(x_new, x_new, m, 1);`
			`tp[m] -= mpn_sub_n(tp, tp, ap, m);`
			`}`
			`TMP_FREE;`
			`}`