mpir/mpn/generic/ngcd.c

/* Sch<63>nhage's 1987 algorithm, reorganized into hgcd form 

Copyright 2004, 2005 Niels M<EFBFBD>ller

This file is part of the MPIR Library.

The MPIR Library is free software; you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as published
by the Free Software Foundation; either version 2.1 of the License, or (at
your option) any later version.

The MPIR Library is distributed in the hope that it will be useful, but
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
License for more details.

You should have received a copy of the GNU Lesser General Public License
along with the MPIR Library; see the file COPYING.LIB.  If not, write
to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
Boston, MA 02110-1301, USA.
*/

#include <stdio.h>  /* for NULL */

#include "mpir.h"
#include "gmp-impl.h"
#include "longlong.h"

/* For input of size n, matrix elements are of size at most ceil(n/2)
   - 1, but we need one limb extra. */

void
mpn_ngcd_matrix_init (struct ngcd_matrix *M, mp_size_t n, mp_ptr p);

#define NHGCD_BASE_ITCH MPN_NGCD_STEP_ITCH

/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
   with elements of size at most (n+1)/2 - 1. Returns new size of a,
   b, or zero if no reduction is possible. */
static mp_size_t
nhgcd_base (mp_ptr ap, mp_ptr bp, mp_size_t n,
	    struct ngcd_matrix *M, mp_ptr tp);

/* Size analysis for nhgcd:

   For the recursive calls, we have n1 <= ceil(n / 2). Then the
   storage need is determined by the storage for the recursive call
   computing M1, and ngcd_matrix_adjust and ngcd_matrix_mul calls that use M1
   (after this, the storage needed for M1 can be recycled).

   Let S(r) denote the required storage. For M1 we need 5 * ceil(n1/2)
   = 5 * ceil(n/4), and for the ngcd_matrix_adjust call, we need n + 2. In
   total, 5 * ceil(n/4) + n + 2 <= 9 ceil(n/4) + 2.

   For the recursive call, we need S(n1) = S(ceil(n/2)).

   S(n) <= 9*ceil(n/4) + 2 + S(ceil(n/2))
        <= 9*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 2k + S(ceil(n/2^k))
        <= 9*(2 ceil(n/4) + k) + 2k + S(n/2^k)   
	<= 18 ceil(n/4) + 11k + S(n/2^k)
	
*/

mp_size_t
mpn_nhgcd_itch (mp_size_t n);


/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
   with elements of size at most (n+1)/2 - 1. Returns new size of a,
   b, or zero if no reduction is possible. */

mp_size_t
mpn_nhgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
	   struct ngcd_matrix *M, mp_ptr tp);


#define EVEN_P(x) (((x) & 1) == 0)

mp_size_t
mpn_ngcd (mp_ptr gp, mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
{
  mp_size_t init_scratch;
  mp_size_t scratch;
  mp_ptr tp;
  TMP_DECL;
  
  ASSERT (an >= n);

  if (BELOW_THRESHOLD (n, GCD_THRESHOLD))
    return mpn_lgcd (gp, ap, an, bp, n);

  init_scratch = MPN_NGCD_MATRIX_INIT_ITCH ((n+1)/2);
  scratch = mpn_nhgcd_itch ((n+1)/2);

  /* Space needed for mpn_ngcd_matrix_adjust */
  if (scratch < 2*n)
    scratch = 2*n;

  TMP_MARK;
  
  if (an + 1 > init_scratch + scratch)
    tp = TMP_ALLOC_LIMBS (an + 1);
  else
    tp = TMP_ALLOC_LIMBS (init_scratch + scratch);
  
  if (an > n)
    {
      mp_ptr rp = tp;
      mp_ptr qp = rp + n;

      mpn_tdiv_qr (qp, rp, 0, ap, an, bp, n);
      MPN_COPY (ap, rp, n);
      an = n;
      MPN_NORMALIZE (ap, an);
      if (an == 0)
	{	  
	  MPN_COPY (gp, bp, n);
	  TMP_FREE;
	  return n;
	}
    }

  while (ABOVE_THRESHOLD (n, GCD_THRESHOLD))
    {
      struct ngcd_matrix M;
      mp_size_t p = n/2;
      mp_size_t nn;
      
      mpn_ngcd_matrix_init (&M, n - p, tp);
      nn = mpn_nhgcd (ap + p, bp + p, n - p, &M, tp + init_scratch);
      if (nn > 0)
	/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
	   = 2(n-1) */
	n = mpn_ngcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + init_scratch);

      else	
	{
	  mp_size_t gn;
	  n = mpn_ngcd_subdiv_step (gp, &gn, ap, bp, n, tp);
	  if (n == 0)
	    {
	      TMP_FREE;
	      return gn;
	    }
	}
    }

  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
#if 0
  /* FIXME: We may want to use lehmer on some systems. */
  n = mpn_ngcd_lehmer (gp, ap, bp, n, tp);

  TMP_FREE;
  return n;
#endif

  if (ap[n-1] < bp[n-1])
    MP_PTR_SWAP (ap, bp);

  an = n;
  MPN_NORMALIZE (bp, n);

  if (n == 0)
    {
      MPN_COPY (gp, ap, an);
      TMP_FREE;
      return an;
    }

  if (EVEN_P (bp[0]))
    {
      /* Then a must be odd (since the calling convention implies that
	 there's no common factor of 2) */
      ASSERT (!EVEN_P (ap[0]));

      while (bp[0] == 0)
	{
	  bp++;
	  n--;
	}

      if (EVEN_P(bp[0]))
	{
	  int count;
	  count_trailing_zeros (count, bp[0]);
	  ASSERT_NOCARRY (mpn_rshift (bp, bp, n, count));
	  n -= (bp[n-1] == 0);
	}
    }

  TMP_FREE;  
  return mpn_lgcd (gp, ap, an, bp, n);
}
-												Added copyright info to GCD and XGCD code.

											
										
										
											2009-10-19 01:49:29 -04:00
+								/* Sch<63>nhage's 1987 algorithm, reorganized into hgcd form
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
-												Fixed the xgcd normalisation issue and redid the tuning code for gcd and 
xgcd.


											
										
										
											2010-05-11 14:37:34 -04:00
+								Copyright 2004, 2005 Niels M<EFBFBD>ller
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
-												Added copyright info to GCD and XGCD code.

											
										
										
											2009-10-19 01:49:29 -04:00
+								This file is part of the MPIR Library.
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
-												Added copyright info to GCD and XGCD code.

											
										
										
											2009-10-19 01:49:29 -04:00
+								The MPIR Library is free software; you can redistribute it and/or modify
 								it under the terms of the GNU Lesser General Public License as published
 								by the Free Software Foundation; either version 2.1 of the License, or (at
 								your option) any later version.
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
-												Added copyright info to GCD and XGCD code.

											
										
										
											2009-10-19 01:49:29 -04:00
+								The MPIR Library is distributed in the hope that it will be useful, but
 								WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
 								or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 								License for more details.
-												Fixed several problems with Moller gcd patches

											
										
										
											2008-12-28 14:03:41 -05:00
-												Added copyright info to GCD and XGCD code.

											
										
										
											2009-10-19 01:49:29 -04:00
+								You should have received a copy of the GNU Lesser General Public License
 								along with the MPIR Library; see the file COPYING.LIB.  If not, write
 								to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
 								Boston, MA 02110-1301, USA.
 								*/
-												Fixed several problems with Moller gcd patches

											
										
										
											2008-12-28 14:03:41 -05:00
-												Added copyright info to GCD and XGCD code.

											
										
										
											2009-10-19 01:49:29 -04:00
+								#include <stdio.h>  /* for NULL */
-												Fixed several problems with Moller gcd patches

											
										
										
											2008-12-28 14:03:41 -05:00
-												Added copyright info to GCD and XGCD code.

											
										
										
											2009-10-19 01:49:29 -04:00
+								#include "mpir.h"
 								#include "gmp-impl.h"
 								#include "longlong.h"
-												Fixed several problems with Moller gcd patches

											
										
										
											2008-12-28 14:03:41 -05:00
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
+								/* For input of size n, matrix elements are of size at most ceil(n/2)
 								   - 1, but we need one limb extra. */
 								void
-												Fixed several problems with Moller gcd patches

											
										
										
											2008-12-28 14:03:41 -05:00
+								mpn_ngcd_matrix_init (struct ngcd_matrix *M, mp_size_t n, mp_ptr p);
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
 								#define NHGCD_BASE_ITCH MPN_NGCD_STEP_ITCH
 								/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
 								   with elements of size at most (n+1)/2 - 1. Returns new size of a,
 								   b, or zero if no reduction is possible. */
 								static mp_size_t
 								nhgcd_base (mp_ptr ap, mp_ptr bp, mp_size_t n,
-												Fixed several problems with Moller gcd patches

											
										
										
											2008-12-28 14:03:41 -05:00
+									    struct ngcd_matrix *M, mp_ptr tp);
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
 								/* Size analysis for nhgcd:
 								   For the recursive calls, we have n1 <= ceil(n / 2). Then the
 								   storage need is determined by the storage for the recursive call
 								   computing M1, and ngcd_matrix_adjust and ngcd_matrix_mul calls that use M1
 								   (after this, the storage needed for M1 can be recycled).
 								   Let S(r) denote the required storage. For M1 we need 5 * ceil(n1/2)
 								   = 5 * ceil(n/4), and for the ngcd_matrix_adjust call, we need n + 2. In
 								   total, 5 * ceil(n/4) + n + 2 <= 9 ceil(n/4) + 2.
 								   For the recursive call, we need S(n1) = S(ceil(n/2)).
 								   S(n) <= 9*ceil(n/4) + 2 + S(ceil(n/2))
 								        <= 9*(ceil(n/4) + ... + ceil(n/2^(1+k))) + 2k + S(ceil(n/2^k))
 								        <= 9*(2 ceil(n/4) + k) + 2k + S(n/2^k)
 									<= 18 ceil(n/4) + 11k + S(n/2^k)
 								*/
 								mp_size_t
-												Fixed several problems with Moller gcd patches

											
										
										
											2008-12-28 14:03:41 -05:00
+								mpn_nhgcd_itch (mp_size_t n);
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
 								/* Reduces a,b until |a-b| fits in n/2 + 1 limbs. Constructs matrix M
 								   with elements of size at most (n+1)/2 - 1. Returns new size of a,
 								   b, or zero if no reduction is possible. */
 								mp_size_t
 								mpn_nhgcd (mp_ptr ap, mp_ptr bp, mp_size_t n,
-												Fixed several problems with Moller gcd patches

											
										
										
											2008-12-28 14:03:41 -05:00
+									   struct ngcd_matrix *M, mp_ptr tp);
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
 								#define EVEN_P(x) (((x) & 1) == 0)
 								mp_size_t
 								mpn_ngcd (mp_ptr gp, mp_ptr ap, mp_size_t an, mp_ptr bp, mp_size_t n)
 								{
 								  mp_size_t init_scratch;
 								  mp_size_t scratch;
 								  mp_ptr tp;
 								  TMP_DECL;
 								  ASSERT (an >= n);
-												Fixed the xgcd normalisation issue and redid the tuning code for gcd and 
xgcd.


											
										
										
											2010-05-11 14:37:34 -04:00
+								  if (BELOW_THRESHOLD (n, GCD_THRESHOLD))
 								    return mpn_lgcd (gp, ap, an, bp, n);
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
 								  init_scratch = MPN_NGCD_MATRIX_INIT_ITCH ((n+1)/2);
 								  scratch = mpn_nhgcd_itch ((n+1)/2);
 								  /* Space needed for mpn_ngcd_matrix_adjust */
 								  if (scratch < 2*n)
 								    scratch = 2*n;
 								  TMP_MARK;
 								  if (an + 1 > init_scratch + scratch)
 								    tp = TMP_ALLOC_LIMBS (an + 1);
 								  else
 								    tp = TMP_ALLOC_LIMBS (init_scratch + scratch);
 								  if (an > n)
 								    {
 								      mp_ptr rp = tp;
 								      mp_ptr qp = rp + n;
 								      mpn_tdiv_qr (qp, rp, 0, ap, an, bp, n);
 								      MPN_COPY (ap, rp, n);
 								      an = n;
 								      MPN_NORMALIZE (ap, an);
 								      if (an == 0)
 									{
 									  MPN_COPY (gp, bp, n);
 									  TMP_FREE;
 									  return n;
 									}
 								    }
-												Fixed the gcd tuning code and added core2/penryn tuning.


											
										
										
											2010-05-11 19:27:43 -04:00
+								  while (ABOVE_THRESHOLD (n, GCD_THRESHOLD))
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
+								    {
 								      struct ngcd_matrix M;
 								      mp_size_t p = n/2;
 								      mp_size_t nn;
 								      mpn_ngcd_matrix_init (&M, n - p, tp);
 								      nn = mpn_nhgcd (ap + p, bp + p, n - p, &M, tp + init_scratch);
 								      if (nn > 0)
 									/* Needs 2*(p + M->n) <= 2*(floor(n/2) + ceil(n/2) - 1)
 									   = 2(n-1) */
 									n = mpn_ngcd_matrix_adjust (&M, p + nn, ap, bp, p, tp + init_scratch);
 								      else
 									{
 									  mp_size_t gn;
 									  n = mpn_ngcd_subdiv_step (gp, &gn, ap, bp, n, tp);
 									  if (n == 0)
 									    {
 									      TMP_FREE;
 									      return gn;
 									    }
 									}
 								    }
 								  ASSERT (ap[n-1] > 0 || bp[n-1] > 0);
 								#if 0
 								  /* FIXME: We may want to use lehmer on some systems. */
 								  n = mpn_ngcd_lehmer (gp, ap, bp, n, tp);
 								  TMP_FREE;
 								  return n;
 								#endif
 								  if (ap[n-1] < bp[n-1])
 								    MP_PTR_SWAP (ap, bp);
 								  an = n;
 								  MPN_NORMALIZE (bp, n);
 								  if (n == 0)
 								    {
 								      MPN_COPY (gp, ap, an);
 								      TMP_FREE;
 								      return an;
 								    }
 								  if (EVEN_P (bp[0]))
 								    {
 								      /* Then a must be odd (since the calling convention implies that
 									 there's no common factor of 2) */
 								      ASSERT (!EVEN_P (ap[0]));
 								      while (bp[0] == 0)
 									{
 									  bp++;
 									  n--;
 									}
 								      if (EVEN_P(bp[0]))
 									{
 									  int count;
 									  count_trailing_zeros (count, bp[0]);
 									  ASSERT_NOCARRY (mpn_rshift (bp, bp, n, count));
 									  n -= (bp[n-1] == 0);
 									}
 								    }
 								  TMP_FREE;
-												Fixed the xgcd normalisation issue and redid the tuning code for gcd and 
xgcd.


											
										
										
											2010-05-11 14:37:34 -04:00
+								  return mpn_lgcd (gp, ap, an, bp, n);
-												Incorporated Niels Mollers lgpl 2.1 gcd code into trunk

											
										
										
											2008-10-11 02:56:15 -04:00
+								}