2008-06-25 03:33:36 -04:00
|
|
|
/* UltraSparc 64 mpn_divrem_1 -- mpn by limb division.
|
|
|
|
|
|
|
|
Copyright 1991, 1993, 1994, 1996, 1998, 1999, 2000, 2001, 2003 Free Software
|
|
|
|
Foundation, Inc.
|
|
|
|
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or (at your
|
|
|
|
option) any later version.
|
|
|
|
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
|
|
License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
|
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
MA 02110-1301, USA. */
|
|
|
|
|
2009-02-12 05:24:24 -05:00
|
|
|
#include "mpir.h"
|
2008-06-25 03:33:36 -04:00
|
|
|
#include "gmp-impl.h"
|
|
|
|
#include "longlong.h"
|
|
|
|
|
|
|
|
#include "mpn/sparc64/sparc64.h"
|
|
|
|
|
|
|
|
|
|
|
|
/* 64-bit divisor 32-bit divisor
|
|
|
|
cycles/limb cycles/limb
|
|
|
|
(approx) (approx)
|
|
|
|
integer fraction integer fraction
|
|
|
|
Ultrasparc 2i: 160 160 122 96
|
|
|
|
*/
|
|
|
|
|
|
|
|
|
|
|
|
/* 32-bit divisors are treated in special case code. This requires 4 mulx
|
|
|
|
per limb instead of 8 in the general case.
|
|
|
|
|
|
|
|
For big endian systems we need HALF_ENDIAN_ADJ included in the src[i]
|
|
|
|
addressing, to get the two halves of each limb read in the correct order.
|
|
|
|
This is kept in an adj variable. Doing that measures about 4 c/l faster
|
|
|
|
than just writing HALF_ENDIAN_ADJ(i) in the integer loop. The latter
|
|
|
|
shouldn't be 6 cycles worth of work, but perhaps it doesn't schedule well
|
|
|
|
(on gcc 3.2.1 at least). The fraction loop doesn't seem affected, but we
|
|
|
|
still use a variable since that ought to work out best. */
|
|
|
|
|
|
|
|
mp_limb_t
|
|
|
|
mpn_divrem_1 (mp_ptr qp_limbptr, mp_size_t xsize_limbs,
|
|
|
|
mp_srcptr ap_limbptr, mp_size_t size_limbs, mp_limb_t d_limb)
|
|
|
|
{
|
|
|
|
mp_size_t total_size_limbs;
|
|
|
|
mp_size_t i;
|
|
|
|
|
|
|
|
ASSERT (xsize_limbs >= 0);
|
|
|
|
ASSERT (size_limbs >= 0);
|
|
|
|
ASSERT (d_limb != 0);
|
|
|
|
/* FIXME: What's the correct overlap rule when xsize!=0? */
|
|
|
|
ASSERT (MPN_SAME_OR_SEPARATE_P (qp_limbptr + xsize_limbs,
|
|
|
|
ap_limbptr, size_limbs));
|
|
|
|
|
|
|
|
total_size_limbs = size_limbs + xsize_limbs;
|
|
|
|
if (UNLIKELY (total_size_limbs == 0))
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
/* udivx is good for total_size==1, and no need to bother checking
|
|
|
|
limb<divisor, since if that's likely the caller should check */
|
|
|
|
if (UNLIKELY (total_size_limbs == 1))
|
|
|
|
{
|
|
|
|
mp_limb_t a, q;
|
|
|
|
a = (LIKELY (size_limbs != 0) ? ap_limbptr[0] : 0);
|
|
|
|
q = a / d_limb;
|
|
|
|
qp_limbptr[0] = q;
|
|
|
|
return a - q*d_limb;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (d_limb <= CNST_LIMB(0xFFFFFFFF))
|
|
|
|
{
|
|
|
|
mp_size_t size, xsize, total_size, adj;
|
|
|
|
unsigned *qp, n1, n0, q, r, nshift, norm_rmask;
|
|
|
|
mp_limb_t dinv_limb;
|
|
|
|
const unsigned *ap;
|
|
|
|
int norm, norm_rshift;
|
|
|
|
|
|
|
|
size = 2 * size_limbs;
|
|
|
|
xsize = 2 * xsize_limbs;
|
|
|
|
total_size = size + xsize;
|
|
|
|
|
|
|
|
ap = (unsigned *) ap_limbptr;
|
|
|
|
qp = (unsigned *) qp_limbptr;
|
|
|
|
|
|
|
|
qp += xsize;
|
|
|
|
r = 0; /* initial remainder */
|
|
|
|
|
|
|
|
if (LIKELY (size != 0))
|
|
|
|
{
|
|
|
|
n1 = ap[size-1 + HALF_ENDIAN_ADJ(1)];
|
|
|
|
|
|
|
|
/* If the length of the source is uniformly distributed, then
|
|
|
|
there's a 50% chance of the high 32-bits being zero, which we
|
|
|
|
can skip. */
|
|
|
|
if (n1 == 0)
|
|
|
|
{
|
|
|
|
n1 = ap[size-2 + HALF_ENDIAN_ADJ(0)];
|
|
|
|
total_size--;
|
|
|
|
size--;
|
|
|
|
ASSERT (size > 0); /* because always even */
|
|
|
|
qp[size + HALF_ENDIAN_ADJ(1)] = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Skip a division if high < divisor (high quotient 0). Testing
|
|
|
|
here before before normalizing will still skip as often as
|
|
|
|
possible. */
|
|
|
|
if (n1 < d_limb)
|
|
|
|
{
|
|
|
|
r = n1;
|
|
|
|
size--;
|
|
|
|
qp[size + HALF_ENDIAN_ADJ(size)] = 0;
|
|
|
|
total_size--;
|
|
|
|
if (total_size == 0)
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
count_leading_zeros_32 (norm, d_limb);
|
|
|
|
norm -= 32;
|
|
|
|
d_limb <<= norm;
|
|
|
|
r <<= norm;
|
|
|
|
|
|
|
|
norm_rshift = 32 - norm;
|
|
|
|
norm_rmask = (norm == 0 ? 0 : 0xFFFFFFFF);
|
|
|
|
|
|
|
|
invert_half_limb (dinv_limb, d_limb);
|
|
|
|
|
|
|
|
if (LIKELY (size != 0))
|
|
|
|
{
|
|
|
|
i = size - 1;
|
|
|
|
adj = HALF_ENDIAN_ADJ (i);
|
|
|
|
n1 = ap[i + adj];
|
|
|
|
adj = -adj;
|
|
|
|
r |= ((n1 >> norm_rshift) & norm_rmask);
|
|
|
|
for ( ; i > 0; i--)
|
|
|
|
{
|
|
|
|
n0 = ap[i-1 + adj];
|
|
|
|
adj = -adj;
|
|
|
|
nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
|
|
|
|
udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
|
|
|
|
qp[i + adj] = q;
|
|
|
|
n1 = n0;
|
|
|
|
}
|
|
|
|
nshift = n1 << norm;
|
|
|
|
udiv_qrnnd_half_preinv (q, r, r, nshift, d_limb, dinv_limb);
|
|
|
|
qp[0 + HALF_ENDIAN_ADJ(0)] = q;
|
|
|
|
}
|
|
|
|
qp -= xsize;
|
|
|
|
adj = HALF_ENDIAN_ADJ (0);
|
|
|
|
for (i = xsize-1; i >= 0; i--)
|
|
|
|
{
|
|
|
|
udiv_qrnnd_half_preinv (q, r, r, 0, d_limb, dinv_limb);
|
|
|
|
adj = -adj;
|
|
|
|
qp[i + adj] = q;
|
|
|
|
}
|
|
|
|
|
|
|
|
return r >> norm;
|
|
|
|
}
|
|
|
|
else
|
|
|
|
{
|
|
|
|
mp_srcptr ap;
|
|
|
|
mp_ptr qp;
|
|
|
|
mp_size_t size, xsize, total_size;
|
|
|
|
mp_limb_t d, n1, n0, q, r, dinv, nshift, norm_rmask;
|
|
|
|
int norm, norm_rshift;
|
|
|
|
|
|
|
|
ap = ap_limbptr;
|
|
|
|
qp = qp_limbptr;
|
|
|
|
size = size_limbs;
|
|
|
|
xsize = xsize_limbs;
|
|
|
|
total_size = total_size_limbs;
|
|
|
|
d = d_limb;
|
|
|
|
|
|
|
|
qp += total_size; /* above high limb */
|
|
|
|
r = 0; /* initial remainder */
|
|
|
|
|
|
|
|
if (LIKELY (size != 0))
|
|
|
|
{
|
|
|
|
/* Skip a division if high < divisor (high quotient 0). Testing
|
|
|
|
here before before normalizing will still skip as often as
|
|
|
|
possible. */
|
|
|
|
n1 = ap[size-1];
|
|
|
|
if (n1 < d)
|
|
|
|
{
|
|
|
|
r = n1;
|
|
|
|
*--qp = 0;
|
|
|
|
total_size--;
|
|
|
|
if (total_size == 0)
|
|
|
|
return r;
|
|
|
|
size--;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
count_leading_zeros (norm, d);
|
|
|
|
d <<= norm;
|
|
|
|
r <<= norm;
|
|
|
|
|
|
|
|
norm_rshift = GMP_LIMB_BITS - norm;
|
|
|
|
norm_rmask = (norm == 0 ? 0 : ~CNST_LIMB(0));
|
|
|
|
|
|
|
|
invert_limb (dinv, d);
|
|
|
|
|
|
|
|
if (LIKELY (size != 0))
|
|
|
|
{
|
|
|
|
n1 = ap[size-1];
|
|
|
|
r |= ((n1 >> norm_rshift) & norm_rmask);
|
|
|
|
for (i = size-2; i >= 0; i--)
|
|
|
|
{
|
|
|
|
n0 = ap[i];
|
|
|
|
nshift = (n1 << norm) | ((n0 >> norm_rshift) & norm_rmask);
|
|
|
|
udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
|
|
|
|
*--qp = q;
|
|
|
|
n1 = n0;
|
|
|
|
}
|
|
|
|
nshift = n1 << norm;
|
|
|
|
udiv_qrnnd_preinv (q, r, r, nshift, d, dinv);
|
|
|
|
*--qp = q;
|
|
|
|
}
|
|
|
|
for (i = 0; i < xsize; i++)
|
|
|
|
{
|
|
|
|
udiv_qrnnd_preinv (q, r, r, CNST_LIMB(0), d, dinv);
|
|
|
|
*--qp = q;
|
|
|
|
}
|
|
|
|
return r >> norm;
|
|
|
|
}
|
|
|
|
}
|