2008-06-25 03:33:36 -04:00
|
|
|
/* UltraSPARC 64 support macros.
|
|
|
|
|
|
|
|
THE FUNCTIONS IN THIS FILE ARE FOR INTERNAL USE ONLY. THEY'RE ALMOST
|
|
|
|
CERTAIN TO BE SUBJECT TO INCOMPATIBLE CHANGES OR DISAPPEAR COMPLETELY IN
|
|
|
|
FUTURE GNU MP RELEASES.
|
|
|
|
|
|
|
|
Copyright 2003 Free Software Foundation, Inc.
|
|
|
|
|
|
|
|
This file is part of the GNU MP Library.
|
|
|
|
|
|
|
|
The GNU MP Library is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or (at your
|
|
|
|
option) any later version.
|
|
|
|
|
|
|
|
The GNU MP Library is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
|
|
License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with the GNU MP Library; see the file COPYING.LIB. If not, write to
|
|
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
MA 02110-1301, USA. */
|
|
|
|
|
|
|
|
|
|
|
|
#define LOW32(x) ((x) & 0xFFFFFFFF)
|
|
|
|
#define HIGH32(x) ((x) >> 32)
|
|
|
|
|
|
|
|
|
|
|
|
/* Halfword number i in src is accessed as src[i+HALF_ENDIAN_ADJ(i)].
|
|
|
|
Plain src[i] would be incorrect in big endian, HALF_ENDIAN_ADJ has the
|
|
|
|
effect of swapping the two halves in this case. */
|
|
|
|
#if HAVE_LIMB_BIG_ENDIAN
|
|
|
|
#define HALF_ENDIAN_ADJ(i) (1 - (((i) & 1) << 1)) /* +1 even, -1 odd */
|
|
|
|
#endif
|
|
|
|
#if HAVE_LIMB_LITTLE_ENDIAN
|
|
|
|
#define HALF_ENDIAN_ADJ(i) 0 /* no adjust */
|
|
|
|
#endif
|
|
|
|
#ifndef HALF_ENDIAN_ADJ
|
|
|
|
Error, error, unknown limb endianness;
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* umul_ppmm_lowequal sets h to the high limb of q*d, assuming the low limb
|
|
|
|
of that product is equal to l. dh and dl are the 32-bit halves of d.
|
|
|
|
|
|
|
|
|-----high----||----low-----|
|
|
|
|
+------+------+
|
|
|
|
| | ph = qh * dh
|
|
|
|
+------+------+
|
|
|
|
+------+------+
|
|
|
|
| | pm1 = ql * dh
|
|
|
|
+------+------+
|
|
|
|
+------+------+
|
|
|
|
| | pm2 = qh * dl
|
|
|
|
+------+------+
|
|
|
|
+------+------+
|
|
|
|
| | pl = ql * dl (not calculated)
|
|
|
|
+------+------+
|
|
|
|
|
|
|
|
Knowing that the low 64 bits is equal to l means that LOW(pm1) + LOW(pm2)
|
|
|
|
+ HIGH(pl) == HIGH(l). The only thing we need from those product parts
|
|
|
|
is whether they produce a carry into the high.
|
|
|
|
|
|
|
|
pm_l = LOW(pm1)+LOW(pm2) is done to contribute its carry, then the only
|
|
|
|
time there's a further carry from LOW(pm_l)+HIGH(pl) is if LOW(pm_l) >
|
|
|
|
HIGH(l). pl is never actually calculated. */
|
|
|
|
|
|
|
|
#define umul_ppmm_lowequal(h, q, d, dh, dl, l) \
|
|
|
|
do { \
|
|
|
|
mp_limb_t ql, qh, ph, pm1, pm2, pm_l; \
|
|
|
|
ASSERT (dh == HIGH32(d)); \
|
|
|
|
ASSERT (dl == LOW32(d)); \
|
|
|
|
ASSERT (q*d == l); \
|
|
|
|
\
|
|
|
|
ql = LOW32 (q); \
|
|
|
|
qh = HIGH32 (q); \
|
|
|
|
\
|
|
|
|
pm1 = ql * dh; \
|
|
|
|
pm2 = qh * dl; \
|
|
|
|
ph = qh * dh; \
|
|
|
|
\
|
|
|
|
pm_l = LOW32 (pm1) + LOW32 (pm2); \
|
|
|
|
\
|
|
|
|
(h) = ph + HIGH32 (pm1) + HIGH32 (pm2) \
|
|
|
|
+ HIGH32 (pm_l) + ((pm_l << 32) > l); \
|
|
|
|
\
|
|
|
|
ASSERT_HIGH_PRODUCT (h, q, d); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
|
|
/* Set h to the high of q*d, assuming the low limb of that product is equal
|
|
|
|
to l, and that d fits in 32-bits.
|
|
|
|
|
|
|
|
|-----high----||----low-----|
|
|
|
|
+------+------+
|
|
|
|
| | pm = qh * dl
|
|
|
|
+------+------+
|
|
|
|
+------+------+
|
|
|
|
| | pl = ql * dl (not calculated)
|
|
|
|
+------+------+
|
|
|
|
|
|
|
|
Knowing that LOW(pm) + HIGH(pl) == HIGH(l) (mod 2^32) means that the only
|
|
|
|
time there's a carry from that sum is when LOW(pm) > HIGH(l). There's no
|
|
|
|
need to calculate pl to determine this. */
|
|
|
|
|
|
|
|
#define umul_ppmm_half_lowequal(h, q, d, l) \
|
|
|
|
do { \
|
|
|
|
mp_limb_t pm; \
|
|
|
|
ASSERT (q*d == l); \
|
|
|
|
ASSERT (HIGH32(d) == 0); \
|
|
|
|
\
|
|
|
|
pm = HIGH32(q) * d; \
|
|
|
|
(h) = HIGH32(pm) + ((pm << 32) > l); \
|
|
|
|
ASSERT_HIGH_PRODUCT (h, q, d); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
|
|
/* check that h is the high limb of x*y */
|
|
|
|
#if WANT_ASSERT
|
|
|
|
#define ASSERT_HIGH_PRODUCT(h, x, y) \
|
|
|
|
do { \
|
|
|
|
mp_limb_t want_h, dummy; \
|
|
|
|
umul_ppmm (want_h, dummy, x, y); \
|
|
|
|
ASSERT (h == want_h); \
|
|
|
|
} while (0)
|
|
|
|
#else
|
|
|
|
#define ASSERT_HIGH_PRODUCT(h, q, d) \
|
|
|
|
do { } while (0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
/* Count the leading zeros on a limb, but assuming it fits in 32 bits.
|
|
|
|
The count returned will be in the range 32 to 63.
|
|
|
|
This is the 32-bit generic C count_leading_zeros from longlong.h. */
|
|
|
|
#define count_leading_zeros_32(count, x) \
|
|
|
|
do { \
|
|
|
|
mp_limb_t __xr = (x); \
|
|
|
|
unsigned __a; \
|
|
|
|
ASSERT ((x) != 0); \
|
|
|
|
ASSERT ((x) <= CNST_LIMB(0xFFFFFFFF)); \
|
|
|
|
__a = __xr < ((UWtype) 1 << 16) ? (__xr < ((UWtype) 1 << 8) ? 1 : 8 + 1) \
|
|
|
|
: (__xr < ((UWtype) 1 << 24) ? 16 + 1 : 24 + 1); \
|
|
|
|
\
|
|
|
|
(count) = W_TYPE_SIZE + 1 - __a - __clz_tab[__xr >> __a]; \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
|
|
/* Set inv to a 32-bit inverse floor((b*(b-d)-1) / d), knowing that d fits
|
|
|
|
32 bits and is normalized (high bit set). */
|
|
|
|
#define invert_half_limb(inv, d) \
|
|
|
|
do { \
|
|
|
|
mp_limb_t _n; \
|
|
|
|
ASSERT ((d) <= 0xFFFFFFFF); \
|
|
|
|
ASSERT ((d) & 0x80000000); \
|
|
|
|
_n = (((mp_limb_t) -(d)) << 32) - 1; \
|
|
|
|
(inv) = (mp_limb_t) (unsigned) (_n / (d)); \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
|
|
/* Divide nh:nl by d, setting q to the quotient and r to the remainder.
|
|
|
|
q, r, nh and nl are 32-bits each, d_limb is 32-bits but in an mp_limb_t,
|
|
|
|
dinv_limb is similarly a 32-bit inverse but in an mp_limb_t. */
|
|
|
|
|
|
|
|
#define udiv_qrnnd_half_preinv(q, r, nh, nl, d_limb, dinv_limb) \
|
|
|
|
do { \
|
|
|
|
unsigned _n2, _n10, _n1, _nadj, _q11n, _xh, _r, _q; \
|
|
|
|
mp_limb_t _n, _x; \
|
|
|
|
ASSERT (d_limb <= 0xFFFFFFFF); \
|
|
|
|
ASSERT (dinv_limb <= 0xFFFFFFFF); \
|
|
|
|
ASSERT (d_limb & 0x80000000); \
|
|
|
|
ASSERT (nh < d_limb); \
|
|
|
|
_n10 = (nl); \
|
|
|
|
_n2 = (nh); \
|
|
|
|
_n1 = (int) _n10 >> 31; \
|
|
|
|
_nadj = _n10 + (_n1 & d_limb); \
|
|
|
|
_x = dinv_limb * (_n2 - _n1) + _nadj; \
|
|
|
|
_q11n = ~(_n2 + HIGH32 (_x)); /* -q1-1 */ \
|
|
|
|
_n = ((mp_limb_t) _n2 << 32) + _n10; \
|
|
|
|
_x = _n + d_limb * _q11n; /* n-q1*d-d */ \
|
|
|
|
_xh = HIGH32 (_x) - d_limb; /* high(n-q1*d-d) */ \
|
|
|
|
ASSERT (_xh == 0 || _xh == ~0); \
|
|
|
|
_r = _x + (d_limb & _xh); /* addback */ \
|
|
|
|
_q = _xh - _q11n; /* q1+1-addback */ \
|
|
|
|
ASSERT (_r < d_limb); \
|
|
|
|
ASSERT (d_limb * _q + _r == _n); \
|
|
|
|
(r) = _r; \
|
|
|
|
(q) = _q; \
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|