2011-04-10 19:42:50 -04:00
|
|
|
/* longlong.h -- definitions for mixed size 32/64 bit arithmetic.
|
|
|
|
|
|
|
|
Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003,
|
|
|
|
2004, 2005 Free Software Foundation, Inc.
|
|
|
|
|
|
|
|
This file is free software; you can redistribute it and/or modify
|
|
|
|
it under the terms of the GNU Lesser General Public License as published by
|
|
|
|
the Free Software Foundation; either version 2.1 of the License, or (at your
|
|
|
|
option) any later version.
|
|
|
|
|
|
|
|
This file is distributed in the hope that it will be useful, but
|
|
|
|
WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
|
|
or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
|
|
License for more details.
|
|
|
|
|
|
|
|
You should have received a copy of the GNU Lesser General Public License
|
|
|
|
along with this file; see the file COPYING.LIB. If not, write to
|
|
|
|
the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
|
|
|
|
MA 02110-1301, USA. */
|
|
|
|
|
2011-04-10 19:06:16 -04:00
|
|
|
/* This form encourages gcc (pre-release 3.4 at least) to emit predicated
|
|
|
|
"sub r=r,r" and "sub r=r,r,1", giving a 2 cycle latency. The generic
|
|
|
|
code using "al<bl" arithmetically comes out making an actual 0 or 1 in a
|
|
|
|
register, which takes an extra cycle. */
|
|
|
|
#define sub_ddmmss(sh, sl, ah, al, bh, bl) \
|
|
|
|
do { \
|
|
|
|
UWtype __x; \
|
|
|
|
__x = (al) - (bl); \
|
|
|
|
if ((al) < (bl)) \
|
|
|
|
(sh) = (ah) - (bh) - 1; \
|
|
|
|
else \
|
|
|
|
(sh) = (ah) - (bh); \
|
|
|
|
(sl) = __x; \
|
|
|
|
} while (0)
|
|
|
|
#if defined (__GNUC__) && ! defined (__INTEL_COMPILER)
|
|
|
|
/* Do both product parts in assembly, since that gives better code with
|
|
|
|
all gcc versions. Some callers will just use the upper part, and in
|
|
|
|
that situation we waste an instruction, but not any cycles. */
|
|
|
|
#define umul_ppmm(ph, pl, m0, m1) \
|
|
|
|
__asm__ ("xma.hu %0 = %2, %3, f0\n\txma.l %1 = %2, %3, f0" \
|
|
|
|
: "=&f" (ph), "=f" (pl) \
|
|
|
|
: "f" (m0), "f" (m1))
|
|
|
|
#define count_leading_zeros(count, x) \
|
|
|
|
do { \
|
|
|
|
UWtype _x = (x), _y, _a, _c; \
|
|
|
|
__asm__ ("mux1 %0 = %1, @rev" : "=r" (_y) : "r" (_x)); \
|
|
|
|
__asm__ ("czx1.l %0 = %1" : "=r" (_a) : "r" (-_y | _y)); \
|
|
|
|
_c = (_a - 1) << 3; \
|
|
|
|
_x >>= _c; \
|
|
|
|
if (_x >= 1 << 4) \
|
|
|
|
_x >>= 4, _c += 4; \
|
|
|
|
if (_x >= 1 << 2) \
|
|
|
|
_x >>= 2, _c += 2; \
|
|
|
|
_c += _x >> 1; \
|
|
|
|
(count) = W_TYPE_SIZE - 1 - _c; \
|
|
|
|
} while (0)
|
|
|
|
/* similar to what gcc does for __builtin_ffs, but 0 based rather than 1
|
|
|
|
based, and we don't need a special case for x==0 here */
|
|
|
|
#define count_trailing_zeros(count, x) \
|
|
|
|
do { \
|
|
|
|
UWtype __ctz_x = (x); \
|
|
|
|
__asm__ ("popcnt %0 = %1" \
|
|
|
|
: "=r" (count) \
|
|
|
|
: "r" ((__ctz_x-1) & ~__ctz_x)); \
|
|
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
#if defined (__INTEL_COMPILER)
|
|
|
|
#include <ia64intrin.h>
|
|
|
|
#define umul_ppmm(ph, pl, m0, m1) \
|
|
|
|
do { \
|
|
|
|
UWtype _m0 = (m0), _m1 = (m1); \
|
|
|
|
ph = _m64_xmahu (_m0, _m1, 0); \
|
|
|
|
pl = _m0 * _m1; \
|
|
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
#ifndef LONGLONG_STANDALONE
|
|
|
|
#define udiv_qrnnd(q, r, n1, n0, d) \
|
|
|
|
do { UWtype __di; \
|
|
|
|
__di = __MPN(invert_limb) (d); \
|
|
|
|
udiv_qrnnd_preinv (q, r, n1, n0, d, __di); \
|
|
|
|
} while (0)
|
|
|
|
#define UDIV_PREINV_ALWAYS 1
|
|
|
|
#define UDIV_NEEDS_NORMALIZATION 1
|
|
|
|
#endif
|
2011-04-30 03:05:19 -04:00
|
|
|
|
|
|
|
#if !defined(ULONG_PARITY) && defined (__GNUC__) && ! defined (__INTEL_COMPILER)
|
|
|
|
/* unsigned long is either 32 or 64 bits depending on the ABI, zero extend
|
|
|
|
to a 64 bit unsigned long long for popcnt */
|
|
|
|
#define ULONG_PARITY(p, n) \
|
|
|
|
do { \
|
|
|
|
unsigned long long __n = (unsigned long) (n); \
|
|
|
|
int __p; \
|
|
|
|
__asm__ ("popcnt %0 = %1" : "=r" (__p) : "r" (__n)); \
|
|
|
|
(p) = __p & 1; \
|
|
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#if !defined(BSWAP_LIMB) && defined (__GNUC__) && ! defined (__INTEL_COMPILER)
|
|
|
|
#define BSWAP_LIMB(dst, src) \
|
|
|
|
do { \
|
|
|
|
__asm__ ("mux1 %0 = %1, @rev" : "=r" (dst) : "r" (src)); \
|
|
|
|
} while (0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
|
|
#if !defined(popc_limb) && defined (__GNUC__) && ! defined (__INTEL_COMPILER)
|
|
|
|
#define popc_limb(result, input) \
|
|
|
|
do { \
|
|
|
|
__asm__ ("popcnt %0 = %1" : "=r" (result) : "r" (input)); \
|
|
|
|
} while (0)
|
|
|
|
#endif
|