diff --git a/Makefile.in b/Makefile.in index 0d919d7b..665a9626 100644 --- a/Makefile.in +++ b/Makefile.in @@ -125,9 +125,11 @@ host_triplet = @host@ subdir = . DIST_COMMON = README $(am__configure_deps) $(am__include_HEADERS_DIST) \ $(srcdir)/Makefile.am $(srcdir)/Makefile.in \ - $(srcdir)/config.in $(srcdir)/gmp-h.in $(top_srcdir)/configure \ - AUTHORS COPYING COPYING.LIB ChangeLog INSTALL NEWS \ - config.guess config.sub install-sh ltmain.sh missing ylwrap + $(srcdir)/config.in $(srcdir)/gmp-h.in \ + $(srcdir)/longlong_post.h $(srcdir)/longlong_pre.h \ + $(top_srcdir)/configure AUTHORS COPYING COPYING.LIB ChangeLog \ + INSTALL NEWS config.guess config.sub install-sh ltmain.sh \ + missing ylwrap ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 am__aclocal_m4_deps = $(top_srcdir)/acinclude.m4 \ $(top_srcdir)/configure.in @@ -137,7 +139,7 @@ am__CONFIG_DISTCLEAN_FILES = config.status config.cache config.log \ configure.lineno config.status.lineno mkinstalldirs = $(install_sh) -d CONFIG_HEADER = config.h -CONFIG_CLEAN_FILES = mpir.h gmp-mparam.h yasm_mac.inc +CONFIG_CLEAN_FILES = longlong.h mpir.h gmp-mparam.h yasm_mac.inc CONFIG_CLEAN_VPATH_FILES = am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; am__vpath_adj = case $$p in \ @@ -653,6 +655,8 @@ $(srcdir)/config.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) distclean-hdr: -rm -f config.h stamp-h1 +longlong.h: $(top_builddir)/config.status $(srcdir)/longlong_pre.h $(srcdir)/longlong_post.h + cd $(top_builddir) && $(SHELL) ./config.status $@ mpir.h: $(top_builddir)/config.status $(srcdir)/gmp-h.in cd $(top_builddir) && $(SHELL) ./config.status $@ install-libLTLIBRARIES: $(lib_LTLIBRARIES) diff --git a/configure b/configure index 9af5a3cc..206bf884 100755 --- a/configure +++ b/configure @@ -26520,13 +26520,6 @@ fi # can provide some functions too. (mpn/Makefile.am passes # -DOPERATION_ to get them to generate the right code.) -# Note: The following lines defining $gmp_mpn_functions_optional -# and $gmp_mpn_functions are parsed by the "macos/configure" -# Perl script. So if you change the lines in a major way -# make sure to run and examine the output from -# -# % (cd macos; perl configure) -# # Note: $gmp_mpn_functions must have mod_1 before preinv_mod_1 so the former # can optionally provide the latter as an extra entrypoint. Likewise # divrem_1 and preinv_divrem_1. @@ -28505,7 +28498,6 @@ echo "include_mpn(\`x86_64/x86_64-defs.m4')" >> $gmp_tmpconfigm4i esac fi - # Create link for gmp-mparam.h. gmp_mparam_source= for gmp_mparam_dir in $path; do @@ -28523,6 +28515,23 @@ if test -z "$gmp_mparam_source"; then as_fn_error $? "no version of gmp-mparam.h found in path: $path" "$LINENO" 5 fi +# Create longlong.h from the path +longlong_source= +for longlong_dir in $path; do + test "$no_create" = yes || rm -f longlong.h + tmp_file=$srcdir/mpn/$longlong_dir/longlong.h + if test -f $tmp_file; then + ac_config_files="$ac_config_files longlong.h:longlong_pre.h:mpn/$longlong_dir/longlong.h:longlong_post.h" + + gmp_srclinks="$gmp_srclinks longlong.h" + longlong_source=$tmp_file + break + fi +done +if test -z "$longlong_source"; then + as_fn_error $? "no version of longlong.h found in path: $path" "$LINENO" 5 +fi + # For a helpful message from tune/tuneup.c gmp_mparam_suggest=$gmp_mparam_source if test "$gmp_mparam_dir" = generic; then @@ -30086,6 +30095,7 @@ do "libtool") CONFIG_COMMANDS="$CONFIG_COMMANDS libtool" ;; "mpn/$tmp_fn.$tmp_ext") CONFIG_LINKS="$CONFIG_LINKS mpn/$tmp_fn.$tmp_ext:mpn/$tmp_dir/$tmp_base.$tmp_ext" ;; "gmp-mparam.h") CONFIG_LINKS="$CONFIG_LINKS gmp-mparam.h:mpn/$gmp_mparam_dir/gmp-mparam.h" ;; + "longlong.h") CONFIG_FILES="$CONFIG_FILES longlong.h:longlong_pre.h:mpn/$longlong_dir/longlong.h:longlong_post.h" ;; "yasm_mac.inc") CONFIG_LINKS="$CONFIG_LINKS yasm_mac.inc:$YASM_MAC_INC" ;; "Makefile") CONFIG_FILES="$CONFIG_FILES Makefile" ;; "mpf/Makefile") CONFIG_FILES="$CONFIG_FILES mpf/Makefile" ;; diff --git a/configure.in b/configure.in index e447cb31..3a05dfd5 100644 --- a/configure.in +++ b/configure.in @@ -2345,13 +2345,6 @@ fi # can provide some functions too. (mpn/Makefile.am passes # -DOPERATION_ to get them to generate the right code.) -# Note: The following lines defining $gmp_mpn_functions_optional -# and $gmp_mpn_functions are parsed by the "macos/configure" -# Perl script. So if you change the lines in a major way -# make sure to run and examine the output from -# -# % (cd macos; perl configure) -# # Note: $gmp_mpn_functions must have mod_1 before preinv_mod_1 so the former # can optionally provide the latter as an extra entrypoint. Likewise # divrem_1 and preinv_divrem_1. @@ -3059,7 +3052,6 @@ if test "$gmp_asm_syntax_testing" != no; then esac fi - # Create link for gmp-mparam.h. gmp_mparam_source= for gmp_mparam_dir in $path; do @@ -3076,6 +3068,22 @@ if test -z "$gmp_mparam_source"; then AC_MSG_ERROR([no version of gmp-mparam.h found in path: $path]) fi +# Create longlong.h from the path +longlong_source= +for longlong_dir in $path; do + test "$no_create" = yes || rm -f longlong.h + tmp_file=$srcdir/mpn/$longlong_dir/longlong.h + if test -f $tmp_file; then + AC_CONFIG_FILES(longlong.h:longlong_pre.h:mpn/$longlong_dir/longlong.h:longlong_post.h) + gmp_srclinks="$gmp_srclinks longlong.h" + longlong_source=$tmp_file + break + fi +done +if test -z "$longlong_source"; then + AC_MSG_ERROR([no version of longlong.h found in path: $path]) +fi + # For a helpful message from tune/tuneup.c gmp_mparam_suggest=$gmp_mparam_source if test "$gmp_mparam_dir" = generic; then diff --git a/longlong.h b/longlong_post.h similarity index 53% rename from longlong.h rename to longlong_post.h index 1ca34ec0..3b0faf35 100644 --- a/longlong.h +++ b/longlong_post.h @@ -32,259 +32,6 @@ MA 02110-1301, USA. */ on a 64 bit machine, UWtype should typically be UDItype. */ -#define __BITS4 (W_TYPE_SIZE / 4) -#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) -#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) -#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) - -/* This is used to make sure no undesirable sharing between different libraries - that use this file takes place. */ -#ifndef __MPN -#define __MPN(x) __##x -#endif - -#ifndef _PROTO -#if (__STDC__-0) || defined (__cplusplus) || defined( _MSC_VER ) -#define _PROTO(x) x -#else -#define _PROTO(x) () -#endif -#endif - -/* Define auxiliary asm macros. - - 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two - UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype - word product in HIGH_PROD and LOW_PROD. - - 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, - denominator) divides a UDWtype, composed by the UWtype integers - HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient - in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less - than DENOMINATOR for correct operation. If, in addition, the most - significant bit of DENOMINATOR must be 1, then the pre-processor symbol - UDIV_NEEDS_NORMALIZATION is defined to 1. - - 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, - denominator). Like udiv_qrnnd but the numbers are signed. The quotient - is rounded towards 0. - - 5) count_leading_zeros(count, x) counts the number of zero-bits from the - msb to the first non-zero bit in the UWtype X. This is the number of - steps X needs to be shifted left to set the msb. Undefined for X == 0, - unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. - - 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts - from the least significant end. - - 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, - high_addend_2, low_addend_2) adds two UWtype integers, composed by - HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 - respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow - (i.e. carry out) is not stored anywhere, and is lost. - - 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, - high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, - composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and - LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE - and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, - and is lost. - - If any of these macros are left undefined for a particular CPU, - C macros are used. - - - Notes: - - For add_ssaaaa the two high and two low addends can both commute, but - unfortunately gcc only supports one "%" commutative in each asm block. - This has always been so but is only documented in recent versions - (eg. pre-release 3.3). Having two or more "%"s can cause an internal - compiler error in certain rare circumstances. - - Apparently it was only the last "%" that was ever actually respected, so - the code has been updated to leave just that. Clearly there's a free - choice whether high or low should get it, if there's a reason to favour - one over the other. Also obviously when the constraints on the two - operands are identical there's no benefit to the reloader in any "%" at - all. - - */ - -/* The CPUs come in alphabetical order below. - - Please add support for more CPUs here, or improve the current support - for the CPUs below! */ - - -/* count_leading_zeros_gcc_clz is count_leading_zeros implemented with gcc - 3.4 __builtin_clzl or __builtin_clzll, according to our limb size. - Similarly count_trailing_zeros_gcc_ctz using __builtin_ctzl or - __builtin_ctzll. - - These builtins are only used when we check what code comes out, on some - chips they're merely libgcc calls, where we will instead want an inline - in that case (either asm or generic C). - - These builtins are better than an asm block of the same insn, since an - asm block doesn't give gcc any information about scheduling or resource - usage. We keep an asm block for use on prior versions of gcc though. - - For reference, __builtin_ffs existed in gcc prior to __builtin_clz, but - it's not used (for count_leading_zeros) because it generally gives extra - code to ensure the result is 0 when the input is 0, which we don't need - or want. */ - -#ifdef _LONG_LONG_LIMB -#define count_leading_zeros_gcc_clz(count,x) \ - do { \ - ASSERT ((x) != 0); \ - (count) = __builtin_clzll (x); \ - } while (0) -#else -#define count_leading_zeros_gcc_clz(count,x) \ - do { \ - ASSERT ((x) != 0); \ - (count) = __builtin_clzl (x); \ - } while (0) -#endif - -#ifdef _LONG_LONG_LIMB -#define count_trailing_zeros_gcc_ctz(count,x) \ - do { \ - ASSERT ((x) != 0); \ - (count) = __builtin_ctzll (x); \ - } while (0) -#else -#define count_trailing_zeros_gcc_ctz(count,x) \ - do { \ - ASSERT ((x) != 0); \ - (count) = __builtin_ctzl (x); \ - } while (0) -#endif - -#ifdef _MSC_VER -/* temporary */ -#include "mpn/x86w/longlong.h" -#include "mpn/x86_64w/longlong.h" -#endif - -/* FIXME: The macros using external routines like __MPN(count_leading_zeros) - don't need to be under !NO_ASM */ -#if ! defined (NO_ASM) - -#if defined (__alpha) && W_TYPE_SIZE == 64 -#include "mpn/alpha/longlong.h" -#endif - -#if defined (__ia64) && W_TYPE_SIZE == 64 -#include "mpn/ia64/longlong.h" -#endif - - -#if defined (__GNUC__) || defined(INTEL_COMPILER) - -#if (defined (__i386__) || defined (__i486__)) && W_TYPE_SIZE == 32 -#include "mpn/x86/longlong.h" -#endif - -#if defined (__amd64__) && W_TYPE_SIZE == 64 -#include "mpn/x86_64/longlong.h" -#endif - -#endif - -#if defined (__GNUC__) - -/* We sometimes need to clobber "cc" with gcc2, but that would not be - understood by gcc1. Use cpp to avoid major code duplication. */ -#if __GNUC__ < 2 -#define __CLOBBER_CC -#define __AND_CLOBBER_CC -#else /* __GNUC__ >= 2 */ -#define __CLOBBER_CC : "cc" -#define __AND_CLOBBER_CC , "cc" -#endif /* __GNUC__ < 2 */ - -#if defined (__arm__) && W_TYPE_SIZE == 32 -#include "mpn/arm/longlong.h" -#endif - -#if defined (__hppa) && W_TYPE_SIZE == 32 -#include "mpn/pa32/longlong.h" -#endif - -/* These macros are for ABI=2.0w. In ABI=2.0n they can't be used, since GCC - (3.2) puts longlong into two adjacent 32-bit registers. Presumably this - is just a case of no direct support for 2.0n but treating it like 1.0. */ -#if defined (__hppa) && W_TYPE_SIZE == 64 && ! defined (_LONG_LONG_LIMB) -#include "mpn/pa64/longlong.h" -#endif - -#if (defined (__i370__) || defined (__s390__) || defined (__mvs__)) && W_TYPE_SIZE == 32 -#include "mpn/s390/longlong.h" -#endif - -#if (defined (__mc68000__) || defined (__mc68020__) || defined(mc68020) \ - || defined (__m68k__) || defined (__mc5200__) || defined (__mc5206e__) \ - || defined (__mc5307__)) && W_TYPE_SIZE == 32 -#include "mpn/m68k/longlong.h" -#endif - -#if defined (__mips) && W_TYPE_SIZE == 32 -#include "mpn/mips32/longlong.h" -#endif - -#if (defined (__mips) && __mips >= 3) && W_TYPE_SIZE == 64 -#include "mpn/mips64/longlong.h" -#endif - -/* In the past we had a block of various #defines tested - _ARCH_PPC - AIX - _ARCH_PWR - AIX - __powerpc__ - gcc - __POWERPC__ - BEOS - __ppc__ - Darwin - PPC - old gcc, GNU/Linux, SysV - The plain PPC test was not good for vxWorks, since PPC is defined on all - CPUs there (eg. m68k too), as a constant one is expected to compare - CPU_FAMILY against. - - At any rate, this was pretty unattractive and a bit fragile. The use of - HAVE_HOST_CPU_FAMILY is designed to cut through it all and be sure of - getting the desired effect. - - ENHANCE-ME: We should test _IBMR2 here when we add assembly support for - the system vendor compilers. (Is that vendor compilers with inline asm, - or what?) */ - -#if (HAVE_HOST_CPU_FAMILY_power || HAVE_HOST_CPU_FAMILY_powerpc) \ - && W_TYPE_SIZE == 32 -#include "mpn/powerpc32/longlong.h" -#endif - -/* We should test _IBMR2 here when we add assembly support for the system - vendor compilers. */ -#if HAVE_HOST_CPU_FAMILY_powerpc && W_TYPE_SIZE == 64 -#include "mpn/powerpc64/longlong.h" -#endif - -#if defined (__sh2__) && W_TYPE_SIZE == 32 -#include "mpn/sh/longlong.h" -#endif - -#if defined (__sparc__) && W_TYPE_SIZE == 32 -#include "mpn/sparc32/longlong.h" -#endif - -#if defined (__sparc__) && W_TYPE_SIZE == 64 -#include "mpn/sparc64/longlong.h" -#endif - -#endif /* __GNUC__ */ - -#endif /* NO_ASM */ - /* Use mpn_umul_ppmm or mpn_udiv_qrnnd functions, if they exist. The "_r" forms have "reversed" arguments, meaning the pointer is last, which sometimes allows better parameter passing, in particular on 64-bit diff --git a/longlong_pre.h b/longlong_pre.h new file mode 100644 index 00000000..56d0eb55 --- /dev/null +++ b/longlong_pre.h @@ -0,0 +1,164 @@ +/* longlong.h -- definitions for mixed size 32/64 bit arithmetic. + +Copyright 1991, 1992, 1993, 1994, 1996, 1997, 1999, 2000, 2001, 2002, 2003, +2004, 2005 Free Software Foundation, Inc. + +This file is free software; you can redistribute it and/or modify +it under the terms of the GNU Lesser General Public License as published by +the Free Software Foundation; either version 2.1 of the License, or (at your +option) any later version. + +This file is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY +or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public +License for more details. + +You should have received a copy of the GNU Lesser General Public License +along with this file; see the file COPYING.LIB. If not, write to +the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, +MA 02110-1301, USA. */ + +/* You have to define the following before including this file: + + UWtype -- An unsigned type, default type for operations (typically a "word") + UHWtype -- An unsigned type, at least half the size of UWtype. + UDWtype -- An unsigned type, at least twice as large a UWtype + W_TYPE_SIZE -- size in bits of UWtype + + SItype, USItype -- Signed and unsigned 32 bit types. + DItype, UDItype -- Signed and unsigned 64 bit types. + + On a 32 bit machine UWtype should typically be USItype; + on a 64 bit machine, UWtype should typically be UDItype. +*/ + +#define __BITS4 (W_TYPE_SIZE / 4) +#define __ll_B ((UWtype) 1 << (W_TYPE_SIZE / 2)) +#define __ll_lowpart(t) ((UWtype) (t) & (__ll_B - 1)) +#define __ll_highpart(t) ((UWtype) (t) >> (W_TYPE_SIZE / 2)) + +/* This is used to make sure no undesirable sharing between different libraries + that use this file takes place. */ +#ifndef __MPN +#define __MPN(x) __##x +#endif + +#ifndef _PROTO +#if (__STDC__-0) || defined (__cplusplus) || defined( _MSC_VER ) +#define _PROTO(x) x +#else +#define _PROTO(x) () +#endif +#endif + +/* Define auxiliary asm macros. + + 1) umul_ppmm(high_prod, low_prod, multipler, multiplicand) multiplies two + UWtype integers MULTIPLER and MULTIPLICAND, and generates a two UWtype + word product in HIGH_PROD and LOW_PROD. + + 3) udiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator) divides a UDWtype, composed by the UWtype integers + HIGH_NUMERATOR and LOW_NUMERATOR, by DENOMINATOR and places the quotient + in QUOTIENT and the remainder in REMAINDER. HIGH_NUMERATOR must be less + than DENOMINATOR for correct operation. If, in addition, the most + significant bit of DENOMINATOR must be 1, then the pre-processor symbol + UDIV_NEEDS_NORMALIZATION is defined to 1. + + 4) sdiv_qrnnd(quotient, remainder, high_numerator, low_numerator, + denominator). Like udiv_qrnnd but the numbers are signed. The quotient + is rounded towards 0. + + 5) count_leading_zeros(count, x) counts the number of zero-bits from the + msb to the first non-zero bit in the UWtype X. This is the number of + steps X needs to be shifted left to set the msb. Undefined for X == 0, + unless the symbol COUNT_LEADING_ZEROS_0 is defined to some value. + + 6) count_trailing_zeros(count, x) like count_leading_zeros, but counts + from the least significant end. + + 7) add_ssaaaa(high_sum, low_sum, high_addend_1, low_addend_1, + high_addend_2, low_addend_2) adds two UWtype integers, composed by + HIGH_ADDEND_1 and LOW_ADDEND_1, and HIGH_ADDEND_2 and LOW_ADDEND_2 + respectively. The result is placed in HIGH_SUM and LOW_SUM. Overflow + (i.e. carry out) is not stored anywhere, and is lost. + + 8) sub_ddmmss(high_difference, low_difference, high_minuend, low_minuend, + high_subtrahend, low_subtrahend) subtracts two two-word UWtype integers, + composed by HIGH_MINUEND_1 and LOW_MINUEND_1, and HIGH_SUBTRAHEND_2 and + LOW_SUBTRAHEND_2 respectively. The result is placed in HIGH_DIFFERENCE + and LOW_DIFFERENCE. Overflow (i.e. carry out) is not stored anywhere, + and is lost. + + If any of these macros are left undefined for a particular CPU, + C macros are used. + + + Notes: + + For add_ssaaaa the two high and two low addends can both commute, but + unfortunately gcc only supports one "%" commutative in each asm block. + This has always been so but is only documented in recent versions + (eg. pre-release 3.3). Having two or more "%"s can cause an internal + compiler error in certain rare circumstances. + + Apparently it was only the last "%" that was ever actually respected, so + the code has been updated to leave just that. Clearly there's a free + choice whether high or low should get it, if there's a reason to favour + one over the other. Also obviously when the constraints on the two + operands are identical there's no benefit to the reloader in any "%" at + all. + + */ + +/* The CPUs come in alphabetical order below. + + Please add support for more CPUs here, or improve the current support + for the CPUs below! */ + + +/* count_leading_zeros_gcc_clz is count_leading_zeros implemented with gcc + 3.4 __builtin_clzl or __builtin_clzll, according to our limb size. + Similarly count_trailing_zeros_gcc_ctz using __builtin_ctzl or + __builtin_ctzll. + + These builtins are only used when we check what code comes out, on some + chips they're merely libgcc calls, where we will instead want an inline + in that case (either asm or generic C). + + These builtins are better than an asm block of the same insn, since an + asm block doesn't give gcc any information about scheduling or resource + usage. We keep an asm block for use on prior versions of gcc though. + + For reference, __builtin_ffs existed in gcc prior to __builtin_clz, but + it's not used (for count_leading_zeros) because it generally gives extra + code to ensure the result is 0 when the input is 0, which we don't need + or want. */ + +#ifdef _LONG_LONG_LIMB +#define count_leading_zeros_gcc_clz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_clzll (x); \ + } while (0) +#else +#define count_leading_zeros_gcc_clz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_clzl (x); \ + } while (0) +#endif + +#ifdef _LONG_LONG_LIMB +#define count_trailing_zeros_gcc_ctz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_ctzll (x); \ + } while (0) +#else +#define count_trailing_zeros_gcc_ctz(count,x) \ + do { \ + ASSERT ((x) != 0); \ + (count) = __builtin_ctzl (x); \ + } while (0) +#endif diff --git a/mpn/generic/longlong.h b/mpn/generic/longlong.h new file mode 100644 index 00000000..51773b9c --- /dev/null +++ b/mpn/generic/longlong.h @@ -0,0 +1 @@ +/* An empty file , although later we put the generic case in here */