mpn_half mpn_double asm for K8

This commit is contained in:
jasonmoxham 2011-07-13 09:48:02 +00:00
parent 94cc9b7a7b
commit 5955fc2424
3 changed files with 156 additions and 34 deletions

60
mpn/x86_64/k8/double.asm Normal file
View File

@ -0,0 +1,60 @@
dnl mpn_double
dnl Copyright 2011 The Code Cavern
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
ASM_START()
PROLOGUE(mpn_double)
mov %rsi,%rax
shr $2,%rsi
and $3,%eax
jz t1
shlq $1,(%rdi)
lea 8(%rdi),%rdi
dec %rax
jz t1
rclq $1,(%rdi)
lea 8(%rdi),%rdi
dec %rax
jz t1
rclq $1,(%rdi)
lea 8(%rdi),%rdi
t1:
sbb %rdx,%rdx
cmp $0,%rsi
jz skiplp
add %rdx,%rdx
.align 16
lp:
rclq $1,(%rdi)
nop
rclq $1,8(%rdi)
rclq $1,16(%rdi)
rclq $1,24(%rdi)
nop
dec %rsi
lea 32(%rdi),%rdi
jnz lp
sbb %rdx,%rdx
skiplp:
sbb %rdx,%rax
ret
EPILOGUE()

View File

@ -1,20 +1,20 @@
/* Generated by tuneup.c, 2011-02-22, gcc 4.4 */
/* Generated by tuneup.c, 2011-07-11, gcc 4.5 */
#define MUL_KARATSUBA_THRESHOLD 24
#define MUL_TOOM3_THRESHOLD 84
#define MUL_TOOM4_THRESHOLD 248
#define MUL_TOOM8H_THRESHOLD 466
#define MUL_KARATSUBA_THRESHOLD 22
#define MUL_TOOM3_THRESHOLD 134
#define MUL_TOOM4_THRESHOLD 387
#define MUL_TOOM8H_THRESHOLD 446
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
#define SQR_KARATSUBA_THRESHOLD 32
#define SQR_TOOM3_THRESHOLD 117
#define SQR_TOOM4_THRESHOLD 498
#define SQR_TOOM8_THRESHOLD 498
#define SQR_KARATSUBA_THRESHOLD 43
#define SQR_TOOM3_THRESHOLD 125
#define SQR_TOOM4_THRESHOLD 512
#define SQR_TOOM8_THRESHOLD 674
#define POWM_THRESHOLD 451
#define POWM_THRESHOLD 464
#define GCD_THRESHOLD 438
#define GCDEXT_THRESHOLD 996
#define GCD_THRESHOLD 446
#define GCDEXT_THRESHOLD 969
#define JACOBI_BASE_METHOD 1
#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
@ -28,47 +28,48 @@
#define MOD_1_1_THRESHOLD 4
#define MOD_1_2_THRESHOLD 8
#define MOD_1_3_THRESHOLD 24
#define DIVREM_HENSEL_QR_1_THRESHOLD 7
#define DIVREM_HENSEL_QR_1_THRESHOLD 8
#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 7
#define DIVREM_EUCLID_HENSEL_THRESHOLD 68
#define DIVREM_EUCLID_HENSEL_THRESHOLD 30
#define ROOTREM_THRESHOLD 11
#define GET_STR_DC_THRESHOLD 14
#define GET_STR_PRECOMPUTE_THRESHOLD 23
#define SET_STR_DC_THRESHOLD 542
#define SET_STR_PRECOMPUTE_THRESHOLD 542
#define GET_STR_DC_THRESHOLD 15
#define GET_STR_PRECOMPUTE_THRESHOLD 26
#define SET_STR_DC_THRESHOLD 27410
#define SET_STR_PRECOMPUTE_THRESHOLD 52671
#define MUL_FFT_TABLE { 400, 928, 3264, 3840, 7168, 36864, 114688, 458752, 2359296, 7340032, 0 }
#define MUL_FFT_MODF_THRESHOLD 464
#define MUL_FFT_FULL_THRESHOLD 3648
#define MUL_FFT_TABLE { 432, 1056, 3136, 3840, 7168, 45056, 114688, 589824, 0 }
#define MUL_FFT_MODF_THRESHOLD 528
#define MUL_FFT_FULL_THRESHOLD 3264
#define SQR_FFT_TABLE { 400, 928, 2368, 2816, 11264, 36864, 114688, 458752, 2359296, 7340032, 0 }
#define SQR_FFT_MODF_THRESHOLD 400
#define SQR_FFT_TABLE { 464, 928, 2368, 2816, 13312, 45056, 114688, 458752, 0 }
#define SQR_FFT_MODF_THRESHOLD 464
#define SQR_FFT_FULL_THRESHOLD 2496
#define MULLOW_BASECASE_THRESHOLD 10
#define MULLOW_BASECASE_THRESHOLD 9
#define MULLOW_DC_THRESHOLD 13
#define MULLOW_MUL_THRESHOLD 9970
#define MULHIGH_BASECASE_THRESHOLD 22
#define MULHIGH_DC_THRESHOLD 22
#define MULHIGH_BASECASE_THRESHOLD 21
#define MULHIGH_DC_THRESHOLD 21
#define MULHIGH_MUL_THRESHOLD 9970
#define MULMOD_2EXPM1_THRESHOLD 20
#define FAC_UI_THRESHOLD 32756
#define DC_DIV_QR_THRESHOLD 54
#define DC_DIVAPPR_Q_N_THRESHOLD 136
#define DC_DIV_QR_THRESHOLD 46
#define DC_DIVAPPR_Q_N_THRESHOLD 156
#define INV_DIV_QR_THRESHOLD 9894
#define INV_DIVAPPR_Q_N_THRESHOLD 136
#define DC_DIV_Q_THRESHOLD 205
#define INV_DIVAPPR_Q_N_THRESHOLD 156
#define DC_DIV_Q_THRESHOLD 195
#define INV_DIV_Q_THRESHOLD 9894
#define DC_DIVAPPR_Q_THRESHOLD 174
#define INV_DIVAPPR_Q_THRESHOLD 19921
#define DC_DIVAPPR_Q_THRESHOLD 171
#define INV_DIVAPPR_Q_THRESHOLD 19441
#define DC_BDIV_QR_THRESHOLD 42
#define DC_BDIV_Q_THRESHOLD 20
/* Tuneup completed successfully, took 794 seconds */
#define DC_BDIV_Q_THRESHOLD 24
/* Tuneup completed successfully, took 131 seconds */
#define MUL_FFT_TABLE2 {{1, 3}, {205, 4}, {377, 5}, {386, 4}, {404, 5}, {813, 6}, {850, 5}, {869, 6}, {971, 5}, {993, 6}, {2392, 7}, {2445, 6}, {2668, 7}, {2727, 6}, {2787, 7}, {2976, 6}, {3042, 7}, {3109, 6}, {3178, 7}, {3248, 8}, {3393, 7}, {3468, 8}, {3544, 7}, {3784, 8}, {3867, 6}, {3952, 7}, {4039, 8}, {4407, 7}, {4504, 8}, {4914, 7}, {5022, 8}, {6957, 9}, {7756, 8}, {8460, 9}, {8836, 8}, {9030, 9}, {9850, 8}, {10513, 9}, {11976, 8}, {12239, 9}, {13939, 8}, {14245, 9}, {15876, 8}, {16224, 9}, {22461, 10}, {23970, 9}, {26142, 10}, {27898, 9}, {28509, 10}, {31772, 9}, {33906, 10}, {36184, 9}, {36977, 10}, {40326, 9}, {41210, 10}, {44943, 11}, {45928, 10}, {46934, 11}, {47962, 10}, {57042, 11}, {62207, 12}, {63570, 10}, {73983, 11}, {80681, 10}, {86099, 11}, {98051, 10}, {102394, 11}, {114110, 12}, {127165, 11}, {164920, 12}, {196129, 11}, {228243, 12}, {233241, 13}, {254354, 12}, {259924, 11}, {277377, 12}, {456509, 13}, {519871, 12}, {659749, 13}, {784582, 14}, {MP_SIZE_T_MAX,0}}

61
mpn/x86_64/k8/half.asm Normal file
View File

@ -0,0 +1,61 @@
dnl mpn_half
dnl Copyright 2011 The Code Cavern
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
ASM_START()
PROLOGUE(mpn_half)
mov %rsi,%rax
lea -8(%rdi,%rsi,8),%rdi
shr $2,%rsi
and $3,%eax
jz t1
shrq $1,(%rdi)
lea -8(%rdi),%rdi
dec %rax
jz t1
rcrq $1,(%rdi)
lea -8(%rdi),%rdi
dec %rax
jz t1
rcrq $1,(%rdi)
lea -8(%rdi),%rdi
t1:
sbb %rdx,%rdx
cmp $0,%rsi
jz skiplp
add %rdx,%rdx
.align 16
lp:
rcrq $1,(%rdi)
nop
rcrq $1,-8(%rdi)
rcrq $1,-16(%rdi)
rcrq $1,-24(%rdi)
nop
dec %rsi
lea -32(%rdi),%rdi
jnz lp
sbb %rdx,%rdx
skiplp:
sbb %rdx,%rax
ret
EPILOGUE()