mpn_half mpn_double asm for K8
This commit is contained in:
parent
94cc9b7a7b
commit
5955fc2424
60
mpn/x86_64/k8/double.asm
Normal file
60
mpn/x86_64/k8/double.asm
Normal file
@ -0,0 +1,60 @@
|
||||
dnl mpn_double
|
||||
|
||||
dnl Copyright 2011 The Code Cavern
|
||||
|
||||
dnl This file is part of the MPIR Library.
|
||||
|
||||
dnl The MPIR Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The MPIR Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
dnl Boston, MA 02110-1301, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_double)
|
||||
mov %rsi,%rax
|
||||
shr $2,%rsi
|
||||
and $3,%eax
|
||||
jz t1
|
||||
shlq $1,(%rdi)
|
||||
lea 8(%rdi),%rdi
|
||||
dec %rax
|
||||
jz t1
|
||||
rclq $1,(%rdi)
|
||||
lea 8(%rdi),%rdi
|
||||
dec %rax
|
||||
jz t1
|
||||
rclq $1,(%rdi)
|
||||
lea 8(%rdi),%rdi
|
||||
t1:
|
||||
sbb %rdx,%rdx
|
||||
cmp $0,%rsi
|
||||
jz skiplp
|
||||
add %rdx,%rdx
|
||||
.align 16
|
||||
lp:
|
||||
rclq $1,(%rdi)
|
||||
nop
|
||||
rclq $1,8(%rdi)
|
||||
rclq $1,16(%rdi)
|
||||
rclq $1,24(%rdi)
|
||||
nop
|
||||
dec %rsi
|
||||
lea 32(%rdi),%rdi
|
||||
jnz lp
|
||||
sbb %rdx,%rdx
|
||||
skiplp:
|
||||
sbb %rdx,%rax
|
||||
ret
|
||||
EPILOGUE()
|
@ -1,20 +1,20 @@
|
||||
/* Generated by tuneup.c, 2011-02-22, gcc 4.4 */
|
||||
/* Generated by tuneup.c, 2011-07-11, gcc 4.5 */
|
||||
|
||||
#define MUL_KARATSUBA_THRESHOLD 24
|
||||
#define MUL_TOOM3_THRESHOLD 84
|
||||
#define MUL_TOOM4_THRESHOLD 248
|
||||
#define MUL_TOOM8H_THRESHOLD 466
|
||||
#define MUL_KARATSUBA_THRESHOLD 22
|
||||
#define MUL_TOOM3_THRESHOLD 134
|
||||
#define MUL_TOOM4_THRESHOLD 387
|
||||
#define MUL_TOOM8H_THRESHOLD 446
|
||||
|
||||
#define SQR_BASECASE_THRESHOLD 0 /* always (native) */
|
||||
#define SQR_KARATSUBA_THRESHOLD 32
|
||||
#define SQR_TOOM3_THRESHOLD 117
|
||||
#define SQR_TOOM4_THRESHOLD 498
|
||||
#define SQR_TOOM8_THRESHOLD 498
|
||||
#define SQR_KARATSUBA_THRESHOLD 43
|
||||
#define SQR_TOOM3_THRESHOLD 125
|
||||
#define SQR_TOOM4_THRESHOLD 512
|
||||
#define SQR_TOOM8_THRESHOLD 674
|
||||
|
||||
#define POWM_THRESHOLD 451
|
||||
#define POWM_THRESHOLD 464
|
||||
|
||||
#define GCD_THRESHOLD 438
|
||||
#define GCDEXT_THRESHOLD 996
|
||||
#define GCD_THRESHOLD 446
|
||||
#define GCDEXT_THRESHOLD 969
|
||||
#define JACOBI_BASE_METHOD 1
|
||||
|
||||
#define DIVREM_1_NORM_THRESHOLD MP_SIZE_T_MAX /* never */
|
||||
@ -28,47 +28,48 @@
|
||||
#define MOD_1_1_THRESHOLD 4
|
||||
#define MOD_1_2_THRESHOLD 8
|
||||
#define MOD_1_3_THRESHOLD 24
|
||||
#define DIVREM_HENSEL_QR_1_THRESHOLD 7
|
||||
#define DIVREM_HENSEL_QR_1_THRESHOLD 8
|
||||
#define RSH_DIVREM_HENSEL_QR_1_THRESHOLD 7
|
||||
#define DIVREM_EUCLID_HENSEL_THRESHOLD 68
|
||||
#define DIVREM_EUCLID_HENSEL_THRESHOLD 30
|
||||
|
||||
#define ROOTREM_THRESHOLD 11
|
||||
|
||||
#define GET_STR_DC_THRESHOLD 14
|
||||
#define GET_STR_PRECOMPUTE_THRESHOLD 23
|
||||
#define SET_STR_DC_THRESHOLD 542
|
||||
#define SET_STR_PRECOMPUTE_THRESHOLD 542
|
||||
#define GET_STR_DC_THRESHOLD 15
|
||||
#define GET_STR_PRECOMPUTE_THRESHOLD 26
|
||||
#define SET_STR_DC_THRESHOLD 27410
|
||||
#define SET_STR_PRECOMPUTE_THRESHOLD 52671
|
||||
|
||||
#define MUL_FFT_TABLE { 400, 928, 3264, 3840, 7168, 36864, 114688, 458752, 2359296, 7340032, 0 }
|
||||
#define MUL_FFT_MODF_THRESHOLD 464
|
||||
#define MUL_FFT_FULL_THRESHOLD 3648
|
||||
#define MUL_FFT_TABLE { 432, 1056, 3136, 3840, 7168, 45056, 114688, 589824, 0 }
|
||||
#define MUL_FFT_MODF_THRESHOLD 528
|
||||
#define MUL_FFT_FULL_THRESHOLD 3264
|
||||
|
||||
#define SQR_FFT_TABLE { 400, 928, 2368, 2816, 11264, 36864, 114688, 458752, 2359296, 7340032, 0 }
|
||||
#define SQR_FFT_MODF_THRESHOLD 400
|
||||
#define SQR_FFT_TABLE { 464, 928, 2368, 2816, 13312, 45056, 114688, 458752, 0 }
|
||||
#define SQR_FFT_MODF_THRESHOLD 464
|
||||
#define SQR_FFT_FULL_THRESHOLD 2496
|
||||
|
||||
#define MULLOW_BASECASE_THRESHOLD 10
|
||||
#define MULLOW_BASECASE_THRESHOLD 9
|
||||
#define MULLOW_DC_THRESHOLD 13
|
||||
#define MULLOW_MUL_THRESHOLD 9970
|
||||
|
||||
#define MULHIGH_BASECASE_THRESHOLD 22
|
||||
#define MULHIGH_DC_THRESHOLD 22
|
||||
#define MULHIGH_BASECASE_THRESHOLD 21
|
||||
#define MULHIGH_DC_THRESHOLD 21
|
||||
#define MULHIGH_MUL_THRESHOLD 9970
|
||||
|
||||
#define MULMOD_2EXPM1_THRESHOLD 20
|
||||
|
||||
#define FAC_UI_THRESHOLD 32756
|
||||
#define DC_DIV_QR_THRESHOLD 54
|
||||
#define DC_DIVAPPR_Q_N_THRESHOLD 136
|
||||
#define DC_DIV_QR_THRESHOLD 46
|
||||
#define DC_DIVAPPR_Q_N_THRESHOLD 156
|
||||
#define INV_DIV_QR_THRESHOLD 9894
|
||||
#define INV_DIVAPPR_Q_N_THRESHOLD 136
|
||||
#define DC_DIV_Q_THRESHOLD 205
|
||||
#define INV_DIVAPPR_Q_N_THRESHOLD 156
|
||||
#define DC_DIV_Q_THRESHOLD 195
|
||||
#define INV_DIV_Q_THRESHOLD 9894
|
||||
#define DC_DIVAPPR_Q_THRESHOLD 174
|
||||
#define INV_DIVAPPR_Q_THRESHOLD 19921
|
||||
#define DC_DIVAPPR_Q_THRESHOLD 171
|
||||
#define INV_DIVAPPR_Q_THRESHOLD 19441
|
||||
#define DC_BDIV_QR_THRESHOLD 42
|
||||
#define DC_BDIV_Q_THRESHOLD 20
|
||||
/* Tuneup completed successfully, took 794 seconds */
|
||||
#define DC_BDIV_Q_THRESHOLD 24
|
||||
/* Tuneup completed successfully, took 131 seconds */
|
||||
|
||||
|
||||
|
||||
#define MUL_FFT_TABLE2 {{1, 3}, {205, 4}, {377, 5}, {386, 4}, {404, 5}, {813, 6}, {850, 5}, {869, 6}, {971, 5}, {993, 6}, {2392, 7}, {2445, 6}, {2668, 7}, {2727, 6}, {2787, 7}, {2976, 6}, {3042, 7}, {3109, 6}, {3178, 7}, {3248, 8}, {3393, 7}, {3468, 8}, {3544, 7}, {3784, 8}, {3867, 6}, {3952, 7}, {4039, 8}, {4407, 7}, {4504, 8}, {4914, 7}, {5022, 8}, {6957, 9}, {7756, 8}, {8460, 9}, {8836, 8}, {9030, 9}, {9850, 8}, {10513, 9}, {11976, 8}, {12239, 9}, {13939, 8}, {14245, 9}, {15876, 8}, {16224, 9}, {22461, 10}, {23970, 9}, {26142, 10}, {27898, 9}, {28509, 10}, {31772, 9}, {33906, 10}, {36184, 9}, {36977, 10}, {40326, 9}, {41210, 10}, {44943, 11}, {45928, 10}, {46934, 11}, {47962, 10}, {57042, 11}, {62207, 12}, {63570, 10}, {73983, 11}, {80681, 10}, {86099, 11}, {98051, 10}, {102394, 11}, {114110, 12}, {127165, 11}, {164920, 12}, {196129, 11}, {228243, 12}, {233241, 13}, {254354, 12}, {259924, 11}, {277377, 12}, {456509, 13}, {519871, 12}, {659749, 13}, {784582, 14}, {MP_SIZE_T_MAX,0}}
|
||||
|
61
mpn/x86_64/k8/half.asm
Normal file
61
mpn/x86_64/k8/half.asm
Normal file
@ -0,0 +1,61 @@
|
||||
dnl mpn_half
|
||||
|
||||
dnl Copyright 2011 The Code Cavern
|
||||
|
||||
dnl This file is part of the MPIR Library.
|
||||
|
||||
dnl The MPIR Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The MPIR Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
dnl Boston, MA 02110-1301, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_half)
|
||||
mov %rsi,%rax
|
||||
lea -8(%rdi,%rsi,8),%rdi
|
||||
shr $2,%rsi
|
||||
and $3,%eax
|
||||
jz t1
|
||||
shrq $1,(%rdi)
|
||||
lea -8(%rdi),%rdi
|
||||
dec %rax
|
||||
jz t1
|
||||
rcrq $1,(%rdi)
|
||||
lea -8(%rdi),%rdi
|
||||
dec %rax
|
||||
jz t1
|
||||
rcrq $1,(%rdi)
|
||||
lea -8(%rdi),%rdi
|
||||
t1:
|
||||
sbb %rdx,%rdx
|
||||
cmp $0,%rsi
|
||||
jz skiplp
|
||||
add %rdx,%rdx
|
||||
.align 16
|
||||
lp:
|
||||
rcrq $1,(%rdi)
|
||||
nop
|
||||
rcrq $1,-8(%rdi)
|
||||
rcrq $1,-16(%rdi)
|
||||
rcrq $1,-24(%rdi)
|
||||
nop
|
||||
dec %rsi
|
||||
lea -32(%rdi),%rdi
|
||||
jnz lp
|
||||
sbb %rdx,%rdx
|
||||
skiplp:
|
||||
sbb %rdx,%rax
|
||||
ret
|
||||
EPILOGUE()
|
Loading…
Reference in New Issue
Block a user