New K8 32bit asm function mul_basecase
This commit is contained in:
parent
10e327969f
commit
875aba0b1c
361
mpn/x86/k7/mmx/k8/mul_basecase.asm
Normal file
361
mpn/x86/k7/mmx/k8/mul_basecase.asm
Normal file
@ -0,0 +1,361 @@
|
||||
dnl mpn_mul_basecase
|
||||
|
||||
dnl Copyright 2009 Jason Moxham
|
||||
|
||||
dnl This file is part of the MPIR Library.
|
||||
|
||||
dnl The MPIR Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The MPIR Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
dnl Boston, MA 02110-1301, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C ret mpn_mul_basecase(mp_ptr dst ,mp_ptr src1,mp_size_t size1, mp_ptr src2,mp_size_t size2)
|
||||
|
||||
C Caller sets up the stack like
|
||||
C size2 44
|
||||
C src2 40
|
||||
C size1 36
|
||||
C src1 32
|
||||
C dst 28
|
||||
C retaddr 24
|
||||
C MUL 20 we sub $8.esp for this
|
||||
C SIZE1T 16 and this , and remember to add 8 back when retting
|
||||
C SAVE_EBX 12 we push and pop this
|
||||
C SAVE_EBP 8 and this . we could do one big sub of esp
|
||||
C SAVE_EDI 4 and this . and just mov in and out
|
||||
C SAVE_ESI 0 and this . this would save macro-ops
|
||||
|
||||
define(`SIZE2',`44(%esp)')
|
||||
define(`SRC2',`40(%esp)')
|
||||
define(`SIZE1',`36(%esp)')
|
||||
define(`SRC1',`32(%esp)')
|
||||
define(`DST',`28(%esp)')
|
||||
define(`MUL',`20(%esp)')
|
||||
define(`SIZE1T',`16(%esp)')
|
||||
define(`SAVE_EBX',`12(%esp)')
|
||||
define(`SAVE_EBP',`8(%esp)')
|
||||
define(`SAVE_EDI',`4(%esp)')
|
||||
define(`SAVE_ESI',`(%esp)')
|
||||
|
||||
|
||||
|
||||
define(`MULSTART',`
|
||||
mov SIZE1,%eax
|
||||
mov SRC2,%ecx
|
||||
mov %eax,SIZE1T
|
||||
mov SRC1,%esi
|
||||
mov (%ecx),%edx
|
||||
mov %edx,MUL
|
||||
mov (%esi),%eax
|
||||
mul %edx
|
||||
mov %eax,%ebx
|
||||
mov %edx,%ecx
|
||||
mov DST,%edi
|
||||
mov MUL,%edx
|
||||
mov `$'0,%ebp
|
||||
subl `$'4,SIZE1T
|
||||
')
|
||||
|
||||
define(`MULLOOP',`
|
||||
mov 4(%esi),%eax
|
||||
mul %edx
|
||||
mov %ebx,(%edi)
|
||||
add %eax,%ecx
|
||||
adc %edx,%ebp
|
||||
mov 8(%esi),%eax
|
||||
mull MUL
|
||||
mov `$'0,%ebx
|
||||
mov %ecx,4(%edi)
|
||||
add %eax,%ebp
|
||||
adc %edx,%ebx
|
||||
mov 12(%esi),%eax
|
||||
mov MUL,%edx
|
||||
mov `$'0,%ecx
|
||||
mul %edx
|
||||
mov %ebp,8(%edi)
|
||||
add %eax,%ebx
|
||||
adc %edx,%ecx
|
||||
lea 12(%esi),%esi
|
||||
mov MUL,%edx
|
||||
subl `$'3,SIZE1T
|
||||
lea 12(%edi),%edi
|
||||
mov `$'0,%ebp
|
||||
')
|
||||
|
||||
define(`MULEND2',`
|
||||
mov 4(%esi),%eax
|
||||
mul %edx
|
||||
mov %ebx,(%edi)
|
||||
add %eax,%ecx
|
||||
adc %edx,%ebp
|
||||
mov 8(%esi),%eax
|
||||
mull MUL
|
||||
mov `$'0,%ebx
|
||||
mov %ecx,4(%edi)
|
||||
add %eax,%ebp
|
||||
adc %edx,%ebx
|
||||
mov %ebp,8(%edi)
|
||||
mov %ebx,12(%edi)
|
||||
decl SIZE2
|
||||
')
|
||||
|
||||
define(`MULEND1',`
|
||||
mov 4(%esi),%eax
|
||||
mul %edx
|
||||
mov %ebx,(%edi)
|
||||
add %eax,%ecx
|
||||
adc %edx,%ebp
|
||||
mov %ecx,4(%edi)
|
||||
mov %ebp,8(%edi)
|
||||
mov DST,%edi
|
||||
decl SIZE2
|
||||
')
|
||||
|
||||
define(`MULEND0',`
|
||||
mov %ebx,(%edi)
|
||||
mov %ecx,4(%edi)
|
||||
mov SRC2,%ebx
|
||||
decl SIZE2
|
||||
')
|
||||
|
||||
|
||||
define(`ADDMULLOOP',`
|
||||
mov 4(%esi),%eax
|
||||
mul %edx
|
||||
add %ebx,(%edi)
|
||||
adc %eax,%ecx
|
||||
adc %edx,%ebp
|
||||
mov 8(%esi),%eax
|
||||
mull MUL
|
||||
mov `$'0,%ebx
|
||||
add %ecx,4(%edi)
|
||||
adc %eax,%ebp
|
||||
adc %edx,%ebx
|
||||
mov 12(%esi),%eax
|
||||
mov MUL,%edx
|
||||
mov `$'0,%ecx
|
||||
mul %edx
|
||||
add %ebp,8(%edi)
|
||||
adc %eax,%ebx
|
||||
adc %edx,%ecx
|
||||
lea 12(%esi),%esi
|
||||
mov MUL,%edx
|
||||
subl `$'3,SIZE1T
|
||||
lea 12(%edi),%edi
|
||||
mov `$'0,%ebp
|
||||
')
|
||||
|
||||
define(`ADDMULSTART2',`
|
||||
mov SIZE1,%eax
|
||||
mov SRC2,%ecx
|
||||
mov %eax,SIZE1T
|
||||
lea 4(%ecx),%ecx
|
||||
mov SRC1,%esi
|
||||
mov %ecx,SRC2
|
||||
mov (%ecx),%edx
|
||||
mov %edx,MUL
|
||||
mov (%esi),%eax
|
||||
mul %edx
|
||||
mov %eax,%ebx
|
||||
mov %edx,%ecx
|
||||
mov DST,%edi
|
||||
mov MUL,%edx
|
||||
mov `$'0,%ebp
|
||||
subl `$'4,SIZE1T
|
||||
lea 4(%edi),%edi
|
||||
mov %edi,DST
|
||||
')
|
||||
|
||||
define(`ADDMULEND2',`
|
||||
mov 4(%esi),%eax
|
||||
mul %edx
|
||||
add %ebx,(%edi)
|
||||
adc %eax,%ecx
|
||||
adc %edx,%ebp
|
||||
mov 8(%esi),%eax
|
||||
mull MUL
|
||||
mov `$'0,%ebx
|
||||
add %ecx,4(%edi)
|
||||
adc %eax,%ebp
|
||||
adc %edx,%ebx
|
||||
add %ebp,8(%edi)
|
||||
adc `$'0,%ebx
|
||||
mov %ebx,12(%edi)
|
||||
decl SIZE2
|
||||
')
|
||||
|
||||
define(`ADDMULSTART1',`
|
||||
mov SRC2,%ebx
|
||||
mov SIZE1,%eax
|
||||
mov %eax,SIZE1T
|
||||
lea 4(%ebx),%ebx
|
||||
mov SRC1,%esi
|
||||
mov (%ebx),%edx
|
||||
mov %edx,MUL
|
||||
mov (%esi),%eax
|
||||
lea 4(%edi),%edi
|
||||
mov %ebx,SRC2
|
||||
mul %edx
|
||||
mov %eax,%ebx
|
||||
mov %edx,%ecx
|
||||
mov MUL,%edx
|
||||
mov `$'0,%ebp
|
||||
mov %edi,DST
|
||||
subl `$'4,SIZE1T
|
||||
')
|
||||
|
||||
|
||||
define(`ADDMULEND1',`
|
||||
mov 4(%esi),%eax
|
||||
mul %edx
|
||||
add %ebx,(%edi)
|
||||
adc %eax,%ecx
|
||||
adc %edx,%ebp
|
||||
add %ecx,4(%edi)
|
||||
adc `$'0,%ebp
|
||||
mov %ebp,8(%edi)
|
||||
mov DST,%edi
|
||||
decl SIZE2
|
||||
')
|
||||
|
||||
define(`ADDMULSTART0',`
|
||||
mov SIZE1,%eax
|
||||
lea 4(%ebx),%ebx
|
||||
mov %ebx,SRC2
|
||||
mov %eax,SIZE1T
|
||||
mov SRC1,%esi
|
||||
mov DST,%edi
|
||||
mov (%ebx),%edx
|
||||
mov %edx,MUL
|
||||
mov (%esi),%eax
|
||||
lea 4(%edi),%edi
|
||||
mul %edx
|
||||
mov %eax,%ebx
|
||||
mov %edi,DST
|
||||
mov %edx,%ecx
|
||||
mov MUL,%edx
|
||||
mov `$'0,%ebp
|
||||
subl `$'4,SIZE1T
|
||||
')
|
||||
|
||||
define(`ADDMULEND0',`
|
||||
add %ebx,(%edi)
|
||||
adc `$'0,%ecx
|
||||
mov %ecx,4(%edi)
|
||||
mov SRC2,%ebx
|
||||
decl SIZE2
|
||||
')
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_mul_basecase)
|
||||
sub $24,%esp
|
||||
mov %ebx,SAVE_EBX
|
||||
mov %ebp,SAVE_EBP
|
||||
mov %edi,SAVE_EDI
|
||||
mov %esi,SAVE_ESI
|
||||
#sub $8,%esp
|
||||
#push %ebx
|
||||
#push %ebp
|
||||
#push %esi
|
||||
#push %edi
|
||||
MULSTART
|
||||
jc skipmullp
|
||||
ALIGN(16)
|
||||
mullp:
|
||||
MULLOOP
|
||||
jnc mullp
|
||||
skipmullp:
|
||||
cmpl $-2,SIZE1T
|
||||
jl case0
|
||||
je case1
|
||||
case2:
|
||||
MULEND2
|
||||
jz theend
|
||||
ALIGN(16)
|
||||
lp2:
|
||||
ADDMULSTART2
|
||||
jc skipaddmullp2
|
||||
ALIGN(16)
|
||||
addmullp2:
|
||||
ADDMULLOOP
|
||||
jnc addmullp2
|
||||
skipaddmullp2:
|
||||
ADDMULEND2
|
||||
jnz lp2
|
||||
theend:
|
||||
mov SAVE_EBX,%ebx
|
||||
mov SAVE_EBP,%ebp
|
||||
mov SAVE_EDI,%edi
|
||||
mov SAVE_ESI,%esi
|
||||
add $24,%esp
|
||||
#pop %edi
|
||||
#pop %esi
|
||||
#pop %ebp
|
||||
#pop %ebx
|
||||
#add $8,%esp
|
||||
ret
|
||||
ALIGN(16)
|
||||
case1:
|
||||
MULEND1
|
||||
jz theend
|
||||
ALIGN(16)
|
||||
lp1:
|
||||
ADDMULSTART1
|
||||
jc skipaddmullp1
|
||||
ALIGN(16)
|
||||
addmullp1:
|
||||
ADDMULLOOP
|
||||
jnc addmullp1
|
||||
skipaddmullp1:
|
||||
ADDMULEND1
|
||||
jnz lp1
|
||||
mov SAVE_EBX,%ebx
|
||||
mov SAVE_EBP,%ebp
|
||||
mov SAVE_EDI,%edi
|
||||
mov SAVE_ESI,%esi
|
||||
add $24,%esp
|
||||
#pop %edi
|
||||
#pop %esi
|
||||
#pop %ebp
|
||||
#pop %ebx
|
||||
#add $8,%esp
|
||||
ret
|
||||
ALIGN(16)
|
||||
case0:
|
||||
MULEND0
|
||||
jz theend
|
||||
ALIGN(16)
|
||||
lp0:
|
||||
ADDMULSTART0
|
||||
jc skipaddmullp0
|
||||
ALIGN(16)
|
||||
addmullp0:
|
||||
ADDMULLOOP
|
||||
jnc addmullp0
|
||||
skipaddmullp0:
|
||||
ADDMULEND0
|
||||
jnz lp0
|
||||
mov SAVE_EBX,%ebx
|
||||
mov SAVE_EBP,%ebp
|
||||
mov SAVE_EDI,%edi
|
||||
mov SAVE_ESI,%esi
|
||||
add $24,%esp
|
||||
#pop %edi
|
||||
#pop %esi
|
||||
#pop %ebp
|
||||
#pop %ebx
|
||||
#add $8,%esp
|
||||
ret
|
||||
EPILOGUE()
|
Loading…
Reference in New Issue
Block a user