add new function core2/penryn mpn_addlsh_n

This commit is contained in:
jasonmoxham 2009-08-23 15:58:03 +00:00
parent da44b12898
commit 211e597c89

View File

@ -0,0 +1,143 @@
dnl mpn_addlsh_n
dnl Copyright 2009 Jason Moxham
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
C ret mpn_addlsh_n(mp_ptr,mp_ptr,mp_ptr,mp_size_t,shift)
C rax rdi, rsi, rdx, rcx r8
ASM_START()
PROLOGUE(mpn_addlsh_n)
lea -32(%rdi,%rcx,8),%rdi
lea -32(%rsi,%rcx,8),%rsi
lea -32(%rdx,%rcx,8),%rdx
push %r12
push %rbx
mov $4,%rbx
sub %rcx,%rbx
mov $64,%rcx
sub %r8,%rcx
mov $0,%r12
mov $0,%rax
mov (%rdx,%rbx,8),%r8
cmp $0,%rbx
jge skiplp
ALIGN(16)
lp:
mov 8(%rdx,%rbx,8),%r9
mov 16(%rdx,%rbx,8),%r10
mov 24(%rdx,%rbx,8),%r11
shrd %cl,%r8,%r12
shrd %cl,%r9,%r8
shrd %cl,%r10,%r9
shrd %cl,%r11,%r10
sahf
adc (%rsi,%rbx,8),%r12
mov %r12,(%rdi,%rbx,8)
adc 8(%rsi,%rbx,8),%r8
mov %r11,%r12
mov %r8,8(%rdi,%rbx,8)
adc 16(%rsi,%rbx,8),%r9
adc 24(%rsi,%rbx,8),%r10
mov %r10,24(%rdi,%rbx,8)
mov %r9,16(%rdi,%rbx,8)
lahf
mov 32(%rdx,%rbx,8),%r8
add $4,%rbx
jnc lp
ALIGN(16)
skiplp:
cmp $2,%rbx
ja case0
je case1
jp case2
case3:
shrd %cl,%r8,%r12
mov 8(%rdx,%rbx,8),%r9
mov 16(%rdx,%rbx,8),%r10
mov 24(%rdx,%rbx,8),%r11
shrd %cl,%r9,%r8
shrd %cl,%r10,%r9
shrd %cl,%r11,%r10
sahf
adc (%rsi,%rbx,8),%r12
mov %r12,(%rdi,%rbx,8)
adc 8(%rsi,%rbx,8),%r8
mov %r11,%r12
mov %r8,8(%rdi,%rbx,8)
adc 16(%rsi,%rbx,8),%r9
adc 24(%rsi,%rbx,8),%r10
mov %r10,24(%rdi,%rbx,8)
mov %r9,16(%rdi,%rbx,8)
lahf
shr %cl,%r12
sahf
adc $0,%r12
mov %r12,%rax
pop %rbx
pop %r12
ret
case2:
shrd %cl,%r8,%r12
mov 8(%rdx,%rbx,8),%r9
shrd %cl,%r9,%r8
mov 16(%rdx,%rbx,8),%r10
shrd %cl,%r10,%r9
shr %cl,%r10
sahf
adc (%rsi,%rbx,8),%r12
mov %r12,(%rdi,%rbx,8)
adc 8(%rsi,%rbx,8),%r8
mov $0,%rax
mov %r8,8(%rdi,%rbx,8)
adc 16(%rsi,%rbx,8),%r9
adc %r10,%rax
mov %r9,16(%rdi,%rbx,8)
pop %rbx
pop %r12
ret
case1:
shrd %cl,%r8,%r12
mov 8(%rdx,%rbx,8),%r9
shrd %cl,%r9,%r8
shr %cl,%r9
sahf
adc (%rsi,%rbx,8),%r12
mov %r12,(%rdi,%rbx,8)
adc 8(%rsi,%rbx,8),%r8
mov $0,%rax
mov %r8,8(%rdi,%rbx,8)
adc %r9,%rax
pop %rbx
pop %r12
ret
case0:
shrd %cl,%r8,%r12
shr %cl,%r8
sahf
adc (%rsi,%rbx,8),%r12
mov %r12,(%rdi,%rbx,8)
adc $0,%r8
mov %r8,%rax
pop %rbx
pop %r12
ret
EPILOGUE()