mpir/mpn/x86_64w/k8/lshift2.asm
gladman 1485e3dc1b 1. Add Jason's new assembler code to the Windows builds
2. Tidy up assembler to prepare for Windows nehalem build
2009-12-02 16:24:00 +00:00

125 lines
2.8 KiB
NASM

; Copyright 2009 Jason Moxham
;
; Windows Conversion Copyright 2008 Brian Gladman
;
; This file is part of the MPIR Library.
;
; The MPIR Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 2.1 of the License, or (at
; your option) any later version.
; The MPIR Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; along with the MPIR Library; see the file COPYING.LIB. If not, write
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
; Boston, MA 02110-1301, USA.
;
; mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)
; rax rdi rsi rdx
; rax rcx rdx r8d
%include "..\yasm_mac.inc"
%define reg_save_list rsi, rdi
CPU Athlon64
BITS 64
FRAME_PROC mpn_lshift2, 0, reg_save_list
movsxd rax, r8d
lea rsi, [rdx+rax*8-24]
lea rdi, [rcx+rax*8-24]
mov ecx, 3
sub rcx, rax
xor eax, eax
xor edx, edx
cmp rcx, 0
jge .2
xalign 16
.1:
mov r8, [rsi+rcx*8]
mov r9, [rsi+rcx*8+8]
mov r10, [rsi+rcx*8+16]
mov r11, [rsi+rcx*8+24]
add rax, rax
adc r8, r8
adc r9, r9
adc r10, r10
adc r11, r11
sbb rax, rax
add rdx, rdx
adc r8, r8
adc r9, r9
adc r10, r10
adc r11, r11
mov [rdi+rcx*8+24], r11
sbb rdx, rdx
mov [rdi+rcx*8], r8
add rcx, 4
mov [rdi+rcx*8-24], r9
mov [rdi+rcx*8-16], r10
jnc .1
.2:
cmp rcx, 2
ja .6
je .5
jp .4
.3:
mov r8, [rsi+rcx*8]
mov r9, [rsi+rcx*8+8]
mov r10, [rsi+rcx*8+16]
add rax, rax
adc r8, r8
adc r9, r9
adc r10, r10
sbb rax, rax
add rdx, rdx
adc r8, r8
adc r9, r9
adc r10, r10
sbb rdx, rdx
mov [rdi+rcx*8], r8
mov [rdi+rcx*8+8], r9
mov [rdi+rcx*8+16], r10
jmp .6
xalign 16
.4:
mov r8, [rsi+rcx*8]
mov r9, [rsi+rcx*8+8]
add rax, rax
adc r8, r8
adc r9, r9
sbb rax, rax
add rdx, rdx
adc r8, r8
adc r9, r9
sbb rdx, rdx
mov [rdi+rcx*8], r8
mov [rdi+rcx*8+8], r9
jmp .6
xalign 16
.5:
mov r8, [rsi+rcx*8]
add rax, rax
adc r8, r8
sbb rax, rax
add rdx, rdx
adc r8, r8
sbb rdx, rdx
mov [rdi+rcx*8], r8
.6:
lea rax, [rdx+rax*2]
neg rax
END_PROC reg_save_list
end