mpir/mpn/x86_64/rshift.as

169 lines
3.8 KiB
ActionScript

; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
; AMD64 mpn_rshift -- mpn right shift
;
; Calling interface(WIN64):
;
; mp_limb_t mpn_rshift(
; mp_ptr dst, rcx
; mp_srcptr src, rdx
; mp_size_t size, r8
; unsigned shift r9
; )
;
; Calling interface(linux):
;
; mp_limb_t mpn_rshift(
; mp_ptr dst, rdi
; mp_srcptr src, rsi
; mp_size_t size, rdx
; unsigned shift rcx
; )
;
; This is an SEH Leaf Function (no unwind support needed)
%ifdef _WIN64_ABI
%define s_len r8
%define s_lend r8d
%define r_tmp r9
%define r_tmpd r9d
%define d_ptr r10
%define s_ptr r11
%define dst rcx
%define src rdx
%define s_tmp rdx
%define shift r9
%else
%if 0
%define s_len rdx
%define s_lend edx
%define r_tmp r10
%define r_tmpd r10d
%define d_ptr rdi
%define s_ptr rsi
%define s_tmp r11
%else
%define src rsi
%define dst rdi
%define r_tmpd ecx
%define s_len rdx
%endif
%endif
bits 64
section .text
global __gmpn_rshift:function
%ifdef DLL
export __gmpn_rshift
%endif
%if 0
__gmpn_rshift:
movsxd s_len,s_lend
or s_len,s_len
jz .0
%ifdef _WIN64_ABI
mov d_ptr,rcx
mov s_ptr,rdx
mov rcx,shift
%endif
cmp s_len,byte 2
jge .1
mov rax,[s_ptr]
mov r_tmp,rax
shr r_tmp,cl
neg cl
mov [d_ptr],r_tmp
shl rax,cl
.0: ret
.1: lea s_ptr,[s_ptr+s_len*8]
lea d_ptr,[d_ptr+s_len*8]
neg s_len
mov s_tmp,[s_ptr+s_len*8]
movq xmm0,s_tmp ; save to shadow space
shr s_tmp,cl
neg cl
inc s_len
.2: mov rax,[s_ptr+s_len*8]
mov r_tmp,rax
shl r_tmp,cl
neg cl
xor r_tmp,s_tmp
shr rax,cl
neg cl
mov s_tmp,rax
mov [d_ptr+s_len*8-8],r_tmp
inc s_len
jnz .2
mov [d_ptr-8],rax
movq rax,xmm0
shl rax,cl
ret
%else
__gmpn_rshift:
movq mm7, [src]
movd mm1, r_tmpd
mov eax, 64
sub eax, r_tmpd
movd mm0, eax
movq mm3, mm7
psllq mm7, mm0
movd rax, mm7
lea src, [src+s_len*8]
lea dst, [dst+s_len*8]
neg s_len
add s_len, 2
jg .1
align 8
.0: movq mm6, [src+s_len*8-8]
movq mm2, mm6
psllq mm6, mm0
psrlq mm3, mm1
por mm3, mm6
movq [dst+s_len*8-16], mm3
je .2
movq mm7, [src+s_len*8]
movq mm3, mm7
psllq mm7, mm0
psrlq mm2, mm1
por mm2, mm7
movq [dst+s_len*8-8], mm2
add s_len, 2
jle .0
.1: movq mm2, mm3
.2: psrlq mm2, mm1
movq [dst-8], mm2
emms
ret
%endif
end