; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. ; ; Copyright 2008 Brian Gladman ; ; This file is part of the MPIR Library. ; ; The MPIR Library is free software; you can redistribute it and/or ; modify it under the terms of the GNU Lesser General Public License as ; published by the Free Software Foundation; either version 2.1 of the ; License, or (at your option) any later version. ; ; The MPIR Library is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ; Lesser General Public License for more details. ; ; You should have received a copy of the GNU Lesser General Public ; License along with the MPIR Library; see the file COPYING.LIB. If ; not, write to the Free Software Foundation, Inc., 51 Franklin Street, ; Fifth Floor, Boston, MA 02110-1301, USA. ; AMD64 mpn_rshift -- mpn right shift ; ; Calling interface: ; ; mp_limb_t mpn_rshift( ; mp_ptr dst, rcx ; mp_srcptr src, rdx ; mp_size_t size, r8 ; unsigned shift r9 ; ) ; ; This is an SEH Leaf Function (no unwind support needed) %define s_len r8 %define r_tmp r9 %define d_ptr r10 %define s_ptr r11 bits 64 section .text global __gmpn_rshift %ifdef DLL export __gmpn_rshift %endif %if 1 __gmpn_rshift: movsxd s_len,r8d or s_len,s_len jz .0 mov d_ptr,rcx mov s_ptr,rdx mov rcx,r9 cmp s_len,byte 2 jge .1 mov rax,[s_ptr] mov r_tmp,rax shr r_tmp,cl neg cl mov [d_ptr],r_tmp shl rax,cl .0: ret .1: lea s_ptr,[s_ptr+s_len*8] lea d_ptr,[d_ptr+s_len*8] neg s_len mov rdx,[s_ptr+s_len*8] movq xmm0, rdx shr rdx,cl neg cl inc s_len .2: mov rax,[s_ptr+s_len*8] mov r_tmp,rax shl r_tmp,cl neg cl xor r_tmp,rdx shr rax,cl neg cl mov rdx,rax mov [d_ptr+s_len*8-8],r_tmp inc s_len jnz .2 mov [d_ptr-8],rax movd rax, xmm0 shl rax,cl ret %else __gmpn_rshift: movq mm7, [rdx] movd mm1, r9d mov eax, 64 sub eax, r9d movd mm0, eax movq mm3, mm7 psllq mm7, mm0 movd rax, mm7 lea rdx, [rdx+r8*8] lea rcx, [rcx+r8*8] neg r8 add r8, 2 jg .1 align 8 .0: movq mm6, [rdx+r8*8-8] movq mm2, mm6 psllq mm6, mm0 psrlq mm3, mm1 por mm3, mm6 movq [rcx+r8*8-16], mm3 je .2 movq mm7, [rdx+r8*8] movq mm3, mm7 psllq mm7, mm0 psrlq mm2, mm1 por mm2, mm7 movq [rcx+r8*8-8], mm2 add r8, 2 jle .0 .1: movq mm2, mm3 .2: psrlq mm2, mm1 movq [rcx-8], mm2 emms ret %endif end