; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc. ; ; Copyright 2008 Brian Gladman, William Hart ; ; This file is part of the MPIR Library. ; ; The MPIR Library is free software; you can redistribute it and/or ; modify it under the terms of the GNU Lesser General Public License as ; published by the Free Software Foundation; either version 2.1 of the ; License, or (at your option) any later version. ; ; The MPIR Library is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ; Lesser General Public License for more details. ; ; You should have received a copy of the GNU Lesser General Public ; License along with the MPIR Library; see the file COPYING.LIB. If ; not, write to the Free Software Foundation, Inc., 59 Temple Place - ; Suite 330, Boston, MA 02111-1307, USA. ; ; Adapted by Brian Gladman for AMD64 using the Microsoft VC++ v8 64-bit ; compiler and the YASM assembler. ; AMD64 mpn_lshift -- mpn left shift ; ; Calling interface: ; ; mp_limb_t mpn_lshift( ; mp_ptr dst, rdi ; mp_srcptr src, rsi ; mp_size_t size, rdx ; unsigned shift rcx ; ) %include '../yasm_mac.inc' %define src rsi %define dst rdi %define s_len rdx %define r_tmpd ecx BITS 64 GLOBAL_FUNC mpn_lshift movq mm7, [src+s_len*8-8] ; put top source chunk in mm7 movd mm1, r_tmpd ; put shift value in mm1 mov eax, 64 sub eax, r_tmpd movd mm0, eax ; put 64 - shift value in mm0 movq mm3, mm7 ; save original source chunk in mm3 psrlq mm7, mm0 ; shift movd rax, mm7 ; put part shifted out top in rax to be returned sub s_len, 2 jl label1 align 4 label0: movq mm6, [src+s_len*8] ; put next source chunk in mm6 movq mm2, mm6 ; copy into mm2 psrlq mm6, mm0 ; shift mm6 right psllq mm3, mm1 ; ...and mm3 left por mm3, mm6 ; and combine movq [dst+s_len*8+8], mm3 ; store result je label2 movq mm7, [src+s_len*8-8] ; next source chunk movq mm3, mm7 ; copy it psrlq mm7, mm0 ; shift right psllq mm2, mm1 ; ...and left por mm2, mm7 ; and combine movq [dst+s_len*8], mm2 ; and store result sub s_len, 2 jge label0 label1: movq mm2, mm3 label2: psllq mm2, mm1 ; final shift movq [dst], mm2 ; and store emms ret