mpir/mpn/x86_64/nano/lshift.as

81 lines
2.7 KiB
ActionScript

; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; Copyright 2008 Brian Gladman, William Hart
;
; This file is part of the MPIR Library.
;
; The MPIR Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The MPIR Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the MPIR Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Adapted by Brian Gladman for AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
; AMD64 mpn_lshift -- mpn left shift
;
; Calling interface:
;
; mp_limb_t mpn_lshift(
; mp_ptr dst, rdi
; mp_srcptr src, rsi
; mp_size_t size, rdx
; unsigned shift rcx
; )
%include 'yasm_mac.inc'
%define src rsi
%define dst rdi
%define s_len rdx
%define r_tmpd ecx
BITS 64
GLOBAL_FUNC mpn_lshift
movq mm7, [src+s_len*8-8] ; put top source chunk in mm7
movd mm1, r_tmpd ; put shift value in mm1
mov eax, 64
sub eax, r_tmpd
movd mm0, eax ; put 64 - shift value in mm0
movq mm3, mm7 ; save original source chunk in mm3
psrlq mm7, mm0 ; shift
movd rax, mm7 ; put part shifted out top in rax to be returned
sub s_len, 2
jl label1
align 4
label0:
movq mm6, [src+s_len*8] ; put next source chunk in mm6
movq mm2, mm6 ; copy into mm2
psrlq mm6, mm0 ; shift mm6 right
psllq mm3, mm1 ; ...and mm3 left
por mm3, mm6 ; and combine
movq [dst+s_len*8+8], mm3 ; store result
je label2
movq mm7, [src+s_len*8-8] ; next source chunk
movq mm3, mm7 ; copy it
psrlq mm7, mm0 ; shift right
psllq mm2, mm1 ; ...and left
por mm2, mm7 ; and combine
movq [dst+s_len*8], mm2 ; and store result
sub s_len, 2
jge label0
label1:
movq mm2, mm3
label2:
psllq mm2, mm1 ; final shift
movq [dst], mm2 ; and store
emms
ret