mpir/mpn/x86_64/rshift.as

85 lines
2.7 KiB
ActionScript

; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
; Copyright 2008 Brian Gladman, William Hart
;
; This file is part of the MPIR Library.
;
; The MPIR Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The MPIR Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the MPIR Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 51 Franklin Street,
; Fifth Floor, Boston, MA 02110-1301, USA.
;
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
; compiler and the YASM assembler.
; AMD64 mpn_rshift -- mpn right shift
;
; Calling interface:
;
; mp_limb_t mpn_rshift(
; mp_ptr dst, rdi
; mp_srcptr src, rsi
; mp_size_t size, rdx
; unsigned shift rcx
; )
%include 'yasm_mac.inc'
%define src rsi
%define dst rdi
%define r_tmpd ecx
%define s_len rdx
BITS 64
GLOBAL_FUNC mpn_rshift
movq mm7, [src] ; move bottom source into mm7
movd mm1, r_tmpd ; move shift value into mm1
mov eax, 64
sub eax, r_tmpd
movd mm0, eax ; and 64 - shift value into mm0
movq mm3, mm7 ; save mm7 in mm3
psllq mm7, mm0 ; do shift
movd rax, mm7 ; put remainder after shift into rax for return
lea src, [src+s_len*8]
lea dst, [dst+s_len*8]
neg s_len
add s_len, 2
jg label1
align 8
label0:
movq mm6, [src+s_len*8-8] ; load next source chunk
movq mm2, mm6 ; copy it
psllq mm6, mm0 ; shift left
psrlq mm3, mm1 ; and right
por mm3, mm6 ; and combine
movq [dst+s_len*8-16], mm3 ; store result
je label2
movq mm7, [src+s_len*8] ; next source chunk
movq mm3, mm7 ; save it
psllq mm7, mm0 ; shift left
psrlq mm2, mm1 ; and right
por mm2, mm7 ; and combine
movq [dst+s_len*8-8], mm2 ; store result
add s_len, 2
jle label0
label1:
movq mm2, mm3
label2:
psrlq mm2, mm1 ; final shift
movq [dst-8], mm2 ; and store
emms
ret