85 lines
2.7 KiB
ActionScript
85 lines
2.7 KiB
ActionScript
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
|
|
;
|
|
; Copyright 2008 Brian Gladman, William Hart
|
|
;
|
|
; This file is part of the MPIR Library.
|
|
;
|
|
; The MPIR Library is free software; you can redistribute it and/or
|
|
; modify it under the terms of the GNU Lesser General Public License as
|
|
; published by the Free Software Foundation; either version 2.1 of the
|
|
; License, or (at your option) any later version.
|
|
;
|
|
; The MPIR Library is distributed in the hope that it will be useful,
|
|
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
; Lesser General Public License for more details.
|
|
;
|
|
; You should have received a copy of the GNU Lesser General Public
|
|
; License along with the MPIR Library; see the file COPYING.LIB. If
|
|
; not, write to the Free Software Foundation, Inc., 51 Franklin Street,
|
|
; Fifth Floor, Boston, MA 02110-1301, USA.
|
|
;
|
|
; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
|
|
; compiler and the YASM assembler.
|
|
|
|
; AMD64 mpn_rshift -- mpn right shift
|
|
;
|
|
; Calling interface:
|
|
;
|
|
; mp_limb_t mpn_rshift(
|
|
; mp_ptr dst, rdi
|
|
; mp_srcptr src, rsi
|
|
; mp_size_t size, rdx
|
|
; unsigned shift rcx
|
|
; )
|
|
|
|
%include 'yasm_mac.inc'
|
|
|
|
|
|
%define src rsi
|
|
%define dst rdi
|
|
%define r_tmpd ecx
|
|
%define s_len rdx
|
|
|
|
BITS 64
|
|
|
|
GLOBAL_FUNC mpn_rshift
|
|
movq mm7, [src] ; move bottom source into mm7
|
|
movd mm1, r_tmpd ; move shift value into mm1
|
|
mov eax, 64
|
|
sub eax, r_tmpd
|
|
movd mm0, eax ; and 64 - shift value into mm0
|
|
movq mm3, mm7 ; save mm7 in mm3
|
|
psllq mm7, mm0 ; do shift
|
|
movd rax, mm7 ; put remainder after shift into rax for return
|
|
lea src, [src+s_len*8]
|
|
lea dst, [dst+s_len*8]
|
|
neg s_len
|
|
add s_len, 2
|
|
jg label1
|
|
|
|
align 8
|
|
label0:
|
|
movq mm6, [src+s_len*8-8] ; load next source chunk
|
|
movq mm2, mm6 ; copy it
|
|
psllq mm6, mm0 ; shift left
|
|
psrlq mm3, mm1 ; and right
|
|
por mm3, mm6 ; and combine
|
|
movq [dst+s_len*8-16], mm3 ; store result
|
|
je label2
|
|
movq mm7, [src+s_len*8] ; next source chunk
|
|
movq mm3, mm7 ; save it
|
|
psllq mm7, mm0 ; shift left
|
|
psrlq mm2, mm1 ; and right
|
|
por mm2, mm7 ; and combine
|
|
movq [dst+s_len*8-8], mm2 ; store result
|
|
add s_len, 2
|
|
jle label0
|
|
label1:
|
|
movq mm2, mm3
|
|
label2:
|
|
psrlq mm2, mm1 ; final shift
|
|
movq [dst-8], mm2 ; and store
|
|
emms
|
|
ret
|