81 lines
2.7 KiB
ActionScript
81 lines
2.7 KiB
ActionScript
; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
|
|
;
|
|
; Copyright 2008 Brian Gladman, William Hart
|
|
;
|
|
; This file is part of the MPIR Library.
|
|
;
|
|
; The MPIR Library is free software; you can redistribute it and/or
|
|
; modify it under the terms of the GNU Lesser General Public License as
|
|
; published by the Free Software Foundation; either version 2.1 of the
|
|
; License, or (at your option) any later version.
|
|
;
|
|
; The MPIR Library is distributed in the hope that it will be useful,
|
|
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
; Lesser General Public License for more details.
|
|
;
|
|
; You should have received a copy of the GNU Lesser General Public
|
|
; License along with the MPIR Library; see the file COPYING.LIB. If
|
|
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
|
|
; Suite 330, Boston, MA 02111-1307, USA.
|
|
;
|
|
; Adapted by Brian Gladman for AMD64 using the Microsoft VC++ v8 64-bit
|
|
; compiler and the YASM assembler.
|
|
|
|
; AMD64 mpn_lshift -- mpn left shift
|
|
;
|
|
; Calling interface:
|
|
;
|
|
; mp_limb_t mpn_lshift(
|
|
; mp_ptr dst, rdi
|
|
; mp_srcptr src, rsi
|
|
; mp_size_t size, rdx
|
|
; unsigned shift rcx
|
|
; )
|
|
|
|
%include 'yasm_mac.inc'
|
|
|
|
%define src rsi
|
|
%define dst rdi
|
|
%define s_len rdx
|
|
%define r_tmpd ecx
|
|
|
|
BITS 64
|
|
|
|
GLOBAL_FUNC mpn_lshift
|
|
movq mm7, [src+s_len*8-8] ; put top source chunk in mm7
|
|
movd mm1, r_tmpd ; put shift value in mm1
|
|
mov eax, 64
|
|
sub eax, r_tmpd
|
|
movd mm0, eax ; put 64 - shift value in mm0
|
|
movq mm3, mm7 ; save original source chunk in mm3
|
|
psrlq mm7, mm0 ; shift
|
|
movd rax, mm7 ; put part shifted out top in rax to be returned
|
|
sub s_len, 2
|
|
jl label1
|
|
|
|
align 4
|
|
label0:
|
|
movq mm6, [src+s_len*8] ; put next source chunk in mm6
|
|
movq mm2, mm6 ; copy into mm2
|
|
psrlq mm6, mm0 ; shift mm6 right
|
|
psllq mm3, mm1 ; ...and mm3 left
|
|
por mm3, mm6 ; and combine
|
|
movq [dst+s_len*8+8], mm3 ; store result
|
|
je label2
|
|
movq mm7, [src+s_len*8-8] ; next source chunk
|
|
movq mm3, mm7 ; copy it
|
|
psrlq mm7, mm0 ; shift right
|
|
psllq mm2, mm1 ; ...and left
|
|
por mm2, mm7 ; and combine
|
|
movq [dst+s_len*8], mm2 ; and store result
|
|
sub s_len, 2
|
|
jge label0
|
|
label1:
|
|
movq mm2, mm3
|
|
label2:
|
|
psllq mm2, mm1 ; final shift
|
|
movq [dst], mm2 ; and store
|
|
emms
|
|
ret
|