mpir/mpn/x86_64/netburst/lshift.as

;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
;  Copyright 2008 Brian Gladman, William Hart
;
;  This file is part of the MPIR Library.
;
;  The MPIR Library is free software; you can redistribute it and/or
;  modify it under the terms of the GNU Lesser General Public License as
;  published by the Free Software Foundation; either version 2.1 of the
;  License, or (at your option) any later version.
;
;  The MPIR Library is distributed in the hope that it will be useful,
;  but WITHOUT ANY WARRANTY; without even the implied warranty of
;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;  Lesser General Public License for more details.
;
;  You should have received a copy of the GNU Lesser General Public
;  License along with the MPIR Library; see the file COPYING.LIB.  If
;  not, write to the Free Software Foundation, Inc., 59 Temple Place -
;  Suite 330, Boston, MA 02111-1307, USA.
;
;  Adapted by Brian Gladman for AMD64 using the Microsoft VC++ v8 64-bit
;  compiler and the YASM assembler.

;  AMD64 mpn_lshift -- mpn left shift
;
;  Calling interface:
;
; mp_limb_t mpn_lshift(
;     mp_ptr dst,       rdi
;     mp_srcptr src,    rsi
;     mp_size_t size,   rdx
;     unsigned shift    rcx
; )

%include 'yasm_mac.inc'

%define src    rsi
%define dst    rdi
%define s_len  rdx
%define r_tmpd ecx

    BITS    64

GLOBAL_FUNC mpn_lshift
    movq    mm7, [src+s_len*8-8]   ; put top source chunk in mm7
    movd    mm1, r_tmpd            ; put shift value in mm1
    mov     eax, 64                
    sub     eax, r_tmpd            
    movd    mm0, eax               ; put 64 - shift value in mm0
    movq    mm3, mm7               ; save original source chunk in mm3
    psrlq   mm7, mm0               ; shift 
    movd    rax, mm7               ; put part shifted out top in rax to be returned
    sub     s_len, 2
    jl      label1

    align   4
label0:  
    movq    mm6, [src+s_len*8]     ; put next source chunk in mm6
    movq    mm2, mm6               ; copy into mm2
    psrlq   mm6, mm0               ; shift mm6 right
    psllq   mm3, mm1               ; ...and mm3 left
    por     mm3, mm6               ; and combine
    movq    [dst+s_len*8+8], mm3   ; store result
    je      label2                
    movq    mm7, [src+s_len*8-8]   ; next source chunk
    movq    mm3, mm7               ; copy it
    psrlq   mm7, mm0               ; shift right
    psllq   mm2, mm1               ; ...and left
    por     mm2, mm7               ; and combine
    movq    [dst+s_len*8], mm2     ; and store result
    sub     s_len, 2
    jge     label0
label1:  
    movq    mm2, mm3
label2:  
    psllq   mm2, mm1               ; final shift
    movq    [dst], mm2             ; and store
    emms
    ret
add missing asm functions for nano,netburst as fat requires asm (not C) functions for any function in the fat structure 2010-07-22 11:42:21 -04:00			`; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.`
			`;`
			`; Copyright 2008 Brian Gladman, William Hart`
			`;`
			`; This file is part of the MPIR Library.`
			`;`
			`; The MPIR Library is free software; you can redistribute it and/or`
			`; modify it under the terms of the GNU Lesser General Public License as`
			`; published by the Free Software Foundation; either version 2.1 of the`
			`; License, or (at your option) any later version.`
			`;`
			`; The MPIR Library is distributed in the hope that it will be useful,`
			`; but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`; Lesser General Public License for more details.`
			`;`
			`; You should have received a copy of the GNU Lesser General Public`
			`; License along with the MPIR Library; see the file COPYING.LIB. If`
			`; not, write to the Free Software Foundation, Inc., 59 Temple Place -`
			`; Suite 330, Boston, MA 02111-1307, USA.`
			`;`
			`; Adapted by Brian Gladman for AMD64 using the Microsoft VC++ v8 64-bit`
			`; compiler and the YASM assembler.`

			`; AMD64 mpn_lshift -- mpn left shift`
			`;`
			`; Calling interface:`
			`;`
			`; mp_limb_t mpn_lshift(`
			`; mp_ptr dst, rdi`
			`; mp_srcptr src, rsi`
			`; mp_size_t size, rdx`
			`; unsigned shift rcx`
			`; )`

			`%include 'yasm_mac.inc'`

			`%define src rsi`
			`%define dst rdi`
			`%define s_len rdx`
			`%define r_tmpd ecx`

			`BITS 64`

			`GLOBAL_FUNC mpn_lshift`
			`movq mm7, [src+s_len*8-8] ; put top source chunk in mm7`
			`movd mm1, r_tmpd ; put shift value in mm1`
			`mov eax, 64`
			`sub eax, r_tmpd`
			`movd mm0, eax ; put 64 - shift value in mm0`
			`movq mm3, mm7 ; save original source chunk in mm3`
			`psrlq mm7, mm0 ; shift`
			`movd rax, mm7 ; put part shifted out top in rax to be returned`
			`sub s_len, 2`
			`jl label1`

			`align 4`
			`label0:`
			`movq mm6, [src+s_len*8] ; put next source chunk in mm6`
			`movq mm2, mm6 ; copy into mm2`
			`psrlq mm6, mm0 ; shift mm6 right`
			`psllq mm3, mm1 ; ...and mm3 left`
			`por mm3, mm6 ; and combine`
			`movq [dst+s_len*8+8], mm3 ; store result`
			`je label2`
			`movq mm7, [src+s_len*8-8] ; next source chunk`
			`movq mm3, mm7 ; copy it`
			`psrlq mm7, mm0 ; shift right`
			`psllq mm2, mm1 ; ...and left`
			`por mm2, mm7 ; and combine`
			`movq [dst+s_len*8], mm2 ; and store result`
			`sub s_len, 2`
			`jge label0`
			`label1:`
			`movq mm2, mm3`
			`label2:`
			`psllq mm2, mm1 ; final shift`
			`movq [dst], mm2 ; and store`
			`emms`
			`ret`