mpir/mpn/x86_64/rshift.as

;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
;  This file is part of the GNU MP Library.
;
;  The GNU MP Library is free software; you can redistribute it and/or
;  modify it under the terms of the GNU Lesser General Public License as
;  published by the Free Software Foundation; either version 2.1 of the
;  License, or (at your option) any later version.
;
;  The GNU MP Library is distributed in the hope that it will be useful,
;  but WITHOUT ANY WARRANTY; without even the implied warranty of
;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;  Lesser General Public License for more details.
;
;  You should have received a copy of the GNU Lesser General Public
;  License along with the GNU MP Library; see the file COPYING.LIB.  If
;  not, write to the Free Software Foundation, Inc., 59 Temple Place -
;  Suite 330, Boston, MA 02111-1307, USA.
;
;  Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
;  compiler and the YASM assembler.

;  AMD64 mpn_rshift -- mpn right shift
;
;  Calling interface(WIN64):
;
; mp_limb_t mpn_rshift(
;     mp_ptr dst,       rcx
;     mp_srcptr src,    rdx
;     mp_size_t size,    r8
;     unsigned shift     r9
; )
;
;  Calling interface(linux):
;
; mp_limb_t mpn_rshift(
;     mp_ptr dst,       rdi
;     mp_srcptr src,    rsi
;     mp_size_t size,   rdx
;     unsigned shift    rcx
; )
;
;  This is an SEH Leaf Function (no unwind support needed)

%include '../yasm_mac.inc'

%ifdef _WIN64_ABI
%define s_len   r8
%define s_lend r8d
%define r_tmp   r9
%define r_tmpd r9d
%define d_ptr  r10
%define s_ptr  r11
%define dst    rcx
%define src    rdx
%define s_tmp  rdx
%define shift   r9

%else
%if 0
%define s_len  rdx
%define s_lend edx
%define r_tmp  r10
%define r_tmpd r10d
%define d_ptr  rdi
%define s_ptr  rsi
%define s_tmp  r11
%else
%define src rsi
%define dst rdi
%define r_tmpd ecx
%define s_len rdx
%endif
%endif

   bits 64
   section .text

   G_EXPORT __gmpn_rshift

%ifdef DLL
   export   __gmpn_rshift
%endif

%if 0

G_LABEL __gmpn_rshift
    movsxd  s_len,s_lend
    or      s_len,s_len
    jz      .0
%ifdef _WIN64_ABI
    mov     d_ptr,rcx
    mov     s_ptr,rdx
    mov     rcx,shift
%endif
    cmp     s_len,byte 2
    jge     .1
    mov     rax,[s_ptr]
    mov     r_tmp,rax
    shr     r_tmp,cl
    neg     cl
    mov     [d_ptr],r_tmp
    shl     rax,cl
.0: ret
.1: lea     s_ptr,[s_ptr+s_len*8]
    lea     d_ptr,[d_ptr+s_len*8]
    neg     s_len
    mov     s_tmp,[s_ptr+s_len*8]
    movq    xmm0,s_tmp     ; save to shadow space
    shr     s_tmp,cl
    neg     cl
    inc     s_len
.2: mov     rax,[s_ptr+s_len*8]
    mov     r_tmp,rax
    shl     r_tmp,cl
    neg     cl
    xor     r_tmp,s_tmp
    shr     rax,cl
    neg     cl
    mov     s_tmp,rax
    mov     [d_ptr+s_len*8-8],r_tmp
    inc     s_len
    jnz     .2
    mov     [d_ptr-8],rax
    movq    rax,xmm0
    shl     rax,cl
    ret

%else

G_LABEL __gmpn_rshift
    movq    mm7, [src]
    movd    mm1, r_tmpd
    mov     eax, 64
    sub     eax, r_tmpd
    movd    mm0, eax
    movq    mm3, mm7
    psllq   mm7, mm0
    movd    rax, mm7
    lea     src, [src+s_len*8]
    lea     dst, [dst+s_len*8]
    neg     s_len
    add     s_len, 2
    jg      .1

    align   8
.0: movq    mm6, [src+s_len*8-8]
    movq    mm2, mm6
    psllq   mm6, mm0
    psrlq   mm3, mm1
    por     mm3, mm6
    movq    [dst+s_len*8-16], mm3
    je     .2
    movq    mm7, [src+s_len*8]
    movq    mm3, mm7
    psllq   mm7, mm0
    psrlq   mm2, mm1
    por     mm2, mm7
    movq    [dst+s_len*8-8], mm2
    add     s_len, 2
    jle     .0
.1: movq    mm2, mm3
.2: psrlq   mm2, mm1
    movq    [dst-8], mm2
    emms
    ret

%endif

    end
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.`
			`;`
			`; This file is part of the GNU MP Library.`
			`;`
			`; The GNU MP Library is free software; you can redistribute it and/or`
			`; modify it under the terms of the GNU Lesser General Public License as`
			`; published by the Free Software Foundation; either version 2.1 of the`
			`; License, or (at your option) any later version.`
			`;`
			`; The GNU MP Library is distributed in the hope that it will be useful,`
			`; but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`; Lesser General Public License for more details.`
			`;`
			`; You should have received a copy of the GNU Lesser General Public`
			`; License along with the GNU MP Library; see the file COPYING.LIB. If`
			`; not, write to the Free Software Foundation, Inc., 59 Temple Place -`
			`; Suite 330, Boston, MA 02111-1307, USA.`
			`;`
			`; Adapted by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit`
			`; compiler and the YASM assembler.`

			`; AMD64 mpn_rshift -- mpn right shift`
			`;`
			`; Calling interface(WIN64):`
			`;`
			`; mp_limb_t mpn_rshift(`
			`; mp_ptr dst, rcx`
			`; mp_srcptr src, rdx`
			`; mp_size_t size, r8`
			`; unsigned shift r9`
			`; )`
			`;`
			`; Calling interface(linux):`
			`;`
			`; mp_limb_t mpn_rshift(`
			`; mp_ptr dst, rdi`
			`; mp_srcptr src, rsi`
			`; mp_size_t size, rdx`
			`; unsigned shift rcx`
			`; )`
			`;`
			`; This is an SEH Leaf Function (no unwind support needed)`

			`%include '../yasm_mac.inc'`

			`%ifdef _WIN64_ABI`
			`%define s_len r8`
			`%define s_lend r8d`
			`%define r_tmp r9`
			`%define r_tmpd r9d`
			`%define d_ptr r10`
			`%define s_ptr r11`
			`%define dst rcx`
			`%define src rdx`
			`%define s_tmp rdx`
			`%define shift r9`

			`%else`
			`%if 0`
			`%define s_len rdx`
			`%define s_lend edx`
			`%define r_tmp r10`
			`%define r_tmpd r10d`
			`%define d_ptr rdi`
			`%define s_ptr rsi`
			`%define s_tmp r11`
			`%else`
			`%define src rsi`
			`%define dst rdi`
			`%define r_tmpd ecx`
			`%define s_len rdx`
			`%endif`
			`%endif`

			`bits 64`
			`section .text`

			`G_EXPORT __gmpn_rshift`

			`%ifdef DLL`
			`export __gmpn_rshift`
			`%endif`

			`%if 0`

			`G_LABEL __gmpn_rshift`
			`movsxd s_len,s_lend`
			`or s_len,s_len`
			`jz .0`
			`%ifdef _WIN64_ABI`
			`mov d_ptr,rcx`
			`mov s_ptr,rdx`
			`mov rcx,shift`
			`%endif`
			`cmp s_len,byte 2`
			`jge .1`
			`mov rax,[s_ptr]`
			`mov r_tmp,rax`
			`shr r_tmp,cl`
			`neg cl`
			`mov [d_ptr],r_tmp`
			`shl rax,cl`
			`.0: ret`
			`.1: lea s_ptr,[s_ptr+s_len*8]`
			`lea d_ptr,[d_ptr+s_len*8]`
			`neg s_len`
			`mov s_tmp,[s_ptr+s_len*8]`
			`movq xmm0,s_tmp ; save to shadow space`
			`shr s_tmp,cl`
			`neg cl`
			`inc s_len`
			`.2: mov rax,[s_ptr+s_len*8]`
			`mov r_tmp,rax`
			`shl r_tmp,cl`
			`neg cl`
			`xor r_tmp,s_tmp`
			`shr rax,cl`
			`neg cl`
			`mov s_tmp,rax`
			`mov [d_ptr+s_len*8-8],r_tmp`
			`inc s_len`
			`jnz .2`
			`mov [d_ptr-8],rax`
			`movq rax,xmm0`
			`shl rax,cl`
			`ret`

			`%else`

			`G_LABEL __gmpn_rshift`
			`movq mm7, [src]`
			`movd mm1, r_tmpd`
			`mov eax, 64`
			`sub eax, r_tmpd`
			`movd mm0, eax`
			`movq mm3, mm7`
			`psllq mm7, mm0`
			`movd rax, mm7`
			`lea src, [src+s_len*8]`
			`lea dst, [dst+s_len*8]`
			`neg s_len`
			`add s_len, 2`
			`jg .1`

			`align 8`
			`.0: movq mm6, [src+s_len*8-8]`
			`movq mm2, mm6`
			`psllq mm6, mm0`
			`psrlq mm3, mm1`
			`por mm3, mm6`
			`movq [dst+s_len*8-16], mm3`
			`je .2`
			`movq mm7, [src+s_len*8]`
			`movq mm3, mm7`
			`psllq mm7, mm0`
			`psrlq mm2, mm1`
			`por mm2, mm7`
			`movq [dst+s_len*8-8], mm2`
			`add s_len, 2`
			`jle .0`
			`.1: movq mm2, mm3`
			`.2: psrlq mm2, mm1`
			`movq [dst-8], mm2`
			`emms`
			`ret`

			`%endif`

			`end`