;  Copyright 2009 Jason Moxham
;
;  Windows Conversion Copyright 2008 Brian Gladman
;
;  This file is part of the MPIR Library.
;
;  The MPIR Library is free software; you can redistribute it and/or modify
;  it under the terms of the GNU Lesser General Public License as published
;  by the Free Software Foundation; either version 2.1 of the License, or (at
;  your option) any later version.

;  The MPIR Library is distributed in the hope that it will be useful, but
;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
;  License for more details.

;  You should have received a copy of the GNU Lesser General Public License
;  along with the MPIR Library; see the file COPYING.LIB.  If not, write
;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;  Boston, MA 02110-1301, USA.
;
;  mp_limb_t mpn_lshift1(mp_ptr, mp_ptr, mp_size_t)
;  rax                      rdi     rsi        rdx
;  rax                      rcx     rdx         r8

%include "..\yasm_mac.inc"

%define reg_save_list rsi, rdi

    CPU  Athlon64
    BITS 64

    FRAME_PROC mpn_lshift2, 0, reg_save_list
    movsxd  rax, r8d
	lea     rsi, [rdx+rax*8-24]
	lea     rdi, [rcx+rax*8-24]
	mov     ecx, 3
	sub     rcx, rax
	xor     eax, eax
	xor     edx, edx
	cmp     rcx, 0
	jge     L_skiplp
	xalign  16
L_lp:
	mov     r8, [rsi+rcx*8]
	mov     r9, [rsi+rcx*8+8]
	mov     r10, [rsi+rcx*8+16]
	mov     r11, [rsi+rcx*8+24]
	add     rax, rax
	adc     r8, r8
	adc     r9, r9
	adc     r10, r10
	adc     r11, r11
	sbb     rax, rax
	add     rdx, rdx
	adc     r8, r8
	adc     r9, r9
	adc     r10, r10
	adc     r11, r11
	mov     [rdi+rcx*8+24], r11
	sbb     rdx, rdx
	mov     [rdi+rcx*8], r8
	add     rcx, 4
	mov     [rdi+rcx*8-24], r9
	mov     [rdi+rcx*8-16], r10
	jnc     L_lp
L_skiplp:
	cmp     rcx, 2
	ja      L_xit
	je      L_case1
	jp      L_case2
L_case3:
	mov     r8, [rsi+rcx*8]
	mov     r9, [rsi+rcx*8+8]
	mov     r10, [rsi+rcx*8+16]
	add     rax, rax
	adc     r8, r8
	adc     r9, r9
	adc     r10, r10
	sbb     rax, rax
	add     rdx, rdx
	adc     r8, r8
	adc     r9, r9
	adc     r10, r10
	sbb     rdx, rdx
	mov     [rdi+rcx*8], r8
	mov     [rdi+rcx*8+8], r9
	mov     [rdi+rcx*8+16], r10
	jmp     L_xit
	xalign  16
L_case2:
	mov     r8, [rsi+rcx*8]
	mov     r9, [rsi+rcx*8+8]
	add     rax, rax
	adc     r8, r8
	adc     r9, r9
	sbb     rax, rax
	add     rdx, rdx
	adc     r8, r8
	adc     r9, r9
	sbb     rdx, rdx
	mov     [rdi+rcx*8], r8
	mov     [rdi+rcx*8+8], r9
	jmp     L_xit
	xalign  16
L_case1:
	mov     r8, [rsi+rcx*8]
	add     rax, rax
	adc     r8, r8
	sbb     rax, rax
	add     rdx, rdx
	adc     r8, r8
	sbb     rdx, rdx
	mov     [rdi+rcx*8], r8
L_xit:
	lea     rax, [rdx+rax*2]
	neg     rax
    END_PROC reg_save_list

    end