mpir/mpn/x86_64w/aors_err2_n.asm

;
;  AMD64 mpn_add_err2_n, mpn_sub_err2_n
;
;  Copyright (C) 2009, David Harvey
;
;  Windows Conversion Copyright 2008 Brian Gladman
;
;  All rights reserved.
;
;  Redistribution and use in source and binary forms, with or without
;  modification, are permitted provided that the following conditions are
;  met:
;
;  1. Redistributions of source code must retain the above copyright notice,
;  this list of conditions and the following disclaimer.
;
;  2. Redistributions in binary form must reproduce the above copyright
;  notice, this list of conditions and the following disclaimer in the
;  documentation and/or other materials provided with the distribution.
;
;  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
;  ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
;  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;  PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
;  HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
;  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
;  TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;  PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;  LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
;
;  mp_limb_t mpn_add_err2_n (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t);
;  mp_limb_t mpn_sub_err2_n (mp_ptr, mp_ptr, mp_ptr, mp_ptr, mp_ptr,  mp_ptr, mp_size_t, mp_limb_t);
;  rax                          rdi     rsi     rdx     rcx       r8      r9     8(rsp)    16(rsp)
;  rax                          rcx     rdx      r8      r9 [rsp+40] [rsp+48]  [rsp+56]   [rsp+64]

%include "yasm_mac.inc"

%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14

%macro fun 2
	xalign  16
    FRAME_PROC %1, 0, reg_save_list
    mov     rax, qword [rsp+stack_use+56]
    lea     rdi, [rcx+rax*8]
    lea     rsi, [rdx+rax*8]
    lea     rdx, [r8+rax*8]
    mov     rcx, r9
    mov     r10, rax
    mov     r8, [rsp+stack_use+40]
    mov     r9, [rsp+stack_use+48]
    mov     rax, [rsp+stack_use+64]

	xor     rbp, rbp
	xor     r11, r11
	xor     r12, r12
	xor     r13, r13
	sub     r9, r8
	test    r10, 1
	jnz     %%1
	lea     r8, [r8+r10*8-8]
	neg     r10
	jmp     %%2

	xalign  16
%%1:
	lea     r8, [r8+r10*8-16]
	neg     r10
	shr     rax, 1
	mov     rbx, [rsi+r10*8]
	%2      rbx, [rdx+r10*8]
	cmovc   rbp, [r8+8]
	cmovc   r12, [r8+r9+8]
	mov     [rdi+r10*8], rbx
	sbb     rax, rax
	inc     r10
	jz      %%3

	xalign  16
%%2:
	mov     rbx, [rsi+r10*8]
	shr     rax, 1
	%2      rbx, [rdx+r10*8]
	mov     [rdi+r10*8], rbx
	sbb     r14, r14

	mov     rbx, [rsi+r10*8+8]
	%2      rbx, [rdx+r10*8+8]
	mov     [rdi+r10*8+8], rbx
	sbb     rax, rax

    mov     rbx, [r8]
	and     rbx, r14
	add     rbp, rbx
	adc     r11, 0

    and     r14, [r8+r9]
	add     r12, r14
	adc     r13, 0

    mov     rbx, [r8-8]
	and     rbx, rax
	add     rbp, rbx
	adc     r11, 0

    mov     rbx, [r8+r9-8]
	and     rbx, rax
	add     r12, rbx
	adc     r13, 0

	add     r10, 2
	lea     r8, [r8-16]
	jnz     %%2
%%3:
	mov     [rcx], rbp
	mov     [rcx+8], r11
	mov     [rcx+16], r12
	mov     [rcx+24], r13
	and     eax, 1
    END_PROC reg_save_list
%endmacro

    CPU  Athlon64
    BITS 64

    fun mpn_add_err2_n, adc
    fun mpn_sub_err2_n, sbb

    end