mpir/mpn/x86_64w/k8/redc_basecase.asm


;  Copyright 2009 Jason Moxham
;
;  Windows Conversion Copyright 2008 Brian Gladman
;
;  This file is part of the MPIR Library.
;
;  The MPIR Library is free software; you can redistribute it and/or modify
;  it under the terms of the GNU Lesser General Public License as published
;  by the Free Software Foundation; either version 2.1 of the License, or (at
;  your option) any later version.
;  The MPIR Library is distributed in the hope that it will be useful, but
;  WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
;  or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
;  License for more details.
;  You should have received a copy of the GNU Lesser General Public License
;  along with the MPIR Library; see the file COPYING.LIB.  If not, write
;  to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
;  Boston, MA 02110-1301, USA.
;
;  mp_limb_t mpn_redc_basecase(mp_ptr, mp_ptr, mp_size_t, mp_limb_t,  mp_ptr)
;  rax                            rdi     rsi        rdx        rcx       r8
;  rax                            rcx     rdx         r8         r9 [rsp+40] 

%include "..\yasm_mac.inc"

%define reg_save_list   rbx, rsi, rdi, rbp, r12, r13, r14, r15

    CPU  Athlon64
    BITS 64

%macro mpn_add 0

    mov     rax, rcx
    and     rax, 3
    shr     rcx, 2
    cmp     rcx, 0
    jnz     %%1
    mov     r11, [rsi]
    add     r11, [rdx]
    mov     [rdi], r11
    dec     rax
    jz      %%2
    mov     r11, [rsi+8]
    adc     r11, [rdx+8]
    mov     [rdi+8], r11
    dec     rax
    jz      %%2
    mov     r11, [rsi+16]
    adc     r11, [rdx+16]
    mov     [rdi+16], r11
    jmp     %%2

    xalign  16
%%1:mov     r11, [rsi]
    mov     r8, [rsi+8]
    lea     rsi, [rsi+32]
    adc     r11, [rdx]
    adc     r8, [rdx+8]
    lea     rdx, [rdx+32]
    mov     [rdi], r11
    mov     [rdi+8], r8
    lea     rdi, [rdi+32]
    mov     r9, [rsi-16]
    mov     r10, [rsi-8]
    adc     r9, [rdx-16]
    adc     r10, [rdx-8]
    mov     [rdi-16], r9
    dec     rcx
    mov     [rdi-8], r10
    jnz     %%1
    inc     rax
    dec     rax
    jz      %%2
    mov     r11, [rsi]
    adc     r11, [rdx]
    mov     [rdi], r11
    dec     rax
    jz      %%2
    mov     r11, [rsi+8]
    adc     r11, [rdx+8]
    mov     [rdi+8], r11
    dec     rax
    jz      %%2
    mov     r11, [rsi+16]
    adc     r11, [rdx+16]
    mov     [rdi+16], r11
%%2:

%endmacro

%macro mpn_sub 0

    mov     rax, rbp
    and     rax, 3
    shr     rbp, 2
    cmp     rbp, 0
    jnz     %%1
    mov     r11, [rsi]
    sub     r11, [rdx]
    mov     [rbx], r11
    dec     rax
    jz      %%2
    mov     r11, [rsi+8]
    sbb     r11, [rdx+8]
    mov     [rbx+8], r11
    dec     rax
    jz      %%2
    mov     r11, [rsi+16]
    sbb     r11, [rdx+16]
    mov     [rbx+16], r11
    jmp     %%2
    xalign  16
%%1:mov     r11, [rsi]
    mov     r8, [rsi+8]
    lea     rsi, [rsi+32]
    sbb     r11, [rdx]
    sbb     r8, [rdx+8]
    lea     rdx, [rdx+32]
    mov     [rbx], r11
    mov     [rbx+8], r8
    lea     rbx, [rbx+32]
    mov     r9, [rsi-16]
    mov     r10, [rsi-8]
    sbb     r9, [rdx-16]
    sbb     r10, [rdx-8]
    mov     [rbx-16], r9
    dec     rbp
    mov     [rbx-8], r10
    jnz     %%1
    inc     rax
    dec     rax
    jz      %%2
    mov     r11, [rsi]
    sbb     r11, [rdx]
    mov     [rbx], r11
    dec     rax
    jz      %%2
    mov     r11, [rsi+8]
    sbb     r11, [rdx+8]
    mov     [rbx+8], r11
    dec     rax
    jz      %%2
    mov     r11, [rsi+16]
    sbb     r11, [rdx+16]
    mov     [rbx+16], r11
%%2:

%endmacro

%macro addmulloop 1

    xalign  16
%%1:mov     r10, 0
    mul     r13
    add     [r8+r11*8], r12
    adc     r9, rax
    db      0x26
    adc     r10, rdx
    mov     rax, [rsi+r11*8+16]
    mul     r13
    add     [r8+r11*8+8], r9
    adc     r10, rax
    mov     ebx, 0
    adc     rbx, rdx
    mov     rax, [rsi+r11*8+24]
    mov     r12, 0
    mov     r9, 0
    mul     r13
    add     [r8+r11*8+16], r10
    db      0x26
    adc     rbx, rax
    db      0x26
    adc     r12, rdx
    mov     rax, [rsi+r11*8+32]
    mul     r13
    add     [r8+r11*8+24], rbx
    db      0x26
    adc     r12, rax
    db      0x26
    adc     r9, rdx
    add     r11, 4
    mov     rax, [rsi+r11*8+8]
    jnc     %%1

%endmacro

%macro addmulpropro0 0

    imul    r13, rcx
    lea     r8, [r8-8]

%endmacro

%macro addmulpro0 0

    mov     r11, r14
    lea     r8, [r8+8]
    mov     rax, [rsi+r14*8]
    mul     r13
    mov     r12, rax
    mov     rax, [rsi+r14*8+8]
    mov     r9, rdx
    cmp     r14, 0

%endmacro

%macro addmulnext0 0

    mov     r10d, 0
    mul     r13
    add     [r8+r11*8], r12
    adc     r9, rax
    adc     r10, rdx
    mov     rax, [rsi+r11*8+16]
    mul     r13
    add     [r8+r11*8+8], r9
    adc     r10, rax
    mov     ebx, 0
    adc     rbx, rdx
    mov     rax, [rsi+r11*8+24]
    mov     r12d, 0
    mov     r9d, 0
    mul     r13
    add     [r8+r11*8+16], r10
    adc     rbx, rax
    adc     r12, rdx
    mov     rax, [rsi+r11*8+32]
    mul     r13
    add     [r8+r11*8+24], rbx
    mov     r13, [r8+r14*8+8]
    adc     r12, rax
    adc     r9, rdx
    imul    r13, rcx
    add     [r8+r11*8+32], r12
    adc     r9, 0
    dec     r15
    mov     [r8+r14*8], r9

%endmacro

%macro addmulpropro1 0

%endmacro

%macro addmulpro1 0

    imul    r13, rcx
    mov     rax, [rsi+r14*8]
    mov     r11, r14
    mul     r13
    mov     r12, rax
    mov     rax, [rsi+r14*8+8]
    mov     r9, rdx
    cmp     r14, 0

%endmacro

%macro addmulnext1 0

    mov     r10d, 0
    mul     r13
    add     [r8+r11*8], r12
    adc     r9, rax
    adc     r10, rdx
    mov     rax, [rsi+r11*8+16]
    mul     r13
    add     [r8+r11*8+8], r9
    adc     r10, rax
    mov     ebx, 0
    adc     rbx, rdx
    mov     rax, [rsi+r11*8+24]
    mov     r12d, 0
    mul     r13
    add     [r8+r11*8+16], r10
    adc     rbx, rax
    adc     r12, rdx
    add     [r8+r11*8+24], rbx
    mov     r13, [r8+r14*8+8]
    adc     r12, 0
    dec     r15
    mov     [r8+r14*8], r12
    lea     r8, [r8+8]

%endmacro

%macro addmulpropro2 0

%endmacro

%macro addmulpro2 0

    imul    r13, rcx
    mov     rax, [rsi+r14*8]
    mov     r11, r14
    mul     r13
    mov     r12, rax
    mov     rax, [rsi+r14*8+8]
    mov     r9, rdx
    cmp     r14, 0

%endmacro

%macro addmulnext2 0

    mul     r13
    add     [r8+r11*8], r12
    adc     r9, rax
    mov     r10d, 0
    adc     r10, rdx
    mov     rax, [rsi+r11*8+16]
    mul     r13
    add     [r8+r11*8+8], r9
    adc     r10, rax
    mov     ebx, 0
    adc     rbx, rdx
    mov     r13, [r8+r14*8+8]
    add     [r8+r11*8+16], r10
    adc     rbx, 0
    mov     [r8+r14*8], rbx
    dec     r15
    lea     r8, [r8+8]

%endmacro

%macro addmulpropro3 0

%endmacro

%macro addmulpro3 0

    imul    r13, rcx
    mov     rax, [rsi+r14*8]
    mov     r11, r14
    mul     r13
    mov     r12, rax
    mov     rax, [rsi+r14*8+8]
    mov     r9, rdx
    cmp     r14, 0

%endmacro

%macro addmulnext3 0

    mul     r13
    add     [r8+r11*8], r12
    adc     r9, rax
    mov     r10d, 0
    adc     r10, rdx
    add     [r8+r11*8+8], r9
    adc     r10, 0
    mov     r13, [r8+r14*8+8]
    mov     [r8+r14*8], r10
    lea     r8, [r8+8]
    dec     r15

%endmacro

%macro mpn_addmul_1_int 1

    addmulpropro%1
    xalign  16
%%1:addmulpro%1
    jge     %%2
    addmulloop %1
%%2:addmulnext%1
    jnz     %%1

%endmacro

    LEAF_PROC mpn_redc_basecase
    cmp     r8, 1
    je      one
    FRAME_PROC ?mpn_redc_basecase, 0, reg_save_list
    mov     rdi, rcx
    mov     rsi, rdx
    mov     rdx, r8
    mov     rcx, r9
    mov     r8, [rsp+stack_use+40]

    mov     r14, 5
    sub     r14, rdx

    mov     [rsp+stack_use+16], rsi
    mov     r8, [rsp+stack_use+40]

    lea     r8, [r8+rdx*8-40]
    lea     rsi, [rsi+rdx*8-40]
    mov     rbp, rdx
    mov     r15, rdx
    mov     rax, r14
    and     rax, 3
    mov     r13, [r8+r14*8]
    je      .2
    jp      .4
    cmp     rax, 1
    je      .3
.1:
	mpn_addmul_1_int 2
    jmp     .5

    xalign  16
.2:
	mpn_addmul_1_int 0
    jmp     .5

    xalign  16
.3:
	mpn_addmul_1_int 1
    jmp     .5

    xalign  16
.4:
	mpn_addmul_1_int 3

    xalign  16
.5:
    mov     rcx, rbp
    mov     rdx, [rsp+stack_use+40]
    lea     rsi, [rdx+rbp*8]
    mov     rbx, rdi
    mpn_add
    mov     rdx, [rsp+stack_use+16]
    jnc     .6
    mov     rsi, rbx
    mpn_sub
.6:
    END_PROC reg_save_list

    xalign  16
one:
	mov     r8,[rsp+40]
    mov     r10, [r8]
    mov     r11, [rdx]
    imul    r9, r10
    mov     rax, r9
    mul     r11
    add     rax, r10
    adc     rdx, [r8+8]     ; rax is zero here
    cmovnc  r11, rax
    sub     rdx, r11
    mov     [rcx], rdx
    ret

    end
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`; Copyright 2009 Jason Moxham`
1. Add new x64 assembler functions to the Windows build 2009-09-02 07:41:43 -04:00			`;`
			`; Windows Conversion Copyright 2008 Brian Gladman`
			`;`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00			`; This file is part of the MPIR Library.`
1. Add new x64 assembler functions to the Windows build 2009-09-02 07:41:43 -04:00			`;`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00			`; The MPIR Library is free software; you can redistribute it and/or modify`
			`; it under the terms of the GNU Lesser General Public License as published`
			`; by the Free Software Foundation; either version 2.1 of the License, or (at`
			`; your option) any later version.`
			`; The MPIR Library is distributed in the hope that it will be useful, but`
			`; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY`
			`; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public`
			`; License for more details.`
			`; You should have received a copy of the GNU Lesser General Public License`
			`; along with the MPIR Library; see the file COPYING.LIB. If not, write`
			`; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,`
			`; Boston, MA 02110-1301, USA.`
			`;`
1. Add new x64 assembler functions to the Windows build 2009-09-02 07:41:43 -04:00			`; mp_limb_t mpn_redc_basecase(mp_ptr, mp_ptr, mp_size_t, mp_limb_t, mp_ptr)`
			`; rax rdi rsi rdx rcx r8`
1. Update Windows k10 build to match Linux 2. Remove the now redundant 32 to 64 register mapping for mp_size_t inputs in Windows assembler 2010-07-02 07:52:24 -04:00			`; rax rcx rdx r8 r9 [rsp+40]`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
Major tidy up of Windows x86_64 assembler code 2009-03-07 10:00:35 -05:00			`%include "..\yasm_mac.inc"`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%define reg_save_list rbx, rsi, rdi, rbp, r12, r13, r14, r15`

1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`CPU Athlon64`
Major tidy up of Windows x86_64 assembler code 2009-03-07 10:00:35 -05:00			`BITS 64`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%macro mpn_add 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov rax, rcx`
			`and rax, 3`
			`shr rcx, 2`
			`cmp rcx, 0`
			`jnz %%1`
			`mov r11, [rsi]`
			`add r11, [rdx]`
			`mov [rdi], r11`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi+8]`
			`adc r11, [rdx+8]`
			`mov [rdi+8], r11`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi+16]`
			`adc r11, [rdx+16]`
			`mov [rdi+16], r11`
			`jmp %%2`

1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`xalign 16`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00			`%%1:mov r11, [rsi]`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov r8, [rsi+8]`
			`lea rsi, [rsi+32]`
			`adc r11, [rdx]`
			`adc r8, [rdx+8]`
			`lea rdx, [rdx+32]`
			`mov [rdi], r11`
			`mov [rdi+8], r8`
			`lea rdi, [rdi+32]`
			`mov r9, [rsi-16]`
			`mov r10, [rsi-8]`
			`adc r9, [rdx-16]`
			`adc r10, [rdx-8]`
			`mov [rdi-16], r9`
			`dec rcx`
			`mov [rdi-8], r10`
			`jnz %%1`
			`inc rax`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi]`
			`adc r11, [rdx]`
			`mov [rdi], r11`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi+8]`
			`adc r11, [rdx+8]`
			`mov [rdi+8], r11`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi+16]`
			`adc r11, [rdx+16]`
			`mov [rdi+16], r11`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00			`%%2:`

			`%endmacro`

			`%macro mpn_sub 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov rax, rbp`
			`and rax, 3`
			`shr rbp, 2`
			`cmp rbp, 0`
			`jnz %%1`
			`mov r11, [rsi]`
			`sub r11, [rdx]`
			`mov [rbx], r11`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi+8]`
			`sbb r11, [rdx+8]`
			`mov [rbx+8], r11`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi+16]`
			`sbb r11, [rdx+16]`
			`mov [rbx+16], r11`
			`jmp %%2`
1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`xalign 16`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00			`%%1:mov r11, [rsi]`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov r8, [rsi+8]`
			`lea rsi, [rsi+32]`
			`sbb r11, [rdx]`
			`sbb r8, [rdx+8]`
			`lea rdx, [rdx+32]`
			`mov [rbx], r11`
			`mov [rbx+8], r8`
			`lea rbx, [rbx+32]`
			`mov r9, [rsi-16]`
			`mov r10, [rsi-8]`
			`sbb r9, [rdx-16]`
			`sbb r10, [rdx-8]`
			`mov [rbx-16], r9`
			`dec rbp`
			`mov [rbx-8], r10`
			`jnz %%1`
			`inc rax`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi]`
			`sbb r11, [rdx]`
			`mov [rbx], r11`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi+8]`
			`sbb r11, [rdx+8]`
			`mov [rbx+8], r11`
			`dec rax`
			`jz %%2`
			`mov r11, [rsi+16]`
			`sbb r11, [rdx+16]`
			`mov [rbx+16], r11`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00			`%%2:`

			`%endmacro`

			`%macro addmulloop 1`

1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`xalign 16`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00			`%%1:mov r10, 0`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mul r13`
			`add [r8+r11*8], r12`
			`adc r9, rax`
			`db 0x26`
			`adc r10, rdx`
			`mov rax, [rsi+r11*8+16]`
			`mul r13`
			`add [r8+r11*8+8], r9`
			`adc r10, rax`
			`mov ebx, 0`
			`adc rbx, rdx`
			`mov rax, [rsi+r11*8+24]`
			`mov r12, 0`
			`mov r9, 0`
			`mul r13`
			`add [r8+r11*8+16], r10`
			`db 0x26`
			`adc rbx, rax`
			`db 0x26`
			`adc r12, rdx`
			`mov rax, [rsi+r11*8+32]`
			`mul r13`
			`add [r8+r11*8+24], rbx`
			`db 0x26`
			`adc r12, rax`
			`db 0x26`
			`adc r9, rdx`
			`add r11, 4`
			`mov rax, [rsi+r11*8+8]`
			`jnc %%1`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro addmulpropro0 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`imul r13, rcx`
			`lea r8, [r8-8]`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro addmulpro0 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov r11, r14`
			`lea r8, [r8+8]`
			`mov rax, [rsi+r14*8]`
			`mul r13`
			`mov r12, rax`
			`mov rax, [rsi+r14*8+8]`
			`mov r9, rdx`
			`cmp r14, 0`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro addmulnext0 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov r10d, 0`
			`mul r13`
			`add [r8+r11*8], r12`
			`adc r9, rax`
			`adc r10, rdx`
			`mov rax, [rsi+r11*8+16]`
			`mul r13`
			`add [r8+r11*8+8], r9`
			`adc r10, rax`
			`mov ebx, 0`
			`adc rbx, rdx`
			`mov rax, [rsi+r11*8+24]`
			`mov r12d, 0`
			`mov r9d, 0`
			`mul r13`
			`add [r8+r11*8+16], r10`
			`adc rbx, rax`
			`adc r12, rdx`
			`mov rax, [rsi+r11*8+32]`
			`mul r13`
			`add [r8+r11*8+24], rbx`
			`mov r13, [r8+r14*8+8]`
			`adc r12, rax`
			`adc r9, rdx`
			`imul r13, rcx`
			`add [r8+r11*8+32], r12`
			`adc r9, 0`
			`dec r15`
			`mov [r8+r14*8], r9`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro addmulpropro1 0`

			`%endmacro`

			`%macro addmulpro1 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`imul r13, rcx`
			`mov rax, [rsi+r14*8]`
			`mov r11, r14`
			`mul r13`
			`mov r12, rax`
			`mov rax, [rsi+r14*8+8]`
			`mov r9, rdx`
			`cmp r14, 0`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro addmulnext1 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov r10d, 0`
			`mul r13`
			`add [r8+r11*8], r12`
			`adc r9, rax`
			`adc r10, rdx`
			`mov rax, [rsi+r11*8+16]`
			`mul r13`
			`add [r8+r11*8+8], r9`
			`adc r10, rax`
			`mov ebx, 0`
			`adc rbx, rdx`
			`mov rax, [rsi+r11*8+24]`
			`mov r12d, 0`
			`mul r13`
			`add [r8+r11*8+16], r10`
			`adc rbx, rax`
			`adc r12, rdx`
			`add [r8+r11*8+24], rbx`
			`mov r13, [r8+r14*8+8]`
			`adc r12, 0`
			`dec r15`
			`mov [r8+r14*8], r12`
			`lea r8, [r8+8]`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro addmulpropro2 0`

			`%endmacro`

			`%macro addmulpro2 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`imul r13, rcx`
			`mov rax, [rsi+r14*8]`
			`mov r11, r14`
			`mul r13`
			`mov r12, rax`
			`mov rax, [rsi+r14*8+8]`
			`mov r9, rdx`
			`cmp r14, 0`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro addmulnext2 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mul r13`
			`add [r8+r11*8], r12`
			`adc r9, rax`
			`mov r10d, 0`
			`adc r10, rdx`
			`mov rax, [rsi+r11*8+16]`
			`mul r13`
			`add [r8+r11*8+8], r9`
			`adc r10, rax`
			`mov ebx, 0`
			`adc rbx, rdx`
			`mov r13, [r8+r14*8+8]`
			`add [r8+r11*8+16], r10`
			`adc rbx, 0`
			`mov [r8+r14*8], rbx`
			`dec r15`
			`lea r8, [r8+8]`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro addmulpropro3 0`

			`%endmacro`

			`%macro addmulpro3 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`imul r13, rcx`
			`mov rax, [rsi+r14*8]`
			`mov r11, r14`
			`mul r13`
			`mov r12, rax`
			`mov rax, [rsi+r14*8+8]`
			`mov r9, rdx`
			`cmp r14, 0`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro addmulnext3 0`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mul r13`
			`add [r8+r11*8], r12`
			`adc r9, rax`
			`mov r10d, 0`
			`adc r10, rdx`
			`add [r8+r11*8+8], r9`
			`adc r10, 0`
			`mov r13, [r8+r14*8+8]`
			`mov [r8+r14*8], r10`
			`lea r8, [r8+8]`
			`dec r15`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

			`%macro mpn_addmul_1_int 1`

Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`addmulpropro%1`
1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`xalign 16`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00			`%%1:addmulpro%1`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`jge %%2`
			`addmulloop %1`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00			`%%2:addmulnext%1`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`jnz %%1`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
			`%endmacro`

Major tidy up of Windows x86_64 assembler code 2009-03-07 10:00:35 -05:00			`LEAF_PROC mpn_redc_basecase`
1. Update Windows k10 build to match Linux 2. Remove the now redundant 32 to 64 register mapping for mp_size_t inputs in Windows assembler 2010-07-02 07:52:24 -04:00			`cmp r8, 1`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`je one`
Adjust Windows assembler files to assist planned automation of aspects of the Windows build 2009-04-03 05:07:52 -04:00			`FRAME_PROC ?mpn_redc_basecase, 0, reg_save_list`
correct 32 to 64 bit sign extension in Windows assembler code 2009-03-09 17:27:31 -04:00			`mov rdi, rcx`
			`mov rsi, rdx`
1. Update Windows k10 build to match Linux 2. Remove the now redundant 32 to 64 register mapping for mp_size_t inputs in Windows assembler 2010-07-02 07:52:24 -04:00			`mov rdx, r8`
correct 32 to 64 bit sign extension in Windows assembler code 2009-03-09 17:27:31 -04:00			`mov rcx, r9`
1. Add new x64 assembler functions to the Windows build 2009-09-02 07:41:43 -04:00			`mov r8, [rsp+stack_use+40]`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov r14, 5`
			`sub r14, rdx`

1. Add new x64 assembler functions to the Windows build 2009-09-02 07:41:43 -04:00			`mov [rsp+stack_use+16], rsi`
			`mov r8, [rsp+stack_use+40]`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00
			`lea r8, [r8+rdx*8-40]`
			`lea rsi, [rsi+rdx*8-40]`
			`mov rbp, rdx`
			`mov r15, rdx`
			`mov rax, r14`
			`and rax, 3`
			`mov r13, [r8+r14*8]`
1. Add Jason's new assembler code to the Windows builds 2. Tidy up assembler to prepare for Windows nehalem build 2009-12-02 11:24:00 -05:00			`je .2`
			`jp .4`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`cmp rax, 1`
1. Add Jason's new assembler code to the Windows builds 2. Tidy up assembler to prepare for Windows nehalem build 2009-12-02 11:24:00 -05:00			`je .3`
			`.1:`
			`mpn_addmul_1_int 2`
			`jmp .5`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00
1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`xalign 16`
1. Add Jason's new assembler code to the Windows builds 2. Tidy up assembler to prepare for Windows nehalem build 2009-12-02 11:24:00 -05:00			`.2:`
			`mpn_addmul_1_int 0`
			`jmp .5`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00
1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`xalign 16`
1. Add Jason's new assembler code to the Windows builds 2. Tidy up assembler to prepare for Windows nehalem build 2009-12-02 11:24:00 -05:00			`.3:`
			`mpn_addmul_1_int 1`
			`jmp .5`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00
1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`xalign 16`
1. Add Jason's new assembler code to the Windows builds 2. Tidy up assembler to prepare for Windows nehalem build 2009-12-02 11:24:00 -05:00			`.4:`
			`mpn_addmul_1_int 3`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00
1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`xalign 16`
1. Add Jason's new assembler code to the Windows builds 2. Tidy up assembler to prepare for Windows nehalem build 2009-12-02 11:24:00 -05:00			`.5:`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov rcx, rbp`
1. Add new x64 assembler functions to the Windows build 2009-09-02 07:41:43 -04:00			`mov rdx, [rsp+stack_use+40]`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`lea rsi, [rdx+rbp*8]`
			`mov rbx, rdi`
			`mpn_add`
1. Add new x64 assembler functions to the Windows build 2009-09-02 07:41:43 -04:00			`mov rdx, [rsp+stack_use+16]`
1. Add Jason's new assembler code to the Windows builds 2. Tidy up assembler to prepare for Windows nehalem build 2009-12-02 11:24:00 -05:00			`jnc .6`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov rsi, rbx`
			`mpn_sub`
1. Add Jason's new assembler code to the Windows builds 2. Tidy up assembler to prepare for Windows nehalem build 2009-12-02 11:24:00 -05:00			`.6:`
Major tidy up of Windows x86_64 assembler code 2009-03-07 10:00:35 -05:00			`END_PROC reg_save_list`
add missing Windows AMD64 assembler file to SVN 2009-03-06 19:04:23 -05:00
1. Update Windows version of getrusage 2. Use YASM's new alignment padding feature in the Windows assembler code 2009-03-24 10:40:39 -04:00			`xalign 16`
1. Add Jason's new assembler code to the Windows builds 2. Tidy up assembler to prepare for Windows nehalem build 2009-12-02 11:24:00 -05:00			`one:`
			`mov r8,[rsp+40]`
Two minor Windows assembler changes plus code formatting to remove tabs and trailing white space 2009-03-13 16:32:09 -04:00			`mov r10, [r8]`
			`mov r11, [rdx]`
			`imul r9, r10`
			`mov rax, r9`
			`mul r11`
			`add rax, r10`
			`adc rdx, [r8+8] ; rax is zero here`
			`cmovnc r11, rax`
			`sub rdx, r11`
			`mov [rcx], rdx`
			`ret`

			`end`