mpir/mpn/x86_64w/divrem_euclidean_qr_2.asm
2009-05-20 20:09:06 +00:00

142 lines
3.1 KiB
NASM

; mpn_divrem_euclidean_qr_2
; Copyright 2009 Jason Moxham
; This file is part of the MPIR Library.
; The MPIR Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 2.1 of the License, or (at
; your option) any later version.
; The MPIR Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; along with the MPIR Library; see the file COPYING.LIB. If not, write
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
; Boston, MA 02110-1301, USA.
;
; mpn_divrem_euclidean_qr_2(mp_ptr, mp_ptr, mp_size_t, mp_srcptr dp);
; rax rdi, rsi, rdx
; t[0] is r8 t[1] is r9
; l is r10 h is r11
; divisor is rcx
; qn is rbx
; i is bp
; qf is r12
; mask, q is r13
%include "yasm_mac.inc"
%define reg_save_list rbx, rsi, rdi, rbp, r12, r13
BITS 64
FRAME_PROC mpn_divrem_euclidean_qr_2, 0, reg_save_list
mov rdi, rcx
mov rsi, rdx
movsxd rdx, r8d
mov rcx, r9
mov rbx, rdx
sub rbx, 3
xor r11, r11
mov rax, -1
mov rdx, [rcx+8]
not rdx
div qword [rcx+8]
mov rbp, rax
mov r10, [rsi+rbx*8+16]
mov r8, [rsi+rbx*8+8]
cmp r10, [rcx+8]
jae L_j4
mov r11, r10
mov r9, r10
mov r10, [rsi+rbx*8+8]
mov r8, r10
mov r12, 0
jmp L_j5
L_j4:
mov r12, 1
mov r9, r10
sub r9, [rcx+8]
sub r8, [rcx]
sbb r9, 0
jnc L_j3
dec r12
add r8, [rcx]
adc r9, [rcx+8]
L_j3:
mov r11, r9
mov r10, r8
L_j5:
cmp rbx, 0
js L_skiplp
xalign 16
L_lp:
mov r8, [rsi+rbx*8]
cmp r11, [rcx+8]
je L_j2
mov r13, r10
sar r13, 63
mov rax, r11
sub rax, r13
and r13, [rcx+8]
add r13, r10
mul rbp
add rax, r13
adc rdx, r11
mov r13, rdx
not rdx
mov rax, [rcx+8]
mul rdx
add rax, r10
adc rdx, r11
sub rdx, [rcx+8]
sbb r13, -1
and rdx, [rcx+8]
add rdx, rax
mov rax, [rcx]
mov r9, rdx
mov [rdi+rbx*8], r13
mul r13
sub r8, rax
sbb r9, rdx
jnc L_j1
dec qword[rdi+rbx*8]
add r8, [rcx]
adc r9, [rcx+8]
jc L_j1
dec qword[rdi+rbx*8]
add r8, [rcx]
adc r9, [rcx+8]
jmp L_j1
L_j2:
mov qword[rdi+rbx*8], -1
mov r9, r10
add r8, [rcx]
adc r9, r11
sbb rax, rax
sub r9, [rcx]
adc rax, 0
jz L_j1
dec qword[rdi+rbx*8]
add r8, [rcx]
adc r9, r11
L_j1:
mov r11, r9
mov r10, r8
dec rbx
jns L_lp
L_skiplp:
mov [rsi+8], r9
mov [rsi], r8
mov rax, r12
END_PROC reg_save_list
end