mpir/mpn/x86_64/divrem_2.as

240 lines
3.9 KiB
ActionScript
Raw Normal View History

2010-02-07 09:02:39 -05:00
; x86-64 mpn_divrem_2 -- Divide an mpn number by a normalized 2-limb number.
; Copyright 2007, 2008 Free Software Foundation, Inc.
; Copyright Brian Gladman 2010 (Conversion to yasm format)
2010-02-07 09:02:39 -05:00
; This file is part of the GNU MP Library.
; The GNU MP Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 3 of the License, or (at
; your option) any later version.
; The GNU MP Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; norm frac
; k8 20 20
; netburst 73 73
; core2 37 37
; nehalem 33 33
; INPUT PARAMETERS
; qp rdi
; fn rsi
; np rdx
; nn rcx
; dp r8
; dinv r9
%include 'yasm_mac.inc'
BITS 64
TEXT
%define reg_save_list rbx, rbp, rsi, rdi, r12, r13, r14, r15
align 16
GLOBAL_FUNC mpn_divrem_2
push r15
lea rax, [rdx+rcx*8]
push r14
push r13
mov r13, rsi
push r12
lea r12, [rax-24]
push rbp
mov rbp, rdi
push rbx
mov r11, [r8+8]
mov r9, [rax-8]
mov r8, [r8]
mov r10, [rax-16]
xor r15d, r15d
cmp r11, r9
ja L_2
setb dl
cmp r8, r10
setbe al
or dl, al
jne L_23
L_2:
lea rbx, [rcx+r13-3]
test rbx, rbx
js L_6
mov rdx, r11
mov rax, -1
not rdx
div r11
mov rdx, r11
mov rdi, rax
imul rdx, rax
mov r14, rdx
mul r8
mov rcx, rdx
mov rdx, -1
add r14, r8
adc rdx, 0
add r14, rcx
adc rdx, 0
js L_8
L_18:
dec rdi
sub r14, r11
sbb rdx, 0
jns L_18
L_8:
%ifdef NEW
lea rbp, [rbp+rbx*8]
mov rcx, rbx
mov rbx, r9
mov r9, rdi
mov r14, r10
mov rsi, r11
neg rsi
align 16
L_loop:
mov rax, r9
mul rbx
add rax, r14
mov r10, rax
adc rdx, rbx
mov rdi, rdx
imul rdx, rsi
mov rax, r8
lea rbx, [rdx+r14]
mul rdi
xor r14d, r14d
cmp r13, rcx
jg L_19
mov r14, [r12]
sub r12, 8
L_19:
sub r14, r8
sbb rbx, r11
sub r14, rax
sbb rbx, rdx
inc rdi
xor edx, edx
cmp rbx, r10
mov rax, r8
adc rdx, -1
add rdi, rdx
and rax, rdx
and rdx, r11
add r14, rax
adc rbx, rdx
cmp rbx, r11
jae L_fix
L_bck:
mov [rbp], rdi
sub rbp, 8
dec rcx
jns L_loop
mov r10, r14
mov r9, rbx
%else
lea rbp, [rbp+rbx*8]
mov rcx, rbx
mov rax, r9
mov rsi, r10
align 16
L_loop:
mov r14, rax
mul rdi
mov r9, r11
add rax, rsi
mov rbx, rax
adc rdx, r14
lea r10, [rdx+1]
mov rax, rdx
imul r9, rdx
sub rsi, r9
xor r9d, r9d
mul r8
cmp r13, rcx
jg L_13
mov r9, [r12]
sub r12, 8
L_13:
sub r9, r8
sbb rsi, r11
sub r9, rax
sbb rsi, rdx
cmp rsi, rbx
sbb rax, rax
not rax
add r10, rax
mov rbx, r8
and rbx, rax
and rax, r11
add r9, rbx
adc rax, rsi
cmp r11, rax
jbe L_fix
L_bck:
mov [rbp], r10
sub rbp, 8
mov rsi, r9
dec rcx
jns L_loop
mov r10, rsi
mov r9, rax
%endif
L_6:
mov [r12+8], r10
mov [r12+16], r9
pop rbx
pop rbp
pop r12
pop r13
pop r14
mov rax, r15
pop r15
ret
L_23:
inc r15d
sub r10, r8
sbb r9, r11
jmp L_2
%ifdef NEW
L_fix:
seta dl
cmp r14, r8
setae al
orb al, dl
je L_bck
inc rdi
sub r14, r8
sbb rbx, r11
jmp L_bck
%else
L_fix:
jb L_88
cmp r9, r8
jb L_bck
L_88:
inc r10
sub r9, r8
sbb rax, r11
jmp L_bck
%endif