Last of Jason Moxham's K8 assembly code converted to yasm format.

This commit is contained in:
wbhart 2009-03-04 22:01:05 +00:00
parent 47be515d09
commit f596e5d3ed
6 changed files with 226 additions and 238 deletions

View File

@ -0,0 +1,78 @@
; AMD64 mpn_addlsh1_n
; Copyright 2008 Jason Moxham
; This file is part of the MPIR Library.
; The MPIR Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 2.1 of the License, or (at
; your option) any later version.
; The MPIR Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; along with the MPIR Library; see the file COPYING.LIB. If not, write
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
; Boston, MA 02110-1301, USA.
; (rdi, rcx) = (rsi, rcx) + (rdx, rcx)<<1
; rax = carry
%include '../yasm_mac.inc'
BITS 64
GLOBAL_FUNC mpn_addlsh1_n
lea rsi, [rsi+rcx*8]
lea rdx, [rdx+rcx*8]
lea rdi, [rdi+rcx*8]
neg rcx
xor r9, r9
xor rax, rax
test rcx, 3
jz next
lp1:
mov r10, [rdx+rcx*8]
add r9, 1
adc r10, r10
sbb r9, r9
add rax, 1
adc r10, [rsi+rcx*8]
sbb rax, rax
mov [rdi+rcx*8], r10
inc rcx
test rcx, 3
jnz lp1
next:
cmp rcx, 0
jz end
push rbx
align 16
lp:
mov r10, [rdx+rcx*8]
mov rbx, [rdx+rcx*8+8]
mov r11, [rdx+rcx*8+16]
mov r8, [rdx+rcx*8+24]
add r9, 1
adc r10, r10
adc rbx, rbx
adc r11, r11
adc r8, r8
sbb r9, r9
add rax, 1
adc r10, [rsi+rcx*8]
adc rbx, [rsi+rcx*8+8]
adc r11, [rsi+rcx*8+16]
adc r8, [rsi+rcx*8+24]
sbb rax, rax
mov [rdi+rcx*8], r10
mov [rdi+rcx*8+8], rbx
mov [rdi+rcx*8+16], r11
mov [rdi+rcx*8+24], r8
add rcx, 4
jnz lp
pop rbx
end:
add rax, r9
neg rax
ret

View File

@ -1,82 +0,0 @@
dnl AMD64 mpn_addlsh1_n
dnl Copyright 2008 Jason Moxham
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
C (rdi,rcx)=(rsi,rcx)+(rdx,rcx)<<1
C rax=carry
ASM_START()
PROLOGUE(mpn_addlsh1_n)
lea (%rsi,%rcx,8),%rsi
lea (%rdx,%rcx,8),%rdx
lea (%rdi,%rcx,8),%rdi
neg %rcx
xor %r9,%r9
xor %rax,%rax
test $3,%rcx
jz next
lp1:
mov (%rdx,%rcx,8),%r10
add $1,%r9
adc %r10,%r10
sbb %r9,%r9
add $1,%rax
adc (%rsi,%rcx,8),%r10
sbb %rax,%rax
mov %r10,(%rdi,%rcx,8)
inc %rcx
test $3,%rcx
jnz lp1
next:
cmp $0,%rcx
jz end
push %rbx
ALIGN(16)
lp:
mov (%rdx,%rcx,8),%r10
mov 8(%rdx,%rcx,8),%rbx
mov 16(%rdx,%rcx,8),%r11
mov 24(%rdx,%rcx,8),%r8
add $1,%r9
adc %r10,%r10
adc %rbx,%rbx
adc %r11,%r11
adc %r8,%r8
sbb %r9,%r9
add $1,%rax
adc (%rsi,%rcx,8),%r10
adc 8(%rsi,%rcx,8),%rbx
adc 16(%rsi,%rcx,8),%r11
adc 24(%rsi,%rcx,8),%r8
sbb %rax,%rax
mov %r10,(%rdi,%rcx,8)
mov %rbx,8(%rdi,%rcx,8)
mov %r11,16(%rdi,%rcx,8)
mov %r8,24(%rdi,%rcx,8)
add $4,%rcx
jnz lp
pop %rbx
end:
add %r9,%rax
neg %rax
ret
EPILOGUE()

58
mpn/x86_64/amd64/com_n.as Normal file
View File

@ -0,0 +1,58 @@
; AMD64 mpn_com_n
; Copyright 2008 Jason Moxham
; This file is part of the MPIR Library.
; The MPIR Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 2.1 of the License, or (at
; your option) any later version.
; The MPIR Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; along with the MPIR Library; see the file COPYING.LIB. If not, write
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
; Boston, MA 02110-1301, USA.
%include '../yasm_mac.inc'
BITS 64
GLOBAL_FUNC mpn_com_n
sub rdx, 4
jc next
align 8
loop1:
mov rax, [rsi+rdx*8+24]
mov rcx, [rsi+rdx*8+16]
not rax
not rcx
mov [rdi+rdx*8+24], rax
mov [rdi+rdx*8+16], rcx
mov rax, [rsi+rdx*8+8]
mov rcx, [rsi+rdx*8]
not rax
not rcx
mov [rdi+rdx*8+8], rax
mov [rdi+rdx*8], rcx
sub rdx, 4
jae loop1
next:
add rdx, 4
jz end
mov rax, [rsi+rdx*8-8]
not rax
mov [rdi+rdx*8-8], rax
dec rdx
jz end
mov rax, [rsi+rdx*8-8]
not rax
mov [rdi+rdx*8-8], rax
dec rdx
jz end
mov rax, [rsi+rdx*8-8]
not rax
mov [rdi+rdx*8-8], rax
end:
ret

View File

@ -1,62 +0,0 @@
dnl AMD64 mpn_com_n
dnl Copyright 2008 Jason Moxham
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
ASM_START()
PROLOGUE(mpn_com_n)
sub $4,%rdx
jc next
ALIGN(8)
loop:
mov 24(%rsi,%rdx,8),%rax
mov 16(%rsi,%rdx,8),%rcx
not %rax
not %rcx
mov %rax,24(%rdi,%rdx,8)
mov %rcx,16(%rdi,%rdx,8)
mov 8(%rsi,%rdx,8),%rax
mov (%rsi,%rdx,8),%rcx
not %rax
not %rcx
mov %rax,8(%rdi,%rdx,8)
mov %rcx,(%rdi,%rdx,8)
sub $4,%rdx
jae loop
next:
add $4,%rdx
jz end
mov -8(%rsi,%rdx,8),%rax
not %rax
mov %rax,-8(%rdi,%rdx,8)
dec %rdx
jz end
mov -8(%rsi,%rdx,8),%rax
not %rax
mov %rax,-8(%rdi,%rdx,8)
dec %rdx
jz end
mov -8(%rsi,%rdx,8),%rax
not %rax
mov %rax,-8(%rdi,%rdx,8)
end:
ret
EPILOGUE()

View File

@ -0,0 +1,90 @@
; AMD64 mpn_sublsh1_n
; Copyright 2008 Jason Moxham
; This file is part of the MPIR Library.
; The MPIR Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 2.1 of the License, or (at
; your option) any later version.
; The MPIR Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; along with the MPIR Library; see the file COPYING.LIB. If not, write
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
; Boston, MA 02110-1301, USA.
; (rdi, rcx) = (rsi, rcx) - (rdx, rcx)<<1
; rax = borrow
%include '../yasm_mac.inc'
BITS 64
GLOBAL_FUNC mpn_sublsh1_n
lea rsi, [rsi+rcx*8]
lea rdx, [rdx+rcx*8]
lea rdi, [rdi+rcx*8]
neg rcx
xor r9, r9
xor rax, rax
test rcx, 3
jz next
lp1:
mov r10, [rsi+rcx*8]
add r9, 1
sbb r10, [rdx+rcx*8]
sbb r9, r9
add rax, 1
sbb r10, [rdx+rcx*8]
sbb rax, rax
mov [rdi+rcx*8], r10
inc rcx
test rcx, 3
jnz lp1
next:
cmp rcx, 0
jz end
push r15
push r14
push r13
push r12
push rbx
align 16
lp:
mov r10, [rsi+rcx*8]
mov rbx, [rsi+rcx*8+8]
mov r11, [rsi+rcx*8+16]
mov r8, [rsi+rcx*8+24]
mov r12, [rdx+rcx*8]
mov r13, [rdx+rcx*8+8]
mov r14, [rdx+rcx*8+16]
mov r15, [rdx+rcx*8+24]
add r9, 1
sbb r10, r12
sbb rbx, r13
sbb r11, r14
sbb r8, r15
sbb r9, r9
add rax, 1
sbb r10, r12
sbb rbx, r13
sbb r11, r14
sbb r8, r15
sbb rax, rax
mov [rdi+rcx*8], r10
mov [rdi+rcx*8+8], rbx
mov [rdi+rcx*8+16], r11
mov [rdi+rcx*8+24], r8
add rcx, 4
jnz lp
pop rbx
pop r12
pop r13
pop r14
pop r15
end:
add rax, r9
neg rax
ret

View File

@ -1,94 +0,0 @@
dnl AMD64 mpn_sublsh1_n
dnl Copyright 2008 Jason Moxham
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
C (rdi,rcx)=(rsi,rcx)-(rdx,rcx)<<1
C rax=borrow
ASM_START()
PROLOGUE(mpn_sublsh1_n)
lea (%rsi,%rcx,8),%rsi
lea (%rdx,%rcx,8),%rdx
lea (%rdi,%rcx,8),%rdi
neg %rcx
xor %r9,%r9
xor %rax,%rax
test $3,%rcx
jz next
lp1:
mov (%rsi,%rcx,8),%r10
add $1,%r9
sbb (%rdx,%rcx,8),%r10
sbb %r9,%r9
add $1,%rax
sbb (%rdx,%rcx,8),%r10
sbb %rax,%rax
mov %r10,(%rdi,%rcx,8)
inc %rcx
test $3,%rcx
jnz lp1
next:
cmp $0,%rcx
jz end
push %r15
push %r14
push %r13
push %r12
push %rbx
ALIGN(16)
lp:
mov (%rsi,%rcx,8),%r10
mov 8(%rsi,%rcx,8),%rbx
mov 16(%rsi,%rcx,8),%r11
mov 24(%rsi,%rcx,8),%r8
mov (%rdx,%rcx,8),%r12
mov 8(%rdx,%rcx,8),%r13
mov 16(%rdx,%rcx,8),%r14
mov 24(%rdx,%rcx,8),%r15
add $1,%r9
sbb %r12,%r10
sbb %r13,%rbx
sbb %r14,%r11
sbb %r15,%r8
sbb %r9,%r9
add $1,%rax
sbb %r12,%r10
sbb %r13,%rbx
sbb %r14,%r11
sbb %r15,%r8
sbb %rax,%rax
mov %r10,(%rdi,%rcx,8)
mov %rbx,8(%rdi,%rcx,8)
mov %r11,16(%rdi,%rcx,8)
mov %r8,24(%rdi,%rcx,8)
add $4,%rcx
jnz lp
pop %rbx
pop %r12
pop %r13
pop %r14
pop %r15
end:
add %r9,%rax
neg %rax
ret
EPILOGUE()