Coverted left and right shift assembly functions of Jason Moxham to yasm
format.
This commit is contained in:
parent
53fc1663bc
commit
f2fa962ce3
96
mpn/x86_64/amd64/lshift.as
Normal file
96
mpn/x86_64/amd64/lshift.as
Normal file
@ -0,0 +1,96 @@
|
||||
|
||||
; AMD64 mpn_lshift
|
||||
; Copyright 2008 Jason Moxham
|
||||
; This file is part of the MPIR Library.
|
||||
; The MPIR Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Lesser General Public License as published
|
||||
; by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
; your option) any later version.
|
||||
; The MPIR Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
; License for more details.
|
||||
; You should have received a copy of the GNU Lesser General Public License
|
||||
; along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
; Boston, MA 02110-1301, USA.
|
||||
|
||||
; (rdi,rdx) = (rsi,rdx)<<rcx
|
||||
; rax = carry
|
||||
|
||||
%include '../yasm_mac.inc'
|
||||
|
||||
BITS 64
|
||||
|
||||
GLOBAL_FUNC mpn_lshift
|
||||
mov eax, 64
|
||||
sub rax, rcx
|
||||
movq mm0, rcx
|
||||
sub rdx, 4
|
||||
movq mm1, rax
|
||||
movq mm5, [rsi+rdx*8+24]
|
||||
movq mm3, mm5
|
||||
psrlq mm5, mm1
|
||||
movq rax, mm5
|
||||
psllq mm3, mm0
|
||||
jbe skiploop
|
||||
align 16
|
||||
loop1
|
||||
movq mm2, [rsi+rdx*8+16]
|
||||
movq mm4, mm2
|
||||
psrlq mm2, mm1
|
||||
por mm3, mm2
|
||||
movq [rdi+rdx*8+24], mm3
|
||||
psllq mm4, mm0
|
||||
movq mm5, [rsi+rdx*8+8]
|
||||
movq mm3, mm5
|
||||
psrlq mm5, mm1
|
||||
por mm4, mm5
|
||||
movq [rdi+rdx*8+16], mm4
|
||||
psllq mm3, mm0
|
||||
movq mm2, [rsi+rdx*8]
|
||||
movq mm4, mm2
|
||||
psrlq mm2, mm1
|
||||
por mm3, mm2
|
||||
movq [rdi+rdx*8+8], mm3
|
||||
psllq mm4, mm0
|
||||
movq mm5, [rsi+rdx*8-8]
|
||||
movq mm3, mm5
|
||||
psrlq mm5, mm1
|
||||
por mm4, mm5
|
||||
movq [rdi+rdx*8], mm4
|
||||
psllq mm3, mm0
|
||||
sub rdx, 4
|
||||
ja loop1
|
||||
skiploop:
|
||||
cmp rdx, -1
|
||||
jl next
|
||||
movq mm2, [rsi+rdx*8+16]
|
||||
movq mm4, mm2
|
||||
psrlq mm2, mm1
|
||||
por mm3, mm2
|
||||
movq [rdi+rdx*8+24], mm3
|
||||
psllq mm4, mm0
|
||||
movq mm5, [rsi+rdx*8+8]
|
||||
movq mm3, mm5
|
||||
psrlq mm5, mm1
|
||||
por mm4, mm5
|
||||
movq [rdi+rdx*8+16], mm4
|
||||
psllq mm3, mm0
|
||||
sub rdx, 2
|
||||
next:
|
||||
test rdx, 1
|
||||
jnz end
|
||||
movq mm2, [rsi+rdx*8+16]
|
||||
movq mm4, mm2
|
||||
psrlq mm2, mm1
|
||||
por mm3, mm2
|
||||
movq [rdi+rdx*8+24], mm3
|
||||
psllq mm4, mm0
|
||||
movq [rdi+rdx*8+16], mm4
|
||||
emms
|
||||
ret
|
||||
end:
|
||||
movq [rdi+rdx*8+24], mm3
|
||||
emms
|
||||
ret
|
@ -1,101 +0,0 @@
|
||||
dnl AMD64 mpn_lshift
|
||||
|
||||
dnl Copyright 2008 Jason Moxham
|
||||
|
||||
dnl This file is part of the MPIR Library.
|
||||
|
||||
dnl The MPIR Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The MPIR Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
dnl Boston, MA 02110-1301, USA.
|
||||
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C (rdi,rdx)=(rsi,rdx)<<rcx
|
||||
C rax=carry
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_lshift)
|
||||
mov $64,%eax
|
||||
sub %rcx,%rax
|
||||
movq %rcx,%mm0
|
||||
sub $4,%rdx
|
||||
movq %rax,%mm1
|
||||
movq 24(%rsi,%rdx,8),%mm5
|
||||
movq %mm5,%mm3
|
||||
psrlq %mm1,%mm5
|
||||
movq %mm5,%rax
|
||||
psllq %mm0,%mm3
|
||||
jbe skiploop
|
||||
ALIGN(16)
|
||||
loop:
|
||||
movq 16(%rsi,%rdx,8),%mm2
|
||||
movq %mm2,%mm4
|
||||
psrlq %mm1,%mm2
|
||||
por %mm2,%mm3
|
||||
movq %mm3,24(%rdi,%rdx,8)
|
||||
psllq %mm0,%mm4
|
||||
movq 8(%rsi,%rdx,8),%mm5
|
||||
movq %mm5,%mm3
|
||||
psrlq %mm1,%mm5
|
||||
por %mm5,%mm4
|
||||
movq %mm4,16(%rdi,%rdx,8)
|
||||
psllq %mm0,%mm3
|
||||
movq (%rsi,%rdx,8),%mm2
|
||||
movq %mm2,%mm4
|
||||
psrlq %mm1,%mm2
|
||||
por %mm2,%mm3
|
||||
movq %mm3,8(%rdi,%rdx,8)
|
||||
psllq %mm0,%mm4
|
||||
movq -8(%rsi,%rdx,8),%mm5
|
||||
movq %mm5,%mm3
|
||||
psrlq %mm1,%mm5
|
||||
por %mm5,%mm4
|
||||
movq %mm4,(%rdi,%rdx,8)
|
||||
psllq %mm0,%mm3
|
||||
sub $4,%rdx
|
||||
ja loop
|
||||
skiploop:
|
||||
cmp $-1,%rdx
|
||||
jl next
|
||||
movq 16(%rsi,%rdx,8),%mm2
|
||||
movq %mm2,%mm4
|
||||
psrlq %mm1,%mm2
|
||||
por %mm2,%mm3
|
||||
movq %mm3,24(%rdi,%rdx,8)
|
||||
psllq %mm0,%mm4
|
||||
movq 8(%rsi,%rdx,8),%mm5
|
||||
movq %mm5,%mm3
|
||||
psrlq %mm1,%mm5
|
||||
por %mm5,%mm4
|
||||
movq %mm4,16(%rdi,%rdx,8)
|
||||
psllq %mm0,%mm3
|
||||
sub $2,%rdx
|
||||
next:
|
||||
test $1,%rdx
|
||||
jnz end
|
||||
movq 16(%rsi,%rdx,8),%mm2
|
||||
movq %mm2,%mm4
|
||||
psrlq %mm1,%mm2
|
||||
por %mm2,%mm3
|
||||
movq %mm3,24(%rdi,%rdx,8)
|
||||
psllq %mm0,%mm4
|
||||
movq %mm4,16(%rdi,%rdx,8)
|
||||
emms
|
||||
ret
|
||||
end:
|
||||
movq %mm3,24(%rdi,%rdx,8)
|
||||
emms
|
||||
ret
|
||||
EPILOGUE()
|
103
mpn/x86_64/amd64/lshift1.as
Normal file
103
mpn/x86_64/amd64/lshift1.as
Normal file
@ -0,0 +1,103 @@
|
||||
|
||||
; AMD64 mpn_lshift1
|
||||
; Copyright 2008 Jason Moxham
|
||||
; This file is part of the MPIR Library.
|
||||
; The MPIR Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Lesser General Public License as published
|
||||
; by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
; your option) any later version.
|
||||
; The MPIR Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
; License for more details.
|
||||
; You should have received a copy of the GNU Lesser General Public License
|
||||
; along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
; Boston, MA 02110-1301, USA.
|
||||
|
||||
; (rdi,rdx) = (rsi,rdx)<<1
|
||||
; rax = carry
|
||||
|
||||
%include '../yasm_mac.inc'
|
||||
|
||||
BITS 64
|
||||
|
||||
GLOBAL_FUNC mpn_lshift1
|
||||
xor rax, rax
|
||||
mov r11, rdx
|
||||
and r11, 7
|
||||
inc r11
|
||||
shr rdx, 3
|
||||
; and clear carry flag
|
||||
cmp rdx, 0
|
||||
jz next
|
||||
align 16
|
||||
loop1:
|
||||
mov rcx, [rsi]
|
||||
mov r8, [rsi+8]
|
||||
mov r10, [rsi+16]
|
||||
mov r9, [rsi+24]
|
||||
adc rcx, rcx
|
||||
adc r8, r8
|
||||
adc r10, r10
|
||||
adc r9, r9
|
||||
mov [rdi], rcx
|
||||
mov [rdi+8], r8
|
||||
mov [rdi+16], r10
|
||||
mov [rdi+24], r9
|
||||
mov rcx, [rsi+32]
|
||||
mov r8, [rsi+40]
|
||||
mov r10, [rsi+48]
|
||||
mov r9, [rsi+56]
|
||||
adc rcx, rcx
|
||||
adc r8, r8
|
||||
adc r10, r10
|
||||
adc r9, r9
|
||||
mov [rdi+32], rcx
|
||||
mov [rdi+40], r8
|
||||
mov [rdi+48], r10
|
||||
mov [rdi+56], r9
|
||||
lea rdi, [rdi+64]
|
||||
dec rdx
|
||||
lea rsi, [rsi+64]
|
||||
jnz loop1
|
||||
next:
|
||||
dec r11
|
||||
jz end
|
||||
; Could still have cache-bank conflicts in this tail part
|
||||
mov rcx, [rsi]
|
||||
adc rcx, rcx
|
||||
mov [rdi], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi+8]
|
||||
adc rcx, rcx
|
||||
mov [rdi+8], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi+16]
|
||||
adc rcx, rcx
|
||||
mov [rdi+16], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi+24]
|
||||
adc rcx, rcx
|
||||
mov [rdi+24], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi+32]
|
||||
adc rcx, rcx
|
||||
mov [rdi+32], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi+40]
|
||||
adc rcx, rcx
|
||||
mov [rdi+40], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi+48]
|
||||
adc rcx, rcx
|
||||
mov [rdi+48], rcx
|
||||
end:
|
||||
adc rax, rax
|
||||
ret
|
@ -1,109 +0,0 @@
|
||||
dnl AMD64 mpn_lshift1
|
||||
|
||||
dnl Copyright 2008 Jason Moxham
|
||||
|
||||
dnl This file is part of the MPIR Library.
|
||||
|
||||
dnl The MPIR Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The MPIR Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
dnl Boston, MA 02110-1301, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C (rdi,rdx)=(rsi,rdx)<<1
|
||||
C rax=carry
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_lshift1)
|
||||
xor %rax,%rax
|
||||
mov %rdx,%r11
|
||||
and $7,%r11
|
||||
inc %r11
|
||||
shr $3,%rdx
|
||||
C and clear carry flag
|
||||
cmp $0,%rdx
|
||||
jz next
|
||||
ALIGN(16)
|
||||
loop:
|
||||
mov (%rsi),%rcx
|
||||
mov 8(%rsi),%r8
|
||||
mov 16(%rsi),%r10
|
||||
mov 24(%rsi),%r9
|
||||
adc %rcx,%rcx
|
||||
adc %r8,%r8
|
||||
adc %r10,%r10
|
||||
adc %r9,%r9
|
||||
mov %rcx,(%rdi)
|
||||
mov %r8,8(%rdi)
|
||||
mov %r10,16(%rdi)
|
||||
mov %r9,24(%rdi)
|
||||
|
||||
mov 32(%rsi),%rcx
|
||||
mov 40(%rsi),%r8
|
||||
mov 48(%rsi),%r10
|
||||
mov 56(%rsi),%r9
|
||||
adc %rcx,%rcx
|
||||
adc %r8,%r8
|
||||
adc %r10,%r10
|
||||
adc %r9,%r9
|
||||
mov %rcx,32(%rdi)
|
||||
mov %r8,40(%rdi)
|
||||
mov %r10,48(%rdi)
|
||||
mov %r9,56(%rdi)
|
||||
|
||||
lea 64(%rdi),%rdi
|
||||
dec %rdx
|
||||
lea 64(%rsi),%rsi
|
||||
jnz loop
|
||||
next:
|
||||
dec %r11
|
||||
jz end
|
||||
C Could still have cache-bank conflicts in this tail part
|
||||
mov (%rsi),%rcx
|
||||
adc %rcx,%rcx
|
||||
mov %rcx,(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov 8(%rsi),%rcx
|
||||
adc %rcx,%rcx
|
||||
mov %rcx,8(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov 16(%rsi),%rcx
|
||||
adc %rcx,%rcx
|
||||
mov %rcx,16(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov 24(%rsi),%rcx
|
||||
adc %rcx,%rcx
|
||||
mov %rcx,24(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov 32(%rsi),%rcx
|
||||
adc %rcx,%rcx
|
||||
mov %rcx,32(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov 40(%rsi),%rcx
|
||||
adc %rcx,%rcx
|
||||
mov %rcx,40(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov 48(%rsi),%rcx
|
||||
adc %rcx,%rcx
|
||||
mov %rcx,48(%rdi)
|
||||
end:
|
||||
adc %rax,%rax
|
||||
ret
|
||||
EPILOGUE()
|
101
mpn/x86_64/amd64/rshift.as
Normal file
101
mpn/x86_64/amd64/rshift.as
Normal file
@ -0,0 +1,101 @@
|
||||
|
||||
; AMD64 mpn_rshift
|
||||
; Copyright 2008 Jason Moxham
|
||||
; This file is part of the MPIR Library.
|
||||
; The MPIR Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Lesser General Public License as published
|
||||
; by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
; your option) any later version.
|
||||
; The MPIR Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
; License for more details.
|
||||
; You should have received a copy of the GNU Lesser General Public License
|
||||
; along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
; Boston, MA 02110-1301, USA.
|
||||
|
||||
; (rdi,rdx) = (rsi,rdx)>>rcx
|
||||
; rax = carry
|
||||
|
||||
%include '../yasm_mac.inc'
|
||||
|
||||
BITS 64
|
||||
|
||||
GLOBAL_FUNC mpn_rshift
|
||||
mov eax, 64
|
||||
lea rsi, [rsi+rdx*8-32]
|
||||
lea rdi, [rdi+rdx*8-32]
|
||||
sub rax, rcx
|
||||
movq mm0, rcx
|
||||
mov r8d, 4
|
||||
sub r8, rdx
|
||||
movq mm1, rax
|
||||
movq mm5, [rsi+r8*8]
|
||||
movq mm3, mm5
|
||||
psllq mm5, mm1
|
||||
movq rax, mm5
|
||||
psrlq mm3, mm0
|
||||
jnc skiploop
|
||||
align 16
|
||||
loop1:
|
||||
movq mm2, [rsi+r8*8+8]
|
||||
movq mm4, mm2
|
||||
psllq mm2, mm1
|
||||
por mm3, mm2
|
||||
movq [rdi+r8*8], mm3
|
||||
psrlq mm4, mm0
|
||||
movq mm5, [rsi+r8*8+16]
|
||||
movq mm3, mm5
|
||||
psllq mm5, mm1
|
||||
por mm4, mm5
|
||||
movq [rdi+r8*8+8], mm4
|
||||
psrlq mm3, mm0
|
||||
; got room here for another jump out , if we can arrange our r8 to be
|
||||
; slightly different , so we can use a jz or jp here
|
||||
movq mm2, [rsi+r8*8+24]
|
||||
movq mm4, mm2
|
||||
psllq mm2, mm1
|
||||
por mm3, mm2
|
||||
movq [rdi+r8*8+16], mm3
|
||||
psrlq mm4, mm0
|
||||
movq mm5, [rsi+r8*8+32]
|
||||
movq mm3, mm5
|
||||
psllq mm5, mm1
|
||||
por mm4, mm5
|
||||
movq [rdi+r8*8+24], mm4
|
||||
psrlq mm3, mm0
|
||||
add r8, 4
|
||||
jnc loop1
|
||||
skiploop:
|
||||
test r8, 2
|
||||
jnz next
|
||||
movq mm2, [rsi+r8*8+8]
|
||||
movq mm4, mm2
|
||||
psllq mm2, mm1
|
||||
por mm3, mm2
|
||||
movq [rdi+r8*8], mm3
|
||||
psrlq mm4, mm0
|
||||
movq mm5, [rsi+r8*8+16]
|
||||
movq mm3, mm5
|
||||
psllq mm5, mm1
|
||||
por mm4, mm5
|
||||
movq [rdi+r8*8+8], mm4
|
||||
psrlq mm3, mm0
|
||||
add r8, 2
|
||||
next:
|
||||
test r8, 1
|
||||
jnz end
|
||||
movq mm2, [rsi+r8*8+8]
|
||||
movq mm4, mm2
|
||||
psllq mm2, mm1
|
||||
por mm3, mm2
|
||||
movq [rdi+r8*8], mm3
|
||||
psrlq mm4, mm0
|
||||
movq [rdi+r8*8+8], mm4
|
||||
emms
|
||||
ret
|
||||
end:
|
||||
movq [rdi+r8*8], mm3
|
||||
emms
|
||||
ret
|
@ -1,109 +0,0 @@
|
||||
dnl AMD64 mpn_rshift
|
||||
|
||||
dnl Copyright 2008 Jason Moxham
|
||||
|
||||
dnl This file is part of the MPIR Library.
|
||||
|
||||
dnl The MPIR Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The MPIR Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
dnl Boston, MA 02110-1301, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C (rdi,rdx)=(rsi,rdx)>>rcx
|
||||
C rax=carry
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_rshift)
|
||||
mov $64,%eax
|
||||
lea -32(%rsi,%rdx,8),%rsi
|
||||
lea -32(%rdi,%rdx,8),%rdi
|
||||
sub %rcx,%rax
|
||||
movq %rcx,%mm0
|
||||
mov $4,%r8d
|
||||
sub %rdx,%r8
|
||||
movq %rax,%mm1
|
||||
movq (%rsi,%r8,8),%mm5
|
||||
movq %mm5,%mm3
|
||||
psllq %mm1,%mm5
|
||||
movq %mm5,%rax
|
||||
psrlq %mm0,%mm3
|
||||
jnc skiploop
|
||||
ALIGN(16)
|
||||
loop:
|
||||
movq 8(%rsi,%r8,8),%mm2
|
||||
movq %mm2,%mm4
|
||||
psllq %mm1,%mm2
|
||||
por %mm2,%mm3
|
||||
movq %mm3,(%rdi,%r8,8)
|
||||
psrlq %mm0,%mm4
|
||||
|
||||
movq 16(%rsi,%r8,8),%mm5
|
||||
movq %mm5,%mm3
|
||||
psllq %mm1,%mm5
|
||||
por %mm5,%mm4
|
||||
movq %mm4,8(%rdi,%r8,8)
|
||||
psrlq %mm0,%mm3
|
||||
|
||||
C got room here for another jump out , if we can arrange our r8 to be
|
||||
C slightly different , so we can use a jz or jp here
|
||||
movq 24(%rsi,%r8,8),%mm2
|
||||
movq %mm2,%mm4
|
||||
psllq %mm1,%mm2
|
||||
por %mm2,%mm3
|
||||
movq %mm3,16(%rdi,%r8,8)
|
||||
psrlq %mm0,%mm4
|
||||
|
||||
movq 32(%rsi,%r8,8),%mm5
|
||||
movq %mm5,%mm3
|
||||
psllq %mm1,%mm5
|
||||
por %mm5,%mm4
|
||||
movq %mm4,24(%rdi,%r8,8)
|
||||
psrlq %mm0,%mm3
|
||||
|
||||
add $4,%r8
|
||||
jnc loop
|
||||
skiploop:
|
||||
test $2,%r8
|
||||
jnz next
|
||||
movq 8(%rsi,%r8,8),%mm2
|
||||
movq %mm2,%mm4
|
||||
psllq %mm1,%mm2
|
||||
por %mm2,%mm3
|
||||
movq %mm3,(%rdi,%r8,8)
|
||||
psrlq %mm0,%mm4
|
||||
movq 16(%rsi,%r8,8),%mm5
|
||||
movq %mm5,%mm3
|
||||
psllq %mm1,%mm5
|
||||
por %mm5,%mm4
|
||||
movq %mm4,8(%rdi,%r8,8)
|
||||
psrlq %mm0,%mm3
|
||||
add $2,%r8
|
||||
next:
|
||||
test $1,%r8
|
||||
jnz end
|
||||
movq 8(%rsi,%r8,8),%mm2
|
||||
movq %mm2,%mm4
|
||||
psllq %mm1,%mm2
|
||||
por %mm2,%mm3
|
||||
movq %mm3,(%rdi,%r8,8)
|
||||
psrlq %mm0,%mm4
|
||||
movq %mm4,8(%rdi,%r8,8)
|
||||
emms
|
||||
ret
|
||||
end:
|
||||
movq %mm3,(%rdi,%r8,8)
|
||||
emms
|
||||
ret
|
||||
EPILOGUE()
|
105
mpn/x86_64/amd64/rshift1.as
Normal file
105
mpn/x86_64/amd64/rshift1.as
Normal file
@ -0,0 +1,105 @@
|
||||
|
||||
; AMD64 mpn_rshift1
|
||||
; Copyright 2008 Jason Moxham
|
||||
; This file is part of the MPIR Library.
|
||||
; The MPIR Library is free software; you can redistribute it and/or modify
|
||||
; it under the terms of the GNU Lesser General Public License as published
|
||||
; by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
; your option) any later version.
|
||||
; The MPIR Library is distributed in the hope that it will be useful, but
|
||||
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
; License for more details.
|
||||
; You should have received a copy of the GNU Lesser General Public License
|
||||
; along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
; Boston, MA 02110-1301, USA.
|
||||
|
||||
; (rdi,rdx) = (rsi,rdx)>>1
|
||||
; rax = carry
|
||||
|
||||
%include '../yasm_mac.inc'
|
||||
|
||||
BITS 64
|
||||
|
||||
GLOBAL_FUNC mpn_rshift1
|
||||
xor rax, rax
|
||||
lea rsi, [rsi+rdx*8-8]
|
||||
lea rdi, [rdi+rdx*8-8]
|
||||
mov r11, rdx
|
||||
and r11, 7
|
||||
inc r11
|
||||
shr rdx, 3
|
||||
; and clear carry flag
|
||||
cmp rdx, 0
|
||||
jz next
|
||||
align 16
|
||||
loop1:
|
||||
mov rcx, [rsi]
|
||||
mov r8, [rsi-8]
|
||||
mov r9, [rsi-16]
|
||||
mov r10, [rsi-24]
|
||||
rcr rcx, 1
|
||||
rcr r8, 1
|
||||
rcr r9, 1
|
||||
rcr r10, 1
|
||||
mov [rdi], rcx
|
||||
mov [rdi-8], r8
|
||||
mov [rdi-16], r9
|
||||
mov [rdi-24], r10
|
||||
mov rcx, [rsi-32]
|
||||
mov r8, [rsi-40]
|
||||
mov r9, [rsi-48]
|
||||
mov r10, [rsi-56]
|
||||
rcr rcx, 1
|
||||
rcr r8, 1
|
||||
rcr r9, 1
|
||||
rcr r10, 1
|
||||
mov [rdi-32], rcx
|
||||
mov [rdi-40], r8
|
||||
mov [rdi-48], r9
|
||||
mov [rdi-56], r10
|
||||
lea rsi, [rsi-64]
|
||||
dec rdx
|
||||
lea rdi, [rdi-64]
|
||||
jnz loop1
|
||||
next:
|
||||
dec r11
|
||||
jz end
|
||||
; Could suffer cache-bank conflicts in this tail part
|
||||
mov rcx, [rsi]
|
||||
rcr rcx, 1
|
||||
mov [rdi], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi-8]
|
||||
rcr rcx, 1
|
||||
mov [rdi-8], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi-16]
|
||||
rcr rcx, 1
|
||||
mov [rdi-16], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi-24]
|
||||
rcr rcx, 1
|
||||
mov [rdi-24], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi-32]
|
||||
rcr rcx, 1
|
||||
mov [rdi-32], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi-40]
|
||||
rcr rcx, 1
|
||||
mov [rdi-40], rcx
|
||||
dec r11
|
||||
jz end
|
||||
mov rcx, [rsi-48]
|
||||
rcr rcx, 1
|
||||
mov [rdi-48], rcx
|
||||
end:
|
||||
rcr rax, 1
|
||||
ret
|
@ -1,109 +0,0 @@
|
||||
dnl AMD64 mpn_rshift1
|
||||
|
||||
dnl Copyright 2008 Jason Moxham
|
||||
|
||||
dnl This file is part of the MPIR Library.
|
||||
|
||||
dnl The MPIR Library is free software; you can redistribute it and/or modify
|
||||
dnl it under the terms of the GNU Lesser General Public License as published
|
||||
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||||
dnl your option) any later version.
|
||||
|
||||
dnl The MPIR Library is distributed in the hope that it will be useful, but
|
||||
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||||
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||||
dnl License for more details.
|
||||
|
||||
dnl You should have received a copy of the GNU Lesser General Public License
|
||||
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
|
||||
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||||
dnl Boston, MA 02110-1301, USA.
|
||||
|
||||
include(`../config.m4')
|
||||
|
||||
C (rdi,rdx)=(rsi,rdx)>>1
|
||||
C rax=carry
|
||||
|
||||
ASM_START()
|
||||
PROLOGUE(mpn_rshift1)
|
||||
xor %rax,%rax
|
||||
lea -8(%rsi,%rdx,8),%rsi
|
||||
lea -8(%rdi,%rdx,8),%rdi
|
||||
mov %rdx,%r11
|
||||
and $7,%r11
|
||||
inc %r11
|
||||
shr $3,%rdx
|
||||
C and clear carry flag
|
||||
cmp $0,%rdx
|
||||
jz next
|
||||
ALIGN(16)
|
||||
loop:
|
||||
mov (%rsi),%rcx
|
||||
mov -8(%rsi),%r8
|
||||
mov -16(%rsi),%r9
|
||||
mov -24(%rsi),%r10
|
||||
rcr $1,%rcx
|
||||
rcr $1,%r8
|
||||
rcr $1,%r9
|
||||
rcr $1,%r10
|
||||
mov %rcx,(%rdi)
|
||||
mov %r8,-8(%rdi)
|
||||
mov %r9,-16(%rdi)
|
||||
mov %r10,-24(%rdi)
|
||||
mov -32(%rsi),%rcx
|
||||
mov -40(%rsi),%r8
|
||||
mov -48(%rsi),%r9
|
||||
mov -56(%rsi),%r10
|
||||
rcr $1,%rcx
|
||||
rcr $1,%r8
|
||||
rcr $1,%r9
|
||||
rcr $1,%r10
|
||||
mov %rcx,-32(%rdi)
|
||||
mov %r8,-40(%rdi)
|
||||
mov %r9,-48(%rdi)
|
||||
mov %r10,-56(%rdi)
|
||||
lea -64(%rsi),%rsi
|
||||
dec %rdx
|
||||
lea -64(%rdi),%rdi
|
||||
jnz loop
|
||||
next:
|
||||
dec %r11
|
||||
jz end
|
||||
C Could suffer cache-bank conflicts in this tail part
|
||||
mov (%rsi),%rcx
|
||||
rcr $1,%rcx
|
||||
mov %rcx,(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov -8(%rsi),%rcx
|
||||
rcr $1,%rcx
|
||||
mov %rcx,-8(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov -16(%rsi),%rcx
|
||||
rcr $1,%rcx
|
||||
mov %rcx,-16(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov -24(%rsi),%rcx
|
||||
rcr $1,%rcx
|
||||
mov %rcx,-24(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov -32(%rsi),%rcx
|
||||
rcr $1,%rcx
|
||||
mov %rcx,-32(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov -40(%rsi),%rcx
|
||||
rcr $1,%rcx
|
||||
mov %rcx,-40(%rdi)
|
||||
dec %r11
|
||||
jz end
|
||||
mov -48(%rsi),%rcx
|
||||
rcr $1,%rcx
|
||||
mov %rcx,-48(%rdi)
|
||||
end:
|
||||
rcr $1,%rax
|
||||
ret
|
||||
EPILOGUE()
|
Loading…
Reference in New Issue
Block a user