Coverted left and right shift assembly functions of Jason Moxham to yasm

format.
This commit is contained in:
wbhart 2009-03-04 16:22:48 +00:00
parent 53fc1663bc
commit f2fa962ce3
8 changed files with 405 additions and 428 deletions

View File

@ -0,0 +1,96 @@
; AMD64 mpn_lshift
; Copyright 2008 Jason Moxham
; This file is part of the MPIR Library.
; The MPIR Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 2.1 of the License, or (at
; your option) any later version.
; The MPIR Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; along with the MPIR Library; see the file COPYING.LIB. If not, write
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
; Boston, MA 02110-1301, USA.
; (rdi,rdx) = (rsi,rdx)<<rcx
; rax = carry
%include '../yasm_mac.inc'
BITS 64
GLOBAL_FUNC mpn_lshift
mov eax, 64
sub rax, rcx
movq mm0, rcx
sub rdx, 4
movq mm1, rax
movq mm5, [rsi+rdx*8+24]
movq mm3, mm5
psrlq mm5, mm1
movq rax, mm5
psllq mm3, mm0
jbe skiploop
align 16
loop1
movq mm2, [rsi+rdx*8+16]
movq mm4, mm2
psrlq mm2, mm1
por mm3, mm2
movq [rdi+rdx*8+24], mm3
psllq mm4, mm0
movq mm5, [rsi+rdx*8+8]
movq mm3, mm5
psrlq mm5, mm1
por mm4, mm5
movq [rdi+rdx*8+16], mm4
psllq mm3, mm0
movq mm2, [rsi+rdx*8]
movq mm4, mm2
psrlq mm2, mm1
por mm3, mm2
movq [rdi+rdx*8+8], mm3
psllq mm4, mm0
movq mm5, [rsi+rdx*8-8]
movq mm3, mm5
psrlq mm5, mm1
por mm4, mm5
movq [rdi+rdx*8], mm4
psllq mm3, mm0
sub rdx, 4
ja loop1
skiploop:
cmp rdx, -1
jl next
movq mm2, [rsi+rdx*8+16]
movq mm4, mm2
psrlq mm2, mm1
por mm3, mm2
movq [rdi+rdx*8+24], mm3
psllq mm4, mm0
movq mm5, [rsi+rdx*8+8]
movq mm3, mm5
psrlq mm5, mm1
por mm4, mm5
movq [rdi+rdx*8+16], mm4
psllq mm3, mm0
sub rdx, 2
next:
test rdx, 1
jnz end
movq mm2, [rsi+rdx*8+16]
movq mm4, mm2
psrlq mm2, mm1
por mm3, mm2
movq [rdi+rdx*8+24], mm3
psllq mm4, mm0
movq [rdi+rdx*8+16], mm4
emms
ret
end:
movq [rdi+rdx*8+24], mm3
emms
ret

View File

@ -1,101 +0,0 @@
dnl AMD64 mpn_lshift
dnl Copyright 2008 Jason Moxham
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
C (rdi,rdx)=(rsi,rdx)<<rcx
C rax=carry
ASM_START()
PROLOGUE(mpn_lshift)
mov $64,%eax
sub %rcx,%rax
movq %rcx,%mm0
sub $4,%rdx
movq %rax,%mm1
movq 24(%rsi,%rdx,8),%mm5
movq %mm5,%mm3
psrlq %mm1,%mm5
movq %mm5,%rax
psllq %mm0,%mm3
jbe skiploop
ALIGN(16)
loop:
movq 16(%rsi,%rdx,8),%mm2
movq %mm2,%mm4
psrlq %mm1,%mm2
por %mm2,%mm3
movq %mm3,24(%rdi,%rdx,8)
psllq %mm0,%mm4
movq 8(%rsi,%rdx,8),%mm5
movq %mm5,%mm3
psrlq %mm1,%mm5
por %mm5,%mm4
movq %mm4,16(%rdi,%rdx,8)
psllq %mm0,%mm3
movq (%rsi,%rdx,8),%mm2
movq %mm2,%mm4
psrlq %mm1,%mm2
por %mm2,%mm3
movq %mm3,8(%rdi,%rdx,8)
psllq %mm0,%mm4
movq -8(%rsi,%rdx,8),%mm5
movq %mm5,%mm3
psrlq %mm1,%mm5
por %mm5,%mm4
movq %mm4,(%rdi,%rdx,8)
psllq %mm0,%mm3
sub $4,%rdx
ja loop
skiploop:
cmp $-1,%rdx
jl next
movq 16(%rsi,%rdx,8),%mm2
movq %mm2,%mm4
psrlq %mm1,%mm2
por %mm2,%mm3
movq %mm3,24(%rdi,%rdx,8)
psllq %mm0,%mm4
movq 8(%rsi,%rdx,8),%mm5
movq %mm5,%mm3
psrlq %mm1,%mm5
por %mm5,%mm4
movq %mm4,16(%rdi,%rdx,8)
psllq %mm0,%mm3
sub $2,%rdx
next:
test $1,%rdx
jnz end
movq 16(%rsi,%rdx,8),%mm2
movq %mm2,%mm4
psrlq %mm1,%mm2
por %mm2,%mm3
movq %mm3,24(%rdi,%rdx,8)
psllq %mm0,%mm4
movq %mm4,16(%rdi,%rdx,8)
emms
ret
end:
movq %mm3,24(%rdi,%rdx,8)
emms
ret
EPILOGUE()

103
mpn/x86_64/amd64/lshift1.as Normal file
View File

@ -0,0 +1,103 @@
; AMD64 mpn_lshift1
; Copyright 2008 Jason Moxham
; This file is part of the MPIR Library.
; The MPIR Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 2.1 of the License, or (at
; your option) any later version.
; The MPIR Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; along with the MPIR Library; see the file COPYING.LIB. If not, write
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
; Boston, MA 02110-1301, USA.
; (rdi,rdx) = (rsi,rdx)<<1
; rax = carry
%include '../yasm_mac.inc'
BITS 64
GLOBAL_FUNC mpn_lshift1
xor rax, rax
mov r11, rdx
and r11, 7
inc r11
shr rdx, 3
; and clear carry flag
cmp rdx, 0
jz next
align 16
loop1:
mov rcx, [rsi]
mov r8, [rsi+8]
mov r10, [rsi+16]
mov r9, [rsi+24]
adc rcx, rcx
adc r8, r8
adc r10, r10
adc r9, r9
mov [rdi], rcx
mov [rdi+8], r8
mov [rdi+16], r10
mov [rdi+24], r9
mov rcx, [rsi+32]
mov r8, [rsi+40]
mov r10, [rsi+48]
mov r9, [rsi+56]
adc rcx, rcx
adc r8, r8
adc r10, r10
adc r9, r9
mov [rdi+32], rcx
mov [rdi+40], r8
mov [rdi+48], r10
mov [rdi+56], r9
lea rdi, [rdi+64]
dec rdx
lea rsi, [rsi+64]
jnz loop1
next:
dec r11
jz end
; Could still have cache-bank conflicts in this tail part
mov rcx, [rsi]
adc rcx, rcx
mov [rdi], rcx
dec r11
jz end
mov rcx, [rsi+8]
adc rcx, rcx
mov [rdi+8], rcx
dec r11
jz end
mov rcx, [rsi+16]
adc rcx, rcx
mov [rdi+16], rcx
dec r11
jz end
mov rcx, [rsi+24]
adc rcx, rcx
mov [rdi+24], rcx
dec r11
jz end
mov rcx, [rsi+32]
adc rcx, rcx
mov [rdi+32], rcx
dec r11
jz end
mov rcx, [rsi+40]
adc rcx, rcx
mov [rdi+40], rcx
dec r11
jz end
mov rcx, [rsi+48]
adc rcx, rcx
mov [rdi+48], rcx
end:
adc rax, rax
ret

View File

@ -1,109 +0,0 @@
dnl AMD64 mpn_lshift1
dnl Copyright 2008 Jason Moxham
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
C (rdi,rdx)=(rsi,rdx)<<1
C rax=carry
ASM_START()
PROLOGUE(mpn_lshift1)
xor %rax,%rax
mov %rdx,%r11
and $7,%r11
inc %r11
shr $3,%rdx
C and clear carry flag
cmp $0,%rdx
jz next
ALIGN(16)
loop:
mov (%rsi),%rcx
mov 8(%rsi),%r8
mov 16(%rsi),%r10
mov 24(%rsi),%r9
adc %rcx,%rcx
adc %r8,%r8
adc %r10,%r10
adc %r9,%r9
mov %rcx,(%rdi)
mov %r8,8(%rdi)
mov %r10,16(%rdi)
mov %r9,24(%rdi)
mov 32(%rsi),%rcx
mov 40(%rsi),%r8
mov 48(%rsi),%r10
mov 56(%rsi),%r9
adc %rcx,%rcx
adc %r8,%r8
adc %r10,%r10
adc %r9,%r9
mov %rcx,32(%rdi)
mov %r8,40(%rdi)
mov %r10,48(%rdi)
mov %r9,56(%rdi)
lea 64(%rdi),%rdi
dec %rdx
lea 64(%rsi),%rsi
jnz loop
next:
dec %r11
jz end
C Could still have cache-bank conflicts in this tail part
mov (%rsi),%rcx
adc %rcx,%rcx
mov %rcx,(%rdi)
dec %r11
jz end
mov 8(%rsi),%rcx
adc %rcx,%rcx
mov %rcx,8(%rdi)
dec %r11
jz end
mov 16(%rsi),%rcx
adc %rcx,%rcx
mov %rcx,16(%rdi)
dec %r11
jz end
mov 24(%rsi),%rcx
adc %rcx,%rcx
mov %rcx,24(%rdi)
dec %r11
jz end
mov 32(%rsi),%rcx
adc %rcx,%rcx
mov %rcx,32(%rdi)
dec %r11
jz end
mov 40(%rsi),%rcx
adc %rcx,%rcx
mov %rcx,40(%rdi)
dec %r11
jz end
mov 48(%rsi),%rcx
adc %rcx,%rcx
mov %rcx,48(%rdi)
end:
adc %rax,%rax
ret
EPILOGUE()

101
mpn/x86_64/amd64/rshift.as Normal file
View File

@ -0,0 +1,101 @@
; AMD64 mpn_rshift
; Copyright 2008 Jason Moxham
; This file is part of the MPIR Library.
; The MPIR Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 2.1 of the License, or (at
; your option) any later version.
; The MPIR Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; along with the MPIR Library; see the file COPYING.LIB. If not, write
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
; Boston, MA 02110-1301, USA.
; (rdi,rdx) = (rsi,rdx)>>rcx
; rax = carry
%include '../yasm_mac.inc'
BITS 64
GLOBAL_FUNC mpn_rshift
mov eax, 64
lea rsi, [rsi+rdx*8-32]
lea rdi, [rdi+rdx*8-32]
sub rax, rcx
movq mm0, rcx
mov r8d, 4
sub r8, rdx
movq mm1, rax
movq mm5, [rsi+r8*8]
movq mm3, mm5
psllq mm5, mm1
movq rax, mm5
psrlq mm3, mm0
jnc skiploop
align 16
loop1:
movq mm2, [rsi+r8*8+8]
movq mm4, mm2
psllq mm2, mm1
por mm3, mm2
movq [rdi+r8*8], mm3
psrlq mm4, mm0
movq mm5, [rsi+r8*8+16]
movq mm3, mm5
psllq mm5, mm1
por mm4, mm5
movq [rdi+r8*8+8], mm4
psrlq mm3, mm0
; got room here for another jump out , if we can arrange our r8 to be
; slightly different , so we can use a jz or jp here
movq mm2, [rsi+r8*8+24]
movq mm4, mm2
psllq mm2, mm1
por mm3, mm2
movq [rdi+r8*8+16], mm3
psrlq mm4, mm0
movq mm5, [rsi+r8*8+32]
movq mm3, mm5
psllq mm5, mm1
por mm4, mm5
movq [rdi+r8*8+24], mm4
psrlq mm3, mm0
add r8, 4
jnc loop1
skiploop:
test r8, 2
jnz next
movq mm2, [rsi+r8*8+8]
movq mm4, mm2
psllq mm2, mm1
por mm3, mm2
movq [rdi+r8*8], mm3
psrlq mm4, mm0
movq mm5, [rsi+r8*8+16]
movq mm3, mm5
psllq mm5, mm1
por mm4, mm5
movq [rdi+r8*8+8], mm4
psrlq mm3, mm0
add r8, 2
next:
test r8, 1
jnz end
movq mm2, [rsi+r8*8+8]
movq mm4, mm2
psllq mm2, mm1
por mm3, mm2
movq [rdi+r8*8], mm3
psrlq mm4, mm0
movq [rdi+r8*8+8], mm4
emms
ret
end:
movq [rdi+r8*8], mm3
emms
ret

View File

@ -1,109 +0,0 @@
dnl AMD64 mpn_rshift
dnl Copyright 2008 Jason Moxham
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
C (rdi,rdx)=(rsi,rdx)>>rcx
C rax=carry
ASM_START()
PROLOGUE(mpn_rshift)
mov $64,%eax
lea -32(%rsi,%rdx,8),%rsi
lea -32(%rdi,%rdx,8),%rdi
sub %rcx,%rax
movq %rcx,%mm0
mov $4,%r8d
sub %rdx,%r8
movq %rax,%mm1
movq (%rsi,%r8,8),%mm5
movq %mm5,%mm3
psllq %mm1,%mm5
movq %mm5,%rax
psrlq %mm0,%mm3
jnc skiploop
ALIGN(16)
loop:
movq 8(%rsi,%r8,8),%mm2
movq %mm2,%mm4
psllq %mm1,%mm2
por %mm2,%mm3
movq %mm3,(%rdi,%r8,8)
psrlq %mm0,%mm4
movq 16(%rsi,%r8,8),%mm5
movq %mm5,%mm3
psllq %mm1,%mm5
por %mm5,%mm4
movq %mm4,8(%rdi,%r8,8)
psrlq %mm0,%mm3
C got room here for another jump out , if we can arrange our r8 to be
C slightly different , so we can use a jz or jp here
movq 24(%rsi,%r8,8),%mm2
movq %mm2,%mm4
psllq %mm1,%mm2
por %mm2,%mm3
movq %mm3,16(%rdi,%r8,8)
psrlq %mm0,%mm4
movq 32(%rsi,%r8,8),%mm5
movq %mm5,%mm3
psllq %mm1,%mm5
por %mm5,%mm4
movq %mm4,24(%rdi,%r8,8)
psrlq %mm0,%mm3
add $4,%r8
jnc loop
skiploop:
test $2,%r8
jnz next
movq 8(%rsi,%r8,8),%mm2
movq %mm2,%mm4
psllq %mm1,%mm2
por %mm2,%mm3
movq %mm3,(%rdi,%r8,8)
psrlq %mm0,%mm4
movq 16(%rsi,%r8,8),%mm5
movq %mm5,%mm3
psllq %mm1,%mm5
por %mm5,%mm4
movq %mm4,8(%rdi,%r8,8)
psrlq %mm0,%mm3
add $2,%r8
next:
test $1,%r8
jnz end
movq 8(%rsi,%r8,8),%mm2
movq %mm2,%mm4
psllq %mm1,%mm2
por %mm2,%mm3
movq %mm3,(%rdi,%r8,8)
psrlq %mm0,%mm4
movq %mm4,8(%rdi,%r8,8)
emms
ret
end:
movq %mm3,(%rdi,%r8,8)
emms
ret
EPILOGUE()

105
mpn/x86_64/amd64/rshift1.as Normal file
View File

@ -0,0 +1,105 @@
; AMD64 mpn_rshift1
; Copyright 2008 Jason Moxham
; This file is part of the MPIR Library.
; The MPIR Library is free software; you can redistribute it and/or modify
; it under the terms of the GNU Lesser General Public License as published
; by the Free Software Foundation; either version 2.1 of the License, or (at
; your option) any later version.
; The MPIR Library is distributed in the hope that it will be useful, but
; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
; License for more details.
; You should have received a copy of the GNU Lesser General Public License
; along with the MPIR Library; see the file COPYING.LIB. If not, write
; to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
; Boston, MA 02110-1301, USA.
; (rdi,rdx) = (rsi,rdx)>>1
; rax = carry
%include '../yasm_mac.inc'
BITS 64
GLOBAL_FUNC mpn_rshift1
xor rax, rax
lea rsi, [rsi+rdx*8-8]
lea rdi, [rdi+rdx*8-8]
mov r11, rdx
and r11, 7
inc r11
shr rdx, 3
; and clear carry flag
cmp rdx, 0
jz next
align 16
loop1:
mov rcx, [rsi]
mov r8, [rsi-8]
mov r9, [rsi-16]
mov r10, [rsi-24]
rcr rcx, 1
rcr r8, 1
rcr r9, 1
rcr r10, 1
mov [rdi], rcx
mov [rdi-8], r8
mov [rdi-16], r9
mov [rdi-24], r10
mov rcx, [rsi-32]
mov r8, [rsi-40]
mov r9, [rsi-48]
mov r10, [rsi-56]
rcr rcx, 1
rcr r8, 1
rcr r9, 1
rcr r10, 1
mov [rdi-32], rcx
mov [rdi-40], r8
mov [rdi-48], r9
mov [rdi-56], r10
lea rsi, [rsi-64]
dec rdx
lea rdi, [rdi-64]
jnz loop1
next:
dec r11
jz end
; Could suffer cache-bank conflicts in this tail part
mov rcx, [rsi]
rcr rcx, 1
mov [rdi], rcx
dec r11
jz end
mov rcx, [rsi-8]
rcr rcx, 1
mov [rdi-8], rcx
dec r11
jz end
mov rcx, [rsi-16]
rcr rcx, 1
mov [rdi-16], rcx
dec r11
jz end
mov rcx, [rsi-24]
rcr rcx, 1
mov [rdi-24], rcx
dec r11
jz end
mov rcx, [rsi-32]
rcr rcx, 1
mov [rdi-32], rcx
dec r11
jz end
mov rcx, [rsi-40]
rcr rcx, 1
mov [rdi-40], rcx
dec r11
jz end
mov rcx, [rsi-48]
rcr rcx, 1
mov [rdi-48], rcx
end:
rcr rax, 1
ret

View File

@ -1,109 +0,0 @@
dnl AMD64 mpn_rshift1
dnl Copyright 2008 Jason Moxham
dnl This file is part of the MPIR Library.
dnl The MPIR Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
dnl your option) any later version.
dnl The MPIR Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the MPIR Library; see the file COPYING.LIB. If not, write
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
dnl Boston, MA 02110-1301, USA.
include(`../config.m4')
C (rdi,rdx)=(rsi,rdx)>>1
C rax=carry
ASM_START()
PROLOGUE(mpn_rshift1)
xor %rax,%rax
lea -8(%rsi,%rdx,8),%rsi
lea -8(%rdi,%rdx,8),%rdi
mov %rdx,%r11
and $7,%r11
inc %r11
shr $3,%rdx
C and clear carry flag
cmp $0,%rdx
jz next
ALIGN(16)
loop:
mov (%rsi),%rcx
mov -8(%rsi),%r8
mov -16(%rsi),%r9
mov -24(%rsi),%r10
rcr $1,%rcx
rcr $1,%r8
rcr $1,%r9
rcr $1,%r10
mov %rcx,(%rdi)
mov %r8,-8(%rdi)
mov %r9,-16(%rdi)
mov %r10,-24(%rdi)
mov -32(%rsi),%rcx
mov -40(%rsi),%r8
mov -48(%rsi),%r9
mov -56(%rsi),%r10
rcr $1,%rcx
rcr $1,%r8
rcr $1,%r9
rcr $1,%r10
mov %rcx,-32(%rdi)
mov %r8,-40(%rdi)
mov %r9,-48(%rdi)
mov %r10,-56(%rdi)
lea -64(%rsi),%rsi
dec %rdx
lea -64(%rdi),%rdi
jnz loop
next:
dec %r11
jz end
C Could suffer cache-bank conflicts in this tail part
mov (%rsi),%rcx
rcr $1,%rcx
mov %rcx,(%rdi)
dec %r11
jz end
mov -8(%rsi),%rcx
rcr $1,%rcx
mov %rcx,-8(%rdi)
dec %r11
jz end
mov -16(%rsi),%rcx
rcr $1,%rcx
mov %rcx,-16(%rdi)
dec %r11
jz end
mov -24(%rsi),%rcx
rcr $1,%rcx
mov %rcx,-24(%rdi)
dec %r11
jz end
mov -32(%rsi),%rcx
rcr $1,%rcx
mov %rcx,-32(%rdi)
dec %r11
jz end
mov -40(%rsi),%rcx
rcr $1,%rcx
mov %rcx,-40(%rdi)
dec %r11
jz end
mov -48(%rsi),%rcx
rcr $1,%rcx
mov %rcx,-48(%rdi)
end:
rcr $1,%rax
ret
EPILOGUE()