121 lines
2.6 KiB
NASM
121 lines
2.6 KiB
NASM
|
dnl SPARC mpn_add_n -- Add two limb vectors of the same length > 0 and store
|
||
|
dnl sum in a third limb vector.
|
||
|
|
||
|
dnl Copyright 2001 Free Software Foundation, Inc.
|
||
|
|
||
|
dnl This file is part of the GNU MP Library.
|
||
|
|
||
|
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||
|
dnl it under the terms of the GNU Lesser General Public License as published
|
||
|
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||
|
dnl your option) any later version.
|
||
|
|
||
|
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||
|
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||
|
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||
|
dnl License for more details.
|
||
|
|
||
|
dnl You should have received a copy of the GNU Lesser General Public License
|
||
|
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write
|
||
|
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||
|
dnl Boston, MA 02110-1301, USA.
|
||
|
|
||
|
|
||
|
include(`../config.m4')
|
||
|
|
||
|
C INPUT PARAMETERS
|
||
|
define(rp,%o0)
|
||
|
define(s1p,%o1)
|
||
|
define(s2p,%o2)
|
||
|
define(n,%o3)
|
||
|
define(cy,%g1)
|
||
|
|
||
|
C This code uses 64-bit operations on `o' and `g' registers. It doesn't
|
||
|
C require that `o' registers' upper 32 bits are preserved by the operating
|
||
|
C system, but if they are not, they must be zeroed. That is indeed what
|
||
|
C happens at least on Slowaris 2.5 and 2.6.
|
||
|
|
||
|
C On UltraSPARC 1 and 2, this code runs at 3 cycles/limb from the Dcache and at
|
||
|
C about 10 cycles/limb from the Ecache.
|
||
|
|
||
|
ASM_START()
|
||
|
PROLOGUE(mpn_add_n)
|
||
|
lduw [s1p+0],%o4
|
||
|
lduw [s2p+0],%o5
|
||
|
addcc n,-2,n
|
||
|
bl,pn %icc,L(end1)
|
||
|
lduw [s1p+4],%g2
|
||
|
lduw [s2p+4],%g3
|
||
|
be,pn %icc,L(end2)
|
||
|
mov 0,cy
|
||
|
|
||
|
.align 16
|
||
|
L(loop):
|
||
|
add %o4,%o5,%g4
|
||
|
add rp,8,rp
|
||
|
lduw [s1p+8],%o4
|
||
|
fitod %f0,%f2
|
||
|
C ---
|
||
|
add cy,%g4,%g4
|
||
|
addcc n,-1,n
|
||
|
lduw [s2p+8],%o5
|
||
|
fitod %f0,%f2
|
||
|
C ---
|
||
|
srlx %g4,32,cy
|
||
|
add s2p,8,s2p
|
||
|
stw %g4,[rp-8]
|
||
|
be,pn %icc,L(exito)+4
|
||
|
C ---
|
||
|
add %g2,%g3,%g4
|
||
|
addcc n,-1,n
|
||
|
lduw [s1p+12],%g2
|
||
|
fitod %f0,%f2
|
||
|
C ---
|
||
|
add cy,%g4,%g4
|
||
|
add s1p,8,s1p
|
||
|
lduw [s2p+4],%g3
|
||
|
fitod %f0,%f2
|
||
|
C ---
|
||
|
srlx %g4,32,cy
|
||
|
bne,pt %icc,L(loop)
|
||
|
stw %g4,[rp-4]
|
||
|
C ---
|
||
|
L(exite):
|
||
|
add %o4,%o5,%g4
|
||
|
add cy,%g4,%g4
|
||
|
srlx %g4,32,cy
|
||
|
stw %g4,[rp+0]
|
||
|
add %g2,%g3,%g4
|
||
|
add cy,%g4,%g4
|
||
|
stw %g4,[rp+4]
|
||
|
retl
|
||
|
srlx %g4,32,%o0
|
||
|
|
||
|
L(exito):
|
||
|
add %g2,%g3,%g4
|
||
|
add cy,%g4,%g4
|
||
|
srlx %g4,32,cy
|
||
|
stw %g4,[rp-4]
|
||
|
add %o4,%o5,%g4
|
||
|
add cy,%g4,%g4
|
||
|
stw %g4,[rp+0]
|
||
|
retl
|
||
|
srlx %g4,32,%o0
|
||
|
|
||
|
L(end1):
|
||
|
add %o4,%o5,%g4
|
||
|
stw %g4,[rp+0]
|
||
|
retl
|
||
|
srlx %g4,32,%o0
|
||
|
|
||
|
L(end2):
|
||
|
add %o4,%o5,%g4
|
||
|
srlx %g4,32,cy
|
||
|
stw %g4,[rp+0]
|
||
|
add %g2,%g3,%g4
|
||
|
add cy,%g4,%g4
|
||
|
stw %g4,[rp+4]
|
||
|
retl
|
||
|
srlx %g4,32,%o0
|
||
|
EPILOGUE(mpn_add_n)
|