220 lines
5.0 KiB
NASM
220 lines
5.0 KiB
NASM
|
dnl PA64 mpn_addlsh1_n/mpn_sublsh1_n -- rp[] = up[] +- (vp[] << 1).
|
||
|
|
||
|
dnl Copyright 2003 Free Software Foundation, Inc.
|
||
|
|
||
|
dnl This file is part of the GNU MP Library.
|
||
|
|
||
|
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
||
|
dnl it under the terms of the GNU Lesser General Public License as published
|
||
|
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
||
|
dnl your option) any later version.
|
||
|
|
||
|
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
||
|
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
||
|
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
||
|
dnl License for more details.
|
||
|
|
||
|
dnl You should have received a copy of the GNU Lesser General Public License
|
||
|
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write
|
||
|
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
||
|
dnl Boston, MA 02110-1301, USA.
|
||
|
|
||
|
include(`../config.m4')
|
||
|
|
||
|
C cycles/limb
|
||
|
C 8000,8200: 2
|
||
|
C 8500,8600,8700: 1.75
|
||
|
|
||
|
C TODO
|
||
|
C * Write special feed-in code for each (n mod 8). (See the ia64 code.)
|
||
|
C * Try to make this run at closer to 1.5 c/l.
|
||
|
C * Set up register aliases (define(`u0',`%r19')).
|
||
|
C * Explicitly align loop.
|
||
|
|
||
|
dnl INPUT PARAMETERS
|
||
|
define(`rp',`%r26')
|
||
|
define(`up',`%r25')
|
||
|
define(`vp',`%r24')
|
||
|
define(`n',`%r23')
|
||
|
|
||
|
ifdef(`OPERATION_addlsh1_n',`
|
||
|
define(ADCSBC, `add,dc')
|
||
|
define(INITC, `ldi 0,')
|
||
|
define(func, mpn_addlsh1_n)
|
||
|
')
|
||
|
ifdef(`OPERATION_sublsh1_n',`
|
||
|
define(ADCSBC, `sub,db')
|
||
|
define(INITC, `ldi 1,')
|
||
|
define(func, mpn_sublsh1_n)
|
||
|
')
|
||
|
|
||
|
MULFUNC_PROLOGUE(mpn_addlsh1_n mpn_sublsh1_n)
|
||
|
|
||
|
ifdef(`HAVE_ABI_2_0w',`
|
||
|
define(LEVEL, `.level 2.0w')
|
||
|
define(RETREG, `%r28')
|
||
|
define(CLRRET1, `dnl')
|
||
|
')
|
||
|
ifdef(`HAVE_ABI_2_0n',`
|
||
|
define(LEVEL, `.level 2.0')
|
||
|
define(RETREG, `%r29')
|
||
|
define(CLRRET1, `ldi 0, %r28')
|
||
|
')
|
||
|
|
||
|
LEVEL
|
||
|
PROLOGUE(func)
|
||
|
std,ma %r3, 0x100(%r30) C save reg
|
||
|
|
||
|
INITC %r1 C init saved cy
|
||
|
|
||
|
C Primitive code for the first (n mod 8) limbs:
|
||
|
extrd,u n, 63, 3, %r22 C count for loop0
|
||
|
comib,= 0, %r22, L(unrolled) C skip loop0?
|
||
|
copy %r0, %r28
|
||
|
LDEF(loop0)
|
||
|
ldd 0(vp), %r21
|
||
|
ldo 8(vp), vp
|
||
|
ldd 0(up), %r19
|
||
|
ldo 8(up), up
|
||
|
shrpd %r21, %r28, 63, %r31
|
||
|
addi -1, %r1, %r0 C restore cy
|
||
|
ADCSBC %r19, %r31, %r29
|
||
|
std %r29, 0(rp)
|
||
|
add,dc %r0, %r0, %r1 C save cy
|
||
|
copy %r21, %r28
|
||
|
addib,> -1, %r22, L(loop0)
|
||
|
ldo 8(rp), rp
|
||
|
|
||
|
addib,>= -8, n, L(unrolled)
|
||
|
addi -1, %r1, %r0 C restore cy
|
||
|
|
||
|
shrpd %r0, %r28, 63, %r28
|
||
|
ADCSBC %r0, %r28, RETREG
|
||
|
ifdef(`OPERATION_sublsh1_n',
|
||
|
` sub %r0, RETREG, RETREG')
|
||
|
CLRRET1
|
||
|
|
||
|
bve (%r2)
|
||
|
ldd,mb -0x100(%r30), %r3
|
||
|
|
||
|
|
||
|
LDEF(unrolled)
|
||
|
std %r4, -0xf8(%r30) C save reg
|
||
|
ldd 0(vp), %r4
|
||
|
std %r5, -0xf0(%r30) C save reg
|
||
|
ldd 8(vp), %r5
|
||
|
std %r6, -0xe8(%r30) C save reg
|
||
|
ldd 16(vp), %r6
|
||
|
std %r7, -0xe0(%r30) C save reg
|
||
|
|
||
|
ldd 24(vp), %r7
|
||
|
shrpd %r4, %r28, 63, %r31
|
||
|
std %r8, -0xd8(%r30) C save reg
|
||
|
ldd 32(vp), %r8
|
||
|
shrpd %r5, %r4, 63, %r4
|
||
|
std %r9, -0xd0(%r30) C save reg
|
||
|
ldd 40(vp), %r9
|
||
|
shrpd %r6, %r5, 63, %r5
|
||
|
ldd 48(vp), %r3
|
||
|
shrpd %r7, %r6, 63, %r6
|
||
|
ldd 56(vp), %r28
|
||
|
shrpd %r8, %r7, 63, %r7
|
||
|
ldd 0(up), %r19
|
||
|
shrpd %r9, %r8, 63, %r8
|
||
|
ldd 8(up), %r20
|
||
|
shrpd %r3, %r9, 63, %r9
|
||
|
ldd 16(up), %r21
|
||
|
shrpd %r28, %r3, 63, %r3
|
||
|
ldd 24(up), %r22
|
||
|
|
||
|
nop C alignment FIXME
|
||
|
addib,<= -8, n, L(end)
|
||
|
addi -1, %r1, %r0 C restore cy
|
||
|
LDEF(loop)
|
||
|
ADCSBC %r19, %r31, %r29
|
||
|
ldd 32(up), %r19
|
||
|
std %r29, 0(rp)
|
||
|
ADCSBC %r20, %r4, %r29
|
||
|
ldd 40(up), %r20
|
||
|
std %r29, 8(rp)
|
||
|
ADCSBC %r21, %r5, %r29
|
||
|
ldd 48(up), %r21
|
||
|
std %r29, 16(rp)
|
||
|
ADCSBC %r22, %r6, %r29
|
||
|
ldd 56(up), %r22
|
||
|
std %r29, 24(rp)
|
||
|
ADCSBC %r19, %r7, %r29
|
||
|
ldd 64(vp), %r4
|
||
|
std %r29, 32(rp)
|
||
|
ADCSBC %r20, %r8, %r29
|
||
|
ldd 72(vp), %r5
|
||
|
std %r29, 40(rp)
|
||
|
ADCSBC %r21, %r9, %r29
|
||
|
ldd 80(vp), %r6
|
||
|
std %r29, 48(rp)
|
||
|
ADCSBC %r22, %r3, %r29
|
||
|
std %r29, 56(rp)
|
||
|
|
||
|
add,dc %r0, %r0, %r1 C save cy
|
||
|
|
||
|
ldd 88(vp), %r7
|
||
|
shrpd %r4, %r28, 63, %r31
|
||
|
ldd 96(vp), %r8
|
||
|
shrpd %r5, %r4, 63, %r4
|
||
|
ldd 104(vp), %r9
|
||
|
shrpd %r6, %r5, 63, %r5
|
||
|
ldd 112(vp), %r3
|
||
|
shrpd %r7, %r6, 63, %r6
|
||
|
ldd 120(vp), %r28
|
||
|
shrpd %r8, %r7, 63, %r7
|
||
|
ldd 64(up), %r19
|
||
|
shrpd %r9, %r8, 63, %r8
|
||
|
ldd 72(up), %r20
|
||
|
shrpd %r3, %r9, 63, %r9
|
||
|
ldd 80(up), %r21
|
||
|
shrpd %r28, %r3, 63, %r3
|
||
|
ldd 88(up), %r22
|
||
|
|
||
|
ldo 64(vp), vp
|
||
|
ldo 64(rp), rp
|
||
|
ldo 64(up), up
|
||
|
addib,> -8, n, L(loop)
|
||
|
addi -1, %r1, %r0 C restore cy
|
||
|
LDEF(end)
|
||
|
ADCSBC %r19, %r31, %r29
|
||
|
ldd 32(up), %r19
|
||
|
std %r29, 0(rp)
|
||
|
ADCSBC %r20, %r4, %r29
|
||
|
ldd 40(up), %r20
|
||
|
std %r29, 8(rp)
|
||
|
ADCSBC %r21, %r5, %r29
|
||
|
ldd 48(up), %r21
|
||
|
std %r29, 16(rp)
|
||
|
ADCSBC %r22, %r6, %r29
|
||
|
ldd 56(up), %r22
|
||
|
std %r29, 24(rp)
|
||
|
ADCSBC %r19, %r7, %r29
|
||
|
ldd -0xf8(%r30), %r4 C restore reg
|
||
|
std %r29, 32(rp)
|
||
|
ADCSBC %r20, %r8, %r29
|
||
|
ldd -0xf0(%r30), %r5 C restore reg
|
||
|
std %r29, 40(rp)
|
||
|
ADCSBC %r21, %r9, %r29
|
||
|
ldd -0xe8(%r30), %r6 C restore reg
|
||
|
std %r29, 48(rp)
|
||
|
ADCSBC %r22, %r3, %r29
|
||
|
ldd -0xe0(%r30), %r7 C restore reg
|
||
|
std %r29, 56(rp)
|
||
|
|
||
|
shrpd %r0, %r28, 63, %r28
|
||
|
ldd -0xd8(%r30), %r8 C restore reg
|
||
|
ADCSBC %r0, %r28, RETREG
|
||
|
ifdef(`OPERATION_sublsh1_n',
|
||
|
` sub %r0, RETREG, RETREG')
|
||
|
CLRRET1
|
||
|
|
||
|
ldd -0xd0(%r30), %r9 C restore reg
|
||
|
bve (%r2)
|
||
|
ldd,mb -0x100(%r30), %r3 C restore reg
|
||
|
EPILOGUE()
|