mpir/mpn/ia64/sqr_diagonal.asm
2010-03-08 00:26:51 +00:00

80 lines
2.1 KiB
NASM

dnl IA-64 mpn_sqr_diagonal. Helper for sqr_basecase.
dnl Copyright 2001, 2002, 2004 Free Software Foundation, Inc.
dnl This file is part of the GNU MP Library.
dnl The GNU MP Library is free software; you can redistribute it and/or modify
dnl it under the terms of the GNU Lesser General Public License as published
dnl by the Free Software Foundation; either version 3 of the License, or (at
dnl your option) any later version.
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
dnl License for more details.
dnl You should have received a copy of the GNU Lesser General Public License
dnl along with the GNU MP Library. If not, see http://www.gnu.org/licenses/.
include(`../config.m4')
C cycles/limb
C Itanium: 4
C Itanium 2: 2
C TODO
C * Perhaps avoid ctop loop. Unfortunately, a cloop loop running at 1 c/l
C would need prohibitive 8-way unrolling.
C * Instead of messing too much with this, write a nifty mpn_sqr_basecase.
C INPUT PARAMETERS
C rp = r32
C sp = r33
C n = r34
ASM_START()
PROLOGUE(mpn_sqr_diagonal)
.prologue
.save ar.lc, r2
.save pr, r15
.body
ifdef(`HAVE_ABI_32',
` addp4 r32 = 0, r32
addp4 r33 = 0, r33
zxt4 r34 = r34
;;
')
ldf8 f32 = [r33], 8 C M load rp[0] early
mov r2 = ar.lc C I0
mov r14 = ar.ec C I0
mov r15 = pr C I0
add r19 = -1, r34 C M I decr n
add r18 = 8, r32 C M I rp for high limb
;;
mov ar.lc = r19 C I0
mov ar.ec = 5 C I0
mov pr.rot = 1<<16 C I0
;;
br.cexit.spnt .Ldone C B
;;
ALIGN(32)
.Loop:
(p16) ldf8 f32 = [r33], 8 C M
(p19) xma.l f36 = f35, f35, f0 C F
(p21) stf8 [r32] = f38, 16 C M2 M3
(p19) xma.hu f40 = f35, f35, f0 C F
(p21) stf8 [r18] = f42, 16 C M2 M3
br.ctop.dptk .Loop C B
;;
.Ldone:
stf8 [r32] = f38 C M2 M3
stf8 [r18] = f42 C M2 M3
mov ar.ec = r14 C I0
;;
mov pr = r15, 0x1ffff C I0
mov ar.lc = r2 C I0
br.ret.sptk.many b0 C B
EPILOGUE(mpn_sqr_diagonal)
ASM_END()