458 lines
7.2 KiB
NASM
458 lines
7.2 KiB
NASM
dnl MIPS64 mpn_divrem_1 -- Divide an mpn number by an unnormalized limb.
|
|
|
|
dnl Copyright 2003 Free Software Foundation, Inc.
|
|
|
|
dnl This file is part of the GNU MP Library.
|
|
|
|
dnl The GNU MP Library is free software; you can redistribute it and/or modify
|
|
dnl it under the terms of the GNU Lesser General Public License as published
|
|
dnl by the Free Software Foundation; either version 2.1 of the License, or (at
|
|
dnl your option) any later version.
|
|
|
|
dnl The GNU MP Library is distributed in the hope that it will be useful, but
|
|
dnl WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
|
|
dnl or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public
|
|
dnl License for more details.
|
|
|
|
dnl You should have received a copy of the GNU Lesser General Public License
|
|
dnl along with the GNU MP Library; see the file COPYING.LIB. If not, write
|
|
dnl to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
|
|
dnl Boston, MA 02110-1301, USA.
|
|
|
|
include(`../config.m4')
|
|
|
|
C INPUT PARAMETERS
|
|
C qp = r4
|
|
C qxn = r5
|
|
C up = r6
|
|
C n = r7
|
|
C vl = r8
|
|
|
|
C cycles/limb
|
|
C R4000: ??
|
|
C R1x000: 35
|
|
|
|
C This was generated by gcc, then the code was manually edited. Lots of things
|
|
C could be streamlined. It would probably be a good idea to merge the loops
|
|
C for normalized and unnormalized divisor, since the shifting stuff is done for
|
|
C free in parallel with other operations.
|
|
|
|
ASM_START()
|
|
PROLOGUE(mpn_divrem_1)
|
|
dsubu $sp,$sp,32
|
|
sd $28,16($sp)
|
|
lui $1,%hi(%neg(%gp_rel(__gmpn_divrem_1)))
|
|
addiu $1,$1,%lo(%neg(%gp_rel(__gmpn_divrem_1)))
|
|
daddu $gp,$1,$25
|
|
move $14,$4
|
|
move $15,$5
|
|
addu $4,$7,$15
|
|
bne $4,$0,.L176
|
|
move $13,$0
|
|
b .L490
|
|
move $2,$0
|
|
.L491:
|
|
b .L229
|
|
dli $5,-1
|
|
.L176:
|
|
sll $2,$4,3
|
|
addu $2,$2,-8
|
|
dli $3,0x8000000000000000
|
|
and $3,$8,$3
|
|
beq $3,$0,.L177
|
|
addu $14,$14,$2
|
|
beq $7,$0,.L494
|
|
dsll $2,$8,1
|
|
sll $2,$7,3
|
|
addu $2,$2,$6
|
|
ld $13,-8($2)
|
|
addu $7,$7,-1
|
|
sltu $2,$13,$8
|
|
xori $2,$2,0x1
|
|
dsll $2,$2,32
|
|
dsra $2,$2,32
|
|
dsll $2,$2,32
|
|
dsrl $2,$2,32
|
|
sd $2,0($14)
|
|
addu $14,$14,-8
|
|
dsubu $2,$0,$2
|
|
and $2,$8,$2
|
|
dsubu $13,$13,$2
|
|
dsll $2,$8,1
|
|
.L494:
|
|
beq $2,$0,.L491
|
|
dsrl $9,$8,32
|
|
dsubu $4,$0,$8
|
|
ddivu $0,$4,$9
|
|
dli $2,0xffffffff
|
|
and $12,$8,$2
|
|
mflo $5
|
|
move $11,$5
|
|
dmult $11,$9
|
|
mflo $3
|
|
nop
|
|
nop
|
|
dmult $11,$12
|
|
mflo $10
|
|
dli $2,0x100000000
|
|
dsubu $4,$4,$3
|
|
dmult $4,$2
|
|
mflo $4
|
|
sltu $3,$4,$10
|
|
beql $3,$0,.L495
|
|
dsubu $4,$4,$10
|
|
daddu $4,$4,$8
|
|
sltu $2,$4,$8
|
|
bne $2,$0,.L248
|
|
dsubu $11,$11,1
|
|
sltu $2,$4,$10
|
|
beql $2,$0,.L495
|
|
dsubu $4,$4,$10
|
|
dsubu $11,$11,1
|
|
daddu $4,$4,$8
|
|
.L248:
|
|
dsubu $4,$4,$10
|
|
.L495:
|
|
ddivu $0,$4,$9
|
|
mflo $2
|
|
move $5,$2
|
|
dmult $5,$9
|
|
mflo $3
|
|
nop
|
|
nop
|
|
dmult $5,$12
|
|
mflo $10
|
|
dli $2,0x100000000
|
|
dsubu $4,$4,$3
|
|
dmult $4,$2
|
|
mflo $4
|
|
sltu $3,$4,$10
|
|
beq $3,$0,.L504
|
|
daddu $4,$4,$8
|
|
sltu $2,$4,$8
|
|
bne $2,$0,.L251
|
|
dsubu $5,$5,1
|
|
sltu $2,$4,$10
|
|
bnel $2,$0,.L251
|
|
dsubu $5,$5,1
|
|
.L251:
|
|
dli $2,0x100000000
|
|
.L504:
|
|
dmult $11,$2
|
|
mflo $2
|
|
or $5,$2,$5
|
|
.L229:
|
|
addu $7,$7,-1
|
|
bltz $7,.L257
|
|
addu $25,$15,-1
|
|
sll $2,$7,3
|
|
addu $6,$2,$6
|
|
.Loop1:
|
|
ld $11,0($6)
|
|
dmultu $13,$5
|
|
mfhi $10
|
|
daddu $10,$10,$13
|
|
nop
|
|
dmultu $10,$8
|
|
mflo $2
|
|
mfhi $9
|
|
dsubu $2,$11,$2
|
|
dsubu $3,$13,$9
|
|
sltu $9,$11,$2
|
|
dsubu $9,$3,$9
|
|
beq $9,$0,.L271
|
|
move $4,$2
|
|
dsubu $2,$4,$8
|
|
sltu $3,$4,$2
|
|
move $4,$2
|
|
beq $9,$3,.L271
|
|
daddu $10,$10,1
|
|
dsubu $4,$4,$8
|
|
daddu $10,$10,1
|
|
.L271: sltu $2,$4,$8
|
|
bne $2,$0,.L496
|
|
move $13,$4
|
|
dsubu $4,$4,$8
|
|
daddu $10,$10,1
|
|
move $13,$4
|
|
.L496: sd $10,0($14)
|
|
addu $14,$14,-8
|
|
addu $7,$7,-1
|
|
bgez $7,.Loop1
|
|
addu $6,$6,-8
|
|
.L257:
|
|
move $7,$25
|
|
bltz $7,.L490
|
|
move $2,$13
|
|
.Loop2:
|
|
dmultu $13,$5
|
|
mfhi $9
|
|
daddu $9,$9,$13
|
|
nop
|
|
dmultu $9,$8
|
|
mflo $2
|
|
mfhi $6
|
|
dsubu $2,$0,$2
|
|
dsubu $3,$13,$6
|
|
sltu $6,$0,$2
|
|
dsubu $6,$3,$6
|
|
beq $6,$0,.L295
|
|
move $4,$2
|
|
dsubu $2,$4,$8
|
|
sltu $3,$4,$2
|
|
move $4,$2
|
|
beq $6,$3,.L295
|
|
daddu $9,$9,1
|
|
dsubu $4,$4,$8
|
|
daddu $9,$9,1
|
|
.L295: sltu $2,$4,$8
|
|
bne $2,$0,.L497
|
|
move $13,$4
|
|
dsubu $4,$4,$8
|
|
daddu $9,$9,1
|
|
move $13,$4
|
|
.L497: sd $9,0($14)
|
|
addu $7,$7,-1
|
|
bgez $7,.Loop2
|
|
addu $14,$14,-8
|
|
b .L490
|
|
move $2,$13
|
|
.L177:
|
|
beq $7,$0,.L308
|
|
sll $2,$7,3
|
|
addu $2,$2,$6
|
|
ld $12,-8($2)
|
|
sltu $3,$12,$8
|
|
beq $3,$0,.L308
|
|
addu $4,$4,-1
|
|
move $13,$12
|
|
sd $0,0($14)
|
|
bne $4,$0,.L307
|
|
addu $14,$14,-8
|
|
b .L490
|
|
move $2,$13
|
|
.L492:
|
|
b .L395
|
|
dli $5,-1
|
|
.L307:
|
|
addu $7,$7,-1
|
|
.L308:
|
|
dli $5,0x38
|
|
dsrl $2,$8,56
|
|
andi $2,$2,0xff
|
|
la $3,__gmpn_clz_tab
|
|
bne $2,$0,.L321
|
|
addu $25,$15,-1
|
|
dsubu $5,$5,8
|
|
.L499:
|
|
beql $5,$0,.L498
|
|
daddu $5,$5,1
|
|
dsll $2,$5,32
|
|
dsra $2,$2,32
|
|
dsrl $2,$8,$2
|
|
andi $2,$2,0xff
|
|
beql $2,$0,.L499
|
|
dsubu $5,$5,8
|
|
.L321:
|
|
daddu $5,$5,1
|
|
.L498:
|
|
dsll $2,$5,32
|
|
dsra $2,$2,32
|
|
dsrl $2,$8,$2
|
|
dsll $2,$2,32
|
|
dsra $2,$2,32
|
|
addu $2,$2,$3
|
|
lbu $4,0($2)
|
|
dli $3,0x41
|
|
dsubu $3,$3,$4
|
|
dsubu $3,$3,$5
|
|
dsll $24,$3,32
|
|
dsra $24,$24,32
|
|
dsll $8,$8,$24
|
|
dsll $2,$8,1
|
|
beq $2,$0,.L492
|
|
dsll $13,$13,$24
|
|
dsrl $9,$8,32
|
|
dsubu $4,$0,$8
|
|
ddivu $0,$4,$9
|
|
dli $2,0xffffffff
|
|
and $12,$8,$2
|
|
mflo $5
|
|
move $11,$5
|
|
dmult $11,$9
|
|
mflo $3
|
|
nop
|
|
nop
|
|
dmult $11,$12
|
|
mflo $10
|
|
dli $2,0x100000000
|
|
dsubu $4,$4,$3
|
|
dmult $4,$2
|
|
mflo $4
|
|
sltu $3,$4,$10
|
|
beql $3,$0,.L500
|
|
dsubu $4,$4,$10
|
|
daddu $4,$4,$8
|
|
sltu $2,$4,$8
|
|
bne $2,$0,.L414
|
|
dsubu $11,$11,1
|
|
sltu $2,$4,$10
|
|
beql $2,$0,.L500
|
|
dsubu $4,$4,$10
|
|
dsubu $11,$11,1
|
|
daddu $4,$4,$8
|
|
.L414:
|
|
dsubu $4,$4,$10
|
|
.L500:
|
|
ddivu $0,$4,$9
|
|
mflo $2
|
|
move $5,$2
|
|
dmult $5,$9
|
|
mflo $3
|
|
nop
|
|
nop
|
|
dmult $5,$12
|
|
mflo $10
|
|
dli $2,0x100000000
|
|
dsubu $4,$4,$3
|
|
dmult $4,$2
|
|
mflo $4
|
|
sltu $3,$4,$10
|
|
beq $3,$0,.L505
|
|
daddu $4,$4,$8
|
|
sltu $2,$4,$8
|
|
bne $2,$0,.L417
|
|
dsubu $5,$5,1
|
|
sltu $2,$4,$10
|
|
bnel $2,$0,.L417
|
|
dsubu $5,$5,1
|
|
.L417:
|
|
dli $2,0x100000000
|
|
.L505:
|
|
dmult $11,$2
|
|
mflo $2
|
|
or $5,$2,$5
|
|
.L395:
|
|
beq $7,$0,.L422
|
|
sll $2,$7,3
|
|
addu $2,$2,$6
|
|
ld $12,-8($2)
|
|
addu $7,$7,-2
|
|
li $2,64
|
|
subu $2,$2,$24
|
|
dsrl $3,$12,$2
|
|
bltz $7,.L424
|
|
or $13,$13,$3
|
|
move $15,$2
|
|
sll $2,$7,3
|
|
addu $6,$2,$6
|
|
.Loop3:
|
|
ld $11,0($6)
|
|
dmultu $13,$5
|
|
mfhi $9
|
|
daddu $9,$9,$13
|
|
nop
|
|
dmultu $9,$8
|
|
mflo $4
|
|
mfhi $10
|
|
dsll $3,$12,$24
|
|
dsrl $2,$11,$15
|
|
or $3,$3,$2
|
|
dsubu $4,$3,$4
|
|
dsubu $2,$13,$10
|
|
sltu $10,$3,$4
|
|
dsubu $10,$2,$10
|
|
beq $10,$0,.L438
|
|
dsubu $2,$4,$8
|
|
sltu $3,$4,$2
|
|
move $4,$2
|
|
beq $10,$3,.L438
|
|
daddu $9,$9,1
|
|
dsubu $4,$4,$8
|
|
daddu $9,$9,1
|
|
.L438: sltu $2,$4,$8
|
|
bne $2,$0,.L501
|
|
move $13,$4
|
|
dsubu $4,$4,$8
|
|
daddu $9,$9,1
|
|
move $13,$4
|
|
.L501: sd $9,0($14)
|
|
addu $14,$14,-8
|
|
move $12,$11
|
|
addu $7,$7,-1
|
|
bgez $7,.Loop3
|
|
addu $6,$6,-8
|
|
.L424:
|
|
dmultu $13,$5
|
|
mfhi $7
|
|
daddu $7,$7,$13
|
|
nop
|
|
dmultu $7,$8
|
|
mflo $2
|
|
mfhi $6
|
|
dsll $3,$12,$24
|
|
dsubu $2,$3,$2
|
|
dsubu $4,$13,$6
|
|
sltu $6,$3,$2
|
|
dsubu $6,$4,$6
|
|
beq $6,$0,.L458
|
|
move $4,$2
|
|
dsubu $2,$4,$8
|
|
sltu $3,$4,$2
|
|
move $4,$2
|
|
beq $6,$3,.L458
|
|
daddu $7,$7,1
|
|
dsubu $4,$4,$8
|
|
daddu $7,$7,1
|
|
.L458:
|
|
sltu $2,$4,$8
|
|
bne $2,$0,.L502
|
|
move $13,$4
|
|
dsubu $4,$4,$8
|
|
daddu $7,$7,1
|
|
move $13,$4
|
|
.L502:
|
|
sd $7,0($14)
|
|
addu $14,$14,-8
|
|
.L422:
|
|
move $7,$25
|
|
bltz $7,.L490
|
|
dsrl $2,$13,$24
|
|
.Loop4:
|
|
dmultu $13,$5
|
|
mfhi $9
|
|
daddu $9,$9,$13
|
|
nop
|
|
dmultu $9,$8
|
|
mflo $2
|
|
mfhi $6
|
|
dsubu $2,$0,$2
|
|
dsubu $3,$13,$6
|
|
sltu $6,$0,$2
|
|
dsubu $6,$3,$6
|
|
beq $6,$0,.L481
|
|
move $4,$2
|
|
dsubu $2,$4,$8
|
|
sltu $3,$4,$2
|
|
move $4,$2
|
|
beq $6,$3,.L481
|
|
daddu $9,$9,1
|
|
dsubu $4,$4,$8
|
|
daddu $9,$9,1
|
|
.L481: sltu $2,$4,$8
|
|
bne $2,$0,.L503
|
|
move $13,$4
|
|
dsubu $4,$4,$8
|
|
daddu $9,$9,1
|
|
move $13,$4
|
|
.L503: sd $9,0($14)
|
|
addu $7,$7,-1
|
|
bgez $7,.Loop4
|
|
addu $14,$14,-8
|
|
dsrl $2,$13,$24
|
|
.L490:
|
|
ld $28,16($sp)
|
|
j $31
|
|
daddu $sp,$sp,32
|
|
EPILOGUE(mpn_divrem_1)
|