mpir/mpn/x86w/mod_34lsub1.asm
brgladman 9c467c6415
2008-07-04 10:39:15 +00:00

142 lines
3.4 KiB
NASM

; Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
section .text
%define PARAM_SIZE esp+frame+8
%define PARAM_SRC esp+frame+4
; re-use parameter space
%define SAVE_EBX PARAM_SRC
global ___gmpn_mod_34lsub1
%ifdef DLL
export ___gmpn_mod_34lsub1
%endif
align 16
___gmpn_mod_34lsub1:
%assign frame 0
mov ecx,[PARAM_SIZE]
mov edx,[PARAM_SRC]
sub ecx,2
ja Lthree_or_more
mov eax,[edx]
jb Lone
mov ecx,[4+edx]
mov edx,eax
shr eax,24 ; src[0] low
and edx,0xFFFFFF ; src[0] high
add eax,edx
mov edx,ecx
and ecx,0xFFFF
shr edx,16 ; src[1] high
add eax,edx
shl ecx,8 ; src[1] low
add eax,ecx
Lone:
ret
; eax
; ebx
; ecx size-2
; edx src
; esi
; edi
; ebp
Lthree_or_more:
mov [SAVE_EBX],ebx ; and arrange 16-byte loop alignment
xor ebx,ebx
FR_push esi
xor esi,esi
FR_push edi
xor eax,eax ; and clear carry flag
; offset 0x40 here
; eax acc 0mod3
; ebx acc 1mod3
; ecx counter,limbs
; edx src
; esi acc 2mod3
; edi
; ebp
Ltop:
lea edx,[12+edx]
lea ecx,[-2+ecx]
adc eax,[-12+edx]
adc ebx,[-8+edx]
adc esi,[-4+edx]
dec ecx
jg Ltop
; ecx is -2,-1 or 0 representing 0,1 or 2 more limbs,respectively
mov edi,0xFFFFFFFF
inc ecx
js Lcombine
adc eax,[edx]
mov edi,0xFFFFFF00
dec ecx
js Lcombine
adc ebx,[4+edx]
mov edi,0xFFFF0000
; eax acc 0mod3
; ebx acc 1mod3
; ecx
; edx
; esi acc 2mod3
; edi mask
; ebp
Lcombine:
sbb ecx,ecx ; carry
mov edx,eax ; 0mod3
shr eax,24 ; 0mod3 high
and ecx,edi ; carry masked
sub eax,ecx ; apply carry
mov edi,ebx ; 1mod3
shr ebx,16 ; 1mod3 high
and edx,0x00FFFFFF ; 0mod3 low
add eax,edx ; apply 0mod3 low
and edi,0xFFFF
shl edi,8 ; 1mod3 low
add eax,ebx ; apply 1mod3 high
add eax,edi ; apply 1mod3 low
mov edx,esi ; 2mod3
shr esi,8 ; 2mod3 high
and edx,0xFF ; 2mod3 low
shl edx,16 ; 2mod3 low
add eax,esi ; apply 2mod3 high
add eax,edx ; apply 2mod3 low
FR_pop edi
mov ebx,[SAVE_EBX]
FR_pop esi
ret
end