142 lines
3.4 KiB
NASM
142 lines
3.4 KiB
NASM
|
|
; Copyright 2000, 2001, 2002 Free Software Foundation, Inc.
|
|
;
|
|
; This file is part of the GNU MP Library.
|
|
;
|
|
; The GNU MP Library is free software; you can redistribute it and/or
|
|
; modify it under the terms of the GNU Lesser General Public License as
|
|
; published by the Free Software Foundation; either version 2.1 of the
|
|
; License, or (at your option) any later version.
|
|
;
|
|
; The GNU MP Library is distributed in the hope that it will be useful,
|
|
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
; Lesser General Public License for more details.
|
|
;
|
|
; You should have received a copy of the GNU Lesser General Public
|
|
; License along with the GNU MP Library; see the file COPYING.LIB. If
|
|
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
|
|
; Suite 330, Boston, MA 02111-1307, USA.
|
|
;
|
|
; Translation of AT&T syntax code by Brian Gladman
|
|
|
|
%include "x86i.inc"
|
|
|
|
section .text
|
|
|
|
%define PARAM_SIZE esp+frame+8
|
|
%define PARAM_SRC esp+frame+4
|
|
|
|
; re-use parameter space
|
|
%define SAVE_EBX PARAM_SRC
|
|
|
|
global ___gmpn_mod_34lsub1
|
|
%ifdef DLL
|
|
export ___gmpn_mod_34lsub1
|
|
%endif
|
|
|
|
align 16
|
|
___gmpn_mod_34lsub1:
|
|
%assign frame 0
|
|
mov ecx,[PARAM_SIZE]
|
|
mov edx,[PARAM_SRC]
|
|
sub ecx,2
|
|
ja Lthree_or_more
|
|
mov eax,[edx]
|
|
jb Lone
|
|
mov ecx,[4+edx]
|
|
mov edx,eax
|
|
shr eax,24 ; src[0] low
|
|
|
|
and edx,0xFFFFFF ; src[0] high
|
|
add eax,edx
|
|
mov edx,ecx
|
|
|
|
and ecx,0xFFFF
|
|
shr edx,16 ; src[1] high
|
|
add eax,edx
|
|
|
|
shl ecx,8 ; src[1] low
|
|
add eax,ecx
|
|
Lone:
|
|
ret
|
|
|
|
; eax
|
|
; ebx
|
|
; ecx size-2
|
|
; edx src
|
|
; esi
|
|
; edi
|
|
; ebp
|
|
|
|
Lthree_or_more:
|
|
mov [SAVE_EBX],ebx ; and arrange 16-byte loop alignment
|
|
xor ebx,ebx
|
|
FR_push esi
|
|
xor esi,esi
|
|
FR_push edi
|
|
xor eax,eax ; and clear carry flag
|
|
|
|
; offset 0x40 here
|
|
; eax acc 0mod3
|
|
; ebx acc 1mod3
|
|
; ecx counter,limbs
|
|
; edx src
|
|
; esi acc 2mod3
|
|
; edi
|
|
; ebp
|
|
|
|
Ltop:
|
|
lea edx,[12+edx]
|
|
lea ecx,[-2+ecx]
|
|
adc eax,[-12+edx]
|
|
adc ebx,[-8+edx]
|
|
adc esi,[-4+edx]
|
|
dec ecx
|
|
jg Ltop
|
|
; ecx is -2,-1 or 0 representing 0,1 or 2 more limbs,respectively
|
|
mov edi,0xFFFFFFFF
|
|
inc ecx
|
|
js Lcombine
|
|
adc eax,[edx]
|
|
mov edi,0xFFFFFF00
|
|
dec ecx
|
|
js Lcombine
|
|
adc ebx,[4+edx]
|
|
mov edi,0xFFFF0000
|
|
|
|
; eax acc 0mod3
|
|
; ebx acc 1mod3
|
|
; ecx
|
|
; edx
|
|
; esi acc 2mod3
|
|
; edi mask
|
|
; ebp
|
|
|
|
Lcombine:
|
|
sbb ecx,ecx ; carry
|
|
mov edx,eax ; 0mod3
|
|
shr eax,24 ; 0mod3 high
|
|
and ecx,edi ; carry masked
|
|
sub eax,ecx ; apply carry
|
|
mov edi,ebx ; 1mod3
|
|
shr ebx,16 ; 1mod3 high
|
|
and edx,0x00FFFFFF ; 0mod3 low
|
|
add eax,edx ; apply 0mod3 low
|
|
and edi,0xFFFF
|
|
shl edi,8 ; 1mod3 low
|
|
add eax,ebx ; apply 1mod3 high
|
|
add eax,edi ; apply 1mod3 low
|
|
mov edx,esi ; 2mod3
|
|
shr esi,8 ; 2mod3 high
|
|
and edx,0xFF ; 2mod3 low
|
|
shl edx,16 ; 2mod3 low
|
|
add eax,esi ; apply 2mod3 high
|
|
add eax,edx ; apply 2mod3 low
|
|
FR_pop edi
|
|
mov ebx,[SAVE_EBX]
|
|
FR_pop esi
|
|
ret
|
|
|
|
end
|