177 lines
4.5 KiB
NASM
177 lines
4.5 KiB
NASM
|
|
; Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
|
|
; Foundation, Inc.
|
|
;
|
|
; This file is part of the GNU MP Library.
|
|
;
|
|
; The GNU MP Library is free software; you can redistribute it and/or
|
|
; modify it under the terms of the GNU Lesser General Public License as
|
|
; published by the Free Software Foundation; either version 2.1 of the
|
|
; License, or (at your option) any later version.
|
|
;
|
|
; The GNU MP Library is distributed in the hope that it will be useful,
|
|
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
; Lesser General Public License for more details.
|
|
;
|
|
; You should have received a copy of the GNU Lesser General Public
|
|
; License along with the GNU MP Library; see the file COPYING.LIB. If
|
|
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
|
|
; Suite 330, Boston, MA 02111-1307, USA.
|
|
;
|
|
; Translation of AT&T syntax code by Brian Gladman
|
|
|
|
%include "x86i.inc"
|
|
|
|
; mp_limb_t M4_function_n (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,
|
|
; mp_size_t size);
|
|
; mp_limb_t M4_function_nc (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,
|
|
; mp_size_t size,mp_limb_t carry);
|
|
|
|
%define PARAM_SPACE 20
|
|
%define PARAM_CARRY esp+frame+20
|
|
%define PARAM_SIZE esp+frame+16
|
|
%define PARAM_SRC2 esp+frame+12
|
|
%define PARAM_SRC1 esp+frame+8
|
|
%define PARAM_DST esp+frame+4
|
|
|
|
%macro mac_sub 4
|
|
|
|
global %1%4
|
|
%ifdef DLL
|
|
export %1%4
|
|
%endif
|
|
|
|
align 8
|
|
%1%4:
|
|
%assign frame 0
|
|
FR_push edi
|
|
FR_push esi
|
|
mov edi,[PARAM_DST]
|
|
mov esi,[PARAM_SRC1]
|
|
mov edx,[PARAM_SRC2]
|
|
mov ecx,[PARAM_SIZE]
|
|
mov eax,ecx
|
|
shr ecx,3 ; compute count for unrolled %%4
|
|
neg eax
|
|
and eax,7 ; get index where to start %%4
|
|
jz %%3 ; necessary special case for 0
|
|
inc ecx ; adjust %%4 count
|
|
shl eax,2 ; adjustment for pointers...
|
|
sub edi,eax ; ... since they are offset ...
|
|
sub esi,eax ; ... by a constant when we ...
|
|
sub edx,eax ; ... enter the %%4
|
|
shr eax,2 ; restore previous value
|
|
|
|
; Calculate start address in %%4
|
|
|
|
%ifdef PIC
|
|
call %%1
|
|
%%1:
|
|
lea eax,[%%4-%%1-3+eax+eax*8]
|
|
add eax,[esp]
|
|
add esp,4
|
|
%else
|
|
lea eax,[%%4-3+eax+eax*8]
|
|
%endif
|
|
|
|
; These lines initialize carry from the 5th parameter. Should be
|
|
; possible to simplify.
|
|
|
|
FR_push ebp
|
|
mov ebp,[PARAM_CARRY]
|
|
shr ebp,1 ; shift bit 0 into carry
|
|
FR_pop ebp
|
|
jmp eax ; jump into %%4
|
|
|
|
global %1%3
|
|
%ifdef DLL
|
|
export %1%3
|
|
%endif
|
|
align 8
|
|
%1%3:
|
|
%assign frame 0
|
|
FR_push edi
|
|
FR_push esi
|
|
mov edi,[PARAM_DST]
|
|
mov esi,[PARAM_SRC1]
|
|
mov edx,[PARAM_SRC2]
|
|
mov ecx,[PARAM_SIZE]
|
|
mov eax,ecx
|
|
shr ecx,3 ; compute count for unrolled %%4
|
|
neg eax
|
|
and eax,7 ; get index where to start %%4
|
|
jz %%4 ; necessary special case for 0
|
|
inc ecx ; adjust %%4 count
|
|
shl eax,2 ; adjustment for pointers...
|
|
sub edi,eax ; ... since they are offset ...
|
|
sub esi,eax ; ... by a constant when we ...
|
|
sub edx,eax ; ... enter the %%4
|
|
shr eax,2 ; restore previous value
|
|
|
|
; Calculate start address in %%4 for PIC.
|
|
; Due to limitations in some assemblers,%%4-%%2-3
|
|
; cannot be put into the leal
|
|
|
|
%ifdef PIC
|
|
call %%2
|
|
%%2:
|
|
lea eax,[%%4-%%2-3+eax+eax*8]
|
|
add eax,[esp]
|
|
add esp,4
|
|
%else
|
|
lea eax,[%%4-3+eax+eax*8]
|
|
%endif
|
|
jmp eax ; jump into %%4
|
|
%%3:
|
|
FR_push ebp
|
|
mov ebp,[PARAM_CARRY]
|
|
shr ebp,1 ; shift bit 0 into carry
|
|
FR_pop ebp
|
|
|
|
align 8
|
|
%%4:
|
|
mov eax,[esi]
|
|
%2 eax,[edx]
|
|
mov [edi],eax
|
|
mov eax,[4+esi]
|
|
%2 eax,[edx+4]
|
|
mov [4+edi],eax
|
|
mov eax,[8+esi]
|
|
%2 eax,[edx+8]
|
|
mov [8+edi],eax
|
|
mov eax,[12+esi]
|
|
%2 eax,[edx+12]
|
|
mov [12+edi],eax
|
|
mov eax,[16+esi]
|
|
%2 eax,[edx+16]
|
|
mov [16+edi],eax
|
|
mov eax,[20+esi]
|
|
%2 eax,[edx+20]
|
|
mov [20+edi],eax
|
|
mov eax,[24+esi]
|
|
%2 eax,[edx+24]
|
|
mov [24+edi],eax
|
|
mov eax,[28+esi]
|
|
%2 eax,[edx+28]
|
|
mov [28+edi],eax
|
|
lea edi,[32+edi]
|
|
lea esi,[32+esi]
|
|
lea edx,[32+edx]
|
|
dec ecx
|
|
jnz %%4
|
|
sbb eax,eax
|
|
neg eax
|
|
pop esi
|
|
pop edi
|
|
ret
|
|
%endmacro
|
|
|
|
section .text
|
|
; global ___gmpn_sub_n
|
|
; global ___gmpn_sub_nc
|
|
|
|
mac_sub ___g,sbb,mpn_sub_n,mpn_sub_nc
|
|
|
|
end
|