mpir/mpn/x86i/aors_n.asm
brgladman 48248cda46 1. longlong.h change to add MSVC intrinsics
2. longlong.h rearrangement for Intel compiler
3. MSVC additions in test  code 
4. GMP 4.2.1 bug fixes
5. Intel format assembly code
2008-05-18 22:20:43 +00:00

176 lines
4.6 KiB
NASM

; Copyright 1992, 1994, 1995, 1996, 1999, 2000, 2001, 2002 Free Software
; Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "x86i.inc"
; mp_limb_t M4_function_n (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,
; mp_size_t size);
; mp_limb_t M4_function_nc (mp_ptr dst,mp_srcptr src1,mp_srcptr src2,
; mp_size_t size,mp_limb_t carry);
%define PARAM_SPACE 20
%define PARAM_CARRY esp+frame+20
%define PARAM_SIZE esp+frame+16
%define PARAM_SRC2 esp+frame+12
%define PARAM_SRC1 esp+frame+8
%define PARAM_DST esp+frame+4
%macro mac_sub 4
global %1%4
%ifdef DLL
export %1%4
%endif
align 8
%1%4:
%assign frame 0
FR_push edi
FR_push esi
mov edi,[PARAM_DST]
mov esi,[PARAM_SRC1]
mov edx,[PARAM_SRC2]
mov ecx,[PARAM_SIZE]
mov eax,ecx
shr ecx,3 ; compute count for unrolled %%4
neg eax
and eax,7 ; get index where to start %%4
jz %%3 ; necessary special case for 0
inc ecx ; adjust %%4 count
shl eax,2 ; adjustment for pointers...
sub edi,eax ; ... since they are offset ...
sub esi,eax ; ... by a constant when we ...
sub edx,eax ; ... enter the %%4
shr eax,2 ; restore previous value
; Calculate start address in %%4
%ifdef PIC
call %%1
%%1:
lea eax,[%%4-%%1-3+eax+eax*8]
add eax,[esp]
add esp,4
%else
lea eax,[%%4-3+eax+eax*8]
%endif
; These lines initialize carry from the 5th parameter. Should be
; possible to simplify.
FR_push ebp
mov ebp,[PARAM_CARRY]
shr ebp,1 ; shift bit 0 into carry
FR_pop ebp
jmp eax ; jump into %%4
global %1%3
%ifdef DLL
export %1%3
%endif
align 8
%1%3:
%assign frame 0
FR_push edi
FR_push esi
mov edi,[PARAM_DST]
mov esi,[PARAM_SRC1]
mov edx,[PARAM_SRC2]
mov ecx,[PARAM_SIZE]
mov eax,ecx
shr ecx,3 ; compute count for unrolled %%4
neg eax
and eax,7 ; get index where to start %%4
jz %%4 ; necessary special case for 0
inc ecx ; adjust %%4 count
shl eax,2 ; adjustment for pointers...
sub edi,eax ; ... since they are offset ...
sub esi,eax ; ... by a constant when we ...
sub edx,eax ; ... enter the %%4
shr eax,2 ; restore previous value
; Calculate start address in %%4 for PIC.
; Due to limitations in some assemblers,%%4-%%2-3
; cannot be put into the leal
%ifdef PIC
call %%2
%%2:
lea eax,[%%4-%%2-3+eax+eax*8]
add eax,[esp]
add esp,4
%else
lea eax,[%%4-3+eax+eax*8]
%endif
jmp eax ; jump into %%4
%%3:
FR_push ebp
mov ebp,[PARAM_CARRY]
shr ebp,1 ; shift bit 0 into carry
FR_pop ebp
align 8
%%4:
mov eax,[esi]
%2 eax,[edx]
mov [edi],eax
mov eax,[4+esi]
%2 eax,[edx+4]
mov [4+edi],eax
mov eax,[8+esi]
%2 eax,[edx+8]
mov [8+edi],eax
mov eax,[12+esi]
%2 eax,[edx+12]
mov [12+edi],eax
mov eax,[16+esi]
%2 eax,[edx+16]
mov [16+edi],eax
mov eax,[20+esi]
%2 eax,[edx+20]
mov [20+edi],eax
mov eax,[24+esi]
%2 eax,[edx+24]
mov [24+edi],eax
mov eax,[28+esi]
%2 eax,[edx+28]
mov [28+edi],eax
lea edi,[32+edi]
lea esi,[32+esi]
lea edx,[32+edx]
dec ecx
jnz %%4
sbb eax,eax
neg eax
pop esi
pop edi
ret
%endmacro
section .text
mac_sub ___g,adc,mpn_add_n,mpn_add_nc
mac_sub ___g,sbb,mpn_sub_n,mpn_sub_nc
end