120 lines
3.2 KiB
NASM
120 lines
3.2 KiB
NASM
|
|
||
|
; Copyright 2001, 2002 Free Software Foundation, Inc.
|
||
|
;
|
||
|
; This file is part of the GNU MP Library.
|
||
|
;
|
||
|
; The GNU MP Library is free software; you can redistribute it and/or
|
||
|
; modify it under the terms of the GNU Lesser General Public License as
|
||
|
; published by the Free Software Foundation; either version 2.1 of the
|
||
|
; License, or (at your option) any later version.
|
||
|
;
|
||
|
; The GNU MP Library is distributed in the hope that it will be useful,
|
||
|
; but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||
|
; Lesser General Public License for more details.
|
||
|
;
|
||
|
; You should have received a copy of the GNU Lesser General Public
|
||
|
; License along with the GNU MP Library; see the file COPYING.LIB. If
|
||
|
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
|
||
|
; Suite 330, Boston, MA 02111-1307, USA.
|
||
|
;
|
||
|
; Translation of AT&T syntax code by Brian Gladman
|
||
|
|
||
|
%include "..\x86i.inc"
|
||
|
|
||
|
global ___gmpn_copyd
|
||
|
|
||
|
%ifdef DLL
|
||
|
export ___gmpn_copyd
|
||
|
%endif
|
||
|
|
||
|
%define PARAM_SIZE esp+frame+12
|
||
|
%define PARAM_SRC esp+frame+8
|
||
|
%define PARAM_DST esp+frame+4
|
||
|
|
||
|
%define SAVE_ESI PARAM_SIZE
|
||
|
%define SAVE_EDI PARAM_SRC
|
||
|
%define frame 0
|
||
|
|
||
|
section .text
|
||
|
align 16
|
||
|
|
||
|
___gmpn_copyd:
|
||
|
mov ecx,[PARAM_SIZE]
|
||
|
mov [SAVE_ESI],esi
|
||
|
mov esi,[PARAM_SRC]
|
||
|
mov [SAVE_EDI],edi
|
||
|
mov edi,[PARAM_DST]
|
||
|
sub ecx,1
|
||
|
jb Lzero
|
||
|
mov eax,[esi+ecx*4] ; src[size-1]
|
||
|
jz Lone
|
||
|
mov edx,[-4+esi+ecx*4] ; src[size-2]
|
||
|
sub ecx,2
|
||
|
jbe Ldone_loop ; 2 or 3 limbs only
|
||
|
|
||
|
; The usual overlap is
|
||
|
;
|
||
|
; high low
|
||
|
; +------------------+
|
||
|
; | dst|
|
||
|
; +------------------+
|
||
|
; +------------------+
|
||
|
; | src|
|
||
|
; +------------------+
|
||
|
;
|
||
|
; We can use an incrementing copy in the following circumstances.
|
||
|
;
|
||
|
; src+4*size<=dst,since then the regions are disjoint
|
||
|
;
|
||
|
; src==dst,clearly (though this shouldn't occur normally)
|
||
|
;
|
||
|
; src>dst,since in that case it's a requirement of the
|
||
|
; parameters that src>=dst+size*4,and hence the
|
||
|
; regions are disjoint
|
||
|
;
|
||
|
; eax prev high limb
|
||
|
; ebx
|
||
|
; ecx counter,size-3 down to 0 or -1,inclusive,by 2s
|
||
|
; edx prev low limb
|
||
|
; esi src
|
||
|
; edi dst
|
||
|
; ebp
|
||
|
|
||
|
lea edx,[edi+ecx*4]
|
||
|
cmp esi,edi
|
||
|
jae Luse_movsl ; src >= dst
|
||
|
cmp edx,edi
|
||
|
mov edx,[4+esi+ecx*4] ; src[size-2] again
|
||
|
jbe Luse_movsl ; src+4*size <= dst
|
||
|
Ltop:
|
||
|
mov [8+edi+ecx*4],eax
|
||
|
mov eax,[esi+ecx*4]
|
||
|
mov [4+edi+ecx*4],edx
|
||
|
mov edx,[-4+esi+ecx*4]
|
||
|
sub ecx,2
|
||
|
jnbe Ltop
|
||
|
Ldone_loop:
|
||
|
mov [8+edi+ecx*4],eax
|
||
|
mov [4+edi+ecx*4],edx
|
||
|
|
||
|
; copy low limb (needed if size was odd,but will already have been
|
||
|
; done in the loop if size was even)
|
||
|
|
||
|
mov eax,[esi]
|
||
|
Lone:
|
||
|
mov [edi],eax
|
||
|
mov edi,[SAVE_EDI]
|
||
|
mov esi,[SAVE_ESI]
|
||
|
ret
|
||
|
Luse_movsl:
|
||
|
add ecx,3
|
||
|
cld
|
||
|
rep movsd
|
||
|
Lzero:
|
||
|
mov esi,[SAVE_ESI]
|
||
|
mov edi,[SAVE_EDI]
|
||
|
ret
|
||
|
|
||
|
end
|