mpir/mpn/x86w/p3/copyd.asm

120 lines
3.1 KiB
NASM
Raw Normal View History

2012-11-25 17:13:44 -05:00
; Copyright 2001, 2002 Free Software Foundation, Inc.
;
; This file is part of the GNU MP Library.
;
; The GNU MP Library is free software; you can redistribute it and/or
; modify it under the terms of the GNU Lesser General Public License as
; published by the Free Software Foundation; either version 2.1 of the
; License, or (at your option) any later version.
;
; The GNU MP Library is distributed in the hope that it will be useful,
; but WITHOUT ANY WARRANTY; without even the implied warranty of
; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
; Lesser General Public License for more details.
;
; You should have received a copy of the GNU Lesser General Public
; License along with the GNU MP Library; see the file COPYING.LIB. If
; not, write to the Free Software Foundation, Inc., 59 Temple Place -
; Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman
%include "..\x86i.inc"
global ___gmpn_copyd
%ifdef DLL
export ___gmpn_copyd
%endif
%define PARAM_SIZE esp+frame+12
%define PARAM_SRC esp+frame+8
%define PARAM_DST esp+frame+4
%define SAVE_ESI PARAM_SIZE
%define SAVE_EDI PARAM_SRC
%define frame 0
section .text
align 16
___gmpn_copyd:
mov ecx,[PARAM_SIZE]
mov [SAVE_ESI],esi
mov esi,[PARAM_SRC]
mov [SAVE_EDI],edi
mov edi,[PARAM_DST]
sub ecx,1
jb Lzero
mov eax,[esi+ecx*4] ; src[size-1]
jz Lone
mov edx,[-4+esi+ecx*4] ; src[size-2]
sub ecx,2
jbe Ldone_loop ; 2 or 3 limbs only
; The usual overlap is
;
; high low
; +------------------+
; | dst|
; +------------------+
; +------------------+
; | src|
; +------------------+
;
; We can use an incrementing copy in the following circumstances.
;
; src+4*size<=dst,since then the regions are disjoint
;
; src==dst,clearly (though this shouldn't occur normally)
;
; src>dst,since in that case it's a requirement of the
; parameters that src>=dst+size*4,and hence the
; regions are disjoint
;
; eax prev high limb
; ebx
; ecx counter,size-3 down to 0 or -1,inclusive,by 2s
; edx prev low limb
; esi src
; edi dst
; ebp
lea edx,[edi+ecx*4]
cmp esi,edi
jae Luse_movsl ; src >= dst
cmp edx,edi
mov edx,[4+esi+ecx*4] ; src[size-2] again
jbe Luse_movsl ; src+4*size <= dst
Ltop:
mov [8+edi+ecx*4],eax
mov eax,[esi+ecx*4]
mov [4+edi+ecx*4],edx
mov edx,[-4+esi+ecx*4]
sub ecx,2
jnbe Ltop
Ldone_loop:
mov [8+edi+ecx*4],eax
mov [4+edi+ecx*4],edx
; copy low limb (needed if size was odd,but will already have been
; done in the loop if size was even)
mov eax,[esi]
Lone:
mov [edi],eax
mov edi,[SAVE_EDI]
mov esi,[SAVE_ESI]
ret
Luse_movsl:
add ecx,3
cld
rep movsd
Lzero:
mov esi,[SAVE_ESI]
mov edi,[SAVE_EDI]
ret
end