; Copyright 2001, 2002 Free Software Foundation, Inc. ; ; This file is part of the GNU MP Library. ; ; The GNU MP Library is free software; you can redistribute it and/or ; modify it under the terms of the GNU Lesser General Public License as ; published by the Free Software Foundation; either version 2.1 of the ; License, or (at your option) any later version. ; ; The GNU MP Library is distributed in the hope that it will be useful, ; but WITHOUT ANY WARRANTY; without even the implied warranty of ; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ; Lesser General Public License for more details. ; ; You should have received a copy of the GNU Lesser General Public ; License along with the GNU MP Library; see the file COPYING.LIB. If ; not, write to the Free Software Foundation, Inc., 59 Temple Place - ; Suite 330, Boston, MA 02111-1307, USA. ; ; Translation of AT&T syntax code by Brian Gladman %include "..\x86i.inc" global ___gmpn_copyd %ifdef DLL export ___gmpn_copyd %endif %define PARAM_SIZE esp+frame+12 %define PARAM_SRC esp+frame+8 %define PARAM_DST esp+frame+4 %define SAVE_ESI PARAM_SIZE %define SAVE_EDI PARAM_SRC %define frame 0 section .text align 16 ___gmpn_copyd: mov ecx,[PARAM_SIZE] mov [SAVE_ESI],esi mov esi,[PARAM_SRC] mov [SAVE_EDI],edi mov edi,[PARAM_DST] sub ecx,1 jb Lzero mov eax,[esi+ecx*4] ; src[size-1] jz Lone mov edx,[-4+esi+ecx*4] ; src[size-2] sub ecx,2 jbe Ldone_loop ; 2 or 3 limbs only ; The usual overlap is ; ; high low ; +------------------+ ; | dst| ; +------------------+ ; +------------------+ ; | src| ; +------------------+ ; ; We can use an incrementing copy in the following circumstances. ; ; src+4*size<=dst,since then the regions are disjoint ; ; src==dst,clearly (though this shouldn't occur normally) ; ; src>dst,since in that case it's a requirement of the ; parameters that src>=dst+size*4,and hence the ; regions are disjoint ; ; eax prev high limb ; ebx ; ecx counter,size-3 down to 0 or -1,inclusive,by 2s ; edx prev low limb ; esi src ; edi dst ; ebp lea edx,[edi+ecx*4] cmp esi,edi jae Luse_movsl ; src >= dst cmp edx,edi mov edx,[4+esi+ecx*4] ; src[size-2] again jbe Luse_movsl ; src+4*size <= dst Ltop: mov [8+edi+ecx*4],eax mov eax,[esi+ecx*4] mov [4+edi+ecx*4],edx mov edx,[-4+esi+ecx*4] sub ecx,2 jnbe Ltop Ldone_loop: mov [8+edi+ecx*4],eax mov [4+edi+ecx*4],edx ; copy low limb (needed if size was odd,but will already have been ; done in the loop if size was even) mov eax,[esi] Lone: mov [edi],eax mov edi,[SAVE_EDI] mov esi,[SAVE_ESI] ret Luse_movsl: add ecx,3 cld rep movsd Lzero: mov esi,[SAVE_ESI] mov edi,[SAVE_EDI] ret end