mpir/mpn/x86w/p3/copyd.asm


;  Copyright 2001, 2002 Free Software Foundation, Inc.
; 
;  This file is part of the GNU MP Library.
; 
;  The GNU MP Library is free software; you can redistribute it and/or
;  modify it under the terms of the GNU Lesser General Public License as
;  published by the Free Software Foundation; either version 2.1 of the
;  License, or (at your option) any later version.
; 
;  The GNU MP Library is distributed in the hope that it will be useful,
;  but WITHOUT ANY WARRANTY; without even the implied warranty of
;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;  Lesser General Public License for more details.
; 
;  You should have received a copy of the GNU Lesser General Public
;  License along with the GNU MP Library; see the file COPYING.LIB.  If
;  not, write to the Free Software Foundation, Inc., 59 Temple Place -
;  Suite 330, Boston, MA 02111-1307, USA.
;
; Translation of AT&T syntax code by Brian Gladman 

%include "..\x86i.inc" 

	global  ___gmpn_copyd 

%ifdef	DLL
	export	___gmpn_copyd
%endif

%define	PARAM_SIZE	esp+frame+12 
%define PARAM_SRC   esp+frame+8 
%define PARAM_DST   esp+frame+4 

%define	SAVE_ESI	PARAM_SIZE
%define	SAVE_EDI	PARAM_SRC
%define	frame		0 

	section .text
	align   16

___gmpn_copyd: 
	mov     ecx,[PARAM_SIZE]
    mov     [SAVE_ESI],esi
    mov     esi,[PARAM_SRC]
    mov     [SAVE_EDI],edi
    mov     edi,[PARAM_DST]
    sub     ecx,1
    jb      Lzero
    mov     eax,[esi+ecx*4]			;  src[size-1] 
    jz      Lone
    mov     edx,[-4+esi+ecx*4]		;  src[size-2] 
    sub     ecx,2
    jbe     Ldone_loop              ;  2 or 3 limbs only 
        
;  The usual overlap is 
;
;      high                   low 
;      +------------------+ 
;      |               dst| 
;      +------------------+ 
;            +------------------+ 
;            |               src| 
;            +------------------+ 
;
;  We can use an incrementing copy in the following circumstances. 
;
;      src+4*size<=dst,since then the regions are disjoint 
;
;      src==dst,clearly (though this shouldn't occur normally) 
;
;      src>dst,since in that case it's a requirement of the 
;               parameters that src>=dst+size*4,and hence the 
;               regions are disjoint 
;
;  eax prev high limb 
;  ebx 
;  ecx counter,size-3 down to 0 or -1,inclusive,by 2s 
;  edx prev low limb 
;  esi src 
;  edi dst 
;  ebp 

    lea     edx,[edi+ecx*4]
    cmp     esi,edi
    jae     Luse_movsl			;  src >= dst 
    cmp     edx,edi
    mov     edx,[4+esi+ecx*4]	;  src[size-2] again 
    jbe     Luse_movsl			;  src+4*size <= dst 
Ltop: 
    mov     [8+edi+ecx*4],eax
    mov     eax,[esi+ecx*4]
    mov     [4+edi+ecx*4],edx
    mov     edx,[-4+esi+ecx*4]
    sub     ecx,2
    jnbe    Ltop
Ldone_loop: 
    mov     [8+edi+ecx*4],eax
    mov     [4+edi+ecx*4],edx

;  copy low limb (needed if size was odd,but will already have been 
;  done in the loop if size was even) 

    mov     eax,[esi]
Lone: 
    mov     [edi],eax
    mov     edi,[SAVE_EDI]
    mov     esi,[SAVE_ESI]
	ret
Luse_movsl: 
    add     ecx,3
    cld
    rep		movsd
Lzero: 
    mov     esi,[SAVE_ESI]
    mov     edi,[SAVE_EDI]
    ret

	end
Line endings 2012-11-25 17:13:44 -05:00
			`; Copyright 2001, 2002 Free Software Foundation, Inc.`
			`;`
			`; This file is part of the GNU MP Library.`
			`;`
			`; The GNU MP Library is free software; you can redistribute it and/or`
			`; modify it under the terms of the GNU Lesser General Public License as`
			`; published by the Free Software Foundation; either version 2.1 of the`
			`; License, or (at your option) any later version.`
			`;`
			`; The GNU MP Library is distributed in the hope that it will be useful,`
			`; but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`; Lesser General Public License for more details.`
			`;`
			`; You should have received a copy of the GNU Lesser General Public`
			`; License along with the GNU MP Library; see the file COPYING.LIB. If`
			`; not, write to the Free Software Foundation, Inc., 59 Temple Place -`
			`; Suite 330, Boston, MA 02111-1307, USA.`
			`;`
			`; Translation of AT&T syntax code by Brian Gladman`

			`%include "..\x86i.inc"`

			`global ___gmpn_copyd`

			`%ifdef DLL`
			`export ___gmpn_copyd`
			`%endif`

			`%define PARAM_SIZE esp+frame+12`
			`%define PARAM_SRC esp+frame+8`
			`%define PARAM_DST esp+frame+4`

			`%define SAVE_ESI PARAM_SIZE`
			`%define SAVE_EDI PARAM_SRC`
			`%define frame 0`

			`section .text`
			`align 16`

			`___gmpn_copyd:`
			`mov ecx,[PARAM_SIZE]`
			`mov [SAVE_ESI],esi`
			`mov esi,[PARAM_SRC]`
			`mov [SAVE_EDI],edi`
			`mov edi,[PARAM_DST]`
			`sub ecx,1`
			`jb Lzero`
			`mov eax,[esi+ecx*4] ; src[size-1]`
			`jz Lone`
			`mov edx,[-4+esi+ecx*4] ; src[size-2]`
			`sub ecx,2`
			`jbe Ldone_loop ; 2 or 3 limbs only`

			`; The usual overlap is`
			`;`
			`; high low`
			`; +------------------+`
			`; \| dst\|`
			`; +------------------+`
			`; +------------------+`
			`; \| src\|`
			`; +------------------+`
			`;`
			`; We can use an incrementing copy in the following circumstances.`
			`;`
			`; src+4*size<=dst,since then the regions are disjoint`
			`;`
			`; src==dst,clearly (though this shouldn't occur normally)`
			`;`
			`; src>dst,since in that case it's a requirement of the`
			`; parameters that src>=dst+size*4,and hence the`
			`; regions are disjoint`
			`;`
			`; eax prev high limb`
			`; ebx`
			`; ecx counter,size-3 down to 0 or -1,inclusive,by 2s`
			`; edx prev low limb`
			`; esi src`
			`; edi dst`
			`; ebp`

			`lea edx,[edi+ecx*4]`
			`cmp esi,edi`
			`jae Luse_movsl ; src >= dst`
			`cmp edx,edi`
			`mov edx,[4+esi+ecx*4] ; src[size-2] again`
			`jbe Luse_movsl ; src+4*size <= dst`
			`Ltop:`
			`mov [8+edi+ecx*4],eax`
			`mov eax,[esi+ecx*4]`
			`mov [4+edi+ecx*4],edx`
			`mov edx,[-4+esi+ecx*4]`
			`sub ecx,2`
			`jnbe Ltop`
			`Ldone_loop:`
			`mov [8+edi+ecx*4],eax`
			`mov [4+edi+ecx*4],edx`

			`; copy low limb (needed if size was odd,but will already have been`
			`; done in the loop if size was even)`

			`mov eax,[esi]`
			`Lone:`
			`mov [edi],eax`
			`mov edi,[SAVE_EDI]`
			`mov esi,[SAVE_ESI]`
			`ret`
			`Luse_movsl:`
			`add ecx,3`
			`cld`
			`rep movsd`
			`Lzero:`
			`mov esi,[SAVE_ESI]`
			`mov edi,[SAVE_EDI]`
			`ret`

			`end`