mpir/mpn/x86_64/amd64/copyi.as

;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
;
;  This file is part of the GNU MP Library.
;
;  The GNU MP Library is free software; you can redistribute it and/or
;  modify it under the terms of the GNU Lesser General Public License as
;  published by the Free Software Foundation; either version 2.1 of the
;  License, or (at your option) any later version.
;
;  The GNU MP Library is distributed in the hope that it will be useful,
;  but WITHOUT ANY WARRANTY; without even the implied warranty of
;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
;  Lesser General Public License for more details.
;
;  You should have received a copy of the GNU Lesser General Public
;  License along with the GNU MP Library; see the file COPYING.LIB.  If
;  not, write to the Free Software Foundation, Inc., 59 Temple Place -
;  Suite 330, Boston, MA 02111-1307, USA.
;
;  Provided by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
;  compiler and the YASM assembler.

; AMD64 mpn_copyi -- incrementing copy limb vector
;
;  Calling interface:
;
; void mpn_copyi(
;     mp_ptr dst,    rdi
;     mp_srcptr src, rsi
;     mp_size_t size rdx
; )

%include '../yasm_mac.inc'

%define    UNROLL_THRESHOLD 16

%define  d_ptr  rdi
%define  s_ptr  rsi
%define  s_len  rdx
%define  s_lend edx

    BITS    64

GLOBAL_FUNC mpn_copyi
    movsxd  s_len,s_lend
    or      s_len,s_len                 ; none to move?
    jz      label1
    mov     rax,s_ptr                   ; find relative alignment of
    xor     rax,d_ptr                   ; source and destination (min
    mov     r9,s_ptr                    ; 8-byte alignment assumed)
    lea     s_ptr,[s_ptr+s_len*8]
    lea     d_ptr,[d_ptr+s_len*8]
    neg     s_len
    cmp     s_len,byte -UNROLL_THRESHOLD
    jbe     label2                      ; if many limbs to move
label0: 
    mov     rax,[s_ptr+s_len*8]         ; short move via rax
    mov     [d_ptr+s_len*8],rax
    inc     s_len
    jnz     label0                      ; avoid single byte ret that
label1: 
    rep     ret                         ; interferes with branch prediction

label2: 
    test    al,8
    jnz     label7                      ; not 16 byte aligned
    test    r9,8                        ; see if src is on 16 byte
    jz      label3                      ; boundary
    mov     rax,[s_ptr+s_len*8]         ; if not do a one limb copy
    mov     [d_ptr+s_len*8],rax
    inc     s_len
label3: 
    lea     s_len,[s_len+3]             ; now 16 byte aligned
label4: 
    prefetchnta [s_ptr+s_len*8-24+3*64] ; should this be +4*64 ??
    movdqa  xmm0,[s_ptr+s_len*8-24]     ; move 32 bytes at a time
    movntdq [d_ptr+s_len*8-24],xmm0
    movdqa  xmm0,[s_ptr+s_len*8-8]
    movntdq [d_ptr+s_len*8-8],xmm0
    add     s_len,4
    jl      label4
    sfence
    test    s_len,2
    jnz     label5
    movdqa  xmm0,[s_ptr+s_len*8-24]     ; move 16 bytes if necessary
    movdqa  [d_ptr+s_len*8-24],xmm0
    add     s_len,2
label5:  
    test    s_len,1
    jnz     label6
    movq    xmm0,[s_ptr+s_len*8-24]     ; move 8 bytes if necessary
    movq    [d_ptr+s_len*8-24],xmm0
label6: 
    ret

label7: 
    lea     s_len,[s_len+1]             ; move 8 bytes at a time
label8: 
    movq    xmm0,[s_ptr+s_len*8-8]
    movq    xmm1,[s_ptr+s_len*8]
    movq    [d_ptr+s_len*8-8],xmm0
    movq    [d_ptr+s_len*8],xmm1
    add     s_len,2
    jl      label8
    test    s_len,1
    jnz     label9
    movq    xmm0,[s_ptr-8]
    movq    [d_ptr-8],xmm0
label9: 
    ret
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`; Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.`
			`;`
			`; This file is part of the GNU MP Library.`
			`;`
			`; The GNU MP Library is free software; you can redistribute it and/or`
			`; modify it under the terms of the GNU Lesser General Public License as`
			`; published by the Free Software Foundation; either version 2.1 of the`
			`; License, or (at your option) any later version.`
			`;`
			`; The GNU MP Library is distributed in the hope that it will be useful,`
			`; but WITHOUT ANY WARRANTY; without even the implied warranty of`
			`; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU`
			`; Lesser General Public License for more details.`
			`;`
			`; You should have received a copy of the GNU Lesser General Public`
			`; License along with the GNU MP Library; see the file COPYING.LIB. If`
			`; not, write to the Free Software Foundation, Inc., 59 Temple Place -`
			`; Suite 330, Boston, MA 02111-1307, USA.`
			`;`
			`; Provided by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit`
			`; compiler and the YASM assembler.`

			`; AMD64 mpn_copyi -- incrementing copy limb vector`
			`;`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`; Calling interface:`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`;`
			`; void mpn_copyi(`
			`; mp_ptr dst, rdi`
			`; mp_srcptr src, rsi`
			`; mp_size_t size rdx`
			`; )`

			`%include '../yasm_mac.inc'`

			`%define UNROLL_THRESHOLD 16`

			`%define d_ptr rdi`
			`%define s_ptr rsi`
			`%define s_len rdx`
			`%define s_lend edx`

Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`BITS 64`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`GLOBAL_FUNC mpn_copyi`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`movsxd s_len,s_lend`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`or s_len,s_len ; none to move?`
			`jz label1`
			`mov rax,s_ptr ; find relative alignment of`
			`xor rax,d_ptr ; source and destination (min`
			`mov r9,s_ptr ; 8-byte alignment assumed)`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`lea s_ptr,[s_ptr+s_len*8]`
			`lea d_ptr,[d_ptr+s_len*8]`
			`neg s_len`
			`cmp s_len,byte -UNROLL_THRESHOLD`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`jbe label2 ; if many limbs to move`
			`label0:`
			`mov rax,[s_ptr+s_len*8] ; short move via rax`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`mov [d_ptr+s_len*8],rax`
			`inc s_len`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`jnz label0 ; avoid single byte ret that`
			`label1:`
			`rep ret ; interferes with branch prediction`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`label2:`
			`test al,8`
			`jnz label7 ; not 16 byte aligned`
			`test r9,8 ; see if src is on 16 byte`
			`jz label3 ; boundary`
			`mov rax,[s_ptr+s_len*8] ; if not do a one limb copy`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`mov [d_ptr+s_len*8],rax`
			`inc s_len`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`label3:`
			`lea s_len,[s_len+3] ; now 16 byte aligned`
			`label4:`
			`prefetchnta [s_ptr+s_len8-24+364] ; should this be +4*64 ??`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`movdqa xmm0,[s_ptr+s_len*8-24] ; move 32 bytes at a time`
			`movntdq [d_ptr+s_len*8-24],xmm0`
			`movdqa xmm0,[s_ptr+s_len*8-8]`
			`movntdq [d_ptr+s_len*8-8],xmm0`
			`add s_len,4`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`jl label4`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`sfence`
			`test s_len,2`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`jnz label5`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`movdqa xmm0,[s_ptr+s_len*8-24] ; move 16 bytes if necessary`
			`movdqa [d_ptr+s_len*8-24],xmm0`
			`add s_len,2`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`label5:`
			`test s_len,1`
			`jnz label6`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`movq xmm0,[s_ptr+s_len*8-24] ; move 8 bytes if necessary`
			`movq [d_ptr+s_len*8-24],xmm0`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`label6:`
			`ret`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`label7:`
			`lea s_len,[s_len+1] ; move 8 bytes at a time`
			`label8:`
			`movq xmm0,[s_ptr+s_len*8-8]`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`movq xmm1,[s_ptr+s_len*8]`
			`movq [d_ptr+s_len*8-8],xmm0`
			`movq [d_ptr+s_len*8],xmm1`
			`add s_len,2`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`jl label8`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`test s_len,1`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`jnz label9`
Set native line endings for all .c, .h, as, .asm, .s, .in, .m4, .cc, am 2008-06-25 03:33:36 -04:00			`movq xmm0,[s_ptr-8]`
			`movq [d_ptr-8],xmm0`
Cleaned up /mpn/x86_64/amd64/*.as files. 2008-07-23 14:37:20 -04:00			`label9:`
			`ret`