delete amd copyi.as and copyd.as

2009-03-06 16:01:39 +00:00 · 2009-03-06 16:01:39 +00:00 · ea9ce09036
commit ea9ce09036
parent 5cfca1657e
2 changed files with 0 additions and 220 deletions
--- a/mpn/x86_64/amd64/copyd.as
+++ b/mpn/x86_64/amd64/copyd.as
@ -1,108 +0,0 @@
-;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-;
-;  Copyright 2008 Brian Gladman, William Hart
-;
-;  This file is part of the MPIR Library.
-;
-;  The MPIR Library is free software; you can redistribute it and/or
-;  modify it under the terms of the GNU Lesser General Public License as
-;  published by the Free Software Foundation; either version 2.1 of the
-;  License, or (at your option) any later version.
-;
-;  The MPIR Library is distributed in the hope that it will be useful,
-;  but WITHOUT ANY WARRANTY; without even the implied warranty of
-;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;  Lesser General Public License for more details.
-;
-;  You should have received a copy of the GNU Lesser General Public
-;  License along with the MPIR Library; see the file COPYING.LIB.  If
-;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,
-;  Fifth Floor, Boston, MA 02110-1301, USA.
-;
-;  Provided by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
-;  compiler and the YASM assembler.
-
-; AMD64 mpn_copyd -- decrementing copy limb vector
-;
-;  Calling interface:
-;
-; void mpn_copyd(
-;     mp_ptr dst,    rdi
-;     mp_srcptr src, rsi
-;     mp_size_t size rdx
-; )
-
-%include 'yasm_mac.inc'
-
-%define    UNROLL_THRESHOLD 16
-
-%define d_ptr  rdi
-%define s_ptr  rsi
-%define s_len  rdx
-%define s_lend edx
-
-    BITS    64
-
-GLOBAL_FUNC mpn_copyd
-    movsxd  s_len,s_lend
-    cmp     s_len,byte UNROLL_THRESHOLD
-    jge     label2                      ; if many limbs to move
-    dec     s_len
-    jl      label1
-label0: 
-    mov     rax,[s_ptr+s_len*8]         ; short move via rax
-    mov     [d_ptr+s_len*8],rax
-    dec     s_len
-    jge     label0                      ; avoid single byte ret that
-label1: 
-    rep     ret                         ; interferes with branch prediction
-
-label2: 
-    mov     rax,s_ptr                   ; find relative alignment of
-    xor     rax,d_ptr                   ; source and destination (min
-    test    al,8
-    jnz     label7                      ; not 16 byte aligned
-    lea     rax,[s_ptr+s_len*8]
-    test    al,8                        ; see if src is on 16 byte
-    jz      label3                      ; boundary
-    dec     s_len
-    mov     rax,[rax-8]                 ; if not do a one limb copy
-    mov     [d_ptr+s_len*8],rax
-label3: 
-    lea     s_len,[s_len-4]             ; now 16 byte aligned
-label4: 
-    prefetchnta [s_ptr+s_len*8+16-3*64] ; should this be -4*64 ??
-    movdqa  xmm0,[s_ptr+s_len*8+16]     ; move 32 bytes at a time
-    movntdq [d_ptr+s_len*8+16],xmm0
-    movdqa  xmm0,[s_ptr+s_len*8]
-    movntdq [d_ptr+s_len*8],xmm0
-    sub     s_len,4
-    jge     label4
-    sfence
-    test    s_len,2
-    jz      label5
-    movdqa  xmm0,[s_ptr+s_len*8+16]     ; move 16 bytes if necessary
-    movdqa  [d_ptr+s_len*8+16],xmm0
-label5: 
-    test    s_len,1
-    jz      label6
-    movq    xmm0,[s_ptr]                ; move 8 bytes if necessary
-    movq    [d_ptr],xmm0
-label6: 
-    ret
-
-label7: 
-    lea     s_len,[s_len-2]             ; move 8 bytes at a time
-label8: 
-    movq    xmm0,[s_ptr+s_len*8+8]
-    movq    xmm1,[s_ptr+s_len*8]
-    movq    [d_ptr+s_len*8+8],xmm0
-    movq    [d_ptr+s_len*8],xmm1
-    sub     s_len,2
-    jge     label8
-    test    s_len,1
-    jz      label9
-    movq    xmm0,[s_ptr]
-    movq    [d_ptr],xmm0
-label9: 
-    ret
--- a/mpn/x86_64/amd64/copyi.as
+++ b/mpn/x86_64/amd64/copyi.as
@ -1,112 +0,0 @@
-;  Copyright 1999, 2000, 2001, 2002 Free Software Foundation, Inc.
-;
-;  Copyright 2008 Brian Gladman, William Hart
-;
-;  This file is part of the MPIR Library.
-;
-;  The MPIR Library is free software; you can redistribute it and/or
-;  modify it under the terms of the GNU Lesser General Public License as
-;  published by the Free Software Foundation; either version 2.1 of the
-;  License, or (at your option) any later version.
-;
-;  The MPIR Library is distributed in the hope that it will be useful,
-;  but WITHOUT ANY WARRANTY; without even the implied warranty of
-;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;  Lesser General Public License for more details.
-;
-;  You should have received a copy of the GNU Lesser General Public
-;  License along with the MPIR Library; see the file COPYING.LIB.  If
-;  not, write to the Free Software Foundation, Inc., 51 Franklin Street,
-;  Fifth Floor, Boston, MA 02110-1301, USA.
-;
-;  Provided by Brian Gladman AMD64 using the Microsoft VC++ v8 64-bit
-;  compiler and the YASM assembler.
-
-; AMD64 mpn_copyi -- incrementing copy limb vector
-;
-;  Calling interface:
-;
-; void mpn_copyi(
-;     mp_ptr dst,    rdi
-;     mp_srcptr src, rsi
-;     mp_size_t size rdx
-; )
-
-%include 'yasm_mac.inc'
-
-%define    UNROLL_THRESHOLD 16
-
-%define  d_ptr  rdi
-%define  s_ptr  rsi
-%define  s_len  rdx
-%define  s_lend edx
-
-    BITS    64
-
-GLOBAL_FUNC mpn_copyi
-    movsxd  s_len,s_lend
-    or      s_len,s_len                 ; none to move?
-    jz      label1
-    mov     rax,s_ptr                   ; find relative alignment of
-    xor     rax,d_ptr                   ; source and destination (min
-    mov     r9,s_ptr                    ; 8-byte alignment assumed)
-    lea     s_ptr,[s_ptr+s_len*8]
-    lea     d_ptr,[d_ptr+s_len*8]
-    neg     s_len
-    cmp     s_len,byte -UNROLL_THRESHOLD
-    jbe     label2                      ; if many limbs to move
-label0: 
-    mov     rax,[s_ptr+s_len*8]         ; short move via rax
-    mov     [d_ptr+s_len*8],rax
-    inc     s_len
-    jnz     label0                      ; avoid single byte ret that
-label1: 
-    rep     ret                         ; interferes with branch prediction
-
-label2: 
-    test    al,8
-    jnz     label7                      ; not 16 byte aligned
-    test    r9,8                        ; see if src is on 16 byte
-    jz      label3                      ; boundary
-    mov     rax,[s_ptr+s_len*8]         ; if not do a one limb copy
-    mov     [d_ptr+s_len*8],rax
-    inc     s_len
-label3: 
-    lea     s_len,[s_len+3]             ; now 16 byte aligned
-label4: 
-    prefetchnta [s_ptr+s_len*8-24+3*64] ; should this be +4*64 ??
-    movdqa  xmm0,[s_ptr+s_len*8-24]     ; move 32 bytes at a time
-    movntdq [d_ptr+s_len*8-24],xmm0
-    movdqa  xmm0,[s_ptr+s_len*8-8]
-    movntdq [d_ptr+s_len*8-8],xmm0
-    add     s_len,4
-    jl      label4
-    sfence
-    test    s_len,2
-    jnz     label5
-    movdqa  xmm0,[s_ptr+s_len*8-24]     ; move 16 bytes if necessary
-    movdqa  [d_ptr+s_len*8-24],xmm0
-    add     s_len,2
-label5:  
-    test    s_len,1
-    jnz     label6
-    movq    xmm0,[s_ptr+s_len*8-24]     ; move 8 bytes if necessary
-    movq    [d_ptr+s_len*8-24],xmm0
-label6: 
-    ret
-
-label7: 
-    lea     s_len,[s_len+1]             ; move 8 bytes at a time
-label8: 
-    movq    xmm0,[s_ptr+s_len*8-8]
-    movq    xmm1,[s_ptr+s_len*8]
-    movq    [d_ptr+s_len*8-8],xmm0
-    movq    [d_ptr+s_len*8],xmm1
-    add     s_len,2
-    jl      label8
-    test    s_len,1
-    jnz     label9
-    movq    xmm0,[s_ptr-8]
-    movq    [d_ptr-8],xmm0
-label9: 
-    ret