From 27b3a6de72b7a9ba6b0e2e366217a4895e8df000 Mon Sep 17 00:00:00 2001 From: gladman Date: Mon, 10 Aug 2009 16:47:45 +0000 Subject: [PATCH] Add mpn_add_nc and mpn_sub_nc to the Windows Core2 assembler --- build.vc9/config.core2 | 2 ++ mpn/x86_64w/amd64/add_n.asm | 2 +- mpn/x86_64w/amd64/sub_n.asm | 2 +- mpn/x86_64w/core2/add_n.asm | 15 ++++++++++++--- mpn/x86_64w/core2/sub_n.asm | 13 +++++++++++-- 5 files changed, 27 insertions(+), 7 deletions(-) diff --git a/build.vc9/config.core2 b/build.vc9/config.core2 index 6b8a5153..4c75358c 100644 --- a/build.vc9/config.core2 +++ b/build.vc9/config.core2 @@ -136,6 +136,7 @@ MA 02111-1307, USA. */ #undef HAVE_NATIVE_mpn_xnor_n #define HAVE_NATIVE_mpn_add_n 1 +#define HAVE_NATIVE_mpn_add_nc 1 #define HAVE_NATIVE_mpn_addadd_n 1 #define HAVE_NATIVE_mpn_subadd_n 1 #define HAVE_NATIVE_mpn_addlsh1_n 1 @@ -162,6 +163,7 @@ MA 02111-1307, USA. */ #define HAVE_NATIVE_mpn_rshift 1 #define HAVE_NATIVE_mpn_sqr_basecase 1 #define HAVE_NATIVE_mpn_sub_n 1 +#define HAVE_NATIVE_mpn_sub_nc 1 #define HAVE_NATIVE_mpn_sublsh1_n 1 #define HAVE_NATIVE_mpn_submul_1 1 #define HAVE_NATIVE_mpn_submul_1c 1 diff --git a/mpn/x86_64w/amd64/add_n.asm b/mpn/x86_64w/amd64/add_n.asm index 6c2eeb55..b596ab8c 100644 --- a/mpn/x86_64w/amd64/add_n.asm +++ b/mpn/x86_64w/amd64/add_n.asm @@ -64,7 +64,7 @@ entry: and rax, 3 shr r9, 2 lea r9,[r10+r9*2] - shr r9, 1 + sar r9, 1 jnz .2 mov r10, [rdx] diff --git a/mpn/x86_64w/amd64/sub_n.asm b/mpn/x86_64w/amd64/sub_n.asm index 82fe130c..eb92420c 100644 --- a/mpn/x86_64w/amd64/sub_n.asm +++ b/mpn/x86_64w/amd64/sub_n.asm @@ -64,7 +64,7 @@ entry: and rax, 3 shr r9, 2 lea r9,[r10+r9*2] - shr r9, 1 + sar r9, 1 jnz .2 mov r10, [rdx] diff --git a/mpn/x86_64w/core2/add_n.asm b/mpn/x86_64w/core2/add_n.asm index 092914a0..c63b3bac 100644 --- a/mpn/x86_64w/core2/add_n.asm +++ b/mpn/x86_64w/core2/add_n.asm @@ -30,7 +30,14 @@ CPU Core2 BITS 64 - LEAF_PROC mpn_add_n + LEAF_PROC mpn_add_nc + mov r10, [rsp+0x28] + jmp mpn_add_entry + + LEAF_PROC mpn_add_n + xor r10, r10 + +mpn_add_entry: movsxd rax, r9d mov r9, rcx mov rcx, rax @@ -40,8 +47,10 @@ lea rdx, [rdx+rcx*8] lea r8, [r8+rcx*8] neg rcx - cmp rcx, 0 - jz L_skiplp + lea rcx, [r10+rcx*2] + sar rcx, 1 + jz L_exitlp + xalign 16 L_lp: mov r10, [rdx+rcx*8] diff --git a/mpn/x86_64w/core2/sub_n.asm b/mpn/x86_64w/core2/sub_n.asm index bd772575..fa0e5114 100644 --- a/mpn/x86_64w/core2/sub_n.asm +++ b/mpn/x86_64w/core2/sub_n.asm @@ -30,7 +30,14 @@ CPU Core2 BITS 64 + LEAF_PROC mpn_sub_nc + mov r10, [rsp+0x28] + jmp mpn_sub_entry + LEAF_PROC mpn_sub_n + xor r10, r10 + +mpn_sub_entry: movsxd rax, r9d mov r9, rcx mov rcx, rax @@ -40,8 +47,10 @@ lea rdx, [rdx+rcx*8] lea r8, [r8+rcx*8] neg rcx - cmp rcx, 0 - jz L_skiplp + lea rcx, [r10+rcx*2] + sar rcx, 1 + jz L_exitlp + xalign 16 L_lp: mov r10, [rdx+rcx*8]