diff --git a/build.vc14/cdata/mpn/x86_64w/sandybridge/cfg.h b/build.vc14/cdata/mpn/x86_64w/sandybridge/cfg.h
index 4effa96c..f33b195a 100644
--- a/build.vc14/cdata/mpn/x86_64w/sandybridge/cfg.h
+++ b/build.vc14/cdata/mpn/x86_64w/sandybridge/cfg.h
@@ -34,6 +34,7 @@ mpn_modexact_1c_odd
mpn_mul_1
mpn_mul_2
mpn_mul_basecase
+mpn_mullow_n_basecase
mpn_mulmid_basecase
mpn_nand_n
mpn_nior_n
diff --git a/build.vc14/dll_mpir_sandybridge/dll_mpir_sandybridge.vcxproj b/build.vc14/dll_mpir_sandybridge/dll_mpir_sandybridge.vcxproj
index 2c8b07ae..f33b7bfd 100644
--- a/build.vc14/dll_mpir_sandybridge/dll_mpir_sandybridge.vcxproj
+++ b/build.vc14/dll_mpir_sandybridge/dll_mpir_sandybridge.vcxproj
@@ -354,6 +354,10 @@ postbuild "$(TargetPath)" 14
+
+
+
+
@@ -385,6 +389,7 @@ postbuild "$(TargetPath)" 14
+
@@ -469,6 +474,7 @@ postbuild "$(TargetPath)" 14
+
@@ -527,7 +533,6 @@ postbuild "$(TargetPath)" 14
-
@@ -537,11 +542,14 @@ postbuild "$(TargetPath)" 14
+
+
+
@@ -615,6 +623,7 @@ postbuild "$(TargetPath)" 14
+
diff --git a/build.vc14/dll_mpir_sandybridge/dll_mpir_sandybridge.vcxproj.filters b/build.vc14/dll_mpir_sandybridge/dll_mpir_sandybridge.vcxproj.filters
index 30484590..24d6f01d 100644
--- a/build.vc14/dll_mpir_sandybridge/dll_mpir_sandybridge.vcxproj.filters
+++ b/build.vc14/dll_mpir_sandybridge/dll_mpir_sandybridge.vcxproj.filters
@@ -778,6 +778,18 @@
Source Files\mpz
+
+ Source Files\mpz
+
+
+ Source Files\mpz
+
+
+ Source Files\mpz
+
+
+ Source Files\mpz
+
Source Files\mpz
@@ -871,6 +883,9 @@
Source Files\mpz
+
+ Source Files\mpz
+
Source Files\mpz
@@ -1123,6 +1138,9 @@
Source Files\mpn
+
+ Source Files\mpn
+
Source Files\mpn
@@ -1297,9 +1315,6 @@
Source Files\mpn
-
- Source Files\mpn
-
Source Files\mpn
@@ -1327,6 +1342,12 @@
Source Files\mpn
+
+ Source Files\mpn
+
+
+ Source Files\mpn
+
Source Files\mpn
@@ -1342,6 +1363,9 @@
Source Files\mpn
+
+ Source Files\mpn
+
Source Files\mpn
@@ -1557,6 +1581,9 @@
Source Files\mpn\yasm
+
+ Source Files\mpn\yasm
+
Source Files\mpn\yasm
diff --git a/build.vc14/lib_mpir_sandybridge/lib_mpir_sandybridge.vcxproj b/build.vc14/lib_mpir_sandybridge/lib_mpir_sandybridge.vcxproj
index 07552c54..8902ba8a 100644
--- a/build.vc14/lib_mpir_sandybridge/lib_mpir_sandybridge.vcxproj
+++ b/build.vc14/lib_mpir_sandybridge/lib_mpir_sandybridge.vcxproj
@@ -350,6 +350,10 @@ postbuild "$(TargetPath)" 14
+
+
+
+
@@ -381,6 +385,7 @@ postbuild "$(TargetPath)" 14
+
@@ -454,6 +459,7 @@ postbuild "$(TargetPath)" 14
+
@@ -512,7 +518,6 @@ postbuild "$(TargetPath)" 14
-
@@ -522,11 +527,14 @@ postbuild "$(TargetPath)" 14
+
+
+
@@ -600,6 +608,7 @@ postbuild "$(TargetPath)" 14
+
diff --git a/build.vc14/lib_mpir_sandybridge/lib_mpir_sandybridge.vcxproj.filters b/build.vc14/lib_mpir_sandybridge/lib_mpir_sandybridge.vcxproj.filters
index c47d7142..1ac34ba0 100644
--- a/build.vc14/lib_mpir_sandybridge/lib_mpir_sandybridge.vcxproj.filters
+++ b/build.vc14/lib_mpir_sandybridge/lib_mpir_sandybridge.vcxproj.filters
@@ -777,6 +777,18 @@
Source Files\mpz
+
+ Source Files\mpz
+
+
+ Source Files\mpz
+
+
+ Source Files\mpz
+
+
+ Source Files\mpz
+
Source Files\mpz
@@ -870,6 +882,9 @@
Source Files\mpz
+
+ Source Files\mpz
+
Source Files\mpz
@@ -1089,6 +1104,9 @@
Source Files\mpn
+
+ Source Files\mpn
+
Source Files\mpn
@@ -1263,9 +1281,6 @@
Source Files\mpn
-
- Source Files\mpn
-
Source Files\mpn
@@ -1293,6 +1308,12 @@
Source Files\mpn
+
+ Source Files\mpn
+
+
+ Source Files\mpn
+
Source Files\mpn
@@ -1308,6 +1329,9 @@
Source Files\mpn
+
+ Source Files\mpn
+
Source Files\mpn
@@ -1523,6 +1547,9 @@
Source Files\mpn\yasm
+
+ Source Files\mpn\yasm
+
Source Files\mpn\yasm
diff --git a/mpn/x86_64w/mullow_n_basecase.asm b/mpn/x86_64w/mullow_n_basecase.asm
new file mode 100644
index 00000000..35706248
--- /dev/null
+++ b/mpn/x86_64w/mullow_n_basecase.asm
@@ -0,0 +1,336 @@
+; AMD64 mpn_mullow_n_basecase
+;
+; Copyright 2015 Free Software Foundation, Inc.
+;
+; This file is part of the GNU MP Library.
+;
+; The GNU MP Library is free software; you can redistribute it and/or modify
+; it under the terms of either:
+;
+; * the GNU Lesser General Public License as published by the Free
+; Software Foundation; either version 3 of the License, or (at your
+; option) any later version.
+;
+; or
+;
+; * the GNU General Public License as published by the Free Software
+; Foundation; either version 2 of the License, or (at your option) any
+; later version.
+;
+; or both in parallel, as here.
+;
+; The GNU MP Library is distributed in the hope that it will be useful, but
+; WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+; for more details.
+;
+; You should have received copies of the GNU General Public License and the
+; GNU Lesser General Public License along with the GNU MP Library. If not,
+; see https://www.gnu.org/licenses/.
+;
+;void mpn_mullow_n_basecase (mp_ptr rp, mp_srcptr up, mp_srcptr vp, mp_size_t n)
+; rdi rsi rdx rcx
+; rcx rdx r8 r9
+
+%include 'yasm_mac.inc'
+
+ TEXT
+
+ LEAF_PROC mpn_mullow_n_basecase
+ cmp r9, 3
+ je asm_sym(?mpn_mullow1)
+ jg asm_sym(?mpn_mullow2)
+ mov rax, [rdx]
+ mov r11, [r8+8]
+ mov r8, [r8]
+ cmp r9, 1
+ jg .3
+
+.2: imul rax, r8
+ mov [rcx], rax
+ ret
+
+.3: mov r9, [rdx+8]
+ imul r11, rax
+ mul r8
+ mov [rcx], rax
+ imul r8, r9
+ lea rax, [r11+rdx]
+ add rax, r8
+ mov [rcx+8], rax
+ ret
+
+%define reg_save_list rsi, rdi
+
+ FRAME_PROC ?mpn_mullow1, 0, reg_save_list
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+ mov rax, [rsi]
+ mov r8, [rdx]
+ mov r9, [rdx+8]
+ mov r11, [rdx+16]
+ mul r8
+ mov [rdi], rax
+ mov rax, [rsi]
+ mov rcx, rdx
+ mul r9
+ imul r9, [rsi+8]
+ mov r10, [rsi+16]
+ imul r10, r8
+ add rcx, rax
+ adc r9, rdx
+ add r9, r10
+ mov rax, [rsi+8]
+ mul r8
+ add rcx, rax
+ adc r9, rdx
+ mov rax, r11
+ imul rax, [rsi]
+ add r9, rax
+ mov [rdi+8], rcx
+ mov [rdi+16], r9
+ END_PROC reg_save_list
+
+%define reg_save_list rsi, rdi, rbx, rbp, r13, r14, r15
+
+ align 16
+ FRAME_PROC ?mpn_mullow2, 0, reg_save_list
+ mov rdi, rcx
+ mov rsi, rdx
+ mov rdx, r8
+ mov rcx, r9
+.5: mov rax, [rsi]
+ mov r13, [rdx]
+ mov r11, rdx
+ lea rdi, [rdi+rcx*8]
+ lea rsi, [rsi+rcx*8]
+ neg rcx
+ mul r13
+ test cl, 1
+ jz .12
+.6: lea rdi, [rdi-8]
+ lea rsi, [rsi-8]
+ test cl, 2
+ jnz .8
+.7: lea r9, [rcx-1]
+ lea r8, [rel .18]
+ mov rbx, rax
+ mov r15, rdx
+ xor ebp, ebp
+ xor r10d, r10d
+ mov rax, [rsi+rcx*8+16]
+ jmp .11
+
+.8: lea r9, [rcx+1]
+ lea r8, [rel .19]
+ mov rbp, rax
+ mov r10, rdx
+ xor ebx, ebx
+ jmp .10
+
+ align 16
+.9: mov [rdi+r9*8-16], rbx
+ add r15, rax
+ mov rax, [rsi+r9*8]
+ adc rbp, rdx
+ xor ebx, ebx
+ mul r13
+ mov [rdi+r9*8-8], r15
+ add rbp, rax
+ adc r10, rdx
+.10:mov rax, [rsi+r9*8+8]
+ mul r13
+ mov [rdi+r9*8], rbp
+ add r10, rax
+ adc rbx, rdx
+ mov rax, [rsi+r9*8+16]
+ mul r13
+ mov [rdi+r9*8+8], r10
+ xor ebp, ebp
+ mov r10, rbp
+ add rbx, rax
+ mov rax, [rsi+r9*8+24]
+ mov r15, rbp
+ adc r15, rdx
+.11:mul r13
+ add r9, 4
+ js .9
+ mov [rdi-16], rbx
+ add r15, rax
+ mov [rdi-8], r15
+ adc rbp, rdx
+ imul r13, [rsi]
+ add rbp, r13
+ mov [rdi], rbp
+ add rcx, 1
+ jz .23
+ mov r13, [r11+8]
+ mov r14, [r11+16]
+ lea rsi, [rsi+16]
+ lea r11, [r11+8]
+ lea rdi, [rdi+24]
+ jmp r8
+
+.12:mov r14, [r11+8]
+ test cl, 2
+ jz .14
+
+ align 16
+.13:lea r9, [rcx+0]
+ mov r10, rax
+ mov rbx, rdx
+ xor r15d, r15d
+ mov rax, [rsi+rcx*8]
+ lea r8, [rel .19]
+ jmp .17
+
+ align 16
+.14:lea r9, [rcx+2]
+ mov r10d, 0
+ mov r15, rax
+ mov rax, [rsi+rcx*8]
+ mov rbp, rdx
+ lea r8, [rel .18]
+ jmp .16
+
+ align 16
+.15:mov rax, [rsi+r9*8-32]
+ mul r14
+ add rbx, rax
+ adc r15, rdx
+ mov rax, [rsi+r9*8-24]
+ xor ebp, ebp
+ mul r13
+ add rbx, rax
+ mov rax, [rsi+r9*8-24]
+ adc r15, rdx
+ adc ebp, 0
+ mul r14
+ add r15, rax
+ mov [rdi+r9*8-24], rbx
+ adc rbp, rdx
+ mov rax, [rsi+r9*8-16]
+ mul r13
+ mov r10d, 0
+ add r15, rax
+ adc rbp, rdx
+ mov rax, [rsi+r9*8-16]
+ adc r10d, 0
+.16:mov ebx, 0
+ mov [rdi+r9*8-16], r15
+ mul r14
+ add rbp, rax
+ mov rax, [rsi+r9*8-8]
+ adc r10, rdx
+ mov r15d, 0
+ mul r13
+ add rbp, rax
+ mov rax, [rsi+r9*8-8]
+ adc r10, rdx
+ adc ebx, r15d
+ mul r14
+ add r10, rax
+ mov [rdi+r9*8-8], rbp
+ adc rbx, rdx
+ mov rax, [rsi+r9*8]
+ mul r13
+ add r10, rax
+ adc rbx, rdx
+ adc r15d, 0
+.17:add r9, 4
+ mov [rdi+r9*8-32], r10
+ js .15
+ imul r14, [rsi-16]
+ add rbx, r14
+ imul r13, [rsi-8]
+ add rbx, r13
+ mov [rdi-8], rbx
+ add rcx, 2
+ jz .23
+ mov r13, [r11+16]
+ mov r14, [r11+24]
+ lea r11, [r11+16]
+ lea rdi, [rdi+16]
+ jmp r8
+.18:lea r9, [rcx-2]
+ mov rax, [rsi+rcx*8-16]
+ mul r13
+ mov r10, rax
+ mov rax, [rsi+rcx*8-16]
+ mov rbx, rdx
+ xor r15d, r15d
+ lea r8, [rel .19]
+ jmp .22
+.19:lea r9, [rcx+0]
+ mov rax, [rsi+rcx*8-16]
+ xor r10d, r10d
+ mul r13
+ mov r15, rax
+ mov rax, [rsi+rcx*8-16]
+ mov rbp, rdx
+ lea r8, [rel .18]
+ jmp .21
+
+ align 16
+.20:add [rdi+r9*8-32], r10
+ adc rbx, rax
+ mov rax, [rsi+r9*8-24]
+ adc r15, rdx
+ xor ebp, ebp
+ mul r13
+ add rbx, rax
+ mov rax, [rsi+r9*8-24]
+ adc r15, rdx
+ adc ebp, ebp
+ mul r14
+ xor r10d, r10d
+ add [rdi+r9*8-24], rbx
+ adc r15, rax
+ mov rax, [rsi+r9*8-16]
+ adc rbp, rdx
+ mul r13
+ add r15, rax
+ mov rax, [rsi+r9*8-16]
+ adc rbp, rdx
+ adc r10d, 0
+.21:mul r14
+ add [rdi+r9*8-16], r15
+ adc rbp, rax
+ mov rax, [rsi+r9*8-8]
+ adc r10, rdx
+ mul r13
+ xor ebx, ebx
+ add rbp, rax
+ adc r10, rdx
+ mov r15d, 0
+ mov rax, [rsi+r9*8-8]
+ adc ebx, r15d
+ mul r14
+ add [rdi+r9*8-8], rbp
+ adc r10, rax
+ adc rbx, rdx
+ mov rax, [rsi+r9*8]
+ mul r13
+ add r10, rax
+ mov rax, [rsi+r9*8]
+ adc rbx, rdx
+ adc r15d, 0
+.22:mul r14
+ add r9, 4
+ js .20
+ add [rdi-32], r10
+ adc rbx, rax
+ imul r13, [rsi-24]
+ add rbx, r13
+ add [rdi-24], rbx
+ add rcx, 2
+ jns .23
+ lea r11, [r11+16]
+ mov r13, [r11]
+ mov r14, [r11+8]
+ lea rsi, [rsi-16]
+ jmp r8
+.23:
+ END_PROC reg_save_list
diff --git a/mpn/x86_64w/yasm_mac.inc b/mpn/x86_64w/yasm_mac.inc
index 52804569..19ee6bbd 100644
--- a/mpn/x86_64w/yasm_mac.inc
+++ b/mpn/x86_64w/yasm_mac.inc
@@ -63,6 +63,8 @@
%define r6b bpl
%define r7b spl
+%define asm_sym(x) __g %+ x
+
; Standard macro for alignment (used to allow easy subsititution of
; alternative padding schemes)
@@ -105,13 +107,13 @@
%macro FRAME_PROC 2-*
- global __g%1
+ global asm_sym(%1)
%ifdef DLL
- export __g%1
+ export asm_sym(%1)
%endif
- PROC_FRAME __g%1
+ PROC_FRAME asm_sym(%1)
%rotate 1
%if %1 < 0
@@ -216,13 +218,13 @@
%macro LEAF_PROC 1
- global __g%1
+ global asm_sym(%1)
%ifdef DLL
- export __g%1
+ export asm_sym(%1)
%endif
-__g%1:
+asm_sym(%1):
%endmacro