extend add_n/sub_n assembler code for Skylake AVX to provide add_nc/sub_nc
This commit is contained in:
parent
bd83f26a70
commit
b961c90963
@ -37,6 +37,16 @@
|
||||
%define SizeB R9B ; check if this fits to code alignment!
|
||||
%define Count R11
|
||||
|
||||
align 32
|
||||
|
||||
LEAF_PROC mpn_add_nc
|
||||
mov CarryD, [rsp+40]
|
||||
mov Count, Size
|
||||
shr Count, 3
|
||||
inc Count
|
||||
vpor YMM0, YMM0, YMM0 ; see comment in main loop below
|
||||
jmp One
|
||||
|
||||
align 32
|
||||
|
||||
LEAF_PROC mpn_add_n
|
||||
@ -44,18 +54,16 @@
|
||||
mov Count, Size
|
||||
shr Count, 3
|
||||
inc Count
|
||||
|
||||
vpor YMM0, YMM0, YMM0 ; see comment in main loop below
|
||||
|
||||
; unrolling the loop from small to high gives better timings
|
||||
; when considering all sizes 1-100 limb
|
||||
.One:
|
||||
|
||||
One:
|
||||
test SizeB, 1
|
||||
je .Two
|
||||
|
||||
mov Limb0, [byte Op1] ; I am using implicit code alignment through-
|
||||
add Limb0, [byte Op2] ; out the following to get all branch targets
|
||||
shr CarryB, 1
|
||||
mov Limb0, [Op1] ; I am using implicit code alignment through-
|
||||
adc Limb0, [Op2] ; out the following to get all branch targets
|
||||
mov [Op3], Limb0 ; on 16 byte alignments - check this if non-
|
||||
setc CarryB ; Linux register allocation is used!
|
||||
|
||||
|
@ -38,24 +38,30 @@
|
||||
%define Count R11
|
||||
|
||||
align 32
|
||||
LEAF_PROC mpn_sub_nc
|
||||
mov BorrowD, [rsp+40]
|
||||
mov Count, Size
|
||||
shr Count, 3
|
||||
inc Count
|
||||
vpor YMM0, YMM0, YMM0 ; see comment in main loop below
|
||||
jmp One
|
||||
|
||||
align 32
|
||||
LEAF_PROC mpn_sub_n
|
||||
xor BorrowD, BorrowD
|
||||
mov Count, Size
|
||||
shr Count, 3
|
||||
inc Count
|
||||
|
||||
vpor YMM0, YMM0, YMM0 ; see comment in main loop below
|
||||
|
||||
; unrolling the loop from small to high gives better timings
|
||||
; when considering all sizes 1-100 limb
|
||||
.One:
|
||||
|
||||
One:
|
||||
test SizeB, 1
|
||||
je .Two
|
||||
|
||||
mov Limb0, [byte Op1] ; I am using implicit code alignment through-
|
||||
sub Limb0, [byte Op2] ; out the following to get all branch targets
|
||||
shr BorrowB, 1
|
||||
mov Limb0, [Op1] ; I am using implicit code alignment through-
|
||||
sbb Limb0, [Op2] ; out the following to get all branch targets
|
||||
mov [Op3], Limb0 ; on 16 byte alignments - check this if non-
|
||||
setc BorrowB ; Linux register allocation is used!
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user