sandy2x: clean the upper halves of the AVX registers
On Linux, with dynamic linking, upper AVX registers are not 0, which introduces a massive performance penalty due to state transitions. Thanks to to Tung Chou and Samuel Neves for catching this, and to @theakman2 for his initial report.
This commit is contained in:
parent
593599a11a
commit
f361d1ccec
@ -17,6 +17,7 @@ ASM_HIDE_SYMBOL _ladder
|
|||||||
ladder:
|
ladder:
|
||||||
_ladder:
|
_ladder:
|
||||||
|
|
||||||
|
vzeroupper
|
||||||
mov %rsp,%r11
|
mov %rsp,%r11
|
||||||
and $31,%r11
|
and $31,%r11
|
||||||
add $1856,%r11
|
add $1856,%r11
|
||||||
|
@ -17,6 +17,7 @@ ASM_HIDE_SYMBOL _ladder_base
|
|||||||
ladder_base:
|
ladder_base:
|
||||||
_ladder_base:
|
_ladder_base:
|
||||||
|
|
||||||
|
vzeroupper
|
||||||
mov %rsp,%r11
|
mov %rsp,%r11
|
||||||
and $31,%r11
|
and $31,%r11
|
||||||
add $1568,%r11
|
add $1568,%r11
|
||||||
|
Loading…
Reference in New Issue
Block a user