sandy2x: clean the upper halves of the AVX registers

On Linux, with dynamic linking, upper AVX registers are not 0, which
introduces a massive performance penalty due to state transitions.

Thanks to to Tung Chou and Samuel Neves for catching this, and to
@theakman2 for his initial report.
This commit is contained in:
Frank Denis 2016-05-16 23:34:03 +02:00
parent 593599a11a
commit f361d1ccec
2 changed files with 2 additions and 0 deletions

View File

@ -17,6 +17,7 @@ ASM_HIDE_SYMBOL _ladder
ladder: ladder:
_ladder: _ladder:
vzeroupper
mov %rsp,%r11 mov %rsp,%r11
and $31,%r11 and $31,%r11
add $1856,%r11 add $1856,%r11

View File

@ -17,6 +17,7 @@ ASM_HIDE_SYMBOL _ladder_base
ladder_base: ladder_base:
_ladder_base: _ladder_base:
vzeroupper
mov %rsp,%r11 mov %rsp,%r11
and $31,%r11 and $31,%r11
add $1568,%r11 add $1568,%r11