Use the assembly version of salsa20_xmm6 by default, if possible
icc produces good code from the intrinsics-based translation, clang produces okay code, but gcc doesn't perform very well ATM. It's a bummer to have a 3rd implementation, but salsa20 is used quite a lot in the library, so it deserves a special attention. If the assembly code cannot be assembled, fall back to the reduced version of the intrinsics-based translation. So, in the final library, we always only get two implementations at most.
This commit is contained in:
parent
7d29c0fbd7
commit
e3b9907429
@ -118,6 +118,13 @@ libsodium_la_SOURCES += \
|
||||
crypto_scalarmult/curve25519/ref10/x25519_ref10.h
|
||||
endif
|
||||
|
||||
if HAVE_AMD64_ASM
|
||||
libsodium_la_SOURCES += \
|
||||
crypto_stream/salsa20/xmm6/salsa20_xmm6-asm.S \
|
||||
crypto_stream/salsa20/xmm6/salsa20_xmm6.c \
|
||||
crypto_stream/salsa20/xmm6/salsa20_xmm6.h
|
||||
endif
|
||||
|
||||
noinst_HEADERS = \
|
||||
crypto_scalarmult/curve25519/sandy2x/consts.S \
|
||||
crypto_scalarmult/curve25519/sandy2x/fe51_mul.S \
|
||||
@ -206,12 +213,16 @@ libsse2_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
|
||||
libsse2_la_SOURCES = \
|
||||
crypto_pwhash/scryptsalsa208sha256/sse/pwhash_scryptsalsa208sha256_sse.c \
|
||||
crypto_onetimeauth/poly1305/sse2/poly1305_sse2.c \
|
||||
crypto_onetimeauth/poly1305/sse2/poly1305_sse2.h \
|
||||
crypto_onetimeauth/poly1305/sse2/poly1305_sse2.h
|
||||
|
||||
if !HAVE_AMD64_ASM
|
||||
libsse2_la_SOURCES += \
|
||||
crypto_stream/salsa20/xmm6int/salsa20_xmm6int-sse2.c \
|
||||
crypto_stream/salsa20/xmm6int/salsa20_xmm6int-sse2.h \
|
||||
crypto_stream/salsa20/xmm6int/u0.h \
|
||||
crypto_stream/salsa20/xmm6int/u1.h \
|
||||
crypto_stream/salsa20/xmm6int/u4.h
|
||||
endif
|
||||
|
||||
libssse3_la_LDFLAGS = $(libsodium_la_LDFLAGS)
|
||||
libssse3_la_CPPFLAGS = $(libsodium_la_CPPFLAGS) \
|
||||
@ -245,4 +256,7 @@ libavx2_la_SOURCES = \
|
||||
crypto_stream/chacha20/dolbeau/u8.h \
|
||||
crypto_stream/salsa20/xmm6int/salsa20_xmm6int-avx2.c \
|
||||
crypto_stream/salsa20/xmm6int/salsa20_xmm6int-avx2.h \
|
||||
crypto_stream/salsa20/xmm6int/u0.h \
|
||||
crypto_stream/salsa20/xmm6int/u1.h \
|
||||
crypto_stream/salsa20/xmm6int/u4.h
|
||||
crypto_stream/salsa20/xmm6int/u8.h
|
||||
|
@ -4,8 +4,12 @@
|
||||
#include "runtime.h"
|
||||
#include "stream_salsa20.h"
|
||||
|
||||
#include "ref/salsa20_ref.h"
|
||||
#ifdef HAVE_EMMINTRIN_H
|
||||
#ifdef HAVE_AMD64_ASM
|
||||
# include "xmm6/salsa20_xmm6.h"
|
||||
#else
|
||||
# include "ref/salsa20_ref.h"
|
||||
#endif
|
||||
#if !defined(HAVE_AMD64_ASM) && defined(HAVE_EMMINTRIN_H)
|
||||
# include "xmm6int/salsa20_xmm6int-sse2.h"
|
||||
#endif
|
||||
#if defined(HAVE_AVX2INTRIN_H) && defined(HAVE_EMMINTRIN_H) && \
|
||||
@ -13,9 +17,9 @@
|
||||
# include "xmm6int/salsa20_xmm6int-avx2.h"
|
||||
#endif
|
||||
|
||||
#if defined(HAVE_EMMINTRIN_H) && defined(__x86_64__)
|
||||
#if HAVE_AMD64_ASM
|
||||
static const crypto_stream_salsa20_implementation *implementation =
|
||||
&crypto_stream_salsa20_xmm6int_sse2_implementation;
|
||||
&crypto_stream_salsa20_xmm6_implementation;
|
||||
#else
|
||||
static const crypto_stream_salsa20_implementation *implementation =
|
||||
&crypto_stream_salsa20_ref_implementation;
|
||||
@ -66,8 +70,8 @@ crypto_stream_salsa20_keygen(unsigned char k[crypto_stream_salsa20_KEYBYTES])
|
||||
int
|
||||
_crypto_stream_salsa20_pick_best_implementation(void)
|
||||
{
|
||||
#if defined(HAVE_EMMINTRIN_H) && defined(__x86_64__)
|
||||
implementation = &crypto_stream_salsa20_xmm6int_sse2_implementation;
|
||||
#ifdef HAVE_AMD64_ASM
|
||||
implementation = &crypto_stream_salsa20_xmm6_implementation;
|
||||
#else
|
||||
implementation = &crypto_stream_salsa20_ref_implementation;
|
||||
#endif
|
||||
@ -79,7 +83,7 @@ _crypto_stream_salsa20_pick_best_implementation(void)
|
||||
return 0;
|
||||
}
|
||||
#endif
|
||||
#ifdef HAVE_EMMINTRIN_H
|
||||
#if !defined(HAVE_AMD64_ASM) && defined(HAVE_EMMINTRIN_H)
|
||||
if (sodium_runtime_has_sse2()) {
|
||||
implementation = &crypto_stream_salsa20_xmm6int_sse2_implementation;
|
||||
return 0;
|
||||
|
@ -3,14 +3,18 @@
|
||||
.text
|
||||
.p2align 5
|
||||
|
||||
.globl crypto_stream_salsa20
|
||||
.globl _crypto_stream_salsa20
|
||||
#ifdef __ELF__
|
||||
.type crypto_stream_salsa20, @function
|
||||
.type _crypto_stream_salsa20, @function
|
||||
#ifdef ASM_HIDE_SYMBOL
|
||||
ASM_HIDE_SYMBOL stream_salsa20_xmm6
|
||||
ASM_HIDE_SYMBOL _stream_salsa20_xmm6
|
||||
#endif
|
||||
crypto_stream_salsa20:
|
||||
_crypto_stream_salsa20:
|
||||
.globl stream_salsa20_xmm6
|
||||
.globl _stream_salsa20_xmm6
|
||||
#ifdef __ELF__
|
||||
.type stream_salsa20_xmm6, @function
|
||||
.type _stream_salsa20_xmm6, @function
|
||||
#endif
|
||||
stream_salsa20_xmm6:
|
||||
_stream_salsa20_xmm6:
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
||||
add $512,%r11
|
||||
@ -39,14 +43,18 @@ jmp ._start
|
||||
.text
|
||||
.p2align 5
|
||||
|
||||
.globl crypto_stream_salsa20_xor_ic
|
||||
.globl _crypto_stream_salsa20_xor_ic
|
||||
#ifdef __ELF__
|
||||
.type crypto_stream_salsa20_xor_ic, @function
|
||||
.type _crypto_stream_salsa20_xor_ic, @function
|
||||
#ifdef ASM_HIDE_SYMBOL
|
||||
ASM_HIDE_SYMBOL stream_salsa20_xmm6_xor_ic
|
||||
ASM_HIDE_SYMBOL _stream_salsa20_xmm6_xor_ic
|
||||
#endif
|
||||
crypto_stream_salsa20_xor_ic:
|
||||
_crypto_stream_salsa20_xor_ic:
|
||||
.globl stream_salsa20_xmm6_xor_ic
|
||||
.globl _stream_salsa20_xmm6_xor_ic
|
||||
#ifdef __ELF__
|
||||
.type stream_salsa20_xmm6_xor_ic, @function
|
||||
.type _stream_salsa20_xmm6_xor_ic, @function
|
||||
#endif
|
||||
stream_salsa20_xmm6_xor_ic:
|
||||
_stream_salsa20_xmm6_xor_ic:
|
||||
|
||||
mov %rsp,%r11
|
||||
and $31,%r11
|
25
src/libsodium/crypto_stream/salsa20/xmm6/salsa20_xmm6.c
Normal file
25
src/libsodium/crypto_stream/salsa20/xmm6/salsa20_xmm6.c
Normal file
@ -0,0 +1,25 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "utils.h"
|
||||
|
||||
#include "../stream_salsa20.h"
|
||||
#include "salsa20_xmm6.h"
|
||||
|
||||
#ifdef HAVE_AMD64_ASM
|
||||
|
||||
extern int stream_salsa20_xmm6(unsigned char *c, unsigned long long clen,
|
||||
const unsigned char *n, const unsigned char *k);
|
||||
|
||||
extern int stream_salsa20_xmm6_xor_ic(unsigned char *c, const unsigned char *m,
|
||||
unsigned long long mlen,
|
||||
const unsigned char *n,
|
||||
uint64_t ic, const unsigned char *k);
|
||||
|
||||
struct crypto_stream_salsa20_implementation
|
||||
crypto_stream_salsa20_xmm6_implementation = {
|
||||
SODIUM_C99(.stream =) stream_salsa20_xmm6,
|
||||
SODIUM_C99(.stream_xor_ic =) stream_salsa20_xmm6_xor_ic,
|
||||
};
|
||||
|
||||
#endif
|
8
src/libsodium/crypto_stream/salsa20/xmm6/salsa20_xmm6.h
Normal file
8
src/libsodium/crypto_stream/salsa20/xmm6/salsa20_xmm6.h
Normal file
@ -0,0 +1,8 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
#include "../stream_salsa20.h"
|
||||
#include "crypto_stream_salsa20.h"
|
||||
|
||||
extern struct crypto_stream_salsa20_implementation
|
||||
crypto_stream_salsa20_xmm6_implementation;
|
Loading…
Reference in New Issue
Block a user