Simple SSE2 implementation of crypto_verify*()

`z` being volatile implies more load/store than needed, but this should
be safer if we want to stick with pure C code, and gives us a chance to
zero the registers.

It's still way faster than byte-by-byte comparisons anyway.

Xored secrets don't matter much when compared byte-by-byte, but they
can be more annoying in 128-bit registers.
This commit is contained in:
Frank Denis 2017-07-15 19:54:23 +02:00
parent 94a8b3327f
commit 7dbbd266b5

View File

@ -24,9 +24,46 @@ crypto_verify_64_bytes(void)
return crypto_verify_64_BYTES;
}
#ifdef HAVE_EMMINTRIN_H
# ifdef __GNUC__
# pragma GCC target("sse2")
# endif
# include <emmintrin.h>
static inline int
crypto_verify_n(const unsigned char *x_, const unsigned char *y_,
const size_t n)
const int n)
{
const __m128i zero = _mm_setzero_si128();
volatile __m128i v1, v2, z;
volatile int m;
int i;
const volatile __m128i *volatile x =
(const volatile __m128i *volatile) (const void *) x_;
const volatile __m128i *volatile y =
(const volatile __m128i *volatile) (const void *) y_;
v1 = _mm_loadu_si128((const __m128i *) &x[0]);
v2 = _mm_loadu_si128((const __m128i *) &y[0]);
z = _mm_xor_si128(v1, v2);
for (i = 1; i < n / 16; i++) {
v1 = _mm_loadu_si128((const __m128i *) &x[i]);
v2 = _mm_loadu_si128((const __m128i *) &y[i]);
z = _mm_or_si128(z, _mm_xor_si128(v1, v2));
}
m = _mm_movemask_epi8(_mm_cmpeq_epi32(z, zero));
v1 = zero; v2 = zero; z = zero;
(void) v1; (void) v2; (void) z;
return ((m + 1) >> 16) - 1;
}
#else
static inline int
crypto_verify_n(const unsigned char *x_, const unsigned char *y_,
const int n)
{
const volatile unsigned char *volatile x =
(const volatile unsigned char *volatile) x_;
@ -41,6 +78,8 @@ crypto_verify_n(const unsigned char *x_, const unsigned char *y_,
return (1 & ((d - 1) >> 8)) - 1;
}
#endif
int
crypto_verify_16(const unsigned char *x, const unsigned char *y)
{