This commit is contained in:
Frank Denis 2017-02-23 10:55:37 +01:00
parent 9626d7af59
commit 32a084222a

View File

@ -8,44 +8,46 @@
#include "private/common.h" #include "private/common.h"
#include "utils.h" #include "utils.h"
#include "stream_chacha20_vec.h"
#include "../stream_chacha20.h" #include "../stream_chacha20.h"
#include "stream_chacha20_vec.h"
#if (defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && defined(__GNUC__)) #if (defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && \
defined(__GNUC__))
#pragma GCC target("sse2") #pragma GCC target("sse2")
#pragma GCC target("ssse3") #pragma GCC target("ssse3")
#define CHACHA_RNDS 20 #define CHACHA_RNDS 20
typedef unsigned int vec __attribute__ ((vector_size(16))); typedef unsigned int vec __attribute__((vector_size(16)));
#include <emmintrin.h> #include <emmintrin.h>
#include <tmmintrin.h> #include <tmmintrin.h>
# if __clang__ #if __clang__
# define VBPI 4 #define VBPI 4
# else #else
# define VBPI 3 #define VBPI 3
# endif #endif
# define ONE (vec) _mm_set_epi32(0, 0, 0, 1) #define ONE (vec) _mm_set_epi32(0, 0, 0, 1)
# define LOAD(m) (vec) _mm_loadu_si128((const __m128i *) (const void *) (m)) #define LOAD(m) (vec) _mm_loadu_si128((const __m128i *) (const void *) (m))
# define LOAD_ALIGNED(m) (vec) _mm_load_si128((const __m128i *) (const void *) (m)) #define LOAD_ALIGNED(m) \
# define STORE(m, r) _mm_storeu_si128((__m128i *) (void *) (m), (__m128i) (r)) (vec) _mm_load_si128((const __m128i *) (const void *) (m))
# define ROTV1(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(0, 3, 2, 1)) #define STORE(m, r) _mm_storeu_si128((__m128i *) (void *) (m), (__m128i)(r))
# define ROTV2(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(1, 0, 3, 2)) #define ROTV1(x) (vec) _mm_shuffle_epi32((__m128i) x, _MM_SHUFFLE(0, 3, 2, 1))
# define ROTV3(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(2, 1, 0, 3)) #define ROTV2(x) (vec) _mm_shuffle_epi32((__m128i) x, _MM_SHUFFLE(1, 0, 3, 2))
# define ROTW7(x) \ #define ROTV3(x) (vec) _mm_shuffle_epi32((__m128i) x, _MM_SHUFFLE(2, 1, 0, 3))
(vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i)x, 25)) #define ROTW7(x) \
# define ROTW12(x) \ (vec)(_mm_slli_epi32((__m128i) x, 7) ^ _mm_srli_epi32((__m128i) x, 25))
(vec)(_mm_slli_epi32((__m128i)x, 12) ^ _mm_srli_epi32((__m128i)x, 20)) #define ROTW12(x) \
# define ROTW8(x) \ (vec)(_mm_slli_epi32((__m128i) x, 12) ^ _mm_srli_epi32((__m128i) x, 20))
(vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i)x, 24)) #define ROTW8(x) \
(vec)(_mm_slli_epi32((__m128i) x, 8) ^ _mm_srli_epi32((__m128i) x, 24))
#define ROTW16(x) \ #define ROTW16(x) \
(vec)(_mm_slli_epi32((__m128i)x, 16) ^ _mm_srli_epi32((__m128i)x, 16)) (vec)(_mm_slli_epi32((__m128i) x, 16) ^ _mm_srli_epi32((__m128i) x, 16))
#ifndef REVV_BE #ifndef REVV_BE
# define REVV_BE(x) (x) #define REVV_BE(x) (x)
#endif #endif
#define BPI (VBPI + 0) /* Blocks computed per loop iteration */ #define BPI (VBPI + 0) /* Blocks computed per loop iteration */
@ -105,12 +107,7 @@ chacha_ivsetup(chacha_ctx *ctx, const uint8_t *iv, uint64_t ic)
memcpy(&iv_low, iv, 4); memcpy(&iv_low, iv, 4);
memcpy(&iv_high, iv + 4, 4); memcpy(&iv_high, iv + 4, 4);
{ {
const vec s3 = { const vec s3 = { (uint32_t) ic, (uint32_t)(ic >> 32), iv_low, iv_high };
(uint32_t) ic,
(uint32_t) (ic >> 32),
iv_low,
iv_high
};
ctx->s3 = s3; ctx->s3 = s3;
} }
} }
@ -118,12 +115,9 @@ chacha_ivsetup(chacha_ctx *ctx, const uint8_t *iv, uint64_t ic)
static void static void
chacha_ietf_ivsetup(chacha_ctx *ctx, const uint8_t *iv, uint32_t ic) chacha_ietf_ivsetup(chacha_ctx *ctx, const uint8_t *iv, uint32_t ic)
{ {
const vec s3 = { const vec s3 = { ic, ((const uint32_t *) (const void *) iv)[0],
ic,
((const uint32_t *) (const void *) iv)[0],
((const uint32_t *) (const void *) iv)[1], ((const uint32_t *) (const void *) iv)[1],
((const uint32_t *) (const void *) iv)[2] ((const uint32_t *) (const void *) iv)[2] };
};
ctx->s3 = s3; ctx->s3 = s3;
} }
@ -138,10 +132,11 @@ static void
chacha_encrypt_bytes(chacha_ctx *ctx, const uint8_t *in, uint8_t *out, chacha_encrypt_bytes(chacha_ctx *ctx, const uint8_t *in, uint8_t *out,
unsigned long long inlen) unsigned long long inlen)
{ {
CRYPTO_ALIGN(16) unsigned chacha_const[] CRYPTO_ALIGN(16)
= { 0x61707865, 0x3320646E, 0x79622D32, 0x6B206574 }; unsigned chacha_const[] = { 0x61707865, 0x3320646E, 0x79622D32,
uint32_t *op = (uint32_t *) (void *) out; 0x6B206574 };
const uint32_t *ip = (const uint32_t *) (const void *) in; uint32_t * op = (uint32_t *) (void *) out;
const uint32_t * ip = (const uint32_t *) (const void *) in;
vec s0, s1, s2, s3; vec s0, s1, s2, s3;
unsigned long long iters; unsigned long long iters;
unsigned long long i; unsigned long long i;
@ -228,7 +223,6 @@ chacha_encrypt_bytes(chacha_ctx *ctx, const uint8_t *in, uint8_t *out,
for (i = CHACHA_RNDS / 2; i; i--) { for (i = CHACHA_RNDS / 2; i; i--) {
DQROUND_VECTORS(v0, v1, v2, v3); DQROUND_VECTORS(v0, v1, v2, v3);
} }
if (inlen >= 16) { if (inlen >= 16) {
STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0));
if (inlen >= 32) { if (inlen >= 32) {
@ -252,8 +246,8 @@ chacha_encrypt_bytes(chacha_ctx *ctx, const uint8_t *in, uint8_t *out,
} }
static int static int
stream_vec(unsigned char *c, unsigned long long clen, stream_vec(unsigned char *c, unsigned long long clen, const unsigned char *n,
const unsigned char *n, const unsigned char *k) const unsigned char *k)
{ {
struct chacha_ctx ctx; struct chacha_ctx ctx;
@ -291,8 +285,7 @@ stream_ietf_vec(unsigned char *c, unsigned long long clen,
static int static int
stream_vec_xor_ic(unsigned char *c, const unsigned char *m, stream_vec_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, unsigned long long mlen, const unsigned char *n, uint64_t ic,
const unsigned char *n, uint64_t ic,
const unsigned char *k) const unsigned char *k)
{ {
struct chacha_ctx ctx; struct chacha_ctx ctx;
@ -310,9 +303,8 @@ stream_vec_xor_ic(unsigned char *c, const unsigned char *m,
static int static int
stream_ietf_vec_xor_ic(unsigned char *c, const unsigned char *m, stream_ietf_vec_xor_ic(unsigned char *c, const unsigned char *m,
unsigned long long mlen, unsigned long long mlen, const unsigned char *n,
const unsigned char *n, uint32_t ic, uint32_t ic, const unsigned char *k)
const unsigned char *k)
{ {
struct chacha_ctx ctx; struct chacha_ctx ctx;
@ -328,11 +320,11 @@ stream_ietf_vec_xor_ic(unsigned char *c, const unsigned char *m,
} }
struct crypto_stream_chacha20_implementation struct crypto_stream_chacha20_implementation
crypto_stream_chacha20_vec_implementation = { crypto_stream_chacha20_vec_implementation = {
SODIUM_C99(.stream =) stream_vec, SODIUM_C99(.stream =) stream_vec,
SODIUM_C99(.stream_ietf =) stream_ietf_vec, SODIUM_C99(.stream_ietf =) stream_ietf_vec,
SODIUM_C99(.stream_xor_ic =) stream_vec_xor_ic, SODIUM_C99(.stream_xor_ic =) stream_vec_xor_ic,
SODIUM_C99(.stream_ietf_xor_ic =) stream_ietf_vec_xor_ic SODIUM_C99(.stream_ietf_xor_ic =) stream_ietf_vec_xor_ic
}; };
#endif #endif