diff --git a/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.c b/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.c index 578b6b3e..f09944ef 100644 --- a/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.c +++ b/src/libsodium/crypto_stream/chacha20/vec/stream_chacha20_vec.c @@ -8,44 +8,46 @@ #include "private/common.h" #include "utils.h" -#include "stream_chacha20_vec.h" #include "../stream_chacha20.h" +#include "stream_chacha20_vec.h" -#if (defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && defined(__GNUC__)) +#if (defined(HAVE_EMMINTRIN_H) && defined(HAVE_TMMINTRIN_H) && \ + defined(__GNUC__)) #pragma GCC target("sse2") #pragma GCC target("ssse3") #define CHACHA_RNDS 20 -typedef unsigned int vec __attribute__ ((vector_size(16))); +typedef unsigned int vec __attribute__((vector_size(16))); #include #include -# if __clang__ -# define VBPI 4 -# else -# define VBPI 3 -# endif -# define ONE (vec) _mm_set_epi32(0, 0, 0, 1) -# define LOAD(m) (vec) _mm_loadu_si128((const __m128i *) (const void *) (m)) -# define LOAD_ALIGNED(m) (vec) _mm_load_si128((const __m128i *) (const void *) (m)) -# define STORE(m, r) _mm_storeu_si128((__m128i *) (void *) (m), (__m128i) (r)) -# define ROTV1(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(0, 3, 2, 1)) -# define ROTV2(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(1, 0, 3, 2)) -# define ROTV3(x) (vec) _mm_shuffle_epi32((__m128i)x, _MM_SHUFFLE(2, 1, 0, 3)) -# define ROTW7(x) \ - (vec)(_mm_slli_epi32((__m128i)x, 7) ^ _mm_srli_epi32((__m128i)x, 25)) -# define ROTW12(x) \ - (vec)(_mm_slli_epi32((__m128i)x, 12) ^ _mm_srli_epi32((__m128i)x, 20)) -# define ROTW8(x) \ - (vec)(_mm_slli_epi32((__m128i)x, 8) ^ _mm_srli_epi32((__m128i)x, 24)) +#if __clang__ +#define VBPI 4 +#else +#define VBPI 3 +#endif +#define ONE (vec) _mm_set_epi32(0, 0, 0, 1) +#define LOAD(m) (vec) _mm_loadu_si128((const __m128i *) (const void *) (m)) +#define LOAD_ALIGNED(m) \ + (vec) _mm_load_si128((const __m128i *) (const void *) (m)) +#define STORE(m, r) _mm_storeu_si128((__m128i *) (void *) (m), (__m128i)(r)) +#define ROTV1(x) (vec) _mm_shuffle_epi32((__m128i) x, _MM_SHUFFLE(0, 3, 2, 1)) +#define ROTV2(x) (vec) _mm_shuffle_epi32((__m128i) x, _MM_SHUFFLE(1, 0, 3, 2)) +#define ROTV3(x) (vec) _mm_shuffle_epi32((__m128i) x, _MM_SHUFFLE(2, 1, 0, 3)) +#define ROTW7(x) \ + (vec)(_mm_slli_epi32((__m128i) x, 7) ^ _mm_srli_epi32((__m128i) x, 25)) +#define ROTW12(x) \ + (vec)(_mm_slli_epi32((__m128i) x, 12) ^ _mm_srli_epi32((__m128i) x, 20)) +#define ROTW8(x) \ + (vec)(_mm_slli_epi32((__m128i) x, 8) ^ _mm_srli_epi32((__m128i) x, 24)) #define ROTW16(x) \ - (vec)(_mm_slli_epi32((__m128i)x, 16) ^ _mm_srli_epi32((__m128i)x, 16)) + (vec)(_mm_slli_epi32((__m128i) x, 16) ^ _mm_srli_epi32((__m128i) x, 16)) #ifndef REVV_BE -# define REVV_BE(x) (x) +#define REVV_BE(x) (x) #endif #define BPI (VBPI + 0) /* Blocks computed per loop iteration */ @@ -105,25 +107,17 @@ chacha_ivsetup(chacha_ctx *ctx, const uint8_t *iv, uint64_t ic) memcpy(&iv_low, iv, 4); memcpy(&iv_high, iv + 4, 4); { - const vec s3 = { - (uint32_t) ic, - (uint32_t) (ic >> 32), - iv_low, - iv_high - }; - ctx->s3 = s3; + const vec s3 = { (uint32_t) ic, (uint32_t)(ic >> 32), iv_low, iv_high }; + ctx->s3 = s3; } } static void chacha_ietf_ivsetup(chacha_ctx *ctx, const uint8_t *iv, uint32_t ic) { - const vec s3 = { - ic, - ((const uint32_t *) (const void *) iv)[0], - ((const uint32_t *) (const void *) iv)[1], - ((const uint32_t *) (const void *) iv)[2] - }; + const vec s3 = { ic, ((const uint32_t *) (const void *) iv)[0], + ((const uint32_t *) (const void *) iv)[1], + ((const uint32_t *) (const void *) iv)[2] }; ctx->s3 = s3; } @@ -138,13 +132,14 @@ static void chacha_encrypt_bytes(chacha_ctx *ctx, const uint8_t *in, uint8_t *out, unsigned long long inlen) { - CRYPTO_ALIGN(16) unsigned chacha_const[] - = { 0x61707865, 0x3320646E, 0x79622D32, 0x6B206574 }; - uint32_t *op = (uint32_t *) (void *) out; - const uint32_t *ip = (const uint32_t *) (const void *) in; - vec s0, s1, s2, s3; - unsigned long long iters; - unsigned long long i; + CRYPTO_ALIGN(16) + unsigned chacha_const[] = { 0x61707865, 0x3320646E, 0x79622D32, + 0x6B206574 }; + uint32_t * op = (uint32_t *) (void *) out; + const uint32_t * ip = (const uint32_t *) (const void *) in; + vec s0, s1, s2, s3; + unsigned long long iters; + unsigned long long i; if (inlen > 64ULL * (1ULL << 32) - 64ULL) { abort(); /* LCOV_EXCL_LINE */ @@ -165,11 +160,11 @@ chacha_encrypt_bytes(chacha_ctx *ctx, const uint8_t *in, uint8_t *out, v4 = v0 = s0; v5 = v1 = s1; v6 = v2 = s2; - v3 = s3; - v7 = v3 + ONE; + v3 = s3; + v7 = v3 + ONE; #if VBPI > 2 - v8 = v4; - v9 = v5; + v8 = v4; + v9 = v5; v10 = v6; v11 = v7 + ONE; #endif @@ -220,7 +215,7 @@ chacha_encrypt_bytes(chacha_ctx *ctx, const uint8_t *in, uint8_t *out, inlen = inlen % 64; if (inlen) { CRYPTO_ALIGN(16) vec buf[4]; - vec v0, v1, v2, v3; + vec v0, v1, v2, v3; v0 = s0; v1 = s1; v2 = s2; @@ -228,7 +223,6 @@ chacha_encrypt_bytes(chacha_ctx *ctx, const uint8_t *in, uint8_t *out, for (i = CHACHA_RNDS / 2; i; i--) { DQROUND_VECTORS(v0, v1, v2, v3); } - if (inlen >= 16) { STORE(op + 0, LOAD(ip + 0) ^ REVV_BE(v0 + s0)); if (inlen >= 32) { @@ -252,8 +246,8 @@ chacha_encrypt_bytes(chacha_ctx *ctx, const uint8_t *in, uint8_t *out, } static int -stream_vec(unsigned char *c, unsigned long long clen, - const unsigned char *n, const unsigned char *k) +stream_vec(unsigned char *c, unsigned long long clen, const unsigned char *n, + const unsigned char *k) { struct chacha_ctx ctx; @@ -291,8 +285,7 @@ stream_ietf_vec(unsigned char *c, unsigned long long clen, static int stream_vec_xor_ic(unsigned char *c, const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, uint64_t ic, + unsigned long long mlen, const unsigned char *n, uint64_t ic, const unsigned char *k) { struct chacha_ctx ctx; @@ -310,9 +303,8 @@ stream_vec_xor_ic(unsigned char *c, const unsigned char *m, static int stream_ietf_vec_xor_ic(unsigned char *c, const unsigned char *m, - unsigned long long mlen, - const unsigned char *n, uint32_t ic, - const unsigned char *k) + unsigned long long mlen, const unsigned char *n, + uint32_t ic, const unsigned char *k) { struct chacha_ctx ctx; @@ -328,11 +320,11 @@ stream_ietf_vec_xor_ic(unsigned char *c, const unsigned char *m, } struct crypto_stream_chacha20_implementation -crypto_stream_chacha20_vec_implementation = { - SODIUM_C99(.stream =) stream_vec, - SODIUM_C99(.stream_ietf =) stream_ietf_vec, - SODIUM_C99(.stream_xor_ic =) stream_vec_xor_ic, - SODIUM_C99(.stream_ietf_xor_ic =) stream_ietf_vec_xor_ic -}; + crypto_stream_chacha20_vec_implementation = { + SODIUM_C99(.stream =) stream_vec, + SODIUM_C99(.stream_ietf =) stream_ietf_vec, + SODIUM_C99(.stream_xor_ic =) stream_vec_xor_ic, + SODIUM_C99(.stream_ietf_xor_ic =) stream_ietf_vec_xor_ic + }; #endif