diff --git a/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c b/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c index 37e3a229..b80f9725 100644 --- a/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c +++ b/src/libsodium/crypto_aead/aes256gcm/aesni/aead_aes256gcm_aesni.c @@ -415,68 +415,72 @@ do { \ __m128i in##a = _mm_load_si128((const __m128i *) (in + a * 16)); /* full encrypt & checksum 8 blocks at once */ -static inline void -aesni_encrypt8full(unsigned char *out, uint32_t *n, const __m128i *rkeys, - const unsigned char *in, unsigned char *accum, - const __m128i hv, const __m128i h2v, const __m128i h3v, - const __m128i h4v) -{ - const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - __m128i accv = _mm_loadu_si128((const __m128i *) accum); - int i; - - MAKE8(NVDECLx); - MAKE8(TEMPDECLx); - MAKE8(NVx); - MAKE8(TEMPx); -#pragma unroll(13) - for (i = 1; i < 14; i++) { - MAKE8(AESENCx); - } - MAKE8(AESENCLASTx); - MAKE8(XORx); - MAKE8(STOREx); - REDUCE4(rev, hv, h2v, h3v, h4v, temp3, temp2, temp1, temp0, accv); - REDUCE4(rev, hv, h2v, h3v, h4v, temp7, temp6, temp5, temp4, accv); - _mm_storeu_si128((__m128i *) accum, accv); -} +#define aesni_encrypt8full(out_, n_, rkeys, in_, accum, hv_, h2v_, h3v_, h4v_) \ +do { \ + unsigned char *out = out_; \ + uint32_t *n = n_; \ + const unsigned char *in = in_; \ + const __m128i hv = hv_; \ + const __m128i h2v = h2v_; \ + const __m128i h3v = h3v_; \ + const __m128i h4v = h4v_; \ + const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __m128i accv = _mm_loadu_si128((const __m128i *) accum); \ + int i; \ +\ + MAKE8(NVDECLx); \ + MAKE8(TEMPDECLx); \ + MAKE8(NVx); \ + MAKE8(TEMPx); \ + for (i = 1; i < 14; i++) { \ + MAKE8(AESENCx); \ + } \ + MAKE8(AESENCLASTx); \ + MAKE8(XORx); \ + MAKE8(STOREx); \ + REDUCE4(rev, hv, h2v, h3v, h4v, temp3, temp2, temp1, temp0, accv); \ + REDUCE4(rev, hv, h2v, h3v, h4v, temp7, temp6, temp5, temp4, accv); \ + _mm_storeu_si128((__m128i *) accum, accv); \ +} while(0) /* checksum 8 blocks at once */ -static inline void -aesni_addmul8full(const unsigned char *in, unsigned char *accum, - const __m128i hv, const __m128i h2v, - const __m128i h3v, const __m128i h4v) -{ - const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); - __m128i accv = _mm_loadu_si128((const __m128i *) accum); - - MAKE8(LOADx); - REDUCE4(rev, hv, h2v, h3v, h4v, in3, in2, in1, in0, accv); - REDUCE4(rev, hv, h2v, h3v, h4v, in7, in6, in5, in4, accv); - _mm_storeu_si128((__m128i *) accum, accv); -} +#define aesni_addmul8full(in_, accum, hv_, h2v_, h3v_, h4v_) \ +do { \ + const unsigned char *in = in_; \ + const __m128i hv = hv_; \ + const __m128i h2v = h2v_ ; \ + const __m128i h3v = h3v_ ; \ + const __m128i h4v = h4v_ ; \ + const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \ + __m128i accv = _mm_loadu_si128((const __m128i *) accum); \ +\ + MAKE8(LOADx); \ + REDUCE4(rev, hv, h2v, h3v, h4v, in3, in2, in1, in0, accv); \ + REDUCE4(rev, hv, h2v, h3v, h4v, in7, in6, in5, in4, accv); \ + _mm_storeu_si128((__m128i *) accum, accv); \ +} while(0) /* decrypt 8 blocks at once */ -static inline void -aesni_decrypt8full(unsigned char *out, uint32_t *n, const __m128i *rkeys, - const unsigned char *in) -{ - const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); - int i; - - MAKE8(NVDECLx); - MAKE8(TEMPDECLx); - MAKE8(NVx); - MAKE8(TEMPx); -#pragma unroll(13) - for (i = 1; i < 14; i++) { - MAKE8(AESENCx); - } - MAKE8(AESENCLASTx); - MAKE8(XORx); - MAKE8(STOREx); -} +#define aesni_decrypt8full(out_, n_, rkeys, in_) \ +do { \ + unsigned char *out = out_; \ + uint32_t *n = n_; \ + const unsigned char *in = in_; \ + const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \ + int i; \ +\ + MAKE8(NVDECLx); \ + MAKE8(TEMPDECLx); \ + MAKE8(NVx); \ + MAKE8(TEMPx); \ + for (i = 1; i < 14; i++) { \ + MAKE8(AESENCx); \ + } \ + MAKE8(AESENCLASTx); \ + MAKE8(XORx); \ + MAKE8(STOREx); \ +} while(0) int crypto_aead_aes256gcm_aesni_beforenm(crypto_aead_aes256gcm_aesni_state *ctx_,