Convert more functions to macros

This commit is contained in:
Frank Denis 2015-10-11 00:43:44 +02:00
parent 69aac7d0af
commit 0b20d292df

View File

@ -415,68 +415,72 @@ do { \
__m128i in##a = _mm_load_si128((const __m128i *) (in + a * 16));
/* full encrypt & checksum 8 blocks at once */
static inline void
aesni_encrypt8full(unsigned char *out, uint32_t *n, const __m128i *rkeys,
const unsigned char *in, unsigned char *accum,
const __m128i hv, const __m128i h2v, const __m128i h3v,
const __m128i h4v)
{
const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
__m128i accv = _mm_loadu_si128((const __m128i *) accum);
int i;
MAKE8(NVDECLx);
MAKE8(TEMPDECLx);
MAKE8(NVx);
MAKE8(TEMPx);
#pragma unroll(13)
for (i = 1; i < 14; i++) {
MAKE8(AESENCx);
}
MAKE8(AESENCLASTx);
MAKE8(XORx);
MAKE8(STOREx);
REDUCE4(rev, hv, h2v, h3v, h4v, temp3, temp2, temp1, temp0, accv);
REDUCE4(rev, hv, h2v, h3v, h4v, temp7, temp6, temp5, temp4, accv);
_mm_storeu_si128((__m128i *) accum, accv);
}
#define aesni_encrypt8full(out_, n_, rkeys, in_, accum, hv_, h2v_, h3v_, h4v_) \
do { \
unsigned char *out = out_; \
uint32_t *n = n_; \
const unsigned char *in = in_; \
const __m128i hv = hv_; \
const __m128i h2v = h2v_; \
const __m128i h3v = h3v_; \
const __m128i h4v = h4v_; \
const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
__m128i accv = _mm_loadu_si128((const __m128i *) accum); \
int i; \
\
MAKE8(NVDECLx); \
MAKE8(TEMPDECLx); \
MAKE8(NVx); \
MAKE8(TEMPx); \
for (i = 1; i < 14; i++) { \
MAKE8(AESENCx); \
} \
MAKE8(AESENCLASTx); \
MAKE8(XORx); \
MAKE8(STOREx); \
REDUCE4(rev, hv, h2v, h3v, h4v, temp3, temp2, temp1, temp0, accv); \
REDUCE4(rev, hv, h2v, h3v, h4v, temp7, temp6, temp5, temp4, accv); \
_mm_storeu_si128((__m128i *) accum, accv); \
} while(0)
/* checksum 8 blocks at once */
static inline void
aesni_addmul8full(const unsigned char *in, unsigned char *accum,
const __m128i hv, const __m128i h2v,
const __m128i h3v, const __m128i h4v)
{
const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
__m128i accv = _mm_loadu_si128((const __m128i *) accum);
MAKE8(LOADx);
REDUCE4(rev, hv, h2v, h3v, h4v, in3, in2, in1, in0, accv);
REDUCE4(rev, hv, h2v, h3v, h4v, in7, in6, in5, in4, accv);
_mm_storeu_si128((__m128i *) accum, accv);
}
#define aesni_addmul8full(in_, accum, hv_, h2v_, h3v_, h4v_) \
do { \
const unsigned char *in = in_; \
const __m128i hv = hv_; \
const __m128i h2v = h2v_ ; \
const __m128i h3v = h3v_ ; \
const __m128i h4v = h4v_ ; \
const __m128i rev = _mm_set_epi8(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); \
__m128i accv = _mm_loadu_si128((const __m128i *) accum); \
\
MAKE8(LOADx); \
REDUCE4(rev, hv, h2v, h3v, h4v, in3, in2, in1, in0, accv); \
REDUCE4(rev, hv, h2v, h3v, h4v, in7, in6, in5, in4, accv); \
_mm_storeu_si128((__m128i *) accum, accv); \
} while(0)
/* decrypt 8 blocks at once */
static inline void
aesni_decrypt8full(unsigned char *out, uint32_t *n, const __m128i *rkeys,
const unsigned char *in)
{
const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0);
int i;
MAKE8(NVDECLx);
MAKE8(TEMPDECLx);
MAKE8(NVx);
MAKE8(TEMPx);
#pragma unroll(13)
for (i = 1; i < 14; i++) {
MAKE8(AESENCx);
}
MAKE8(AESENCLASTx);
MAKE8(XORx);
MAKE8(STOREx);
}
#define aesni_decrypt8full(out_, n_, rkeys, in_) \
do { \
unsigned char *out = out_; \
uint32_t *n = n_; \
const unsigned char *in = in_; \
const __m128i pt = _mm_set_epi8(12, 13, 14, 15, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0); \
int i; \
\
MAKE8(NVDECLx); \
MAKE8(TEMPDECLx); \
MAKE8(NVx); \
MAKE8(TEMPx); \
for (i = 1; i < 14; i++) { \
MAKE8(AESENCx); \
} \
MAKE8(AESENCLASTx); \
MAKE8(XORx); \
MAKE8(STOREx); \
} while(0)
int
crypto_aead_aes256gcm_aesni_beforenm(crypto_aead_aes256gcm_aesni_state *ctx_,