From ada7ebdf5d8a7e8bb5ad10925f55acf0fc3e5795 Mon Sep 17 00:00:00 2001 From: Frank Denis Date: Thu, 1 May 2014 21:42:45 -0700 Subject: [PATCH] Get ready to say goodbye to the infamous poly1305-53 implementation. Give room for optimized implementations instead. --- AUTHORS | 2 +- src/libsodium/Makefile.am | 4 +- .../poly1305/donna/auth_poly1305_donna.c | 178 ++--- .../poly1305/donna/poly1305_donna.h | 12 + .../poly1305/donna/poly1305_donna32.h | 221 ++++++ .../poly1305/donna/poly1305_donna64.h | 209 +++++ .../poly1305/donna/portable-jane.h | 712 ------------------ .../poly1305/onetimeauth_poly1305_try.c | 2 +- 8 files changed, 498 insertions(+), 842 deletions(-) create mode 100644 src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h create mode 100644 src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h create mode 100644 src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h delete mode 100644 src/libsodium/crypto_onetimeauth/poly1305/donna/portable-jane.h diff --git a/AUTHORS b/AUTHORS index 7dfb08d1..20be3e30 100644 --- a/AUTHORS +++ b/AUTHORS @@ -98,7 +98,7 @@ Christian Winnerlein crypto_onetimeauth/poly1305/donna --------------------------------- -Andrew "floodyberry" M. +Andrew "floodyberry" Moon. crypto_pwhash/scryptxsalsa208sha256 -------------------------------- diff --git a/src/libsodium/Makefile.am b/src/libsodium/Makefile.am index 7e75f28d..4b7fdbf7 100644 --- a/src/libsodium/Makefile.am +++ b/src/libsodium/Makefile.am @@ -57,7 +57,9 @@ libsodium_la_SOURCES = \ crypto_onetimeauth/poly1305/53/auth_poly1305_53.c \ crypto_onetimeauth/poly1305/53/verify_poly1305_53.c \ crypto_onetimeauth/poly1305/donna/api.h \ - crypto_onetimeauth/poly1305/donna/portable-jane.h \ + crypto_onetimeauth/poly1305/donna/poly1305_donna.h \ + crypto_onetimeauth/poly1305/donna/poly1305_donna32.h \ + crypto_onetimeauth/poly1305/donna/poly1305_donna64.h \ crypto_onetimeauth/poly1305/donna/auth_poly1305_donna.c \ crypto_onetimeauth/poly1305/donna/verify_poly1305_donna.c \ crypto_pwhash/scryptxsalsa208sha256/crypto_scrypt-common.c \ diff --git a/src/libsodium/crypto_onetimeauth/poly1305/donna/auth_poly1305_donna.c b/src/libsodium/crypto_onetimeauth/poly1305/donna/auth_poly1305_donna.c index eaea9786..212725f3 100644 --- a/src/libsodium/crypto_onetimeauth/poly1305/donna/auth_poly1305_donna.c +++ b/src/libsodium/crypto_onetimeauth/poly1305/donna/auth_poly1305_donna.c @@ -1,138 +1,62 @@ #include "api.h" -#include "crypto_onetimeauth_poly1305_donna.h" #include "utils.h" -#include "portable-jane.h" +#include "crypto_onetimeauth_poly1305_donna.h" +#include "poly1305_donna.h" + +#ifdef HAVE_TI_MODE +# include "poly1305_donna64.h" +#else +# include "poly1305_donna32.h" +#endif + +static void +poly1305_update(poly1305_context *ctx, const unsigned char *m, + unsigned long long bytes) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + unsigned long long i; + + /* handle leftover */ + if (st->leftover) { + unsigned long long want = (poly1305_block_size - st->leftover); + if (want > bytes) + want = bytes; + for (i = 0; i < want; i++) + st->buffer[st->leftover + i] = m[i]; + bytes -= want; + m += want; + st->leftover += want; + if (st->leftover < poly1305_block_size) + return; + poly1305_blocks(st, st->buffer, poly1305_block_size); + st->leftover = 0; + } + + /* process full blocks */ + if (bytes >= poly1305_block_size) { + unsigned long long want = (bytes & ~(poly1305_block_size - 1)); + poly1305_blocks(st, m, want); + m += want; + bytes -= want; + } + + /* store leftover */ + if (bytes) { + for (i = 0; i < bytes; i++) + st->buffer[st->leftover + i] = m[i]; + st->leftover += bytes; + } +} int crypto_onetimeauth(unsigned char *out, const unsigned char *m, unsigned long long inlen, const unsigned char *key) { - uint32_t t0,t1,t2,t3; - uint32_t h0,h1,h2,h3,h4; - uint32_t r0,r1,r2,r3,r4; - uint32_t s1,s2,s3,s4; - uint32_t b, nb; - unsigned long long j; - uint64_t t[5]; - uint64_t f0,f1,f2,f3; - uint32_t g0,g1,g2,g3,g4; - uint64_t c; - unsigned char mp[16]; - - /* clamp key */ - t0 = U8TO32_LE(key+0); - t1 = U8TO32_LE(key+4); - t2 = U8TO32_LE(key+8); - t3 = U8TO32_LE(key+12); - - /* precompute multipliers */ - r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6; - r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12; - r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18; - r3 = t2 & 0x3f03fff; t3 >>= 8; - r4 = t3 & 0x00fffff; - - s1 = r1 * 5; - s2 = r2 * 5; - s3 = r3 * 5; - s4 = r4 * 5; - - /* init state */ - h0 = 0; - h1 = 0; - h2 = 0; - h3 = 0; - h4 = 0; - - /* full blocks */ - if (inlen < 16) goto poly1305_donna_atmost15bytes; -poly1305_donna_16bytes: - m += 16; - inlen -= 16; - - t0 = U8TO32_LE(m-16); - t1 = U8TO32_LE(m-12); - t2 = U8TO32_LE(m-8); - t3 = U8TO32_LE(m-4); - - h0 += t0 & 0x3ffffff; - h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; - h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; - h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; - h4 += (t3 >> 8) | (1 << 24); - - -poly1305_donna_mul: - t[0] = mul32x32_64(h0,r0) + mul32x32_64(h1,s4) + mul32x32_64(h2,s3) + mul32x32_64(h3,s2) + mul32x32_64(h4,s1); - t[1] = mul32x32_64(h0,r1) + mul32x32_64(h1,r0) + mul32x32_64(h2,s4) + mul32x32_64(h3,s3) + mul32x32_64(h4,s2); - t[2] = mul32x32_64(h0,r2) + mul32x32_64(h1,r1) + mul32x32_64(h2,r0) + mul32x32_64(h3,s4) + mul32x32_64(h4,s3); - t[3] = mul32x32_64(h0,r3) + mul32x32_64(h1,r2) + mul32x32_64(h2,r1) + mul32x32_64(h3,r0) + mul32x32_64(h4,s4); - t[4] = mul32x32_64(h0,r4) + mul32x32_64(h1,r3) + mul32x32_64(h2,r2) + mul32x32_64(h3,r1) + mul32x32_64(h4,r0); - - h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >> 26); - t[1] += c; h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >> 26); - t[2] += b; h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >> 26); - t[3] += b; h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >> 26); - t[4] += b; h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >> 26); - h0 += b * 5; - - if (inlen >= 16) goto poly1305_donna_16bytes; - - /* final bytes */ -poly1305_donna_atmost15bytes: - if (!inlen) goto poly1305_donna_finish; - - for (j = 0; j < inlen; j++) mp[j] = m[j]; - mp[j++] = 1; - for (; j < 16; j++) mp[j] = 0; - inlen = 0; - - t0 = U8TO32_LE(mp+0); - t1 = U8TO32_LE(mp+4); - t2 = U8TO32_LE(mp+8); - t3 = U8TO32_LE(mp+12); - - h0 += t0 & 0x3ffffff; - h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff; - h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff; - h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff; - h4 += (t3 >> 8); - - goto poly1305_donna_mul; - -poly1305_donna_finish: - b = h0 >> 26; h0 = h0 & 0x3ffffff; - h1 += b; b = h1 >> 26; h1 = h1 & 0x3ffffff; - h2 += b; b = h2 >> 26; h2 = h2 & 0x3ffffff; - h3 += b; b = h3 >> 26; h3 = h3 & 0x3ffffff; - h4 += b; b = h4 >> 26; h4 = h4 & 0x3ffffff; - h0 += b * 5; - - g0 = h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff; - g1 = h1 + b; b = g1 >> 26; g1 &= 0x3ffffff; - g2 = h2 + b; b = g2 >> 26; g2 &= 0x3ffffff; - g3 = h3 + b; b = g3 >> 26; g3 &= 0x3ffffff; - g4 = h4 + b - (1 << 26); - - b = (g4 >> 31) - 1; - nb = ~b; - h0 = (h0 & nb) | (g0 & b); - h1 = (h1 & nb) | (g1 & b); - h2 = (h2 & nb) | (g2 & b); - h3 = (h3 & nb) | (g3 & b); - h4 = (h4 & nb) | (g4 & b); - - f0 = ((h0 ) | (h1 << 26)) + (uint64_t)U8TO32_LE(&key[16]); - f1 = ((h1 >> 6) | (h2 << 20)) + (uint64_t)U8TO32_LE(&key[20]); - f2 = ((h2 >> 12) | (h3 << 14)) + (uint64_t)U8TO32_LE(&key[24]); - f3 = ((h3 >> 18) | (h4 << 8)) + (uint64_t)U8TO32_LE(&key[28]); - - U32TO8_LE(&out[ 0], f0); f1 += (f0 >> 32); - U32TO8_LE(&out[ 4], f1); f2 += (f1 >> 32); - U32TO8_LE(&out[ 8], f2); f3 += (f2 >> 32); - U32TO8_LE(&out[12], f3); + poly1305_context ctx; + poly1305_init(&ctx, key); + poly1305_update(&ctx, m, inlen); + poly1305_finish(&ctx, out); return 0; } @@ -140,7 +64,7 @@ poly1305_donna_finish: const char * crypto_onetimeauth_poly1305_implementation_name(void) { - return "donna"; + return POLY1305_IMPLEMENTATION_NAME; } struct crypto_onetimeauth_poly1305_implementation diff --git a/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h b/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h new file mode 100644 index 00000000..e10e935f --- /dev/null +++ b/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna.h @@ -0,0 +1,12 @@ +#ifndef POLY1305_DONNA_H +#define POLY1305_DONNA_H + +#include + +typedef struct poly1305_context { + unsigned long long aligner; + unsigned char opaque[136]; +} poly1305_context; + +#endif /* POLY1305_DONNA_H */ + diff --git a/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h b/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h new file mode 100644 index 00000000..bcc6cccf --- /dev/null +++ b/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna32.h @@ -0,0 +1,221 @@ +/* + poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition +*/ + +#define POLY1305_IMPLEMENTATION_NAME "donna32" + +#if defined(_MSC_VER) +# define POLY1305_NOINLINE __declspec(noinline) +#elif defined(__GNUC__) +# define POLY1305_NOINLINE __attribute__((noinline)) +#else +# define POLY1305_NOINLINE +#endif + +#define poly1305_block_size 16 + +/* 17 + sizeof(unsigned long long) + 14*sizeof(unsigned long) */ +typedef struct poly1305_state_internal_t { + unsigned long r[5]; + unsigned long h[5]; + unsigned long pad[4]; + unsigned long long leftover; + unsigned char buffer[poly1305_block_size]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */ +static unsigned long +U8TO32(const unsigned char *p) { + return + (((unsigned long)(p[0] & 0xff) ) | + ((unsigned long)(p[1] & 0xff) << 8) | + ((unsigned long)(p[2] & 0xff) << 16) | + ((unsigned long)(p[3] & 0xff) << 24)); +} + +/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */ +static void +U32TO8(unsigned char *p, unsigned long v) { + p[0] = (v ) & 0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; +} + +static void +poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + st->r[0] = (U8TO32(&key[ 0]) ) & 0x3ffffff; + st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03; + st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff; + st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff; + st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + + /* save pad for later */ + st->pad[0] = U8TO32(&key[16]); + st->pad[1] = U8TO32(&key[20]); + st->pad[2] = U8TO32(&key[24]); + st->pad[3] = U8TO32(&key[28]); + + st->leftover = 0; + st->final = 0; +} + +static void +poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, unsigned long long bytes) { + const unsigned long hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */ + unsigned long r0,r1,r2,r3,r4; + unsigned long s1,s2,s3,s4; + unsigned long h0,h1,h2,h3,h4; + unsigned long long d0,d1,d2,d3,d4; + unsigned long c; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + r3 = st->r[3]; + r4 = st->r[4]; + + s1 = r1 * 5; + s2 = r2 * 5; + s3 = r3 * 5; + s4 = r4 * 5; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + while (bytes >= poly1305_block_size) { + /* h += m[i] */ + h0 += (U8TO32(m+ 0) ) & 0x3ffffff; + h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff; + h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff; + h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff; + h4 += (U8TO32(m+12) >> 8) | hibit; + + /* h *= r */ + d0 = ((unsigned long long)h0 * r0) + ((unsigned long long)h1 * s4) + ((unsigned long long)h2 * s3) + ((unsigned long long)h3 * s2) + ((unsigned long long)h4 * s1); + d1 = ((unsigned long long)h0 * r1) + ((unsigned long long)h1 * r0) + ((unsigned long long)h2 * s4) + ((unsigned long long)h3 * s3) + ((unsigned long long)h4 * s2); + d2 = ((unsigned long long)h0 * r2) + ((unsigned long long)h1 * r1) + ((unsigned long long)h2 * r0) + ((unsigned long long)h3 * s4) + ((unsigned long long)h4 * s3); + d3 = ((unsigned long long)h0 * r3) + ((unsigned long long)h1 * r2) + ((unsigned long long)h2 * r1) + ((unsigned long long)h3 * r0) + ((unsigned long long)h4 * s4); + d4 = ((unsigned long long)h0 * r4) + ((unsigned long long)h1 * r3) + ((unsigned long long)h2 * r2) + ((unsigned long long)h3 * r1) + ((unsigned long long)h4 * r0); + + /* (partial) h %= p */ + c = (unsigned long)(d0 >> 26); h0 = (unsigned long)d0 & 0x3ffffff; + d1 += c; c = (unsigned long)(d1 >> 26); h1 = (unsigned long)d1 & 0x3ffffff; + d2 += c; c = (unsigned long)(d2 >> 26); h2 = (unsigned long)d2 & 0x3ffffff; + d3 += c; c = (unsigned long)(d3 >> 26); h3 = (unsigned long)d3 & 0x3ffffff; + d4 += c; c = (unsigned long)(d4 >> 26); h4 = (unsigned long)d4 & 0x3ffffff; + h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff; + h1 += c; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; + st->h[3] = h3; + st->h[4] = h4; +} + +static POLY1305_NOINLINE void +poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + unsigned long h0,h1,h2,h3,h4,c; + unsigned long g0,g1,g2,g3,g4; + unsigned long long f; + unsigned long mask; + + /* process the remaining block */ + if (st->leftover) { + unsigned long long i = st->leftover; + st->buffer[i++] = 1; + for (; i < poly1305_block_size; i++) + st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + h3 = st->h[3]; + h4 = st->h[4]; + + c = h1 >> 26; h1 = h1 & 0x3ffffff; + h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff; + h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff; + h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff; + h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff; + g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff; + g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff; + g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff; + g4 = h4 + c - (1 << 26); + + /* select h if h < p, or h + -p if h >= p */ + mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1; + g0 &= mask; + g1 &= mask; + g2 &= mask; + g3 &= mask; + g4 &= mask; + mask = ~mask; + h0 = (h0 & mask) | g0; + h1 = (h1 & mask) | g1; + h2 = (h2 & mask) | g2; + h3 = (h3 & mask) | g3; + h4 = (h4 & mask) | g4; + + /* h = h % (2^128) */ + h0 = ((h0 ) | (h1 << 26)) & 0xffffffff; + h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff; + h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff; + h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff; + + /* mac = (h + pad) % (2^128) */ + f = (unsigned long long)h0 + st->pad[0] ; h0 = (unsigned long)f; + f = (unsigned long long)h1 + st->pad[1] + (f >> 32); h1 = (unsigned long)f; + f = (unsigned long long)h2 + st->pad[2] + (f >> 32); h2 = (unsigned long)f; + f = (unsigned long long)h3 + st->pad[3] + (f >> 32); h3 = (unsigned long)f; + + U32TO8(mac + 0, h0); + U32TO8(mac + 4, h1); + U32TO8(mac + 8, h2); + U32TO8(mac + 12, h3); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->h[3] = 0; + st->h[4] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->r[3] = 0; + st->r[4] = 0; + st->pad[0] = 0; + st->pad[1] = 0; + st->pad[2] = 0; + st->pad[3] = 0; +} + diff --git a/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h b/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h new file mode 100644 index 00000000..97b5c472 --- /dev/null +++ b/src/libsodium/crypto_onetimeauth/poly1305/donna/poly1305_donna64.h @@ -0,0 +1,209 @@ +/* + poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication and 128 bit addition +*/ + +#define POLY1305_IMPLEMENTATION_NAME "donna64" + +#if defined(__SIZEOF_INT128__) +typedef unsigned __int128 uint128_t; +#else +typedef unsigned uint128_t __attribute__((mode(TI))); +#endif + +#define MUL(out, x, y) out = ((uint128_t)x * y) +#define ADD(out, in) out += in +#define ADDLO(out, in) out += in +#define SHR(in, shift) (unsigned long long)(in >> (shift)) +#define LO(in) (unsigned long long)(in) + +#define POLY1305_NOINLINE __attribute__((noinline)) + +#define poly1305_block_size 16 + +/* 17 + sizeof(unsigned long long) + 8*sizeof(unsigned long long) */ +typedef struct poly1305_state_internal_t { + unsigned long long r[3]; + unsigned long long h[3]; + unsigned long long pad[2]; + unsigned long long leftover; + unsigned char buffer[poly1305_block_size]; + unsigned char final; +} poly1305_state_internal_t; + +/* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in little endian */ +static unsigned long long +U8TO64(const unsigned char *p) { + return + (((unsigned long long)(p[0] & 0xff) ) | + ((unsigned long long)(p[1] & 0xff) << 8) | + ((unsigned long long)(p[2] & 0xff) << 16) | + ((unsigned long long)(p[3] & 0xff) << 24) | + ((unsigned long long)(p[4] & 0xff) << 32) | + ((unsigned long long)(p[5] & 0xff) << 40) | + ((unsigned long long)(p[6] & 0xff) << 48) | + ((unsigned long long)(p[7] & 0xff) << 56)); +} + +/* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little endian */ +static void +U64TO8(unsigned char *p, unsigned long long v) { + p[0] = (v ) & 0xff; + p[1] = (v >> 8) & 0xff; + p[2] = (v >> 16) & 0xff; + p[3] = (v >> 24) & 0xff; + p[4] = (v >> 32) & 0xff; + p[5] = (v >> 40) & 0xff; + p[6] = (v >> 48) & 0xff; + p[7] = (v >> 56) & 0xff; +} + +static void +poly1305_init(poly1305_context *ctx, const unsigned char key[32]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + unsigned long long t0,t1; + + /* r &= 0xffffffc0ffffffc0ffffffc0fffffff */ + t0 = U8TO64(&key[0]); + t1 = U8TO64(&key[8]); + + st->r[0] = ( t0 ) & 0xffc0fffffff; + st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff; + st->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f; + + /* h = 0 */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + + /* save pad for later */ + st->pad[0] = U8TO64(&key[16]); + st->pad[1] = U8TO64(&key[24]); + + st->leftover = 0; + st->final = 0; +} + +static void +poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, unsigned long long bytes) { + const unsigned long long hibit = (st->final) ? 0 : ((unsigned long long)1 << 40); /* 1 << 128 */ + unsigned long long r0,r1,r2; + unsigned long long s1,s2; + unsigned long long h0,h1,h2; + unsigned long long c; + uint128_t d0,d1,d2,d; + + r0 = st->r[0]; + r1 = st->r[1]; + r2 = st->r[2]; + + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + + s1 = r1 * (5 << 2); + s2 = r2 * (5 << 2); + + while (bytes >= poly1305_block_size) { + unsigned long long t0,t1; + + /* h += m[i] */ + t0 = U8TO64(&m[0]); + t1 = U8TO64(&m[8]); + + h0 += (( t0 ) & 0xfffffffffff); + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff); + h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit; + + /* h *= r */ + MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d); + MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d); + MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d); + + /* (partial) h %= p */ + c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff; + ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff; + ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff; + h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff; + h1 += c; + + m += poly1305_block_size; + bytes -= poly1305_block_size; + } + + st->h[0] = h0; + st->h[1] = h1; + st->h[2] = h2; +} + + +static POLY1305_NOINLINE void +poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) { + poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx; + unsigned long long h0,h1,h2,c; + unsigned long long g0,g1,g2; + unsigned long long t0,t1; + + /* process the remaining block */ + if (st->leftover) { + unsigned long long i = st->leftover; + st->buffer[i] = 1; + for (i = i + 1; i < poly1305_block_size; i++) + st->buffer[i] = 0; + st->final = 1; + poly1305_blocks(st, st->buffer, poly1305_block_size); + } + + /* fully carry h */ + h0 = st->h[0]; + h1 = st->h[1]; + h2 = st->h[2]; + + c = (h1 >> 44); h1 &= 0xfffffffffff; + h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; + h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; + h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff; + h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff; + h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff; + h1 += c; + + /* compute h + -p */ + g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff; + g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff; + g2 = h2 + c - ((unsigned long long)1 << 42); + + /* select h if h < p, or h + -p if h >= p */ + c = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1; + g0 &= c; + g1 &= c; + g2 &= c; + c = ~c; + h0 = (h0 & c) | g0; + h1 = (h1 & c) | g1; + h2 = (h2 & c) | g2; + + /* h = (h + pad) */ + t0 = st->pad[0]; + t1 = st->pad[1]; + + h0 += (( t0 ) & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff; + h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff; + h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff; + + /* mac = h % (2^128) */ + h0 = ((h0 ) | (h1 << 44)); + h1 = ((h1 >> 20) | (h2 << 24)); + + U64TO8(&mac[0], h0); + U64TO8(&mac[8], h1); + + /* zero out the state */ + st->h[0] = 0; + st->h[1] = 0; + st->h[2] = 0; + st->r[0] = 0; + st->r[1] = 0; + st->r[2] = 0; + st->pad[0] = 0; + st->pad[1] = 0; +} + diff --git a/src/libsodium/crypto_onetimeauth/poly1305/donna/portable-jane.h b/src/libsodium/crypto_onetimeauth/poly1305/donna/portable-jane.h deleted file mode 100644 index 2d8e8c1b..00000000 --- a/src/libsodium/crypto_onetimeauth/poly1305/donna/portable-jane.h +++ /dev/null @@ -1,712 +0,0 @@ -#ifndef PORTABLE_JANE_H -#define PORTABLE_JANE_H "+endian +uint128" -/* 0000-os-100-solaris.h */ - -#if defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__) - #include - #include - #include - - #define OS_SOLARIS -#endif - -/* 0000-os-100-unix.h */ - -#if defined(__unix__) || defined(unix) - #include - #include - #if !defined(USG) - #include /* need this to define BSD */ - #endif - #include - #include - - #define OS_NIX - #if defined(__linux__) - #include - #define OS_LINUX - #elif defined(BSD) - #define OS_BSD - - #if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__)) - #define OS_OSX - #elif defined(macintosh) || defined(Macintosh) - #define OS_MAC - #elif defined(__OpenBSD__) - #define OS_OPENBSD - #elif defined(__FreeBSD__) - #define OS_FREEBSD - #elif defined(__NetBSD__) - #define OS_NETBSD - #endif - #endif -#endif - -/* 0000-os-100-windows.h */ - -#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__) - #include - #include - #define OS_WINDOWS -#endif - -/* 0100-compiler-000.h */ - -#undef NOINLINE -#undef INLINE -#undef FASTCALL -#undef CDECL -#undef STDCALL -#undef NAKED - -/* 0100-compiler-100-clang.h */ - -#if defined(__clang__) - #define COMPILER_CLANG ((__clang_major__ * 10000) + (__clang_minor__ * 100) + (__clang_patchlevel__)) -#endif - -/* 0100-compiler-100-gcc.h */ - -#if defined(__GNUC__) - #if (__GNUC__ >= 3) - #define COMPILER_GCC_PATCHLEVEL __GNUC_PATCHLEVEL__ - #else - #define COMPILER_GCC_PATCHLEVEL 0 - #endif - #define COMPILER_GCC ((__GNUC__ * 10000) + (__GNUC_MINOR__ * 100) + (COMPILER_GCC_PATCHLEVEL)) - - #include - - typedef unsigned int fpu_control_t; - - #define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b))) - #define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b))) - #define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b))) - #define ROTR64(a,b) (((a) >> (b)) | ((a) << (64 - b))) - - #if (COMPILER_GCC >= 30000) - #define NOINLINE __attribute__((noinline)) - #else - #define NOINLINE - #endif - #if (COMPILER_GCC >= 30000) - #define INLINE inline __attribute__((always_inline)) - #else - #define INLINE inline - #endif - #if (COMPILER_GCC >= 30400) - #define FASTCALL __attribute__((fastcall)) - #else - #define FASTCALL - #endif - #define CDECL __attribute__((cdecl)) - #define STDCALL __attribute__((stdcall)) - - #define mul32x32_64(a,b) ((uint64_t)(a) * (b)) - #define mul32x32_64s(a,b) (((int64_t)(a))*(b)) -#endif - -/* 0100-compiler-100-icc.h */ - -#if defined(__ICC) - #define COMPILER_ICC __ICC -#endif - -/* 0100-compiler-100-mingw.h */ - -#if defined(__MINGW32__) || defined(__MINGW64__) - #define COMPILER_MINGW -#endif - -/* 0100-compiler-100-msvc.h */ - -#if defined(_MSC_VER) -#ifndef _CRT_SECURE_NO_WARNINGS -# define _CRT_SECURE_NO_WARNINGS -#endif - - #pragma warning(disable : 4127) /* conditional expression is constant */ - #pragma warning(disable : 4100) /* unreferenced formal parameter */ - - #include - #include /* _rotl */ - #include - - #define COMPILER_MSVC_VS6 120000000 - #define COMPILER_MSVC_VS6PP 121000000 - #define COMPILER_MSVC_VS2002 130000000 - #define COMPILER_MSVC_VS2003 131000000 - #define COMPILER_MSVC_VS2005 140050727 - #define COMPILER_MSVC_VS2008 150000000 - #define COMPILER_MSVC_VS2008SP1 150030729 - #define COMPILER_MSVC_VS2010 160000000 - #define COMPILER_MSVC_VS2010SP1 160040219 - #define COMPILER_MSVC_VS2012RC 170000000 - #define COMPILER_MSVC_VS2012 170050727 - - #if _MSC_FULL_VER > 100000000 - #define COMPILER_MSVC (_MSC_FULL_VER) - #else - #define COMPILER_MSVC (_MSC_FULL_VER * 10) - #endif - - #if ((_MSC_VER == 1200) && defined(_mm_free)) - #undef COMPILER_MSVC - #define COMPILER_MSVC COMPILER_MSVC_VS6PP - #endif - - typedef unsigned char uint8_t; - typedef unsigned short uint16_t; - typedef unsigned int uint32_t; - typedef signed int int32_t; - typedef unsigned __int64 uint64_t; - typedef signed __int64 int64_t; - - typedef uint16_t fpu_control_t; - - #define ROTL32(a,b) _rotl(a,b) - #define ROTR32(a,b) _rotr(a,b) - #define ROTL64(a,b) _rotl64(a,b) - #define ROTR64(a,b) _rotr64(a,b) - - #define NOINLINE __declspec(noinline) - #define INLINE __forceinline - #define FASTCALL __fastcall - #define CDECL __cdecl - #define STDCALL __stdcall - #define NAKED __declspec(naked) - - #if defined(_DEBUG) - #define mul32x32_64(a,b) (((uint64_t)(a))*(b)) - #define mul32x32_64s(a,b) (((int64_t)(a))*(b)) - #else - #define mul32x32_64(a,b) __emulu(a,b) - #define mul32x32_64s(a,b) __emul(a,b) - #endif -#endif -/* 0100-compiler-999.h */ - -#define OPTIONAL_INLINE /* config */ -#if defined(OPTIONAL_INLINE) - #undef OPTIONAL_INLINE - #define OPTIONAL_INLINE INLINE -#else - #define OPTIONAL_INLINE -#endif - -#define Preprocessor_ToString(s) #s -#define Stringify(s) Preprocessor_ToString(s) - -#include -#include - -/* 0200-cpu-100-alpha.h */ - -#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA) - #define CPU_ALPHA -#endif - -/* 0200-cpu-100-hppa.h */ - -#if defined(__hppa__) || defined(__hppa) - #define CPU_HPPA -#endif - -/* 0200-cpu-100-intel.h */ - -#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64) - #define CPU_X86_64 -#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500)) - #define CPU_X86 500 -#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400)) - #define CPU_X86 400 -#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__) - #define CPU_X86 300 -#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64) - #define CPU_IA64 -#endif - -/* 0200-cpu-100-ppc.h */ - -#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC) - #define CPU_PPC - #if defined(_ARCH_PWR7) - #define CPU_POWER7 - #elif defined(__64BIT__) - #define CPU_PPC64 - #else - #define CPU_PPC32 - #endif -#endif - -/* 0200-cpu-100-sparc.h */ - -#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9) - #define CPU_SPARC - #if defined(__sparcv9) - #define CPU_SPARC64 - #else - #define CPU_SPARC32 - #endif -#endif - -/* 0200-cpu-200-bits.h */ - -#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64)) - #define CPU_64BITS - - #undef FASTCALL - #undef CDECL - #undef STDCALL - - #define FASTCALL - #define CDECL - #define STDCALL -#endif - -/* 0200-cpu-200-endian.h */ - -#if ((defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \ - (defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \ - (defined(CPU_X86) || defined(CPU_X86_64)) || \ - (defined(vax) || defined(MIPSEL) || defined(_MIPSEL))) -#define CPU_LE -#elif ((defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) || \ - (defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \ - (defined(CPU_SPARC) || defined(CPU_PPC) || defined(mc68000) || defined(sel)) || defined(_MIPSEB)) -#define CPU_BE -#else - /* unknown endian! */ -#endif - -#if defined(__s390__) || defined(__zarch__) || defined(__SYSC_ZARCH__) -# define CPU_Z390 -#endif - -/* 0400-endian-100-be.h */ - -#if defined(CPU_BE) && !defined(CPU_ALIGNED_ACCESS_REQUIRED) - static INLINE uint16_t fU8TO16_BE_FAST(const uint8_t *p) { return *(const uint16_t *)p; } - static INLINE uint32_t fU8TO32_BE_FAST(const uint8_t *p) { return *(const uint32_t *)p; } - static INLINE uint64_t fU8TO64_BE_FAST(const uint8_t *p) { return *(const uint64_t *)p; } - static INLINE void fU16TO8_BE_FAST(uint8_t *p, const uint16_t v) { *(uint16_t *)p = v; } - static INLINE void fU32TO8_BE_FAST(uint8_t *p, const uint32_t v) { *(uint32_t *)p = v; } - static INLINE void fU64TO8_BE_FAST(uint8_t *p, const uint64_t v) { *(uint64_t *)p = v; } - - #define U8TO16_BE(p) fU8TO16_BE_FAST(p) - #define U8TO32_BE(p) fU8TO32_BE_FAST(p) - #define U8TO64_BE(p) fU8TO64_BE_FAST(p) - #define U16TO8_BE(p, v) fU16TO8_BE_FAST(p, v) - #define U32TO8_BE(p, v) fU32TO8_BE_FAST(p, v) - #define U64TO8_BE(p, v) fU64TO8_BE_FAST(p, v) -#endif - -/* 0400-endian-100-le.h */ - -#if defined(CPU_LE) && !defined(CPU_ALIGNED_ACCESS_REQUIRED) - static INLINE uint16_t fU8TO16_LE_FAST(const uint8_t *p) { return *(const uint16_t *)p; } - static INLINE uint32_t fU8TO32_LE_FAST(const uint8_t *p) { return *(const uint32_t *)p; } - static INLINE uint64_t fU8TO64_LE_FAST(const uint8_t *p) { return *(const uint64_t *)p; } - static INLINE void fU16TO8_LE_FAST(uint8_t *p, const uint16_t v) { *(uint16_t *)p = v; } - static INLINE void fU32TO8_LE_FAST(uint8_t *p, const uint32_t v) { *(uint32_t *)p = v; } - static INLINE void fU64TO8_LE_FAST(uint8_t *p, const uint64_t v) { *(uint64_t *)p = v; } - - #define U8TO16_LE(p) fU8TO16_LE_FAST(p) - #define U8TO32_LE(p) fU8TO32_LE_FAST(p) - #define U8TO64_LE(p) fU8TO64_LE_FAST(p) - #define U16TO8_LE(p, v) fU16TO8_LE_FAST(p, v) - #define U32TO8_LE(p, v) fU32TO8_LE_FAST(p, v) - #define U64TO8_LE(p, v) fU64TO8_LE_FAST(p, v) -#endif - -/* 0400-endian-100-sparc.h */ - -#if defined(CPU_SPARC) - #if defined(CPU_SPARC64) - static INLINE uint64_t - fU8TO64_LE_FAST(const uint8_t *p) { - uint64_d d; - __asm__ ("ldxa [%1]0x88,%0" : "=r"(d) : "r"(p)); - return d; - } - - static INLINE void - fU64TO8_LE_FAST(uint8_t *p, const uint64_t v) { - __asm__ ("stxa %0,[%1]0x88" : : "r"(v), "r"(p)); - } - #else - static INLINE uint64_t - fU8TO64_LE_FAST(const uint8_t *p) { - uint32_t *s4, h, l; - __asm__ ("add %3,4,%0\n\tlda [%3]0x88,%1\n\tlda [%0]0x88,%2" : "+r"(s4), "=r"(l), "=r"(h) : "r"(p)); - return ((uint64_t)h << 32) | l; - } - - static INLINE void - fU64TO8_LE_FAST(uint8_t *p, const uint64_t v) { - uint32_t *s4, h = (uint32_t)(v >> 32), l = (uint32_t)(v & (uint32_t)0xffffffff); - __asm__ ("add %3,4,%0\n\tsta %1,[%3]0x88\n\tsta %2,[%0]0x88" : "+r"(s4) : "r"(l), "r"(h), "r"(p)); - } - #endif - - static INLINE uint32_t - fU8TO32_LE_FAST(const uint8_t *p) { - uint32_t d; - __asm__ ("lda [%1]0x88,%0" : "=r"(d) : "r"(p)); - return d; - } - - static INLINE void - fU32TO8_LE_FAST(uint8_t *p, const uint32_t v) { - __asm__ ("sta %0,[%1]0x88" : : "r"(p), "r"(v)); - } - - #define U8TO32_LE(p) fU8TO32_LE_FAST(p) - #define U8TO64_LE(p) fU8TO64_LE_FAST(p) - #define U32TO8_LE(p, v) fU32TO8_LE_FAST(p, v) - #define U64TO8_LE(p, v) fU64TO8_LE_FAST(p, v) -#endif - -/* 0400-endian-100-x86.h */ - -#if (((defined(CPU_X86) && (CPU_X86 >= 400)) || defined(CPU_X86_64)) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC))) - #if defined(COMPILER_MSVC) - static INLINE uint16_t U16_SWAP_FAST(uint16_t v) { return _byteswap_ushort(v); } - static INLINE uint32_t U32_SWAP_FAST(uint32_t v) { return _byteswap_ulong(v); } - static INLINE uint64_t U64_SWAP_FAST(uint64_t v) { return _byteswap_uint64(v); } - #else - static INLINE uint16_t U16_SWAP_FAST(uint16_t v) { __asm__("rorw $8,%0" : "+r" (v)); return v; } - static INLINE uint32_t U32_SWAP_FAST(uint32_t v) { __asm__("bswap %0" : "+r" (v)); return v; } - #if defined(CPU_X86_64) - static INLINE uint64_t U64_SWAP_FAST(uint64_t v) { __asm__("bswap %0" : "+r" (v)); return v; } - #else - static INLINE uint64_t U64_SWAP_FAST(uint64_t v) { - uint32_t lo = U32_SWAP_FAST((uint32_t)(v)), hi = U32_SWAP_FAST((uint32_t)(v >> 32)); - return ((uint64_t)lo << 32) | hi; - } - #endif - #endif - - - static INLINE uint16_t fU8TO16_BE_FAST(const uint8_t *p) { return U16_SWAP_FAST(*(const uint16_t *)p); } - static INLINE uint32_t fU8TO32_BE_FAST(const uint8_t *p) { return U32_SWAP_FAST(*(const uint32_t *)p); } - static INLINE uint64_t fU8TO64_BE_FAST(const uint8_t *p) { return U64_SWAP_FAST(*(const uint64_t *)p); } - static INLINE void fU16TO8_BE_FAST(uint8_t *p, const uint16_t v) { *(uint16_t *)p = U16_SWAP_FAST(v); } - static INLINE void fU32TO8_BE_FAST(uint8_t *p, const uint32_t v) { *(uint32_t *)p = U32_SWAP_FAST(v); } - static INLINE void fU64TO8_BE_FAST(uint8_t *p, const uint64_t v) { *(uint64_t *)p = U64_SWAP_FAST(v); } - - #define U16_SWAP(p) U16_SWAP_FAST(p) - #define U32_SWAP(p) U32_SWAP_FAST(p) - #define U64_SWAP(p) U64_SWAP_FAST(p) - #define U8TO16_BE(p) fU8TO16_BE_FAST(p) - #define U8TO32_BE(p) fU8TO32_BE_FAST(p) - #define U8TO64_BE(p) fU8TO64_BE_FAST(p) - #define U16TO8_BE(p, v) fU16TO8_BE_FAST(p, v) - #define U32TO8_BE(p, v) fU32TO8_BE_FAST(p, v) - #define U64TO8_BE(p, v) fU64TO8_BE_FAST(p, v) -#endif - -/* 0400-endian-999-generic-be.h */ - -#if !defined(U8TO16_BE) - static INLINE uint16_t - fU8TO16_BE_SLOW(const uint8_t *p) { - return - (((uint16_t)(p[0]) << 8) | - ((uint16_t)(p[1]) )); - } - - #define U8TO16_BE(p) fU8TO16_BE_SLOW(p) -#endif - - -#if !defined(U8TO32_BE) - static INLINE uint32_t - fU8TO32_BE_SLOW(const uint8_t *p) { - return - (((uint32_t)(p[0]) << 24) | - ((uint32_t)(p[1]) << 16) | - ((uint32_t)(p[2]) << 8) | - ((uint32_t)(p[3]) )); - } - - #define U8TO32_BE(p) fU8TO32_BE_SLOW(p) -#endif - -#if !defined(U8TO64_BE) - static INLINE uint64_t - fU8TO64_BE_SLOW(const uint8_t *p) { - return - (((uint64_t)(p[0]) << 56) | - ((uint64_t)(p[1]) << 48) | - ((uint64_t)(p[2]) << 40) | - ((uint64_t)(p[3]) << 32) | - ((uint64_t)(p[4]) << 24) | - ((uint64_t)(p[5]) << 16) | - ((uint64_t)(p[6]) << 8) | - ((uint64_t)(p[7]) )); - } - - #define U8TO64_BE(p) fU8TO64_BE_SLOW(p) -#endif - -#if !defined(U16TO8_BE) - static INLINE void - fU16TO8_BE_SLOW(uint8_t *p, const uint16_t v) { - p[0] = (uint8_t)(v >> 8); - p[1] = (uint8_t)(v ); - } - - #define U16TO8_BE(p, v) fU16TO8_BE_SLOW(p, v) -#endif - -#if !defined(U32TO8_BE) - static INLINE void - fU32TO8_BE_SLOW(uint8_t *p, const uint32_t v) { - p[0] = (uint8_t)(v >> 24); - p[1] = (uint8_t)(v >> 16); - p[2] = (uint8_t)(v >> 8); - p[3] = (uint8_t)(v ); - } - - #define U32TO8_BE(p, v) fU32TO8_BE_SLOW(p, v) -#endif - -#if !defined(U64TO8_BE) - static INLINE void - fU64TO8_BE_SLOW(uint8_t *p, const uint64_t v) { - p[0] = (uint8_t)(v >> 56); - p[1] = (uint8_t)(v >> 48); - p[2] = (uint8_t)(v >> 40); - p[3] = (uint8_t)(v >> 32); - p[4] = (uint8_t)(v >> 24); - p[5] = (uint8_t)(v >> 16); - p[6] = (uint8_t)(v >> 8); - p[7] = (uint8_t)(v ); - } - - #define U64TO8_BE(p, v) fU64TO8_BE_SLOW(p, v) -#endif - -/* 0400-endian-999-generic-le.h */ - -#if !defined(U8TO16_LE) - static INLINE uint16_t - fU8TO16_LE_SLOW(const uint8_t *p) { - return - (((uint16_t)(p[0]) ) | - ((uint16_t)(p[1]) << 8)); - } - - #define U8TO16_LE(p) fU8TO16_LE_SLOW(p) -#endif - -#if !defined(U8TO32_LE) - static INLINE uint32_t - fU8TO32_LE_SLOW(const uint8_t *p) { - return - (((uint32_t)(p[0]) ) | - ((uint32_t)(p[1]) << 8) | - ((uint32_t)(p[2]) << 16) | - ((uint32_t)(p[3]) << 24)); - } - - #define U8TO32_LE(p) fU8TO32_LE_SLOW(p) -#endif - - -#if !defined(U8TO64_LE) - static INLINE uint64_t - fU8TO64_LE_SLOW(const uint8_t *p) { - return - (((uint64_t)(p[0]) ) | - ((uint64_t)(p[1]) << 8) | - ((uint64_t)(p[2]) << 16) | - ((uint64_t)(p[3]) << 24) | - ((uint64_t)(p[4]) << 32) | - ((uint64_t)(p[5]) << 40) | - ((uint64_t)(p[6]) << 48) | - ((uint64_t)(p[7]) << 56)); - } - - #define U8TO64_LE(p) fU8TO64_LE_SLOW(p) -#endif - -#if !defined(U16TO8_LE) - static INLINE void - fU16TO8_LE_SLOW(uint8_t *p, const uint16_t v) { - p[0] = (uint8_t)(v ); - p[1] = (uint8_t)(v >> 8); - } - - #define U16TO8_LE(p, v) fU16TO8_LE_SLOW(p, v) -#endif - -#if !defined(U32TO8_LE) - static INLINE void - fU32TO8_LE_SLOW(uint8_t *p, const uint32_t v) { - p[0] = (uint8_t)(v ); - p[1] = (uint8_t)(v >> 8); - p[2] = (uint8_t)(v >> 16); - p[3] = (uint8_t)(v >> 24); - } - - #define U32TO8_LE(p, v) fU32TO8_LE_SLOW(p, v) -#endif - -#if !defined(U64TO8_LE) - static INLINE void - fU64TO8_LE_SLOW(uint8_t *p, const uint64_t v) { - p[0] = (uint8_t)(v ); - p[1] = (uint8_t)(v >> 8); - p[2] = (uint8_t)(v >> 16); - p[3] = (uint8_t)(v >> 24); - p[4] = (uint8_t)(v >> 32); - p[5] = (uint8_t)(v >> 40); - p[6] = (uint8_t)(v >> 48); - p[7] = (uint8_t)(v >> 56); - } - - #define U64TO8_LE(p, v) fU64TO8_LE_SLOW(p, v) -#endif - -/* 0400-endian-999-generic-swap.h */ - -#if !defined(U16_SWAP) - static INLINE uint16_t - fU16_SWAP_SLOW(uint16_t v) { - v = (v << 8) | (v >> 8); - return v; - } - - #define U16_SWAP(p) fU16_SWAP_SLOW(p) -#endif - -#if !defined(U32_SWAP) - static INLINE uint32_t - fU32_SWAP_SLOW(uint32_t v) { - v = ((v << 8) & 0xFF00FF00) | ((v >> 8) & 0xFF00FF); - v = (v << 16) | (v >> 16); - return v; - } - - #define U32_SWAP(p) fU32_SWAP_SLOW(p) -#endif - -#if !defined(U64_SWAP) - static INLINE uint64_t - fU64_SWAP_SLOW(uint64_t v) { - v = ((v << 8) & 0xFF00FF00FF00FF00ull) | ((v >> 8) & 0x00FF00FF00FF00FFull); - v = ((v << 16) & 0xFFFF0000FFFF0000ull) | ((v >> 16) & 0x0000FFFF0000FFFFull); - v = (v << 32) | (v >> 32); - return v; - } - - #define U64_SWAP(p) fU64_SWAP_SLOW(p) -#endif - -/* 0400-uint128-000.h */ - -/* 0400-uint128-100-clang.h */ - -#ifdef HAVE_TI_MODE -# define HAVE_NATIVE_UINT128 -typedef unsigned uint128_t __attribute__((mode(TI))); -#endif - -/* 0400-uint128-100-msvc.h */ - -#if defined(CPU_64BITS) && defined(COMPILER_MSVC) - #define HAVE_UINT128 - - typedef struct uint128 { - uint64_t lo, hi; - } uint128_t; - - static INLINE uint128_t - mul64x64_128(uint64_t a, uint64_t b) { - uint128_t v; - v.lo = _umul128(a, b, &v.hi); - return v; - } - - static INLINE uint64_t - shr128_pair(uint64_t hi, uint64_t lo, const int shift) { - return __shiftright128(lo, hi, shift); - } - - static INLINE uint64_t - shr128(uint128_t v, const int shift) { - return __shiftright128(v.lo, v.hi, shift); - } - - static INLINE uint128_t - add128(uint128_t a, uint128_t b) { - uint64_t t = a.lo; - a.lo += b.lo; - a.hi += b.hi + (a.lo < t); - return a; - } - - static INLINE uint128_t - add128_64(uint128_t a, uint64_t b) { - uint64_t t = a.lo; - a.lo += b; - a.hi += (a.lo < t); - return a; - } - - static INLINE uint64_t - lo128(uint128_t a) { - return a.lo; - } - - static INLINE uint64_t - hi128(uint128_t a) { - return a.hi; - } -#endif - -/* 0400-uint128-999.h */ - -#if defined(HAVE_NATIVE_UINT128) - #define HAVE_UINT128 - - static INLINE uint128_t - mul64x64_128(uint64_t a, uint64_t b) { - return (uint128_t)a * b; - } - - static INLINE uint64_t - shr128(uint128_t v, const int shift) { - return (uint64_t)(v >> shift); - } - - static INLINE uint64_t - shr128_pair(uint64_t hi, uint64_t lo, const int shift) { - return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift); - } - - static INLINE uint128_t - add128(uint128_t a, uint128_t b) { - return a + b; - } - - static INLINE uint128_t - add128_64(uint128_t a, uint64_t b) { - return a + b; - } - - static INLINE uint64_t - lo128(uint128_t a) { - return (uint64_t)a; - } - - static INLINE uint64_t - hi128(uint128_t a) { - return (uint64_t)(a >> 64); - } -#endif - -#endif /* PORTABLE_JANE_H */ - diff --git a/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305_try.c b/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305_try.c index 82d67867..fabf490f 100644 --- a/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305_try.c +++ b/src/libsodium/crypto_onetimeauth/poly1305/onetimeauth_poly1305_try.c @@ -124,10 +124,10 @@ crypto_onetimeauth_poly1305_implementation * crypto_onetimeauth_pick_best_implementation(void) { crypto_onetimeauth_poly1305_implementation *implementations[] = { + &crypto_onetimeauth_poly1305_donna_implementation, #ifdef HAVE_FENV_H &crypto_onetimeauth_poly1305_53_implementation, #endif - &crypto_onetimeauth_poly1305_donna_implementation, NULL }; const char *err;