Get ready to say goodbye to the infamous poly1305-53 implementation.
Give room for optimized implementations instead.
This commit is contained in:
parent
840b0f243d
commit
ada7ebdf5d
2
AUTHORS
2
AUTHORS
@ -98,7 +98,7 @@ Christian Winnerlein
|
||||
|
||||
crypto_onetimeauth/poly1305/donna
|
||||
---------------------------------
|
||||
Andrew "floodyberry" M.
|
||||
Andrew "floodyberry" Moon.
|
||||
|
||||
crypto_pwhash/scryptxsalsa208sha256
|
||||
--------------------------------
|
||||
|
@ -57,7 +57,9 @@ libsodium_la_SOURCES = \
|
||||
crypto_onetimeauth/poly1305/53/auth_poly1305_53.c \
|
||||
crypto_onetimeauth/poly1305/53/verify_poly1305_53.c \
|
||||
crypto_onetimeauth/poly1305/donna/api.h \
|
||||
crypto_onetimeauth/poly1305/donna/portable-jane.h \
|
||||
crypto_onetimeauth/poly1305/donna/poly1305_donna.h \
|
||||
crypto_onetimeauth/poly1305/donna/poly1305_donna32.h \
|
||||
crypto_onetimeauth/poly1305/donna/poly1305_donna64.h \
|
||||
crypto_onetimeauth/poly1305/donna/auth_poly1305_donna.c \
|
||||
crypto_onetimeauth/poly1305/donna/verify_poly1305_donna.c \
|
||||
crypto_pwhash/scryptxsalsa208sha256/crypto_scrypt-common.c \
|
||||
|
@ -1,138 +1,62 @@
|
||||
|
||||
#include "api.h"
|
||||
#include "crypto_onetimeauth_poly1305_donna.h"
|
||||
#include "utils.h"
|
||||
|
||||
#include "portable-jane.h"
|
||||
#include "crypto_onetimeauth_poly1305_donna.h"
|
||||
#include "poly1305_donna.h"
|
||||
|
||||
#ifdef HAVE_TI_MODE
|
||||
# include "poly1305_donna64.h"
|
||||
#else
|
||||
# include "poly1305_donna32.h"
|
||||
#endif
|
||||
|
||||
static void
|
||||
poly1305_update(poly1305_context *ctx, const unsigned char *m,
|
||||
unsigned long long bytes) {
|
||||
poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
|
||||
unsigned long long i;
|
||||
|
||||
/* handle leftover */
|
||||
if (st->leftover) {
|
||||
unsigned long long want = (poly1305_block_size - st->leftover);
|
||||
if (want > bytes)
|
||||
want = bytes;
|
||||
for (i = 0; i < want; i++)
|
||||
st->buffer[st->leftover + i] = m[i];
|
||||
bytes -= want;
|
||||
m += want;
|
||||
st->leftover += want;
|
||||
if (st->leftover < poly1305_block_size)
|
||||
return;
|
||||
poly1305_blocks(st, st->buffer, poly1305_block_size);
|
||||
st->leftover = 0;
|
||||
}
|
||||
|
||||
/* process full blocks */
|
||||
if (bytes >= poly1305_block_size) {
|
||||
unsigned long long want = (bytes & ~(poly1305_block_size - 1));
|
||||
poly1305_blocks(st, m, want);
|
||||
m += want;
|
||||
bytes -= want;
|
||||
}
|
||||
|
||||
/* store leftover */
|
||||
if (bytes) {
|
||||
for (i = 0; i < bytes; i++)
|
||||
st->buffer[st->leftover + i] = m[i];
|
||||
st->leftover += bytes;
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
crypto_onetimeauth(unsigned char *out, const unsigned char *m,
|
||||
unsigned long long inlen, const unsigned char *key)
|
||||
{
|
||||
uint32_t t0,t1,t2,t3;
|
||||
uint32_t h0,h1,h2,h3,h4;
|
||||
uint32_t r0,r1,r2,r3,r4;
|
||||
uint32_t s1,s2,s3,s4;
|
||||
uint32_t b, nb;
|
||||
unsigned long long j;
|
||||
uint64_t t[5];
|
||||
uint64_t f0,f1,f2,f3;
|
||||
uint32_t g0,g1,g2,g3,g4;
|
||||
uint64_t c;
|
||||
unsigned char mp[16];
|
||||
|
||||
/* clamp key */
|
||||
t0 = U8TO32_LE(key+0);
|
||||
t1 = U8TO32_LE(key+4);
|
||||
t2 = U8TO32_LE(key+8);
|
||||
t3 = U8TO32_LE(key+12);
|
||||
|
||||
/* precompute multipliers */
|
||||
r0 = t0 & 0x3ffffff; t0 >>= 26; t0 |= t1 << 6;
|
||||
r1 = t0 & 0x3ffff03; t1 >>= 20; t1 |= t2 << 12;
|
||||
r2 = t1 & 0x3ffc0ff; t2 >>= 14; t2 |= t3 << 18;
|
||||
r3 = t2 & 0x3f03fff; t3 >>= 8;
|
||||
r4 = t3 & 0x00fffff;
|
||||
|
||||
s1 = r1 * 5;
|
||||
s2 = r2 * 5;
|
||||
s3 = r3 * 5;
|
||||
s4 = r4 * 5;
|
||||
|
||||
/* init state */
|
||||
h0 = 0;
|
||||
h1 = 0;
|
||||
h2 = 0;
|
||||
h3 = 0;
|
||||
h4 = 0;
|
||||
|
||||
/* full blocks */
|
||||
if (inlen < 16) goto poly1305_donna_atmost15bytes;
|
||||
poly1305_donna_16bytes:
|
||||
m += 16;
|
||||
inlen -= 16;
|
||||
|
||||
t0 = U8TO32_LE(m-16);
|
||||
t1 = U8TO32_LE(m-12);
|
||||
t2 = U8TO32_LE(m-8);
|
||||
t3 = U8TO32_LE(m-4);
|
||||
|
||||
h0 += t0 & 0x3ffffff;
|
||||
h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
|
||||
h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
|
||||
h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
|
||||
h4 += (t3 >> 8) | (1 << 24);
|
||||
|
||||
|
||||
poly1305_donna_mul:
|
||||
t[0] = mul32x32_64(h0,r0) + mul32x32_64(h1,s4) + mul32x32_64(h2,s3) + mul32x32_64(h3,s2) + mul32x32_64(h4,s1);
|
||||
t[1] = mul32x32_64(h0,r1) + mul32x32_64(h1,r0) + mul32x32_64(h2,s4) + mul32x32_64(h3,s3) + mul32x32_64(h4,s2);
|
||||
t[2] = mul32x32_64(h0,r2) + mul32x32_64(h1,r1) + mul32x32_64(h2,r0) + mul32x32_64(h3,s4) + mul32x32_64(h4,s3);
|
||||
t[3] = mul32x32_64(h0,r3) + mul32x32_64(h1,r2) + mul32x32_64(h2,r1) + mul32x32_64(h3,r0) + mul32x32_64(h4,s4);
|
||||
t[4] = mul32x32_64(h0,r4) + mul32x32_64(h1,r3) + mul32x32_64(h2,r2) + mul32x32_64(h3,r1) + mul32x32_64(h4,r0);
|
||||
|
||||
h0 = (uint32_t)t[0] & 0x3ffffff; c = (t[0] >> 26);
|
||||
t[1] += c; h1 = (uint32_t)t[1] & 0x3ffffff; b = (uint32_t)(t[1] >> 26);
|
||||
t[2] += b; h2 = (uint32_t)t[2] & 0x3ffffff; b = (uint32_t)(t[2] >> 26);
|
||||
t[3] += b; h3 = (uint32_t)t[3] & 0x3ffffff; b = (uint32_t)(t[3] >> 26);
|
||||
t[4] += b; h4 = (uint32_t)t[4] & 0x3ffffff; b = (uint32_t)(t[4] >> 26);
|
||||
h0 += b * 5;
|
||||
|
||||
if (inlen >= 16) goto poly1305_donna_16bytes;
|
||||
|
||||
/* final bytes */
|
||||
poly1305_donna_atmost15bytes:
|
||||
if (!inlen) goto poly1305_donna_finish;
|
||||
|
||||
for (j = 0; j < inlen; j++) mp[j] = m[j];
|
||||
mp[j++] = 1;
|
||||
for (; j < 16; j++) mp[j] = 0;
|
||||
inlen = 0;
|
||||
|
||||
t0 = U8TO32_LE(mp+0);
|
||||
t1 = U8TO32_LE(mp+4);
|
||||
t2 = U8TO32_LE(mp+8);
|
||||
t3 = U8TO32_LE(mp+12);
|
||||
|
||||
h0 += t0 & 0x3ffffff;
|
||||
h1 += ((((uint64_t)t1 << 32) | t0) >> 26) & 0x3ffffff;
|
||||
h2 += ((((uint64_t)t2 << 32) | t1) >> 20) & 0x3ffffff;
|
||||
h3 += ((((uint64_t)t3 << 32) | t2) >> 14) & 0x3ffffff;
|
||||
h4 += (t3 >> 8);
|
||||
|
||||
goto poly1305_donna_mul;
|
||||
|
||||
poly1305_donna_finish:
|
||||
b = h0 >> 26; h0 = h0 & 0x3ffffff;
|
||||
h1 += b; b = h1 >> 26; h1 = h1 & 0x3ffffff;
|
||||
h2 += b; b = h2 >> 26; h2 = h2 & 0x3ffffff;
|
||||
h3 += b; b = h3 >> 26; h3 = h3 & 0x3ffffff;
|
||||
h4 += b; b = h4 >> 26; h4 = h4 & 0x3ffffff;
|
||||
h0 += b * 5;
|
||||
|
||||
g0 = h0 + 5; b = g0 >> 26; g0 &= 0x3ffffff;
|
||||
g1 = h1 + b; b = g1 >> 26; g1 &= 0x3ffffff;
|
||||
g2 = h2 + b; b = g2 >> 26; g2 &= 0x3ffffff;
|
||||
g3 = h3 + b; b = g3 >> 26; g3 &= 0x3ffffff;
|
||||
g4 = h4 + b - (1 << 26);
|
||||
|
||||
b = (g4 >> 31) - 1;
|
||||
nb = ~b;
|
||||
h0 = (h0 & nb) | (g0 & b);
|
||||
h1 = (h1 & nb) | (g1 & b);
|
||||
h2 = (h2 & nb) | (g2 & b);
|
||||
h3 = (h3 & nb) | (g3 & b);
|
||||
h4 = (h4 & nb) | (g4 & b);
|
||||
|
||||
f0 = ((h0 ) | (h1 << 26)) + (uint64_t)U8TO32_LE(&key[16]);
|
||||
f1 = ((h1 >> 6) | (h2 << 20)) + (uint64_t)U8TO32_LE(&key[20]);
|
||||
f2 = ((h2 >> 12) | (h3 << 14)) + (uint64_t)U8TO32_LE(&key[24]);
|
||||
f3 = ((h3 >> 18) | (h4 << 8)) + (uint64_t)U8TO32_LE(&key[28]);
|
||||
|
||||
U32TO8_LE(&out[ 0], f0); f1 += (f0 >> 32);
|
||||
U32TO8_LE(&out[ 4], f1); f2 += (f1 >> 32);
|
||||
U32TO8_LE(&out[ 8], f2); f3 += (f2 >> 32);
|
||||
U32TO8_LE(&out[12], f3);
|
||||
poly1305_context ctx;
|
||||
poly1305_init(&ctx, key);
|
||||
poly1305_update(&ctx, m, inlen);
|
||||
poly1305_finish(&ctx, out);
|
||||
|
||||
return 0;
|
||||
}
|
||||
@ -140,7 +64,7 @@ poly1305_donna_finish:
|
||||
const char *
|
||||
crypto_onetimeauth_poly1305_implementation_name(void)
|
||||
{
|
||||
return "donna";
|
||||
return POLY1305_IMPLEMENTATION_NAME;
|
||||
}
|
||||
|
||||
struct crypto_onetimeauth_poly1305_implementation
|
||||
|
@ -0,0 +1,12 @@
|
||||
#ifndef POLY1305_DONNA_H
|
||||
#define POLY1305_DONNA_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
typedef struct poly1305_context {
|
||||
unsigned long long aligner;
|
||||
unsigned char opaque[136];
|
||||
} poly1305_context;
|
||||
|
||||
#endif /* POLY1305_DONNA_H */
|
||||
|
@ -0,0 +1,221 @@
|
||||
/*
|
||||
poly1305 implementation using 32 bit * 32 bit = 64 bit multiplication and 64 bit addition
|
||||
*/
|
||||
|
||||
#define POLY1305_IMPLEMENTATION_NAME "donna32"
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
# define POLY1305_NOINLINE __declspec(noinline)
|
||||
#elif defined(__GNUC__)
|
||||
# define POLY1305_NOINLINE __attribute__((noinline))
|
||||
#else
|
||||
# define POLY1305_NOINLINE
|
||||
#endif
|
||||
|
||||
#define poly1305_block_size 16
|
||||
|
||||
/* 17 + sizeof(unsigned long long) + 14*sizeof(unsigned long) */
|
||||
typedef struct poly1305_state_internal_t {
|
||||
unsigned long r[5];
|
||||
unsigned long h[5];
|
||||
unsigned long pad[4];
|
||||
unsigned long long leftover;
|
||||
unsigned char buffer[poly1305_block_size];
|
||||
unsigned char final;
|
||||
} poly1305_state_internal_t;
|
||||
|
||||
/* interpret four 8 bit unsigned integers as a 32 bit unsigned integer in little endian */
|
||||
static unsigned long
|
||||
U8TO32(const unsigned char *p) {
|
||||
return
|
||||
(((unsigned long)(p[0] & 0xff) ) |
|
||||
((unsigned long)(p[1] & 0xff) << 8) |
|
||||
((unsigned long)(p[2] & 0xff) << 16) |
|
||||
((unsigned long)(p[3] & 0xff) << 24));
|
||||
}
|
||||
|
||||
/* store a 32 bit unsigned integer as four 8 bit unsigned integers in little endian */
|
||||
static void
|
||||
U32TO8(unsigned char *p, unsigned long v) {
|
||||
p[0] = (v ) & 0xff;
|
||||
p[1] = (v >> 8) & 0xff;
|
||||
p[2] = (v >> 16) & 0xff;
|
||||
p[3] = (v >> 24) & 0xff;
|
||||
}
|
||||
|
||||
static void
|
||||
poly1305_init(poly1305_context *ctx, const unsigned char key[32]) {
|
||||
poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
|
||||
|
||||
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
st->r[0] = (U8TO32(&key[ 0]) ) & 0x3ffffff;
|
||||
st->r[1] = (U8TO32(&key[ 3]) >> 2) & 0x3ffff03;
|
||||
st->r[2] = (U8TO32(&key[ 6]) >> 4) & 0x3ffc0ff;
|
||||
st->r[3] = (U8TO32(&key[ 9]) >> 6) & 0x3f03fff;
|
||||
st->r[4] = (U8TO32(&key[12]) >> 8) & 0x00fffff;
|
||||
|
||||
/* h = 0 */
|
||||
st->h[0] = 0;
|
||||
st->h[1] = 0;
|
||||
st->h[2] = 0;
|
||||
st->h[3] = 0;
|
||||
st->h[4] = 0;
|
||||
|
||||
/* save pad for later */
|
||||
st->pad[0] = U8TO32(&key[16]);
|
||||
st->pad[1] = U8TO32(&key[20]);
|
||||
st->pad[2] = U8TO32(&key[24]);
|
||||
st->pad[3] = U8TO32(&key[28]);
|
||||
|
||||
st->leftover = 0;
|
||||
st->final = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, unsigned long long bytes) {
|
||||
const unsigned long hibit = (st->final) ? 0 : (1 << 24); /* 1 << 128 */
|
||||
unsigned long r0,r1,r2,r3,r4;
|
||||
unsigned long s1,s2,s3,s4;
|
||||
unsigned long h0,h1,h2,h3,h4;
|
||||
unsigned long long d0,d1,d2,d3,d4;
|
||||
unsigned long c;
|
||||
|
||||
r0 = st->r[0];
|
||||
r1 = st->r[1];
|
||||
r2 = st->r[2];
|
||||
r3 = st->r[3];
|
||||
r4 = st->r[4];
|
||||
|
||||
s1 = r1 * 5;
|
||||
s2 = r2 * 5;
|
||||
s3 = r3 * 5;
|
||||
s4 = r4 * 5;
|
||||
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
h3 = st->h[3];
|
||||
h4 = st->h[4];
|
||||
|
||||
while (bytes >= poly1305_block_size) {
|
||||
/* h += m[i] */
|
||||
h0 += (U8TO32(m+ 0) ) & 0x3ffffff;
|
||||
h1 += (U8TO32(m+ 3) >> 2) & 0x3ffffff;
|
||||
h2 += (U8TO32(m+ 6) >> 4) & 0x3ffffff;
|
||||
h3 += (U8TO32(m+ 9) >> 6) & 0x3ffffff;
|
||||
h4 += (U8TO32(m+12) >> 8) | hibit;
|
||||
|
||||
/* h *= r */
|
||||
d0 = ((unsigned long long)h0 * r0) + ((unsigned long long)h1 * s4) + ((unsigned long long)h2 * s3) + ((unsigned long long)h3 * s2) + ((unsigned long long)h4 * s1);
|
||||
d1 = ((unsigned long long)h0 * r1) + ((unsigned long long)h1 * r0) + ((unsigned long long)h2 * s4) + ((unsigned long long)h3 * s3) + ((unsigned long long)h4 * s2);
|
||||
d2 = ((unsigned long long)h0 * r2) + ((unsigned long long)h1 * r1) + ((unsigned long long)h2 * r0) + ((unsigned long long)h3 * s4) + ((unsigned long long)h4 * s3);
|
||||
d3 = ((unsigned long long)h0 * r3) + ((unsigned long long)h1 * r2) + ((unsigned long long)h2 * r1) + ((unsigned long long)h3 * r0) + ((unsigned long long)h4 * s4);
|
||||
d4 = ((unsigned long long)h0 * r4) + ((unsigned long long)h1 * r3) + ((unsigned long long)h2 * r2) + ((unsigned long long)h3 * r1) + ((unsigned long long)h4 * r0);
|
||||
|
||||
/* (partial) h %= p */
|
||||
c = (unsigned long)(d0 >> 26); h0 = (unsigned long)d0 & 0x3ffffff;
|
||||
d1 += c; c = (unsigned long)(d1 >> 26); h1 = (unsigned long)d1 & 0x3ffffff;
|
||||
d2 += c; c = (unsigned long)(d2 >> 26); h2 = (unsigned long)d2 & 0x3ffffff;
|
||||
d3 += c; c = (unsigned long)(d3 >> 26); h3 = (unsigned long)d3 & 0x3ffffff;
|
||||
d4 += c; c = (unsigned long)(d4 >> 26); h4 = (unsigned long)d4 & 0x3ffffff;
|
||||
h0 += c * 5; c = (h0 >> 26); h0 = h0 & 0x3ffffff;
|
||||
h1 += c;
|
||||
|
||||
m += poly1305_block_size;
|
||||
bytes -= poly1305_block_size;
|
||||
}
|
||||
|
||||
st->h[0] = h0;
|
||||
st->h[1] = h1;
|
||||
st->h[2] = h2;
|
||||
st->h[3] = h3;
|
||||
st->h[4] = h4;
|
||||
}
|
||||
|
||||
static POLY1305_NOINLINE void
|
||||
poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) {
|
||||
poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
|
||||
unsigned long h0,h1,h2,h3,h4,c;
|
||||
unsigned long g0,g1,g2,g3,g4;
|
||||
unsigned long long f;
|
||||
unsigned long mask;
|
||||
|
||||
/* process the remaining block */
|
||||
if (st->leftover) {
|
||||
unsigned long long i = st->leftover;
|
||||
st->buffer[i++] = 1;
|
||||
for (; i < poly1305_block_size; i++)
|
||||
st->buffer[i] = 0;
|
||||
st->final = 1;
|
||||
poly1305_blocks(st, st->buffer, poly1305_block_size);
|
||||
}
|
||||
|
||||
/* fully carry h */
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
h3 = st->h[3];
|
||||
h4 = st->h[4];
|
||||
|
||||
c = h1 >> 26; h1 = h1 & 0x3ffffff;
|
||||
h2 += c; c = h2 >> 26; h2 = h2 & 0x3ffffff;
|
||||
h3 += c; c = h3 >> 26; h3 = h3 & 0x3ffffff;
|
||||
h4 += c; c = h4 >> 26; h4 = h4 & 0x3ffffff;
|
||||
h0 += c * 5; c = h0 >> 26; h0 = h0 & 0x3ffffff;
|
||||
h1 += c;
|
||||
|
||||
/* compute h + -p */
|
||||
g0 = h0 + 5; c = g0 >> 26; g0 &= 0x3ffffff;
|
||||
g1 = h1 + c; c = g1 >> 26; g1 &= 0x3ffffff;
|
||||
g2 = h2 + c; c = g2 >> 26; g2 &= 0x3ffffff;
|
||||
g3 = h3 + c; c = g3 >> 26; g3 &= 0x3ffffff;
|
||||
g4 = h4 + c - (1 << 26);
|
||||
|
||||
/* select h if h < p, or h + -p if h >= p */
|
||||
mask = (g4 >> ((sizeof(unsigned long) * 8) - 1)) - 1;
|
||||
g0 &= mask;
|
||||
g1 &= mask;
|
||||
g2 &= mask;
|
||||
g3 &= mask;
|
||||
g4 &= mask;
|
||||
mask = ~mask;
|
||||
h0 = (h0 & mask) | g0;
|
||||
h1 = (h1 & mask) | g1;
|
||||
h2 = (h2 & mask) | g2;
|
||||
h3 = (h3 & mask) | g3;
|
||||
h4 = (h4 & mask) | g4;
|
||||
|
||||
/* h = h % (2^128) */
|
||||
h0 = ((h0 ) | (h1 << 26)) & 0xffffffff;
|
||||
h1 = ((h1 >> 6) | (h2 << 20)) & 0xffffffff;
|
||||
h2 = ((h2 >> 12) | (h3 << 14)) & 0xffffffff;
|
||||
h3 = ((h3 >> 18) | (h4 << 8)) & 0xffffffff;
|
||||
|
||||
/* mac = (h + pad) % (2^128) */
|
||||
f = (unsigned long long)h0 + st->pad[0] ; h0 = (unsigned long)f;
|
||||
f = (unsigned long long)h1 + st->pad[1] + (f >> 32); h1 = (unsigned long)f;
|
||||
f = (unsigned long long)h2 + st->pad[2] + (f >> 32); h2 = (unsigned long)f;
|
||||
f = (unsigned long long)h3 + st->pad[3] + (f >> 32); h3 = (unsigned long)f;
|
||||
|
||||
U32TO8(mac + 0, h0);
|
||||
U32TO8(mac + 4, h1);
|
||||
U32TO8(mac + 8, h2);
|
||||
U32TO8(mac + 12, h3);
|
||||
|
||||
/* zero out the state */
|
||||
st->h[0] = 0;
|
||||
st->h[1] = 0;
|
||||
st->h[2] = 0;
|
||||
st->h[3] = 0;
|
||||
st->h[4] = 0;
|
||||
st->r[0] = 0;
|
||||
st->r[1] = 0;
|
||||
st->r[2] = 0;
|
||||
st->r[3] = 0;
|
||||
st->r[4] = 0;
|
||||
st->pad[0] = 0;
|
||||
st->pad[1] = 0;
|
||||
st->pad[2] = 0;
|
||||
st->pad[3] = 0;
|
||||
}
|
||||
|
@ -0,0 +1,209 @@
|
||||
/*
|
||||
poly1305 implementation using 64 bit * 64 bit = 128 bit multiplication and 128 bit addition
|
||||
*/
|
||||
|
||||
#define POLY1305_IMPLEMENTATION_NAME "donna64"
|
||||
|
||||
#if defined(__SIZEOF_INT128__)
|
||||
typedef unsigned __int128 uint128_t;
|
||||
#else
|
||||
typedef unsigned uint128_t __attribute__((mode(TI)));
|
||||
#endif
|
||||
|
||||
#define MUL(out, x, y) out = ((uint128_t)x * y)
|
||||
#define ADD(out, in) out += in
|
||||
#define ADDLO(out, in) out += in
|
||||
#define SHR(in, shift) (unsigned long long)(in >> (shift))
|
||||
#define LO(in) (unsigned long long)(in)
|
||||
|
||||
#define POLY1305_NOINLINE __attribute__((noinline))
|
||||
|
||||
#define poly1305_block_size 16
|
||||
|
||||
/* 17 + sizeof(unsigned long long) + 8*sizeof(unsigned long long) */
|
||||
typedef struct poly1305_state_internal_t {
|
||||
unsigned long long r[3];
|
||||
unsigned long long h[3];
|
||||
unsigned long long pad[2];
|
||||
unsigned long long leftover;
|
||||
unsigned char buffer[poly1305_block_size];
|
||||
unsigned char final;
|
||||
} poly1305_state_internal_t;
|
||||
|
||||
/* interpret eight 8 bit unsigned integers as a 64 bit unsigned integer in little endian */
|
||||
static unsigned long long
|
||||
U8TO64(const unsigned char *p) {
|
||||
return
|
||||
(((unsigned long long)(p[0] & 0xff) ) |
|
||||
((unsigned long long)(p[1] & 0xff) << 8) |
|
||||
((unsigned long long)(p[2] & 0xff) << 16) |
|
||||
((unsigned long long)(p[3] & 0xff) << 24) |
|
||||
((unsigned long long)(p[4] & 0xff) << 32) |
|
||||
((unsigned long long)(p[5] & 0xff) << 40) |
|
||||
((unsigned long long)(p[6] & 0xff) << 48) |
|
||||
((unsigned long long)(p[7] & 0xff) << 56));
|
||||
}
|
||||
|
||||
/* store a 64 bit unsigned integer as eight 8 bit unsigned integers in little endian */
|
||||
static void
|
||||
U64TO8(unsigned char *p, unsigned long long v) {
|
||||
p[0] = (v ) & 0xff;
|
||||
p[1] = (v >> 8) & 0xff;
|
||||
p[2] = (v >> 16) & 0xff;
|
||||
p[3] = (v >> 24) & 0xff;
|
||||
p[4] = (v >> 32) & 0xff;
|
||||
p[5] = (v >> 40) & 0xff;
|
||||
p[6] = (v >> 48) & 0xff;
|
||||
p[7] = (v >> 56) & 0xff;
|
||||
}
|
||||
|
||||
static void
|
||||
poly1305_init(poly1305_context *ctx, const unsigned char key[32]) {
|
||||
poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
|
||||
unsigned long long t0,t1;
|
||||
|
||||
/* r &= 0xffffffc0ffffffc0ffffffc0fffffff */
|
||||
t0 = U8TO64(&key[0]);
|
||||
t1 = U8TO64(&key[8]);
|
||||
|
||||
st->r[0] = ( t0 ) & 0xffc0fffffff;
|
||||
st->r[1] = ((t0 >> 44) | (t1 << 20)) & 0xfffffc0ffff;
|
||||
st->r[2] = ((t1 >> 24) ) & 0x00ffffffc0f;
|
||||
|
||||
/* h = 0 */
|
||||
st->h[0] = 0;
|
||||
st->h[1] = 0;
|
||||
st->h[2] = 0;
|
||||
|
||||
/* save pad for later */
|
||||
st->pad[0] = U8TO64(&key[16]);
|
||||
st->pad[1] = U8TO64(&key[24]);
|
||||
|
||||
st->leftover = 0;
|
||||
st->final = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
poly1305_blocks(poly1305_state_internal_t *st, const unsigned char *m, unsigned long long bytes) {
|
||||
const unsigned long long hibit = (st->final) ? 0 : ((unsigned long long)1 << 40); /* 1 << 128 */
|
||||
unsigned long long r0,r1,r2;
|
||||
unsigned long long s1,s2;
|
||||
unsigned long long h0,h1,h2;
|
||||
unsigned long long c;
|
||||
uint128_t d0,d1,d2,d;
|
||||
|
||||
r0 = st->r[0];
|
||||
r1 = st->r[1];
|
||||
r2 = st->r[2];
|
||||
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
|
||||
s1 = r1 * (5 << 2);
|
||||
s2 = r2 * (5 << 2);
|
||||
|
||||
while (bytes >= poly1305_block_size) {
|
||||
unsigned long long t0,t1;
|
||||
|
||||
/* h += m[i] */
|
||||
t0 = U8TO64(&m[0]);
|
||||
t1 = U8TO64(&m[8]);
|
||||
|
||||
h0 += (( t0 ) & 0xfffffffffff);
|
||||
h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff);
|
||||
h2 += (((t1 >> 24) ) & 0x3ffffffffff) | hibit;
|
||||
|
||||
/* h *= r */
|
||||
MUL(d0, h0, r0); MUL(d, h1, s2); ADD(d0, d); MUL(d, h2, s1); ADD(d0, d);
|
||||
MUL(d1, h0, r1); MUL(d, h1, r0); ADD(d1, d); MUL(d, h2, s2); ADD(d1, d);
|
||||
MUL(d2, h0, r2); MUL(d, h1, r1); ADD(d2, d); MUL(d, h2, r0); ADD(d2, d);
|
||||
|
||||
/* (partial) h %= p */
|
||||
c = SHR(d0, 44); h0 = LO(d0) & 0xfffffffffff;
|
||||
ADDLO(d1, c); c = SHR(d1, 44); h1 = LO(d1) & 0xfffffffffff;
|
||||
ADDLO(d2, c); c = SHR(d2, 42); h2 = LO(d2) & 0x3ffffffffff;
|
||||
h0 += c * 5; c = (h0 >> 44); h0 = h0 & 0xfffffffffff;
|
||||
h1 += c;
|
||||
|
||||
m += poly1305_block_size;
|
||||
bytes -= poly1305_block_size;
|
||||
}
|
||||
|
||||
st->h[0] = h0;
|
||||
st->h[1] = h1;
|
||||
st->h[2] = h2;
|
||||
}
|
||||
|
||||
|
||||
static POLY1305_NOINLINE void
|
||||
poly1305_finish(poly1305_context *ctx, unsigned char mac[16]) {
|
||||
poly1305_state_internal_t *st = (poly1305_state_internal_t *)ctx;
|
||||
unsigned long long h0,h1,h2,c;
|
||||
unsigned long long g0,g1,g2;
|
||||
unsigned long long t0,t1;
|
||||
|
||||
/* process the remaining block */
|
||||
if (st->leftover) {
|
||||
unsigned long long i = st->leftover;
|
||||
st->buffer[i] = 1;
|
||||
for (i = i + 1; i < poly1305_block_size; i++)
|
||||
st->buffer[i] = 0;
|
||||
st->final = 1;
|
||||
poly1305_blocks(st, st->buffer, poly1305_block_size);
|
||||
}
|
||||
|
||||
/* fully carry h */
|
||||
h0 = st->h[0];
|
||||
h1 = st->h[1];
|
||||
h2 = st->h[2];
|
||||
|
||||
c = (h1 >> 44); h1 &= 0xfffffffffff;
|
||||
h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
|
||||
h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
|
||||
h1 += c; c = (h1 >> 44); h1 &= 0xfffffffffff;
|
||||
h2 += c; c = (h2 >> 42); h2 &= 0x3ffffffffff;
|
||||
h0 += c * 5; c = (h0 >> 44); h0 &= 0xfffffffffff;
|
||||
h1 += c;
|
||||
|
||||
/* compute h + -p */
|
||||
g0 = h0 + 5; c = (g0 >> 44); g0 &= 0xfffffffffff;
|
||||
g1 = h1 + c; c = (g1 >> 44); g1 &= 0xfffffffffff;
|
||||
g2 = h2 + c - ((unsigned long long)1 << 42);
|
||||
|
||||
/* select h if h < p, or h + -p if h >= p */
|
||||
c = (g2 >> ((sizeof(unsigned long long) * 8) - 1)) - 1;
|
||||
g0 &= c;
|
||||
g1 &= c;
|
||||
g2 &= c;
|
||||
c = ~c;
|
||||
h0 = (h0 & c) | g0;
|
||||
h1 = (h1 & c) | g1;
|
||||
h2 = (h2 & c) | g2;
|
||||
|
||||
/* h = (h + pad) */
|
||||
t0 = st->pad[0];
|
||||
t1 = st->pad[1];
|
||||
|
||||
h0 += (( t0 ) & 0xfffffffffff) ; c = (h0 >> 44); h0 &= 0xfffffffffff;
|
||||
h1 += (((t0 >> 44) | (t1 << 20)) & 0xfffffffffff) + c; c = (h1 >> 44); h1 &= 0xfffffffffff;
|
||||
h2 += (((t1 >> 24) ) & 0x3ffffffffff) + c; h2 &= 0x3ffffffffff;
|
||||
|
||||
/* mac = h % (2^128) */
|
||||
h0 = ((h0 ) | (h1 << 44));
|
||||
h1 = ((h1 >> 20) | (h2 << 24));
|
||||
|
||||
U64TO8(&mac[0], h0);
|
||||
U64TO8(&mac[8], h1);
|
||||
|
||||
/* zero out the state */
|
||||
st->h[0] = 0;
|
||||
st->h[1] = 0;
|
||||
st->h[2] = 0;
|
||||
st->r[0] = 0;
|
||||
st->r[1] = 0;
|
||||
st->r[2] = 0;
|
||||
st->pad[0] = 0;
|
||||
st->pad[1] = 0;
|
||||
}
|
||||
|
@ -1,712 +0,0 @@
|
||||
#ifndef PORTABLE_JANE_H
|
||||
#define PORTABLE_JANE_H "+endian +uint128"
|
||||
/* 0000-os-100-solaris.h */
|
||||
|
||||
#if defined(sun) || defined(__sun) || defined(__SVR4) || defined(__svr4__)
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#define OS_SOLARIS
|
||||
#endif
|
||||
|
||||
/* 0000-os-100-unix.h */
|
||||
|
||||
#if defined(__unix__) || defined(unix)
|
||||
#include <sys/mman.h>
|
||||
#include <sys/time.h>
|
||||
#if !defined(USG)
|
||||
#include <sys/param.h> /* need this to define BSD */
|
||||
#endif
|
||||
#include <unistd.h>
|
||||
#include <fcntl.h>
|
||||
|
||||
#define OS_NIX
|
||||
#if defined(__linux__)
|
||||
#include <endian.h>
|
||||
#define OS_LINUX
|
||||
#elif defined(BSD)
|
||||
#define OS_BSD
|
||||
|
||||
#if defined(MACOS_X) || (defined(__APPLE__) & defined(__MACH__))
|
||||
#define OS_OSX
|
||||
#elif defined(macintosh) || defined(Macintosh)
|
||||
#define OS_MAC
|
||||
#elif defined(__OpenBSD__)
|
||||
#define OS_OPENBSD
|
||||
#elif defined(__FreeBSD__)
|
||||
#define OS_FREEBSD
|
||||
#elif defined(__NetBSD__)
|
||||
#define OS_NETBSD
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* 0000-os-100-windows.h */
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64) || defined(__TOS_WIN__) || defined(__WINDOWS__)
|
||||
#include <windows.h>
|
||||
#include <wincrypt.h>
|
||||
#define OS_WINDOWS
|
||||
#endif
|
||||
|
||||
/* 0100-compiler-000.h */
|
||||
|
||||
#undef NOINLINE
|
||||
#undef INLINE
|
||||
#undef FASTCALL
|
||||
#undef CDECL
|
||||
#undef STDCALL
|
||||
#undef NAKED
|
||||
|
||||
/* 0100-compiler-100-clang.h */
|
||||
|
||||
#if defined(__clang__)
|
||||
#define COMPILER_CLANG ((__clang_major__ * 10000) + (__clang_minor__ * 100) + (__clang_patchlevel__))
|
||||
#endif
|
||||
|
||||
/* 0100-compiler-100-gcc.h */
|
||||
|
||||
#if defined(__GNUC__)
|
||||
#if (__GNUC__ >= 3)
|
||||
#define COMPILER_GCC_PATCHLEVEL __GNUC_PATCHLEVEL__
|
||||
#else
|
||||
#define COMPILER_GCC_PATCHLEVEL 0
|
||||
#endif
|
||||
#define COMPILER_GCC ((__GNUC__ * 10000) + (__GNUC_MINOR__ * 100) + (COMPILER_GCC_PATCHLEVEL))
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
typedef unsigned int fpu_control_t;
|
||||
|
||||
#define ROTL32(a,b) (((a) << (b)) | ((a) >> (32 - b)))
|
||||
#define ROTR32(a,b) (((a) >> (b)) | ((a) << (32 - b)))
|
||||
#define ROTL64(a,b) (((a) << (b)) | ((a) >> (64 - b)))
|
||||
#define ROTR64(a,b) (((a) >> (b)) | ((a) << (64 - b)))
|
||||
|
||||
#if (COMPILER_GCC >= 30000)
|
||||
#define NOINLINE __attribute__((noinline))
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
#if (COMPILER_GCC >= 30000)
|
||||
#define INLINE inline __attribute__((always_inline))
|
||||
#else
|
||||
#define INLINE inline
|
||||
#endif
|
||||
#if (COMPILER_GCC >= 30400)
|
||||
#define FASTCALL __attribute__((fastcall))
|
||||
#else
|
||||
#define FASTCALL
|
||||
#endif
|
||||
#define CDECL __attribute__((cdecl))
|
||||
#define STDCALL __attribute__((stdcall))
|
||||
|
||||
#define mul32x32_64(a,b) ((uint64_t)(a) * (b))
|
||||
#define mul32x32_64s(a,b) (((int64_t)(a))*(b))
|
||||
#endif
|
||||
|
||||
/* 0100-compiler-100-icc.h */
|
||||
|
||||
#if defined(__ICC)
|
||||
#define COMPILER_ICC __ICC
|
||||
#endif
|
||||
|
||||
/* 0100-compiler-100-mingw.h */
|
||||
|
||||
#if defined(__MINGW32__) || defined(__MINGW64__)
|
||||
#define COMPILER_MINGW
|
||||
#endif
|
||||
|
||||
/* 0100-compiler-100-msvc.h */
|
||||
|
||||
#if defined(_MSC_VER)
|
||||
#ifndef _CRT_SECURE_NO_WARNINGS
|
||||
# define _CRT_SECURE_NO_WARNINGS
|
||||
#endif
|
||||
|
||||
#pragma warning(disable : 4127) /* conditional expression is constant */
|
||||
#pragma warning(disable : 4100) /* unreferenced formal parameter */
|
||||
|
||||
#include <float.h>
|
||||
#include <stdlib.h> /* _rotl */
|
||||
#include <intrin.h>
|
||||
|
||||
#define COMPILER_MSVC_VS6 120000000
|
||||
#define COMPILER_MSVC_VS6PP 121000000
|
||||
#define COMPILER_MSVC_VS2002 130000000
|
||||
#define COMPILER_MSVC_VS2003 131000000
|
||||
#define COMPILER_MSVC_VS2005 140050727
|
||||
#define COMPILER_MSVC_VS2008 150000000
|
||||
#define COMPILER_MSVC_VS2008SP1 150030729
|
||||
#define COMPILER_MSVC_VS2010 160000000
|
||||
#define COMPILER_MSVC_VS2010SP1 160040219
|
||||
#define COMPILER_MSVC_VS2012RC 170000000
|
||||
#define COMPILER_MSVC_VS2012 170050727
|
||||
|
||||
#if _MSC_FULL_VER > 100000000
|
||||
#define COMPILER_MSVC (_MSC_FULL_VER)
|
||||
#else
|
||||
#define COMPILER_MSVC (_MSC_FULL_VER * 10)
|
||||
#endif
|
||||
|
||||
#if ((_MSC_VER == 1200) && defined(_mm_free))
|
||||
#undef COMPILER_MSVC
|
||||
#define COMPILER_MSVC COMPILER_MSVC_VS6PP
|
||||
#endif
|
||||
|
||||
typedef unsigned char uint8_t;
|
||||
typedef unsigned short uint16_t;
|
||||
typedef unsigned int uint32_t;
|
||||
typedef signed int int32_t;
|
||||
typedef unsigned __int64 uint64_t;
|
||||
typedef signed __int64 int64_t;
|
||||
|
||||
typedef uint16_t fpu_control_t;
|
||||
|
||||
#define ROTL32(a,b) _rotl(a,b)
|
||||
#define ROTR32(a,b) _rotr(a,b)
|
||||
#define ROTL64(a,b) _rotl64(a,b)
|
||||
#define ROTR64(a,b) _rotr64(a,b)
|
||||
|
||||
#define NOINLINE __declspec(noinline)
|
||||
#define INLINE __forceinline
|
||||
#define FASTCALL __fastcall
|
||||
#define CDECL __cdecl
|
||||
#define STDCALL __stdcall
|
||||
#define NAKED __declspec(naked)
|
||||
|
||||
#if defined(_DEBUG)
|
||||
#define mul32x32_64(a,b) (((uint64_t)(a))*(b))
|
||||
#define mul32x32_64s(a,b) (((int64_t)(a))*(b))
|
||||
#else
|
||||
#define mul32x32_64(a,b) __emulu(a,b)
|
||||
#define mul32x32_64s(a,b) __emul(a,b)
|
||||
#endif
|
||||
#endif
|
||||
/* 0100-compiler-999.h */
|
||||
|
||||
#define OPTIONAL_INLINE /* config */
|
||||
#if defined(OPTIONAL_INLINE)
|
||||
#undef OPTIONAL_INLINE
|
||||
#define OPTIONAL_INLINE INLINE
|
||||
#else
|
||||
#define OPTIONAL_INLINE
|
||||
#endif
|
||||
|
||||
#define Preprocessor_ToString(s) #s
|
||||
#define Stringify(s) Preprocessor_ToString(s)
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
|
||||
/* 0200-cpu-100-alpha.h */
|
||||
|
||||
#if defined(__alpha__) || defined(__alpha) || defined(_M_ALPHA)
|
||||
#define CPU_ALPHA
|
||||
#endif
|
||||
|
||||
/* 0200-cpu-100-hppa.h */
|
||||
|
||||
#if defined(__hppa__) || defined(__hppa)
|
||||
#define CPU_HPPA
|
||||
#endif
|
||||
|
||||
/* 0200-cpu-100-intel.h */
|
||||
|
||||
#if defined(__amd64__) || defined(__amd64) || defined(__x86_64__ ) || defined(_M_X64)
|
||||
#define CPU_X86_64
|
||||
#elif defined(__i586__) || defined(__i686__) || (defined(_M_IX86) && (_M_IX86 >= 500))
|
||||
#define CPU_X86 500
|
||||
#elif defined(__i486__) || (defined(_M_IX86) && (_M_IX86 >= 400))
|
||||
#define CPU_X86 400
|
||||
#elif defined(__i386__) || (defined(_M_IX86) && (_M_IX86 >= 300)) || defined(__X86__) || defined(_X86_) || defined(__I86__)
|
||||
#define CPU_X86 300
|
||||
#elif defined(__ia64__) || defined(_IA64) || defined(__IA64__) || defined(_M_IA64) || defined(__ia64)
|
||||
#define CPU_IA64
|
||||
#endif
|
||||
|
||||
/* 0200-cpu-100-ppc.h */
|
||||
|
||||
#if defined(powerpc) || defined(__PPC__) || defined(__ppc__) || defined(_ARCH_PPC) || defined(__powerpc__) || defined(__powerpc) || defined(POWERPC) || defined(_M_PPC)
|
||||
#define CPU_PPC
|
||||
#if defined(_ARCH_PWR7)
|
||||
#define CPU_POWER7
|
||||
#elif defined(__64BIT__)
|
||||
#define CPU_PPC64
|
||||
#else
|
||||
#define CPU_PPC32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* 0200-cpu-100-sparc.h */
|
||||
|
||||
#if defined(__sparc__) || defined(__sparc) || defined(__sparcv9)
|
||||
#define CPU_SPARC
|
||||
#if defined(__sparcv9)
|
||||
#define CPU_SPARC64
|
||||
#else
|
||||
#define CPU_SPARC32
|
||||
#endif
|
||||
#endif
|
||||
|
||||
/* 0200-cpu-200-bits.h */
|
||||
|
||||
#if defined(CPU_X86_64) || defined(CPU_IA64) || defined(CPU_SPARC64) || defined(__64BIT__) || defined(__LP64__) || defined(_LP64) || (defined(_MIPS_SZLONG) && (_MIPS_SZLONG == 64))
|
||||
#define CPU_64BITS
|
||||
|
||||
#undef FASTCALL
|
||||
#undef CDECL
|
||||
#undef STDCALL
|
||||
|
||||
#define FASTCALL
|
||||
#define CDECL
|
||||
#define STDCALL
|
||||
#endif
|
||||
|
||||
/* 0200-cpu-200-endian.h */
|
||||
|
||||
#if ((defined(__BYTE_ORDER) && defined(__LITTLE_ENDIAN) && (__BYTE_ORDER == __LITTLE_ENDIAN)) || \
|
||||
(defined(BYTE_ORDER) && defined(LITTLE_ENDIAN) && (BYTE_ORDER == LITTLE_ENDIAN)) || \
|
||||
(defined(CPU_X86) || defined(CPU_X86_64)) || \
|
||||
(defined(vax) || defined(MIPSEL) || defined(_MIPSEL)))
|
||||
#define CPU_LE
|
||||
#elif ((defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) && (__BYTE_ORDER == __BIG_ENDIAN)) || \
|
||||
(defined(BYTE_ORDER) && defined(BIG_ENDIAN) && (BYTE_ORDER == BIG_ENDIAN)) || \
|
||||
(defined(CPU_SPARC) || defined(CPU_PPC) || defined(mc68000) || defined(sel)) || defined(_MIPSEB))
|
||||
#define CPU_BE
|
||||
#else
|
||||
/* unknown endian! */
|
||||
#endif
|
||||
|
||||
#if defined(__s390__) || defined(__zarch__) || defined(__SYSC_ZARCH__)
|
||||
# define CPU_Z390
|
||||
#endif
|
||||
|
||||
/* 0400-endian-100-be.h */
|
||||
|
||||
#if defined(CPU_BE) && !defined(CPU_ALIGNED_ACCESS_REQUIRED)
|
||||
static INLINE uint16_t fU8TO16_BE_FAST(const uint8_t *p) { return *(const uint16_t *)p; }
|
||||
static INLINE uint32_t fU8TO32_BE_FAST(const uint8_t *p) { return *(const uint32_t *)p; }
|
||||
static INLINE uint64_t fU8TO64_BE_FAST(const uint8_t *p) { return *(const uint64_t *)p; }
|
||||
static INLINE void fU16TO8_BE_FAST(uint8_t *p, const uint16_t v) { *(uint16_t *)p = v; }
|
||||
static INLINE void fU32TO8_BE_FAST(uint8_t *p, const uint32_t v) { *(uint32_t *)p = v; }
|
||||
static INLINE void fU64TO8_BE_FAST(uint8_t *p, const uint64_t v) { *(uint64_t *)p = v; }
|
||||
|
||||
#define U8TO16_BE(p) fU8TO16_BE_FAST(p)
|
||||
#define U8TO32_BE(p) fU8TO32_BE_FAST(p)
|
||||
#define U8TO64_BE(p) fU8TO64_BE_FAST(p)
|
||||
#define U16TO8_BE(p, v) fU16TO8_BE_FAST(p, v)
|
||||
#define U32TO8_BE(p, v) fU32TO8_BE_FAST(p, v)
|
||||
#define U64TO8_BE(p, v) fU64TO8_BE_FAST(p, v)
|
||||
#endif
|
||||
|
||||
/* 0400-endian-100-le.h */
|
||||
|
||||
#if defined(CPU_LE) && !defined(CPU_ALIGNED_ACCESS_REQUIRED)
|
||||
static INLINE uint16_t fU8TO16_LE_FAST(const uint8_t *p) { return *(const uint16_t *)p; }
|
||||
static INLINE uint32_t fU8TO32_LE_FAST(const uint8_t *p) { return *(const uint32_t *)p; }
|
||||
static INLINE uint64_t fU8TO64_LE_FAST(const uint8_t *p) { return *(const uint64_t *)p; }
|
||||
static INLINE void fU16TO8_LE_FAST(uint8_t *p, const uint16_t v) { *(uint16_t *)p = v; }
|
||||
static INLINE void fU32TO8_LE_FAST(uint8_t *p, const uint32_t v) { *(uint32_t *)p = v; }
|
||||
static INLINE void fU64TO8_LE_FAST(uint8_t *p, const uint64_t v) { *(uint64_t *)p = v; }
|
||||
|
||||
#define U8TO16_LE(p) fU8TO16_LE_FAST(p)
|
||||
#define U8TO32_LE(p) fU8TO32_LE_FAST(p)
|
||||
#define U8TO64_LE(p) fU8TO64_LE_FAST(p)
|
||||
#define U16TO8_LE(p, v) fU16TO8_LE_FAST(p, v)
|
||||
#define U32TO8_LE(p, v) fU32TO8_LE_FAST(p, v)
|
||||
#define U64TO8_LE(p, v) fU64TO8_LE_FAST(p, v)
|
||||
#endif
|
||||
|
||||
/* 0400-endian-100-sparc.h */
|
||||
|
||||
#if defined(CPU_SPARC)
|
||||
#if defined(CPU_SPARC64)
|
||||
static INLINE uint64_t
|
||||
fU8TO64_LE_FAST(const uint8_t *p) {
|
||||
uint64_d d;
|
||||
__asm__ ("ldxa [%1]0x88,%0" : "=r"(d) : "r"(p));
|
||||
return d;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
fU64TO8_LE_FAST(uint8_t *p, const uint64_t v) {
|
||||
__asm__ ("stxa %0,[%1]0x88" : : "r"(v), "r"(p));
|
||||
}
|
||||
#else
|
||||
static INLINE uint64_t
|
||||
fU8TO64_LE_FAST(const uint8_t *p) {
|
||||
uint32_t *s4, h, l;
|
||||
__asm__ ("add %3,4,%0\n\tlda [%3]0x88,%1\n\tlda [%0]0x88,%2" : "+r"(s4), "=r"(l), "=r"(h) : "r"(p));
|
||||
return ((uint64_t)h << 32) | l;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
fU64TO8_LE_FAST(uint8_t *p, const uint64_t v) {
|
||||
uint32_t *s4, h = (uint32_t)(v >> 32), l = (uint32_t)(v & (uint32_t)0xffffffff);
|
||||
__asm__ ("add %3,4,%0\n\tsta %1,[%3]0x88\n\tsta %2,[%0]0x88" : "+r"(s4) : "r"(l), "r"(h), "r"(p));
|
||||
}
|
||||
#endif
|
||||
|
||||
static INLINE uint32_t
|
||||
fU8TO32_LE_FAST(const uint8_t *p) {
|
||||
uint32_t d;
|
||||
__asm__ ("lda [%1]0x88,%0" : "=r"(d) : "r"(p));
|
||||
return d;
|
||||
}
|
||||
|
||||
static INLINE void
|
||||
fU32TO8_LE_FAST(uint8_t *p, const uint32_t v) {
|
||||
__asm__ ("sta %0,[%1]0x88" : : "r"(p), "r"(v));
|
||||
}
|
||||
|
||||
#define U8TO32_LE(p) fU8TO32_LE_FAST(p)
|
||||
#define U8TO64_LE(p) fU8TO64_LE_FAST(p)
|
||||
#define U32TO8_LE(p, v) fU32TO8_LE_FAST(p, v)
|
||||
#define U64TO8_LE(p, v) fU64TO8_LE_FAST(p, v)
|
||||
#endif
|
||||
|
||||
/* 0400-endian-100-x86.h */
|
||||
|
||||
#if (((defined(CPU_X86) && (CPU_X86 >= 400)) || defined(CPU_X86_64)) && (defined(COMPILER_MSVC) || defined(COMPILER_GCC)))
|
||||
#if defined(COMPILER_MSVC)
|
||||
static INLINE uint16_t U16_SWAP_FAST(uint16_t v) { return _byteswap_ushort(v); }
|
||||
static INLINE uint32_t U32_SWAP_FAST(uint32_t v) { return _byteswap_ulong(v); }
|
||||
static INLINE uint64_t U64_SWAP_FAST(uint64_t v) { return _byteswap_uint64(v); }
|
||||
#else
|
||||
static INLINE uint16_t U16_SWAP_FAST(uint16_t v) { __asm__("rorw $8,%0" : "+r" (v)); return v; }
|
||||
static INLINE uint32_t U32_SWAP_FAST(uint32_t v) { __asm__("bswap %0" : "+r" (v)); return v; }
|
||||
#if defined(CPU_X86_64)
|
||||
static INLINE uint64_t U64_SWAP_FAST(uint64_t v) { __asm__("bswap %0" : "+r" (v)); return v; }
|
||||
#else
|
||||
static INLINE uint64_t U64_SWAP_FAST(uint64_t v) {
|
||||
uint32_t lo = U32_SWAP_FAST((uint32_t)(v)), hi = U32_SWAP_FAST((uint32_t)(v >> 32));
|
||||
return ((uint64_t)lo << 32) | hi;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
|
||||
|
||||
static INLINE uint16_t fU8TO16_BE_FAST(const uint8_t *p) { return U16_SWAP_FAST(*(const uint16_t *)p); }
|
||||
static INLINE uint32_t fU8TO32_BE_FAST(const uint8_t *p) { return U32_SWAP_FAST(*(const uint32_t *)p); }
|
||||
static INLINE uint64_t fU8TO64_BE_FAST(const uint8_t *p) { return U64_SWAP_FAST(*(const uint64_t *)p); }
|
||||
static INLINE void fU16TO8_BE_FAST(uint8_t *p, const uint16_t v) { *(uint16_t *)p = U16_SWAP_FAST(v); }
|
||||
static INLINE void fU32TO8_BE_FAST(uint8_t *p, const uint32_t v) { *(uint32_t *)p = U32_SWAP_FAST(v); }
|
||||
static INLINE void fU64TO8_BE_FAST(uint8_t *p, const uint64_t v) { *(uint64_t *)p = U64_SWAP_FAST(v); }
|
||||
|
||||
#define U16_SWAP(p) U16_SWAP_FAST(p)
|
||||
#define U32_SWAP(p) U32_SWAP_FAST(p)
|
||||
#define U64_SWAP(p) U64_SWAP_FAST(p)
|
||||
#define U8TO16_BE(p) fU8TO16_BE_FAST(p)
|
||||
#define U8TO32_BE(p) fU8TO32_BE_FAST(p)
|
||||
#define U8TO64_BE(p) fU8TO64_BE_FAST(p)
|
||||
#define U16TO8_BE(p, v) fU16TO8_BE_FAST(p, v)
|
||||
#define U32TO8_BE(p, v) fU32TO8_BE_FAST(p, v)
|
||||
#define U64TO8_BE(p, v) fU64TO8_BE_FAST(p, v)
|
||||
#endif
|
||||
|
||||
/* 0400-endian-999-generic-be.h */
|
||||
|
||||
#if !defined(U8TO16_BE)
|
||||
static INLINE uint16_t
|
||||
fU8TO16_BE_SLOW(const uint8_t *p) {
|
||||
return
|
||||
(((uint16_t)(p[0]) << 8) |
|
||||
((uint16_t)(p[1]) ));
|
||||
}
|
||||
|
||||
#define U8TO16_BE(p) fU8TO16_BE_SLOW(p)
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(U8TO32_BE)
|
||||
static INLINE uint32_t
|
||||
fU8TO32_BE_SLOW(const uint8_t *p) {
|
||||
return
|
||||
(((uint32_t)(p[0]) << 24) |
|
||||
((uint32_t)(p[1]) << 16) |
|
||||
((uint32_t)(p[2]) << 8) |
|
||||
((uint32_t)(p[3]) ));
|
||||
}
|
||||
|
||||
#define U8TO32_BE(p) fU8TO32_BE_SLOW(p)
|
||||
#endif
|
||||
|
||||
#if !defined(U8TO64_BE)
|
||||
static INLINE uint64_t
|
||||
fU8TO64_BE_SLOW(const uint8_t *p) {
|
||||
return
|
||||
(((uint64_t)(p[0]) << 56) |
|
||||
((uint64_t)(p[1]) << 48) |
|
||||
((uint64_t)(p[2]) << 40) |
|
||||
((uint64_t)(p[3]) << 32) |
|
||||
((uint64_t)(p[4]) << 24) |
|
||||
((uint64_t)(p[5]) << 16) |
|
||||
((uint64_t)(p[6]) << 8) |
|
||||
((uint64_t)(p[7]) ));
|
||||
}
|
||||
|
||||
#define U8TO64_BE(p) fU8TO64_BE_SLOW(p)
|
||||
#endif
|
||||
|
||||
#if !defined(U16TO8_BE)
|
||||
static INLINE void
|
||||
fU16TO8_BE_SLOW(uint8_t *p, const uint16_t v) {
|
||||
p[0] = (uint8_t)(v >> 8);
|
||||
p[1] = (uint8_t)(v );
|
||||
}
|
||||
|
||||
#define U16TO8_BE(p, v) fU16TO8_BE_SLOW(p, v)
|
||||
#endif
|
||||
|
||||
#if !defined(U32TO8_BE)
|
||||
static INLINE void
|
||||
fU32TO8_BE_SLOW(uint8_t *p, const uint32_t v) {
|
||||
p[0] = (uint8_t)(v >> 24);
|
||||
p[1] = (uint8_t)(v >> 16);
|
||||
p[2] = (uint8_t)(v >> 8);
|
||||
p[3] = (uint8_t)(v );
|
||||
}
|
||||
|
||||
#define U32TO8_BE(p, v) fU32TO8_BE_SLOW(p, v)
|
||||
#endif
|
||||
|
||||
#if !defined(U64TO8_BE)
|
||||
static INLINE void
|
||||
fU64TO8_BE_SLOW(uint8_t *p, const uint64_t v) {
|
||||
p[0] = (uint8_t)(v >> 56);
|
||||
p[1] = (uint8_t)(v >> 48);
|
||||
p[2] = (uint8_t)(v >> 40);
|
||||
p[3] = (uint8_t)(v >> 32);
|
||||
p[4] = (uint8_t)(v >> 24);
|
||||
p[5] = (uint8_t)(v >> 16);
|
||||
p[6] = (uint8_t)(v >> 8);
|
||||
p[7] = (uint8_t)(v );
|
||||
}
|
||||
|
||||
#define U64TO8_BE(p, v) fU64TO8_BE_SLOW(p, v)
|
||||
#endif
|
||||
|
||||
/* 0400-endian-999-generic-le.h */
|
||||
|
||||
#if !defined(U8TO16_LE)
|
||||
static INLINE uint16_t
|
||||
fU8TO16_LE_SLOW(const uint8_t *p) {
|
||||
return
|
||||
(((uint16_t)(p[0]) ) |
|
||||
((uint16_t)(p[1]) << 8));
|
||||
}
|
||||
|
||||
#define U8TO16_LE(p) fU8TO16_LE_SLOW(p)
|
||||
#endif
|
||||
|
||||
#if !defined(U8TO32_LE)
|
||||
static INLINE uint32_t
|
||||
fU8TO32_LE_SLOW(const uint8_t *p) {
|
||||
return
|
||||
(((uint32_t)(p[0]) ) |
|
||||
((uint32_t)(p[1]) << 8) |
|
||||
((uint32_t)(p[2]) << 16) |
|
||||
((uint32_t)(p[3]) << 24));
|
||||
}
|
||||
|
||||
#define U8TO32_LE(p) fU8TO32_LE_SLOW(p)
|
||||
#endif
|
||||
|
||||
|
||||
#if !defined(U8TO64_LE)
|
||||
static INLINE uint64_t
|
||||
fU8TO64_LE_SLOW(const uint8_t *p) {
|
||||
return
|
||||
(((uint64_t)(p[0]) ) |
|
||||
((uint64_t)(p[1]) << 8) |
|
||||
((uint64_t)(p[2]) << 16) |
|
||||
((uint64_t)(p[3]) << 24) |
|
||||
((uint64_t)(p[4]) << 32) |
|
||||
((uint64_t)(p[5]) << 40) |
|
||||
((uint64_t)(p[6]) << 48) |
|
||||
((uint64_t)(p[7]) << 56));
|
||||
}
|
||||
|
||||
#define U8TO64_LE(p) fU8TO64_LE_SLOW(p)
|
||||
#endif
|
||||
|
||||
#if !defined(U16TO8_LE)
|
||||
static INLINE void
|
||||
fU16TO8_LE_SLOW(uint8_t *p, const uint16_t v) {
|
||||
p[0] = (uint8_t)(v );
|
||||
p[1] = (uint8_t)(v >> 8);
|
||||
}
|
||||
|
||||
#define U16TO8_LE(p, v) fU16TO8_LE_SLOW(p, v)
|
||||
#endif
|
||||
|
||||
#if !defined(U32TO8_LE)
|
||||
static INLINE void
|
||||
fU32TO8_LE_SLOW(uint8_t *p, const uint32_t v) {
|
||||
p[0] = (uint8_t)(v );
|
||||
p[1] = (uint8_t)(v >> 8);
|
||||
p[2] = (uint8_t)(v >> 16);
|
||||
p[3] = (uint8_t)(v >> 24);
|
||||
}
|
||||
|
||||
#define U32TO8_LE(p, v) fU32TO8_LE_SLOW(p, v)
|
||||
#endif
|
||||
|
||||
#if !defined(U64TO8_LE)
|
||||
static INLINE void
|
||||
fU64TO8_LE_SLOW(uint8_t *p, const uint64_t v) {
|
||||
p[0] = (uint8_t)(v );
|
||||
p[1] = (uint8_t)(v >> 8);
|
||||
p[2] = (uint8_t)(v >> 16);
|
||||
p[3] = (uint8_t)(v >> 24);
|
||||
p[4] = (uint8_t)(v >> 32);
|
||||
p[5] = (uint8_t)(v >> 40);
|
||||
p[6] = (uint8_t)(v >> 48);
|
||||
p[7] = (uint8_t)(v >> 56);
|
||||
}
|
||||
|
||||
#define U64TO8_LE(p, v) fU64TO8_LE_SLOW(p, v)
|
||||
#endif
|
||||
|
||||
/* 0400-endian-999-generic-swap.h */
|
||||
|
||||
#if !defined(U16_SWAP)
|
||||
static INLINE uint16_t
|
||||
fU16_SWAP_SLOW(uint16_t v) {
|
||||
v = (v << 8) | (v >> 8);
|
||||
return v;
|
||||
}
|
||||
|
||||
#define U16_SWAP(p) fU16_SWAP_SLOW(p)
|
||||
#endif
|
||||
|
||||
#if !defined(U32_SWAP)
|
||||
static INLINE uint32_t
|
||||
fU32_SWAP_SLOW(uint32_t v) {
|
||||
v = ((v << 8) & 0xFF00FF00) | ((v >> 8) & 0xFF00FF);
|
||||
v = (v << 16) | (v >> 16);
|
||||
return v;
|
||||
}
|
||||
|
||||
#define U32_SWAP(p) fU32_SWAP_SLOW(p)
|
||||
#endif
|
||||
|
||||
#if !defined(U64_SWAP)
|
||||
static INLINE uint64_t
|
||||
fU64_SWAP_SLOW(uint64_t v) {
|
||||
v = ((v << 8) & 0xFF00FF00FF00FF00ull) | ((v >> 8) & 0x00FF00FF00FF00FFull);
|
||||
v = ((v << 16) & 0xFFFF0000FFFF0000ull) | ((v >> 16) & 0x0000FFFF0000FFFFull);
|
||||
v = (v << 32) | (v >> 32);
|
||||
return v;
|
||||
}
|
||||
|
||||
#define U64_SWAP(p) fU64_SWAP_SLOW(p)
|
||||
#endif
|
||||
|
||||
/* 0400-uint128-000.h */
|
||||
|
||||
/* 0400-uint128-100-clang.h */
|
||||
|
||||
#ifdef HAVE_TI_MODE
|
||||
# define HAVE_NATIVE_UINT128
|
||||
typedef unsigned uint128_t __attribute__((mode(TI)));
|
||||
#endif
|
||||
|
||||
/* 0400-uint128-100-msvc.h */
|
||||
|
||||
#if defined(CPU_64BITS) && defined(COMPILER_MSVC)
|
||||
#define HAVE_UINT128
|
||||
|
||||
typedef struct uint128 {
|
||||
uint64_t lo, hi;
|
||||
} uint128_t;
|
||||
|
||||
static INLINE uint128_t
|
||||
mul64x64_128(uint64_t a, uint64_t b) {
|
||||
uint128_t v;
|
||||
v.lo = _umul128(a, b, &v.hi);
|
||||
return v;
|
||||
}
|
||||
|
||||
static INLINE uint64_t
|
||||
shr128_pair(uint64_t hi, uint64_t lo, const int shift) {
|
||||
return __shiftright128(lo, hi, shift);
|
||||
}
|
||||
|
||||
static INLINE uint64_t
|
||||
shr128(uint128_t v, const int shift) {
|
||||
return __shiftright128(v.lo, v.hi, shift);
|
||||
}
|
||||
|
||||
static INLINE uint128_t
|
||||
add128(uint128_t a, uint128_t b) {
|
||||
uint64_t t = a.lo;
|
||||
a.lo += b.lo;
|
||||
a.hi += b.hi + (a.lo < t);
|
||||
return a;
|
||||
}
|
||||
|
||||
static INLINE uint128_t
|
||||
add128_64(uint128_t a, uint64_t b) {
|
||||
uint64_t t = a.lo;
|
||||
a.lo += b;
|
||||
a.hi += (a.lo < t);
|
||||
return a;
|
||||
}
|
||||
|
||||
static INLINE uint64_t
|
||||
lo128(uint128_t a) {
|
||||
return a.lo;
|
||||
}
|
||||
|
||||
static INLINE uint64_t
|
||||
hi128(uint128_t a) {
|
||||
return a.hi;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* 0400-uint128-999.h */
|
||||
|
||||
#if defined(HAVE_NATIVE_UINT128)
|
||||
#define HAVE_UINT128
|
||||
|
||||
static INLINE uint128_t
|
||||
mul64x64_128(uint64_t a, uint64_t b) {
|
||||
return (uint128_t)a * b;
|
||||
}
|
||||
|
||||
static INLINE uint64_t
|
||||
shr128(uint128_t v, const int shift) {
|
||||
return (uint64_t)(v >> shift);
|
||||
}
|
||||
|
||||
static INLINE uint64_t
|
||||
shr128_pair(uint64_t hi, uint64_t lo, const int shift) {
|
||||
return (uint64_t)((((uint128_t)hi << 64) | lo) >> shift);
|
||||
}
|
||||
|
||||
static INLINE uint128_t
|
||||
add128(uint128_t a, uint128_t b) {
|
||||
return a + b;
|
||||
}
|
||||
|
||||
static INLINE uint128_t
|
||||
add128_64(uint128_t a, uint64_t b) {
|
||||
return a + b;
|
||||
}
|
||||
|
||||
static INLINE uint64_t
|
||||
lo128(uint128_t a) {
|
||||
return (uint64_t)a;
|
||||
}
|
||||
|
||||
static INLINE uint64_t
|
||||
hi128(uint128_t a) {
|
||||
return (uint64_t)(a >> 64);
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* PORTABLE_JANE_H */
|
||||
|
@ -124,10 +124,10 @@ crypto_onetimeauth_poly1305_implementation *
|
||||
crypto_onetimeauth_pick_best_implementation(void)
|
||||
{
|
||||
crypto_onetimeauth_poly1305_implementation *implementations[] = {
|
||||
&crypto_onetimeauth_poly1305_donna_implementation,
|
||||
#ifdef HAVE_FENV_H
|
||||
&crypto_onetimeauth_poly1305_53_implementation,
|
||||
#endif
|
||||
&crypto_onetimeauth_poly1305_donna_implementation,
|
||||
NULL
|
||||
};
|
||||
const char *err;
|
||||
|
Loading…
Reference in New Issue
Block a user