curve25519-donna-c64: replace U8TO64/U64TO8 with load_limb/store_limb

To match the current @agl code.
This commit is contained in:
Frank Denis 2014-11-20 11:43:53 -08:00
parent 4f70e7c171
commit ae13df74e1

View File

@ -36,10 +36,10 @@ typedef limb felem[5];
typedef unsigned uint128_t __attribute__((mode(TI)));
#undef force_inline
#define force_inline inline __attribute__((always_inline))
#define force_inline __attribute__((always_inline))
/* Sum two numbers: output += in */
static force_inline void
static inline void force_inline
fsum(limb *output, const limb *in) {
output[0] += in[0];
output[1] += in[1];
@ -54,7 +54,7 @@ fsum(limb *output, const limb *in) {
* Assumes that out[i] < 2**52
* On return, out[i] < 2**55
*/
static force_inline void
static inline void force_inline
fdifference_backwards(felem out, const felem in) {
/* 152 is 19 << 3 */
static const limb two54m152 = (((limb)1) << 54) - 152;
@ -68,7 +68,7 @@ fdifference_backwards(felem out, const felem in) {
}
/* Multiply a number by a scalar: output = in * scalar */
static force_inline void
static inline void force_inline
fscalar_product(felem output, const felem in, const limb scalar) {
uint128_t a;
@ -98,7 +98,7 @@ fscalar_product(felem output, const felem in, const limb scalar) {
* Assumes that in[i] < 2**55 and likewise for in2.
* On return, output[i] < 2**52
*/
static force_inline void
static inline void force_inline
fmul(felem output, const felem in2, const felem in) {
uint128_t t[5];
limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
@ -147,7 +147,7 @@ fmul(felem output, const felem in2, const felem in) {
output[4] = r4;
}
static force_inline void
static inline void force_inline
fsquare_times(felem output, const felem in, limb count) {
uint128_t t[5];
limb r0,r1,r2,r3,r4,c;
@ -190,43 +190,43 @@ fsquare_times(felem output, const felem in, limb count) {
}
#if !defined(CPU_ALIGNED_ACCESS_REQUIRED) && defined(NATIVE_LITTLE_ENDIAN)
# define U8TO64(p) (*((const uint64_t *) (p)))
# define U64TO8(p, v) (*((uint64_t *) (p)) = (v))
# define load_limb(p) (*((const uint64_t *) (p)))
# define store_limb(p, v) (*((uint64_t *) (p)) = (v))
#else
static force_inline uint64_t
U8TO64(const unsigned char *p) {
return
(((uint64_t)(p[0] & 0xff) ) |
((uint64_t)(p[1] & 0xff) << 8) |
((uint64_t)(p[2] & 0xff) << 16) |
((uint64_t)(p[3] & 0xff) << 24) |
((uint64_t)(p[4] & 0xff) << 32) |
((uint64_t)(p[5] & 0xff) << 40) |
((uint64_t)(p[6] & 0xff) << 48) |
((uint64_t)(p[7] & 0xff) << 56));
static inline limb force_inline
load_limb(const u8 *in) {
return
((limb)in[0]) |
(((limb)in[1]) << 8) |
(((limb)in[2]) << 16) |
(((limb)in[3]) << 24) |
(((limb)in[4]) << 32) |
(((limb)in[5]) << 40) |
(((limb)in[6]) << 48) |
(((limb)in[7]) << 56);
}
static force_inline void
U64TO8(unsigned char *p, uint64_t v) {
p[0] = (v ) & 0xff;
p[1] = (v >> 8) & 0xff;
p[2] = (v >> 16) & 0xff;
p[3] = (v >> 24) & 0xff;
p[4] = (v >> 32) & 0xff;
p[5] = (v >> 40) & 0xff;
p[6] = (v >> 48) & 0xff;
p[7] = (v >> 56) & 0xff;
static inline void force_inline
store_limb(u8 *out, limb in) {
out[0] = in & 0xff;
out[1] = (in >> 8) & 0xff;
out[2] = (in >> 16) & 0xff;
out[3] = (in >> 24) & 0xff;
out[4] = (in >> 32) & 0xff;
out[5] = (in >> 40) & 0xff;
out[6] = (in >> 48) & 0xff;
out[7] = (in >> 56) & 0xff;
}
#endif
/* Take a little-endian, 32-byte number and expand it into polynomial form */
static void
fexpand(limb *output, const u8 *in) {
output[0] = U8TO64(in) & 0x7ffffffffffff;
output[1] = (U8TO64(in+6) >> 3) & 0x7ffffffffffff;
output[2] = (U8TO64(in+12) >> 6) & 0x7ffffffffffff;
output[3] = (U8TO64(in+19) >> 1) & 0x7ffffffffffff;
output[4] = (U8TO64(in+24) >> 12) & 0x7ffffffffffff;
output[0] = load_limb(in) & 0x7ffffffffffff;
output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffff;
output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffff;
output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffff;
output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffff;
}
/* Take a fully reduced polynomial form number and contract it into a
@ -281,10 +281,10 @@ fcontract(u8 *output, const felem input) {
t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
t[4] &= 0x7ffffffffffff;
U64TO8(output, t[0] | (t[1] << 51));
U64TO8(output + 8, (t[1] >> 13) | (t[2] << 38));
U64TO8(output + 16, (t[2] >> 26) | (t[3] << 25));
U64TO8(output + 24, (t[3] >> 39) | (t[4] << 12));
store_limb(output, t[0] | (t[1] << 51));
store_limb(output + 8, (t[1] >> 13) | (t[2] << 38));
store_limb(output + 16, (t[2] >> 26) | (t[3] << 25));
store_limb(output + 24, (t[3] >> 39) | (t[4] << 12));
}
/* Input: Q, Q', Q-Q'