curve25519-donna-c64: replace U8TO64/U64TO8 with load_limb/store_limb
To match the current @agl code.
This commit is contained in:
parent
4f70e7c171
commit
ae13df74e1
@ -36,10 +36,10 @@ typedef limb felem[5];
|
||||
typedef unsigned uint128_t __attribute__((mode(TI)));
|
||||
|
||||
#undef force_inline
|
||||
#define force_inline inline __attribute__((always_inline))
|
||||
#define force_inline __attribute__((always_inline))
|
||||
|
||||
/* Sum two numbers: output += in */
|
||||
static force_inline void
|
||||
static inline void force_inline
|
||||
fsum(limb *output, const limb *in) {
|
||||
output[0] += in[0];
|
||||
output[1] += in[1];
|
||||
@ -54,7 +54,7 @@ fsum(limb *output, const limb *in) {
|
||||
* Assumes that out[i] < 2**52
|
||||
* On return, out[i] < 2**55
|
||||
*/
|
||||
static force_inline void
|
||||
static inline void force_inline
|
||||
fdifference_backwards(felem out, const felem in) {
|
||||
/* 152 is 19 << 3 */
|
||||
static const limb two54m152 = (((limb)1) << 54) - 152;
|
||||
@ -68,7 +68,7 @@ fdifference_backwards(felem out, const felem in) {
|
||||
}
|
||||
|
||||
/* Multiply a number by a scalar: output = in * scalar */
|
||||
static force_inline void
|
||||
static inline void force_inline
|
||||
fscalar_product(felem output, const felem in, const limb scalar) {
|
||||
uint128_t a;
|
||||
|
||||
@ -98,7 +98,7 @@ fscalar_product(felem output, const felem in, const limb scalar) {
|
||||
* Assumes that in[i] < 2**55 and likewise for in2.
|
||||
* On return, output[i] < 2**52
|
||||
*/
|
||||
static force_inline void
|
||||
static inline void force_inline
|
||||
fmul(felem output, const felem in2, const felem in) {
|
||||
uint128_t t[5];
|
||||
limb r0,r1,r2,r3,r4,s0,s1,s2,s3,s4,c;
|
||||
@ -147,7 +147,7 @@ fmul(felem output, const felem in2, const felem in) {
|
||||
output[4] = r4;
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
static inline void force_inline
|
||||
fsquare_times(felem output, const felem in, limb count) {
|
||||
uint128_t t[5];
|
||||
limb r0,r1,r2,r3,r4,c;
|
||||
@ -190,43 +190,43 @@ fsquare_times(felem output, const felem in, limb count) {
|
||||
}
|
||||
|
||||
#if !defined(CPU_ALIGNED_ACCESS_REQUIRED) && defined(NATIVE_LITTLE_ENDIAN)
|
||||
# define U8TO64(p) (*((const uint64_t *) (p)))
|
||||
# define U64TO8(p, v) (*((uint64_t *) (p)) = (v))
|
||||
# define load_limb(p) (*((const uint64_t *) (p)))
|
||||
# define store_limb(p, v) (*((uint64_t *) (p)) = (v))
|
||||
#else
|
||||
static force_inline uint64_t
|
||||
U8TO64(const unsigned char *p) {
|
||||
return
|
||||
(((uint64_t)(p[0] & 0xff) ) |
|
||||
((uint64_t)(p[1] & 0xff) << 8) |
|
||||
((uint64_t)(p[2] & 0xff) << 16) |
|
||||
((uint64_t)(p[3] & 0xff) << 24) |
|
||||
((uint64_t)(p[4] & 0xff) << 32) |
|
||||
((uint64_t)(p[5] & 0xff) << 40) |
|
||||
((uint64_t)(p[6] & 0xff) << 48) |
|
||||
((uint64_t)(p[7] & 0xff) << 56));
|
||||
static inline limb force_inline
|
||||
load_limb(const u8 *in) {
|
||||
return
|
||||
((limb)in[0]) |
|
||||
(((limb)in[1]) << 8) |
|
||||
(((limb)in[2]) << 16) |
|
||||
(((limb)in[3]) << 24) |
|
||||
(((limb)in[4]) << 32) |
|
||||
(((limb)in[5]) << 40) |
|
||||
(((limb)in[6]) << 48) |
|
||||
(((limb)in[7]) << 56);
|
||||
}
|
||||
|
||||
static force_inline void
|
||||
U64TO8(unsigned char *p, uint64_t v) {
|
||||
p[0] = (v ) & 0xff;
|
||||
p[1] = (v >> 8) & 0xff;
|
||||
p[2] = (v >> 16) & 0xff;
|
||||
p[3] = (v >> 24) & 0xff;
|
||||
p[4] = (v >> 32) & 0xff;
|
||||
p[5] = (v >> 40) & 0xff;
|
||||
p[6] = (v >> 48) & 0xff;
|
||||
p[7] = (v >> 56) & 0xff;
|
||||
static inline void force_inline
|
||||
store_limb(u8 *out, limb in) {
|
||||
out[0] = in & 0xff;
|
||||
out[1] = (in >> 8) & 0xff;
|
||||
out[2] = (in >> 16) & 0xff;
|
||||
out[3] = (in >> 24) & 0xff;
|
||||
out[4] = (in >> 32) & 0xff;
|
||||
out[5] = (in >> 40) & 0xff;
|
||||
out[6] = (in >> 48) & 0xff;
|
||||
out[7] = (in >> 56) & 0xff;
|
||||
}
|
||||
#endif
|
||||
|
||||
/* Take a little-endian, 32-byte number and expand it into polynomial form */
|
||||
static void
|
||||
fexpand(limb *output, const u8 *in) {
|
||||
output[0] = U8TO64(in) & 0x7ffffffffffff;
|
||||
output[1] = (U8TO64(in+6) >> 3) & 0x7ffffffffffff;
|
||||
output[2] = (U8TO64(in+12) >> 6) & 0x7ffffffffffff;
|
||||
output[3] = (U8TO64(in+19) >> 1) & 0x7ffffffffffff;
|
||||
output[4] = (U8TO64(in+24) >> 12) & 0x7ffffffffffff;
|
||||
output[0] = load_limb(in) & 0x7ffffffffffff;
|
||||
output[1] = (load_limb(in+6) >> 3) & 0x7ffffffffffff;
|
||||
output[2] = (load_limb(in+12) >> 6) & 0x7ffffffffffff;
|
||||
output[3] = (load_limb(in+19) >> 1) & 0x7ffffffffffff;
|
||||
output[4] = (load_limb(in+24) >> 12) & 0x7ffffffffffff;
|
||||
}
|
||||
|
||||
/* Take a fully reduced polynomial form number and contract it into a
|
||||
@ -281,10 +281,10 @@ fcontract(u8 *output, const felem input) {
|
||||
t[4] += t[3] >> 51; t[3] &= 0x7ffffffffffff;
|
||||
t[4] &= 0x7ffffffffffff;
|
||||
|
||||
U64TO8(output, t[0] | (t[1] << 51));
|
||||
U64TO8(output + 8, (t[1] >> 13) | (t[2] << 38));
|
||||
U64TO8(output + 16, (t[2] >> 26) | (t[3] << 25));
|
||||
U64TO8(output + 24, (t[3] >> 39) | (t[4] << 12));
|
||||
store_limb(output, t[0] | (t[1] << 51));
|
||||
store_limb(output + 8, (t[1] >> 13) | (t[2] << 38));
|
||||
store_limb(output + 16, (t[2] >> 26) | (t[3] << 25));
|
||||
store_limb(output + 24, (t[3] >> 39) | (t[4] << 12));
|
||||
}
|
||||
|
||||
/* Input: Q, Q', Q-Q'
|
||||
|
Loading…
Reference in New Issue
Block a user