From cdfd98e9086258a58f41c0a5c149caa8e2cc973b Mon Sep 17 00:00:00 2001
From: Frank Denis <github@pureftpd.org>
Date: Mon, 6 Nov 2017 10:52:03 +0100
Subject: [PATCH] Move fe_cswap and fe_scalar_product to core

---
 .../crypto_core/curve25519/ref10/fe_25_5/fe.h | 143 +++++++++++++++
 .../crypto_core/curve25519/ref10/fe_51/fe.h   |  77 ++++++++
 .../donna_c64/curve25519_donna_c64.c          |  50 +-----
 .../curve25519/ref10/x25519_ref10.c           | 165 +-----------------
 .../include/sodium/private/curve25519_ref10.h |   6 +-
 5 files changed, 230 insertions(+), 211 deletions(-)

diff --git a/src/libsodium/crypto_core/curve25519/ref10/fe_25_5/fe.h b/src/libsodium/crypto_core/curve25519/ref10/fe_25_5/fe.h
index e8d8f817..258f2715 100644
--- a/src/libsodium/crypto_core/curve25519/ref10/fe_25_5/fe.h
+++ b/src/libsodium/crypto_core/curve25519/ref10/fe_25_5/fe.h
@@ -205,6 +205,78 @@ fe_cmov(fe f, const fe g, unsigned int b)
     f[9] = f9 ^ x9;
 }
 
+void
+fe_cswap(fe f, fe g, unsigned int b)
+{
+    const uint32_t mask = (uint32_t) (-(int64_t) b);
+
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+
+    int32_t g0 = g[0];
+    int32_t g1 = g[1];
+    int32_t g2 = g[2];
+    int32_t g3 = g[3];
+    int32_t g4 = g[4];
+    int32_t g5 = g[5];
+    int32_t g6 = g[6];
+    int32_t g7 = g[7];
+    int32_t g8 = g[8];
+    int32_t g9 = g[9];
+
+    int32_t x0 = f0 ^ g0;
+    int32_t x1 = f1 ^ g1;
+    int32_t x2 = f2 ^ g2;
+    int32_t x3 = f3 ^ g3;
+    int32_t x4 = f4 ^ g4;
+    int32_t x5 = f5 ^ g5;
+    int32_t x6 = f6 ^ g6;
+    int32_t x7 = f7 ^ g7;
+    int32_t x8 = f8 ^ g8;
+    int32_t x9 = f9 ^ g9;
+
+    x0 &= mask;
+    x1 &= mask;
+    x2 &= mask;
+    x3 &= mask;
+    x4 &= mask;
+    x5 &= mask;
+    x6 &= mask;
+    x7 &= mask;
+    x8 &= mask;
+    x9 &= mask;
+
+    f[0] = f0 ^ x0;
+    f[1] = f1 ^ x1;
+    f[2] = f2 ^ x2;
+    f[3] = f3 ^ x3;
+    f[4] = f4 ^ x4;
+    f[5] = f5 ^ x5;
+    f[6] = f6 ^ x6;
+    f[7] = f7 ^ x7;
+    f[8] = f8 ^ x8;
+    f[9] = f9 ^ x9;
+
+    g[0] = g0 ^ x0;
+    g[1] = g1 ^ x1;
+    g[2] = g2 ^ x2;
+    g[3] = g3 ^ x3;
+    g[4] = g4 ^ x4;
+    g[5] = g5 ^ x5;
+    g[6] = g6 ^ x6;
+    g[7] = g7 ^ x7;
+    g[8] = g8 ^ x8;
+    g[9] = g9 ^ x9;
+}
+
 /*
  h = f
  */
@@ -1136,3 +1208,74 @@ fe_sq2(fe h, const fe f)
     h[8] = (int32_t) h8;
     h[9] = (int32_t) h9;
 }
+
+void
+fe_scalar_product(fe h, const fe f, uint32_t n)
+{
+    int64_t sn = (int64_t) n;
+    int32_t f0 = f[0];
+    int32_t f1 = f[1];
+    int32_t f2 = f[2];
+    int32_t f3 = f[3];
+    int32_t f4 = f[4];
+    int32_t f5 = f[5];
+    int32_t f6 = f[6];
+    int32_t f7 = f[7];
+    int32_t f8 = f[8];
+    int32_t f9 = f[9];
+    int64_t h0 = f0 * sn;
+    int64_t h1 = f1 * sn;
+    int64_t h2 = f2 * sn;
+    int64_t h3 = f3 * sn;
+    int64_t h4 = f4 * sn;
+    int64_t h5 = f5 * sn;
+    int64_t h6 = f6 * sn;
+    int64_t h7 = f7 * sn;
+    int64_t h8 = f8 * sn;
+    int64_t h9 = f9 * sn;
+    int64_t carry0, carry1, carry2, carry3, carry4, carry5, carry6, carry7,
+            carry8, carry9;
+
+    carry9 = (h9 + ((int64_t) 1 << 24)) >> 25;
+    h0 += carry9 * 19;
+    h9 -= carry9 * ((int64_t) 1 << 25);
+    carry1 = (h1 + ((int64_t) 1 << 24)) >> 25;
+    h2 += carry1;
+    h1 -= carry1 * ((int64_t) 1 << 25);
+    carry3 = (h3 + ((int64_t) 1 << 24)) >> 25;
+    h4 += carry3;
+    h3 -= carry3 * ((int64_t) 1 << 25);
+    carry5 = (h5 + ((int64_t) 1 << 24)) >> 25;
+    h6 += carry5;
+    h5 -= carry5 * ((int64_t) 1 << 25);
+    carry7 = (h7 + ((int64_t) 1 << 24)) >> 25;
+    h8 += carry7;
+    h7 -= carry7 * ((int64_t) 1 << 25);
+
+    carry0 = (h0 + ((int64_t) 1 << 25)) >> 26;
+    h1 += carry0;
+    h0 -= carry0 * ((int64_t) 1 << 26);
+    carry2 = (h2 + ((int64_t) 1 << 25)) >> 26;
+    h3 += carry2;
+    h2 -= carry2 * ((int64_t) 1 << 26);
+    carry4 = (h4 + ((int64_t) 1 << 25)) >> 26;
+    h5 += carry4;
+    h4 -= carry4 * ((int64_t) 1 << 26);
+    carry6 = (h6 + ((int64_t) 1 << 25)) >> 26;
+    h7 += carry6;
+    h6 -= carry6 * ((int64_t) 1 << 26);
+    carry8 = (h8 + ((int64_t) 1 << 25)) >> 26;
+    h9 += carry8;
+    h8 -= carry8 * ((int64_t) 1 << 26);
+
+    h[0] = (int32_t) h0;
+    h[1] = (int32_t) h1;
+    h[2] = (int32_t) h2;
+    h[3] = (int32_t) h3;
+    h[4] = (int32_t) h4;
+    h[5] = (int32_t) h5;
+    h[6] = (int32_t) h6;
+    h[7] = (int32_t) h7;
+    h[8] = (int32_t) h8;
+    h[9] = (int32_t) h9;
+}
diff --git a/src/libsodium/crypto_core/curve25519/ref10/fe_51/fe.h b/src/libsodium/crypto_core/curve25519/ref10/fe_51/fe.h
index 9587f572..3455cbe8 100644
--- a/src/libsodium/crypto_core/curve25519/ref10/fe_51/fe.h
+++ b/src/libsodium/crypto_core/curve25519/ref10/fe_51/fe.h
@@ -147,6 +147,55 @@ fe_cmov(fe f, const fe g, unsigned int b)
     f[4] = f4 ^ x4;
 }
 
+/*
+Replace (f,g) with (g,f) if b == 1;
+replace (f,g) with (f,g) if b == 0.
+
+Preconditions: b in {0,1}.
+*/
+
+void
+fe_cswap(fe f, fe g, unsigned int b)
+{
+    const uint64_t mask = (uint64_t) (-(int64_t) b);
+
+    uint64_t f0 = f[0];
+    uint64_t f1 = f[1];
+    uint64_t f2 = f[2];
+    uint64_t f3 = f[3];
+    uint64_t f4 = f[4];
+
+    uint64_t g0 = g[0];
+    uint64_t g1 = g[1];
+    uint64_t g2 = g[2];
+    uint64_t g3 = g[3];
+    uint64_t g4 = g[4];
+
+    uint64_t x0 = f0 ^ g0;
+    uint64_t x1 = f1 ^ g1;
+    uint64_t x2 = f2 ^ g2;
+    uint64_t x3 = f3 ^ g3;
+    uint64_t x4 = f4 ^ g4;
+
+    x0 &= mask;
+    x1 &= mask;
+    x2 &= mask;
+    x3 &= mask;
+    x4 &= mask;
+
+    f[0] = f0 ^ x0;
+    f[1] = f1 ^ x1;
+    f[2] = f2 ^ x2;
+    f[3] = f3 ^ x3;
+    f[4] = f4 ^ x4;
+
+    g[0] = g0 ^ x0;
+    g[1] = g1 ^ x1;
+    g[2] = g2 ^ x2;
+    g[3] = g3 ^ x3;
+    g[4] = g4 ^ x4;
+}
+
 /*
  h = f
  */
@@ -568,3 +617,31 @@ fe_sq2(fe h, const fe f)
     h[3] = r03;
     h[4] = r04;
 }
+
+void
+fe_scalar_product(fe h, const fe f, uint32_t n)
+{
+    const uint64_t mask = 0x7ffffffffffffULL;
+    uint128_t a;
+    uint128_t sn = (uint128_t) n;
+    uint64_t  h0, h1, h2, h3, h4;
+
+    a  = f[0] * sn;
+    h0 = ((uint64_t) a) & mask;
+    a  = f[1] * sn + ((uint64_t) (a >> 51));
+    h1 = ((uint64_t) a) & mask;
+    a  = f[2] * sn + ((uint64_t) (a >> 51));
+    h2 = ((uint64_t) a) & mask;
+    a  = f[3] * sn + ((uint64_t) (a >> 51));
+    h3 = ((uint64_t) a) & mask;
+    a  = f[4] * sn + ((uint64_t) (a >> 51));
+    h4 = ((uint64_t) a) & mask;
+
+    h0 += (a >> 51) * 19ULL;
+
+    h[0] = h0;
+    h[1] = h1;
+    h[2] = h2;
+    h[3] = h3;
+    h[4] = h4;
+}
diff --git a/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c b/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c
index c9529fb5..13fc5c4f 100644
--- a/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c
+++ b/src/libsodium/crypto_scalarmult/curve25519/donna_c64/curve25519_donna_c64.c
@@ -64,27 +64,6 @@ fe_sub_backwards(fe out, const fe in)
     out[4] = in[4] + two54m8   - out[4];
 }
 
-/* Multiply a number by a scalar: output = in * scalar */
-static inline void
-fe_scalar_product(fe output, const fe in, const uint64_t scalar)
-{
-    const uint64_t mask = 0x7ffffffffffffULL;
-    uint128_t a;
-
-    a = in[0] * (uint128_t) scalar;
-    output[0] = ((uint64_t) a) & mask;
-    a = in[1] * (uint128_t) scalar + ((uint64_t) (a >> 51));
-    output[1] = ((uint64_t) a) & mask;
-    a = in[2] * (uint128_t) scalar + ((uint64_t) (a >> 51));
-    output[2] = ((uint64_t) a) & mask;
-    a = in[3] * (uint128_t) scalar + ((uint64_t) (a >> 51));
-    output[3] = ((uint64_t) a) & mask;
-    a = in[4] * (uint128_t) scalar + ((uint64_t) (a >> 51));
-    output[4] = ((uint64_t) a) & mask;
-
-    output[0] += (a >> 51) * 19ULL;
-}
-
 /* Multiply two numbers: output = in2 * in
  *
  * output must be distinct to both inputs. The inputs are reduced coefficient
@@ -265,27 +244,6 @@ fe_mont_y(fe x2,     fe z2,     /* output 2Q */
     fe_mul_restrict(z2, zz, zzz);
 }
 
-/* -----------------------------------------------------------------------------
-   Maybe swap the contents of two uint64_t arrays (f and g), each 5 elements
-   long. Perform the swap iff b is non-zero.
-
-   This function performs the swap without leaking any side-channel
-   information.
-   -----------------------------------------------------------------------------
-   */
-static void
-swap_conditional(fe f, fe g, unsigned int b)
-{
-    const uint64_t mask = (uint64_t) (-(int64_t) b);
-    unsigned int   i;
-
-    for (i = 0; i < 5; i++) {
-        const uint64_t x = mask & (f[i] ^ g[i]);
-        f[i] ^= x;
-        g[i] ^= x;
-    }
-}
-
 /* Calculates nQ where Q is the x-coordinate of a point on the curve
  *
  *   resultx/resultz: the x coordinate of the resulting curve point (short form)
@@ -309,11 +267,11 @@ cmult(fe resultx, fe resultz, const uint8_t *n, const fe q)
         for (j = 0; j < 8; ++j) {
             const unsigned int bit = byte >> 7;
 
-            swap_conditional(nqx, nqpqx, bit);
-            swap_conditional(nqz, nqpqz, bit);
+            fe_cswap(nqx, nqpqx, bit);
+            fe_cswap(nqz, nqpqz, bit);
             fe_mont_y(nqx2, nqz2, nqpqx2, nqpqz2, nqx, nqz, nqpqx, nqpqz, q);
-            swap_conditional(nqx2, nqpqx2, bit);
-            swap_conditional(nqz2, nqpqz2, bit);
+            fe_cswap(nqx2, nqpqx2, bit);
+            fe_cswap(nqz2, nqpqz2, bit);
 
             t = nqx;
             nqx = nqx2;
diff --git a/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c b/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c
index 208ae865..c69f452a 100644
--- a/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c
+++ b/src/libsodium/crypto_scalarmult/curve25519/ref10/x25519_ref10.c
@@ -9,169 +9,6 @@
 #include "utils.h"
 #include "x25519_ref10.h"
 
-/*
-Replace (f,g) with (g,f) if b == 1;
-replace (f,g) with (f,g) if b == 0.
-
-Preconditions: b in {0,1}.
-*/
-
-static void
-fe_cswap(fe f, fe g, unsigned int b)
-{
-    int32_t f0 = f[0];
-    int32_t f1 = f[1];
-    int32_t f2 = f[2];
-    int32_t f3 = f[3];
-    int32_t f4 = f[4];
-    int32_t f5 = f[5];
-    int32_t f6 = f[6];
-    int32_t f7 = f[7];
-    int32_t f8 = f[8];
-    int32_t f9 = f[9];
-    int32_t g0 = g[0];
-    int32_t g1 = g[1];
-    int32_t g2 = g[2];
-    int32_t g3 = g[3];
-    int32_t g4 = g[4];
-    int32_t g5 = g[5];
-    int32_t g6 = g[6];
-    int32_t g7 = g[7];
-    int32_t g8 = g[8];
-    int32_t g9 = g[9];
-    int32_t x0 = f0 ^ g0;
-    int32_t x1 = f1 ^ g1;
-    int32_t x2 = f2 ^ g2;
-    int32_t x3 = f3 ^ g3;
-    int32_t x4 = f4 ^ g4;
-    int32_t x5 = f5 ^ g5;
-    int32_t x6 = f6 ^ g6;
-    int32_t x7 = f7 ^ g7;
-    int32_t x8 = f8 ^ g8;
-    int32_t x9 = f9 ^ g9;
-
-    b = (unsigned int)(-(int)b);
-    x0 &= b;
-    x1 &= b;
-    x2 &= b;
-    x3 &= b;
-    x4 &= b;
-    x5 &= b;
-    x6 &= b;
-    x7 &= b;
-    x8 &= b;
-    x9 &= b;
-    f[0] = f0 ^ x0;
-    f[1] = f1 ^ x1;
-    f[2] = f2 ^ x2;
-    f[3] = f3 ^ x3;
-    f[4] = f4 ^ x4;
-    f[5] = f5 ^ x5;
-    f[6] = f6 ^ x6;
-    f[7] = f7 ^ x7;
-    f[8] = f8 ^ x8;
-    f[9] = f9 ^ x9;
-    g[0] = g0 ^ x0;
-    g[1] = g1 ^ x1;
-    g[2] = g2 ^ x2;
-    g[3] = g3 ^ x3;
-    g[4] = g4 ^ x4;
-    g[5] = g5 ^ x5;
-    g[6] = g6 ^ x6;
-    g[7] = g7 ^ x7;
-    g[8] = g8 ^ x8;
-    g[9] = g9 ^ x9;
-}
-
-/*
-h = f * 121666
-Can overlap h with f.
-
-Preconditions:
-   |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc.
-
-Postconditions:
-   |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc.
-*/
-
-static void
-fe_mul121666(fe h, const fe f)
-{
-    int32_t f0 = f[0];
-    int32_t f1 = f[1];
-    int32_t f2 = f[2];
-    int32_t f3 = f[3];
-    int32_t f4 = f[4];
-    int32_t f5 = f[5];
-    int32_t f6 = f[6];
-    int32_t f7 = f[7];
-    int32_t f8 = f[8];
-    int32_t f9 = f[9];
-    int64_t h0 = f0 * (int64_t)121666;
-    int64_t h1 = f1 * (int64_t)121666;
-    int64_t h2 = f2 * (int64_t)121666;
-    int64_t h3 = f3 * (int64_t)121666;
-    int64_t h4 = f4 * (int64_t)121666;
-    int64_t h5 = f5 * (int64_t)121666;
-    int64_t h6 = f6 * (int64_t)121666;
-    int64_t h7 = f7 * (int64_t)121666;
-    int64_t h8 = f8 * (int64_t)121666;
-    int64_t h9 = f9 * (int64_t)121666;
-    int64_t carry0;
-    int64_t carry1;
-    int64_t carry2;
-    int64_t carry3;
-    int64_t carry4;
-    int64_t carry5;
-    int64_t carry6;
-    int64_t carry7;
-    int64_t carry8;
-    int64_t carry9;
-
-    carry9 = (h9 + ((int64_t)1 << 24)) >> 25;
-    h0 += carry9 * 19;
-    h9 -= carry9 * ((int64_t)1 << 25);
-    carry1 = (h1 + ((int64_t)1 << 24)) >> 25;
-    h2 += carry1;
-    h1 -= carry1 * ((int64_t)1 << 25);
-    carry3 = (h3 + ((int64_t)1 << 24)) >> 25;
-    h4 += carry3;
-    h3 -= carry3 * ((int64_t)1 << 25);
-    carry5 = (h5 + ((int64_t)1 << 24)) >> 25;
-    h6 += carry5;
-    h5 -= carry5 * ((int64_t)1 << 25);
-    carry7 = (h7 + ((int64_t)1 << 24)) >> 25;
-    h8 += carry7;
-    h7 -= carry7 * ((int64_t)1 << 25);
-
-    carry0 = (h0 + ((int64_t)1 << 25)) >> 26;
-    h1 += carry0;
-    h0 -= carry0 * ((int64_t)1 << 26);
-    carry2 = (h2 + ((int64_t)1 << 25)) >> 26;
-    h3 += carry2;
-    h2 -= carry2 * ((int64_t)1 << 26);
-    carry4 = (h4 + ((int64_t)1 << 25)) >> 26;
-    h5 += carry4;
-    h4 -= carry4 * ((int64_t)1 << 26);
-    carry6 = (h6 + ((int64_t)1 << 25)) >> 26;
-    h7 += carry6;
-    h6 -= carry6 * ((int64_t)1 << 26);
-    carry8 = (h8 + ((int64_t)1 << 25)) >> 26;
-    h9 += carry8;
-    h8 -= carry8 * ((int64_t)1 << 26);
-
-    h[0] = (int32_t) h0;
-    h[1] = (int32_t) h1;
-    h[2] = (int32_t) h2;
-    h[3] = (int32_t) h3;
-    h[4] = (int32_t) h4;
-    h[5] = (int32_t) h5;
-    h[6] = (int32_t) h6;
-    h[7] = (int32_t) h7;
-    h[8] = (int32_t) h8;
-    h[9] = (int32_t) h9;
-}
-
 static int
 crypto_scalarmult_curve25519_ref10(unsigned char *q,
                                    const unsigned char *n,
@@ -223,7 +60,7 @@ crypto_scalarmult_curve25519_ref10(unsigned char *q,
         fe_mul(x2, tmp1, tmp0);
         fe_sub(tmp1, tmp1, tmp0);
         fe_sq(z2, z2);
-        fe_mul121666(z3, tmp1);
+        fe_scalar_product(z3, tmp1, 121666);
         fe_sq(x3, x3);
         fe_add(tmp0, tmp0, z3);
         fe_mul(z3, x1, z2);
diff --git a/src/libsodium/include/sodium/private/curve25519_ref10.h b/src/libsodium/include/sodium/private/curve25519_ref10.h
index 4088e8ec..50229186 100644
--- a/src/libsodium/include/sodium/private/curve25519_ref10.h
+++ b/src/libsodium/include/sodium/private/curve25519_ref10.h
@@ -30,12 +30,14 @@ typedef int32_t fe[10];
 #define fe_mul crypto_core_curve25519_ref10_fe_mul
 #define fe_sq crypto_core_curve25519_ref10_fe_sq
 #define fe_invert crypto_core_curve25519_ref10_fe_invert
+#define fe_cswap crypto_core_curve25519_fe_cswap
+#define fe_scalar_product crypto_core_curve25519_fe_scalar_product
 
 extern void fe_frombytes(fe,const unsigned char *);
 extern void fe_tobytes(unsigned char *,const fe);
 
 extern void fe_copy(fe,const fe);
-extern int fe_iszero(const fe);
+extern int  fe_iszero(const fe);
 extern void fe_0(fe);
 extern void fe_1(fe);
 extern void fe_add(fe,const fe,const fe);
@@ -43,6 +45,8 @@ extern void fe_sub(fe,const fe,const fe);
 extern void fe_mul(fe,const fe,const fe);
 extern void fe_sq(fe,const fe);
 extern void fe_invert(fe,const fe);
+extern void fe_cswap(fe f, fe g, unsigned int b);
+extern void fe_scalar_product(fe h, const fe f, uint32_t n);
 
 /*
  ge means group element.