diff --git a/configure.ac b/configure.ac index 5b872dc13..4121f004b 100644 --- a/configure.ac +++ b/configure.ac @@ -630,6 +630,22 @@ then fi +# ECC25519 +AC_ARG_ENABLE([ecc25519], + [ --enable-ecc25519 Enable ECC25519 (default: disabled)], + [ ENABLED_ECC25519=$enableval ], + [ ENABLED_ECC25519=no ] + ) + +if test "$ENABLED_ECC25519" = "yes" +then + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC25519" +fi + + +AM_CONDITIONAL([BUILD_ECC25519], [test "x$ENABLED_ECC25519" = "xyes"]) + + # FP ECC, Fixed Point cache ECC AC_ARG_ENABLE([fpecc], [ --enable-fpecc Enable Fixed Point cache ECC (default: disabled)], diff --git a/src/include.am b/src/include.am index 3d7f7e010..5cc1d20ac 100644 --- a/src/include.am +++ b/src/include.am @@ -164,6 +164,11 @@ if BUILD_ECC src_libwolfssl_la_SOURCES += wolfcrypt/src/ecc.c endif +if BUILD_ECC25519 +src_libwolfssl_la_SOURCES += wolfcrypt/src/ecc25519.c +src_libwolfssl_la_SOURCES += wolfcrypt/src/ecc25519_fe.c +endif + if BUILD_LIBZ src_libwolfssl_la_SOURCES += wolfcrypt/src/compress.c endif diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index 9350f168a..d5cccd9ab 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -53,7 +53,12 @@ #include #include #include -#include +#ifdef HAVE_ECC + #include +#endif +#ifdef HAVE_ECC25519 + #include +#endif #include #ifdef HAVE_CAVIUM @@ -132,6 +137,10 @@ void bench_dh(void); void bench_eccKeyGen(void); void bench_eccKeyAgree(void); #endif +#ifdef HAVE_ECC25519 +void bench_ecc25519KeyGen(void); +void bench_ecc25519KeyAgree(void); +#endif #ifdef HAVE_NTRU void bench_ntru(void); void bench_ntruKeyGen(void); @@ -327,7 +336,7 @@ int benchmark_test(void *args) bench_ntruKeyGen(); #endif -#ifdef HAVE_ECC +#ifdef HAVE_ECC bench_eccKeyGen(); bench_eccKeyAgree(); #if defined(FP_ECC) @@ -335,6 +344,11 @@ int benchmark_test(void *args) #endif #endif +#ifdef HAVE_ECC25519 + bench_ecc25519KeyGen(); + bench_ecc25519KeyAgree(); +#endif + #if defined(HAVE_LOCAL_RNG) && (defined(HAVE_HASHDRBG) || defined(NO_RC4)) wc_FreeRng(&rng); #endif @@ -1582,6 +1596,87 @@ void bench_eccKeyAgree(void) } #endif /* HAVE_ECC */ +#ifdef HAVE_ECC25519 +void bench_ecc25519KeyGen(void) +{ + ecc25519_key genKey; + double start, total, each, milliEach; + int i, ret; + + ret = wc_InitRng(&rng); + if (ret < 0) { + printf("InitRNG failed\n"); + return; + } + /* 256 bit */ + start = current_time(1); + + for(i = 0; i < genTimes; i++) { + wc_ecc25519_make_key(&rng, 32, &genKey); + wc_ecc25519_free(&genKey); + } + + total = current_time(0) - start; + each = total / genTimes; /* per second */ + milliEach = each * 1000; /* millisconds */ + printf("\n"); + printf("ECC25519 256 key generation %6.3f milliseconds, avg over %d" + " iterations\n", milliEach, genTimes); +} + + +void bench_ecc25519KeyAgree(void) +{ + ecc25519_key genKey, genKey2; + double start, total, each, milliEach; + int i, ret; + byte shared[1024]; + word32 x = 0; + + wc_ecc25519_init(&genKey); + wc_ecc25519_init(&genKey2); + + ret = wc_InitRng(&rng); + if (ret < 0) { + printf("InitRNG failed\n"); + return; + } + + ret = wc_ecc25519_make_key(&rng, 32, &genKey); + if (ret != 0) { + printf("ecc25519_make_key failed\n"); + return; + } + ret = wc_ecc25519_make_key(&rng, 32, &genKey2); + if (ret != 0) { + printf("ecc25519_make_key failed\n"); + return; + } + + /* 256 bit */ + start = current_time(1); + + for(i = 0; i < agreeTimes; i++) { + x = sizeof(shared); + ret = wc_ecc25519_shared_secret(&genKey, &genKey2, shared, &x); + if (ret != 0) { + printf("ecc25519_shared_secret failed\n"); + return; + } + } + + total = current_time(0) - start; + each = total / agreeTimes; /* per second */ + milliEach = each * 1000; /* millisconds */ + printf("ECC25519 key agreement %6.3f milliseconds, avg over %d" + " iterations\n", milliEach, agreeTimes); + + wc_ecc25519_free(&genKey2); + wc_ecc25519_free(&genKey); +} +#endif /* HAVE_ECC25519 */ + + #ifdef _WIN32 #define WIN32_LEAN_AND_MEAN diff --git a/wolfcrypt/src/ecc25519.c b/wolfcrypt/src/ecc25519.c new file mode 100644 index 000000000..8e7dc2e0d --- /dev/null +++ b/wolfcrypt/src/ecc25519.c @@ -0,0 +1,318 @@ +/* ecc25519.c + * + * Copyright (C) 2006-2015 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * From Daniel J Bernstein's curve25519 ref10 work. + */ + + +#ifdef HAVE_CONFIG_H + #include +#endif + +#include + +#ifdef HAVE_ECC25519 + +#include +#include + +#define MONTGOMERY_X_LE 65 + +const ecc25519_set_type ecc25519_sets[] = { +{ + 32, + "CURVE25519", +} +}; + + +/* internal function */ +static int curve25519(unsigned char* q, unsigned char* n, unsigned char* p) +{ + unsigned char e[32]; + unsigned int i; + fe x1; + fe x2; + fe z2; + fe x3; + fe z3; + fe tmp0; + fe tmp1; + int pos; + unsigned int swap; + unsigned int b; + + for (i = 0;i < 32;++i) e[i] = n[i]; + e[0] &= 248; + e[31] &= 127; + e[31] |= 64; + + fe_frombytes(x1,p); + fe_1(x2); + fe_0(z2); + fe_copy(x3,x1); + fe_1(z3); + + swap = 0; + for (pos = 254;pos >= 0;--pos) { + b = e[pos / 8] >> (pos & 7); + b &= 1; + swap ^= b; + fe_cswap(x2,x3,swap); + fe_cswap(z2,z3,swap); + swap = b; +#include + } + fe_cswap(x2,x3,swap); + fe_cswap(z2,z3,swap); + + fe_invert(z2,z2); + fe_mul(x2,x2,z2); + fe_tobytes(q,x2); + + return 0; +} + + +int wc_ecc25519_make_key(RNG* rng, int keysize, ecc25519_key* key) +{ + unsigned char basepoint[ECC25519_KEYSIZE] = {9}; + unsigned char n[ECC25519_KEYSIZE]; + unsigned char p[ECC25519_KEYSIZE]; + int i; + int err; + + if (key == NULL || rng == NULL) + return ECC_BAD_ARG_E; + + /* currently only a key size of 32 bytes is used */ + if (keysize != ECC25519_KEYSIZE) + return ECC_BAD_ARG_E; + + /* get random number from RNG */ + err = wc_RNG_GenerateBlock(rng, n, keysize); + if (err != 0) + return err; + + for (i = 0; i < keysize; ++i) key->k.point[i] = n[i]; + key->k.point[ 0] &= 248; + key->k.point[31] &= 127; + key->k.point[31] |= 64; + + /*compute public key*/ + err = curve25519(p, key->k.point, basepoint); + + /* store keys in big endian format */ + for (i = 0; i < keysize; ++i) n[i] = key->k.point[i]; + for (i = 0; i < keysize; ++i) { + key->p.point[keysize - i - 1] = p[i]; + key->k.point[keysize - i - 1] = n[i]; + } + + XMEMSET(n, 0, keysize); + + return err; +} + + +int wc_ecc25519_shared_secret(ecc25519_key* private_key, ecc25519_key* public_key, + byte* out, word32* outlen) +{ + unsigned char k[ECC25519_KEYSIZE]; + unsigned char p[ECC25519_KEYSIZE]; + int err = 0; + int i; + + /* sanity check */ + if (private_key == NULL || public_key == NULL || out == NULL || + outlen == NULL) + return BAD_FUNC_ARG; + + if (private_key->k.point == NULL || public_key->p.point == NULL) + return BAD_FUNC_ARG; + + /* avoid implementation fingerprinting */ + if (public_key->p.point[0] > 0x7F) + return ECC_BAD_ARG_E; + + if (*outlen < ECC25519_KEYSIZE) + return BUFFER_E; + + XMEMSET(p, 0, sizeof(p)); + XMEMSET(k, 0, sizeof(k)); + XMEMSET(out, 0, ECC25519_KEYSIZE); + + for (i = 0; i < ECC25519_KEYSIZE; ++i) { + p[i] = public_key->p.point [ECC25519_KEYSIZE - i - 1]; + k[i] = private_key->k.point[ECC25519_KEYSIZE - i - 1]; + } + + err = curve25519(out , k, p); + *outlen = ECC25519_KEYSIZE; + + XMEMSET(p, 0, sizeof(p)); + XMEMSET(k, 0, sizeof(k)); + + return err; +} + + + +/* curve25519 uses a serialized string for key representation */ +int wc_ecc25519_export_public(ecc25519_key* key, byte* out, word32* outLen) +{ + word32 keySz; + byte offset; + + if (key == NULL || out == NULL) + return BAD_FUNC_ARG; + + /* check size of outgoing key */ + keySz = wc_ecc25519_size(key); + offset = 2; + + /* copy in public key and leave room for length and type byte */ + XMEMCPY(out + offset, key->p.point, keySz); + *outLen = keySz + offset; + + /* length and type */ + out[0] = *outLen; + out[1] = key->f; + + return 0; +} + +/* import curve25519 public key + return 0 on success */ +int wc_ecc25519_import_public(const byte* in, word32 inLen, ecc25519_key* key) +{ + word32 keySz; + byte offset; + + /* sanity check */ + if (key == NULL || in == NULL) + return ECC_BAD_ARG_E; + + /* check size of incoming keys */ + keySz = wc_ecc25519_size(key); + offset = 2; + + /* check that it is correct size plus length and type */ + if ((inLen != keySz + offset) || (in[1] != MONTGOMERY_X_LE)) + return ECC_BAD_ARG_E; + + XMEMCPY(key->p.point, in + offset, inLen); + + key->dp = &ecc25519_sets[0]; + + return 0; +} + + +/* export curve25519 private key only raw, outLen is in/out size + return 0 on success */ +int wc_ecc25519_export_private_raw(ecc25519_key* key, byte* out, word32* outLen) +{ + word32 keySz; + + /* sanity check */ + if (key == NULL || out == NULL || outLen == NULL) + return ECC_BAD_ARG_E; + + keySz = wc_ecc25519_size(key); + + if (*outLen < keySz) { + *outLen = keySz; + return BUFFER_E; + } + *outLen = keySz; + XMEMSET(out, 0, *outLen); + XMEMCPY(out, key->k.point, *outLen); + + return 0; +} + + +/* curve25519 private key import,public key in serialized format, private raw */ +int wc_ecc25519_import_private_raw(const byte* priv, word32 privSz, + const byte* pub, word32 pubSz, ecc25519_key* key) +{ + int ret = 0; + word32 keySz; + + /* sanity check */ + if (key == NULL || priv == NULL || pub ==NULL) + return ECC_BAD_ARG_E; + + /* check size of incoming keys */ + keySz = wc_ecc25519_size(key); + if (privSz != keySz || pubSz != keySz) + return ECC_BAD_ARG_E; + + XMEMCPY(key->k.point, priv, privSz); + XMEMCPY(key->p.point, pub, pubSz); + + return ret; +} + + +int wc_ecc25519_init(ecc25519_key* key) +{ + word32 keySz; + + if (key == NULL) + return ECC_BAD_ARG_E; + + /* currently the format for curve25519 */ + key->f = MONTGOMERY_X_LE; + key->dp = &ecc25519_sets[0]; + keySz = key->dp->size; + + XMEMSET(key->k.point, 0, keySz); + XMEMSET(key->p.point, 0, keySz); + + return 0; +} + + +/** + Clean the memory of a key +*/ +void wc_ecc25519_free(ecc25519_key* key) +{ + if (key == NULL) + return; + + key->dp = NULL; + XMEMSET(key->p.point, 0, sizeof(key->p.point)); + XMEMSET(key->k.point, 0, sizeof(key->k.point)); +} + + +/* key size */ +int wc_ecc25519_size(ecc25519_key* key) +{ + if (key == NULL) return 0; + + return key->dp->size; +} + +#endif /*HAVE_ECC25519*/ + diff --git a/wolfcrypt/src/ecc25519_fe.c b/wolfcrypt/src/ecc25519_fe.c new file mode 100644 index 000000000..9ced36bca --- /dev/null +++ b/wolfcrypt/src/ecc25519_fe.c @@ -0,0 +1,961 @@ +/* ecc25519_fe.c + * + * Copyright (C) 2006-2015 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * From Daniel J Bernstein's curve25519 ref10 work. + */ + + +#ifdef HAVE_ECC25519 + +#include +#include + +/* +h = 0 +*/ + +void fe_0(fe h) +{ + h[0] = 0; + h[1] = 0; + h[2] = 0; + h[3] = 0; + h[4] = 0; + h[5] = 0; + h[6] = 0; + h[7] = 0; + h[8] = 0; + h[9] = 0; +} + + + +/* +h = 1 +*/ + +void fe_1(fe h) +{ + h[0] = 1; + h[1] = 0; + h[2] = 0; + h[3] = 0; + h[4] = 0; + h[5] = 0; + h[6] = 0; + h[7] = 0; + h[8] = 0; + h[9] = 0; +} + + +/* +h = f + g +Can overlap h with f or g. + +Preconditions: + |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + +Postconditions: + |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +*/ + +void fe_add(fe h,fe f,fe g) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t h0 = f0 + g0; + int32_t h1 = f1 + g1; + int32_t h2 = f2 + g2; + int32_t h3 = f3 + g3; + int32_t h4 = f4 + g4; + int32_t h5 = f5 + g5; + int32_t h6 = f6 + g6; + int32_t h7 = f7 + g7; + int32_t h8 = f8 + g8; + int32_t h9 = f9 + g9; + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + + + +/* +h = f +*/ + +void fe_copy(fe h,fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + h[0] = f0; + h[1] = f1; + h[2] = f2; + h[3] = f3; + h[4] = f4; + h[5] = f5; + h[6] = f6; + h[7] = f7; + h[8] = f8; + h[9] = f9; +} + + +/* +Replace (f,g) with (g,f) if b == 1; +replace (f,g) with (f,g) if b == 0. + +Preconditions: b in {0,1}. +*/ + +void fe_cswap(fe f,fe g,unsigned int b) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t x0 = f0 ^ g0; + int32_t x1 = f1 ^ g1; + int32_t x2 = f2 ^ g2; + int32_t x3 = f3 ^ g3; + int32_t x4 = f4 ^ g4; + int32_t x5 = f5 ^ g5; + int32_t x6 = f6 ^ g6; + int32_t x7 = f7 ^ g7; + int32_t x8 = f8 ^ g8; + int32_t x9 = f9 ^ g9; + b = -b; + x0 &= b; + x1 &= b; + x2 &= b; + x3 &= b; + x4 &= b; + x5 &= b; + x6 &= b; + x7 &= b; + x8 &= b; + x9 &= b; + f[0] = f0 ^ x0; + f[1] = f1 ^ x1; + f[2] = f2 ^ x2; + f[3] = f3 ^ x3; + f[4] = f4 ^ x4; + f[5] = f5 ^ x5; + f[6] = f6 ^ x6; + f[7] = f7 ^ x7; + f[8] = f8 ^ x8; + f[9] = f9 ^ x9; + g[0] = g0 ^ x0; + g[1] = g1 ^ x1; + g[2] = g2 ^ x2; + g[3] = g3 ^ x3; + g[4] = g4 ^ x4; + g[5] = g5 ^ x5; + g[6] = g6 ^ x6; + g[7] = g7 ^ x7; + g[8] = g8 ^ x8; + g[9] = g9 ^ x9; +} + + + +static uint64_t load_3(const unsigned char *in) +{ + uint64_t result; + result = (uint64_t) in[0]; + result |= ((uint64_t) in[1]) << 8; + result |= ((uint64_t) in[2]) << 16; + return result; +} + +static uint64_t load_4(const unsigned char *in) +{ + uint64_t result; + result = (uint64_t) in[0]; + result |= ((uint64_t) in[1]) << 8; + result |= ((uint64_t) in[2]) << 16; + result |= ((uint64_t) in[3]) << 24; + return result; +} + +void fe_frombytes(fe h,const unsigned char *s) +{ + int64_t h0 = load_4(s); + int64_t h1 = load_3(s + 4) << 6; + int64_t h2 = load_3(s + 7) << 5; + int64_t h3 = load_3(s + 10) << 3; + int64_t h4 = load_3(s + 13) << 2; + int64_t h5 = load_4(s + 16); + int64_t h6 = load_3(s + 20) << 7; + int64_t h7 = load_3(s + 23) << 5; + int64_t h8 = load_3(s + 26) << 4; + int64_t h9 = (load_3(s + 29) & 8388607) << 2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; + carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; + carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; + carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; + carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; + + carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; + carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; + carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; +} + + + +void fe_invert(fe out,fe z) +{ + fe t0; + fe t1; + fe t2; + fe t3; + int i; + +#include + + return; +} + + +/* +h = f * 121666 +Can overlap h with f. + +Preconditions: + |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + +Postconditions: + |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +*/ + +void fe_mul121666(fe h,fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int64_t h0 = f0 * (int64_t) 121666; + int64_t h1 = f1 * (int64_t) 121666; + int64_t h2 = f2 * (int64_t) 121666; + int64_t h3 = f3 * (int64_t) 121666; + int64_t h4 = f4 * (int64_t) 121666; + int64_t h5 = f5 * (int64_t) 121666; + int64_t h6 = f6 * (int64_t) 121666; + int64_t h7 = f7 * (int64_t) 121666; + int64_t h8 = f8 * (int64_t) 121666; + int64_t h9 = f9 * (int64_t) 121666; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; + carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; + carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; + carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; + carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; + + carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; + carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; + carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; +} + + + +/* +h = f * g +Can overlap h with f or g. + +Preconditions: + |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + |g| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + +Postconditions: + |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +*/ + +/* +Notes on implementation strategy: + +Using schoolbook multiplication. +Karatsuba would save a little in some cost models. + +Most multiplications by 2 and 19 are 32-bit precomputations; +cheaper than 64-bit postcomputations. + +There is one remaining multiplication by 19 in the carry chain; +one *19 precomputation can be merged into this, +but the resulting data flow is considerably less clean. + +There are 12 carries below. +10 of them are 2-way parallelizable and vectorizable. +Can get away with 11 carries, but then data flow is much deeper. + +With tighter constraints on inputs can squeeze carries into int32. +*/ + +void fe_mul(fe h,fe f,fe g) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t g1_19 = 19 * g1; /* 1.4*2^29 */ + int32_t g2_19 = 19 * g2; /* 1.4*2^30; still ok */ + int32_t g3_19 = 19 * g3; + int32_t g4_19 = 19 * g4; + int32_t g5_19 = 19 * g5; + int32_t g6_19 = 19 * g6; + int32_t g7_19 = 19 * g7; + int32_t g8_19 = 19 * g8; + int32_t g9_19 = 19 * g9; + int32_t f1_2 = 2 * f1; + int32_t f3_2 = 2 * f3; + int32_t f5_2 = 2 * f5; + int32_t f7_2 = 2 * f7; + int32_t f9_2 = 2 * f9; + int64_t f0g0 = f0 * (int64_t) g0; + int64_t f0g1 = f0 * (int64_t) g1; + int64_t f0g2 = f0 * (int64_t) g2; + int64_t f0g3 = f0 * (int64_t) g3; + int64_t f0g4 = f0 * (int64_t) g4; + int64_t f0g5 = f0 * (int64_t) g5; + int64_t f0g6 = f0 * (int64_t) g6; + int64_t f0g7 = f0 * (int64_t) g7; + int64_t f0g8 = f0 * (int64_t) g8; + int64_t f0g9 = f0 * (int64_t) g9; + int64_t f1g0 = f1 * (int64_t) g0; + int64_t f1g1_2 = f1_2 * (int64_t) g1; + int64_t f1g2 = f1 * (int64_t) g2; + int64_t f1g3_2 = f1_2 * (int64_t) g3; + int64_t f1g4 = f1 * (int64_t) g4; + int64_t f1g5_2 = f1_2 * (int64_t) g5; + int64_t f1g6 = f1 * (int64_t) g6; + int64_t f1g7_2 = f1_2 * (int64_t) g7; + int64_t f1g8 = f1 * (int64_t) g8; + int64_t f1g9_38 = f1_2 * (int64_t) g9_19; + int64_t f2g0 = f2 * (int64_t) g0; + int64_t f2g1 = f2 * (int64_t) g1; + int64_t f2g2 = f2 * (int64_t) g2; + int64_t f2g3 = f2 * (int64_t) g3; + int64_t f2g4 = f2 * (int64_t) g4; + int64_t f2g5 = f2 * (int64_t) g5; + int64_t f2g6 = f2 * (int64_t) g6; + int64_t f2g7 = f2 * (int64_t) g7; + int64_t f2g8_19 = f2 * (int64_t) g8_19; + int64_t f2g9_19 = f2 * (int64_t) g9_19; + int64_t f3g0 = f3 * (int64_t) g0; + int64_t f3g1_2 = f3_2 * (int64_t) g1; + int64_t f3g2 = f3 * (int64_t) g2; + int64_t f3g3_2 = f3_2 * (int64_t) g3; + int64_t f3g4 = f3 * (int64_t) g4; + int64_t f3g5_2 = f3_2 * (int64_t) g5; + int64_t f3g6 = f3 * (int64_t) g6; + int64_t f3g7_38 = f3_2 * (int64_t) g7_19; + int64_t f3g8_19 = f3 * (int64_t) g8_19; + int64_t f3g9_38 = f3_2 * (int64_t) g9_19; + int64_t f4g0 = f4 * (int64_t) g0; + int64_t f4g1 = f4 * (int64_t) g1; + int64_t f4g2 = f4 * (int64_t) g2; + int64_t f4g3 = f4 * (int64_t) g3; + int64_t f4g4 = f4 * (int64_t) g4; + int64_t f4g5 = f4 * (int64_t) g5; + int64_t f4g6_19 = f4 * (int64_t) g6_19; + int64_t f4g7_19 = f4 * (int64_t) g7_19; + int64_t f4g8_19 = f4 * (int64_t) g8_19; + int64_t f4g9_19 = f4 * (int64_t) g9_19; + int64_t f5g0 = f5 * (int64_t) g0; + int64_t f5g1_2 = f5_2 * (int64_t) g1; + int64_t f5g2 = f5 * (int64_t) g2; + int64_t f5g3_2 = f5_2 * (int64_t) g3; + int64_t f5g4 = f5 * (int64_t) g4; + int64_t f5g5_38 = f5_2 * (int64_t) g5_19; + int64_t f5g6_19 = f5 * (int64_t) g6_19; + int64_t f5g7_38 = f5_2 * (int64_t) g7_19; + int64_t f5g8_19 = f5 * (int64_t) g8_19; + int64_t f5g9_38 = f5_2 * (int64_t) g9_19; + int64_t f6g0 = f6 * (int64_t) g0; + int64_t f6g1 = f6 * (int64_t) g1; + int64_t f6g2 = f6 * (int64_t) g2; + int64_t f6g3 = f6 * (int64_t) g3; + int64_t f6g4_19 = f6 * (int64_t) g4_19; + int64_t f6g5_19 = f6 * (int64_t) g5_19; + int64_t f6g6_19 = f6 * (int64_t) g6_19; + int64_t f6g7_19 = f6 * (int64_t) g7_19; + int64_t f6g8_19 = f6 * (int64_t) g8_19; + int64_t f6g9_19 = f6 * (int64_t) g9_19; + int64_t f7g0 = f7 * (int64_t) g0; + int64_t f7g1_2 = f7_2 * (int64_t) g1; + int64_t f7g2 = f7 * (int64_t) g2; + int64_t f7g3_38 = f7_2 * (int64_t) g3_19; + int64_t f7g4_19 = f7 * (int64_t) g4_19; + int64_t f7g5_38 = f7_2 * (int64_t) g5_19; + int64_t f7g6_19 = f7 * (int64_t) g6_19; + int64_t f7g7_38 = f7_2 * (int64_t) g7_19; + int64_t f7g8_19 = f7 * (int64_t) g8_19; + int64_t f7g9_38 = f7_2 * (int64_t) g9_19; + int64_t f8g0 = f8 * (int64_t) g0; + int64_t f8g1 = f8 * (int64_t) g1; + int64_t f8g2_19 = f8 * (int64_t) g2_19; + int64_t f8g3_19 = f8 * (int64_t) g3_19; + int64_t f8g4_19 = f8 * (int64_t) g4_19; + int64_t f8g5_19 = f8 * (int64_t) g5_19; + int64_t f8g6_19 = f8 * (int64_t) g6_19; + int64_t f8g7_19 = f8 * (int64_t) g7_19; + int64_t f8g8_19 = f8 * (int64_t) g8_19; + int64_t f8g9_19 = f8 * (int64_t) g9_19; + int64_t f9g0 = f9 * (int64_t) g0; + int64_t f9g1_38 = f9_2 * (int64_t) g1_19; + int64_t f9g2_19 = f9 * (int64_t) g2_19; + int64_t f9g3_38 = f9_2 * (int64_t) g3_19; + int64_t f9g4_19 = f9 * (int64_t) g4_19; + int64_t f9g5_38 = f9_2 * (int64_t) g5_19; + int64_t f9g6_19 = f9 * (int64_t) g6_19; + int64_t f9g7_38 = f9_2 * (int64_t) g7_19; + int64_t f9g8_19 = f9 * (int64_t) g8_19; + int64_t f9g9_38 = f9_2 * (int64_t) g9_19; + int64_t h0 = f0g0+f1g9_38+f2g8_19+f3g7_38+f4g6_19+f5g5_38+f6g4_19+f7g3_38+f8g2_19+f9g1_38; + int64_t h1 = f0g1+f1g0 +f2g9_19+f3g8_19+f4g7_19+f5g6_19+f6g5_19+f7g4_19+f8g3_19+f9g2_19; + int64_t h2 = f0g2+f1g1_2 +f2g0 +f3g9_38+f4g8_19+f5g7_38+f6g6_19+f7g5_38+f8g4_19+f9g3_38; + int64_t h3 = f0g3+f1g2 +f2g1 +f3g0 +f4g9_19+f5g8_19+f6g7_19+f7g6_19+f8g5_19+f9g4_19; + int64_t h4 = f0g4+f1g3_2 +f2g2 +f3g1_2 +f4g0 +f5g9_38+f6g8_19+f7g7_38+f8g6_19+f9g5_38; + int64_t h5 = f0g5+f1g4 +f2g3 +f3g2 +f4g1 +f5g0 +f6g9_19+f7g8_19+f8g7_19+f9g6_19; + int64_t h6 = f0g6+f1g5_2 +f2g4 +f3g3_2 +f4g2 +f5g1_2 +f6g0 +f7g9_38+f8g8_19+f9g7_38; + int64_t h7 = f0g7+f1g6 +f2g5 +f3g4 +f4g3 +f5g2 +f6g1 +f7g0 +f8g9_19+f9g8_19; + int64_t h8 = f0g8+f1g7_2 +f2g6 +f3g5_2 +f4g4 +f5g3_2 +f6g2 +f7g1_2 +f8g0 +f9g9_38; + int64_t h9 = f0g9+f1g8 +f2g7 +f3g6 +f4g5 +f5g4 +f6g3 +f7g2 +f8g1 +f9g0 ; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + /* + |h0| <= (1.1*1.1*2^52*(1+19+19+19+19)+1.1*1.1*2^50*(38+38+38+38+38)) + i.e. |h0| <= 1.2*2^59; narrower ranges for h2, h4, h6, h8 + |h1| <= (1.1*1.1*2^51*(1+1+19+19+19+19+19+19+19+19)) + i.e. |h1| <= 1.5*2^58; narrower ranges for h3, h5, h7, h9 + */ + + carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + /* |h0| <= 2^25 */ + /* |h4| <= 2^25 */ + /* |h1| <= 1.51*2^58 */ + /* |h5| <= 1.51*2^58 */ + + carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; + carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; + /* |h1| <= 2^24; from now on fits into int32 */ + /* |h5| <= 2^24; from now on fits into int32 */ + /* |h2| <= 1.21*2^59 */ + /* |h6| <= 1.21*2^59 */ + + carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; + carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; + /* |h2| <= 2^25; from now on fits into int32 unchanged */ + /* |h6| <= 2^25; from now on fits into int32 unchanged */ + /* |h3| <= 1.51*2^58 */ + /* |h7| <= 1.51*2^58 */ + + carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; + carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; + /* |h3| <= 2^24; from now on fits into int32 unchanged */ + /* |h7| <= 2^24; from now on fits into int32 unchanged */ + /* |h4| <= 1.52*2^33 */ + /* |h8| <= 1.52*2^33 */ + + carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; + /* |h4| <= 2^25; from now on fits into int32 unchanged */ + /* |h8| <= 2^25; from now on fits into int32 unchanged */ + /* |h5| <= 1.01*2^24 */ + /* |h9| <= 1.51*2^58 */ + + carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; + /* |h9| <= 2^24; from now on fits into int32 unchanged */ + /* |h0| <= 1.8*2^37 */ + + carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + /* |h0| <= 2^25; from now on fits into int32 unchanged */ + /* |h1| <= 1.01*2^24 */ + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; +} + + + +/* +h = f * f +Can overlap h with f. + +Preconditions: + |f| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. + +Postconditions: + |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. +*/ + +/* +See fe_mul.c for discussion of implementation strategy. +*/ + +void fe_sq(fe h,fe f) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t f0_2 = 2 * f0; + int32_t f1_2 = 2 * f1; + int32_t f2_2 = 2 * f2; + int32_t f3_2 = 2 * f3; + int32_t f4_2 = 2 * f4; + int32_t f5_2 = 2 * f5; + int32_t f6_2 = 2 * f6; + int32_t f7_2 = 2 * f7; + int32_t f5_38 = 38 * f5; /* 1.31*2^30 */ + int32_t f6_19 = 19 * f6; /* 1.31*2^30 */ + int32_t f7_38 = 38 * f7; /* 1.31*2^30 */ + int32_t f8_19 = 19 * f8; /* 1.31*2^30 */ + int32_t f9_38 = 38 * f9; /* 1.31*2^30 */ + int64_t f0f0 = f0 * (int64_t) f0; + int64_t f0f1_2 = f0_2 * (int64_t) f1; + int64_t f0f2_2 = f0_2 * (int64_t) f2; + int64_t f0f3_2 = f0_2 * (int64_t) f3; + int64_t f0f4_2 = f0_2 * (int64_t) f4; + int64_t f0f5_2 = f0_2 * (int64_t) f5; + int64_t f0f6_2 = f0_2 * (int64_t) f6; + int64_t f0f7_2 = f0_2 * (int64_t) f7; + int64_t f0f8_2 = f0_2 * (int64_t) f8; + int64_t f0f9_2 = f0_2 * (int64_t) f9; + int64_t f1f1_2 = f1_2 * (int64_t) f1; + int64_t f1f2_2 = f1_2 * (int64_t) f2; + int64_t f1f3_4 = f1_2 * (int64_t) f3_2; + int64_t f1f4_2 = f1_2 * (int64_t) f4; + int64_t f1f5_4 = f1_2 * (int64_t) f5_2; + int64_t f1f6_2 = f1_2 * (int64_t) f6; + int64_t f1f7_4 = f1_2 * (int64_t) f7_2; + int64_t f1f8_2 = f1_2 * (int64_t) f8; + int64_t f1f9_76 = f1_2 * (int64_t) f9_38; + int64_t f2f2 = f2 * (int64_t) f2; + int64_t f2f3_2 = f2_2 * (int64_t) f3; + int64_t f2f4_2 = f2_2 * (int64_t) f4; + int64_t f2f5_2 = f2_2 * (int64_t) f5; + int64_t f2f6_2 = f2_2 * (int64_t) f6; + int64_t f2f7_2 = f2_2 * (int64_t) f7; + int64_t f2f8_38 = f2_2 * (int64_t) f8_19; + int64_t f2f9_38 = f2 * (int64_t) f9_38; + int64_t f3f3_2 = f3_2 * (int64_t) f3; + int64_t f3f4_2 = f3_2 * (int64_t) f4; + int64_t f3f5_4 = f3_2 * (int64_t) f5_2; + int64_t f3f6_2 = f3_2 * (int64_t) f6; + int64_t f3f7_76 = f3_2 * (int64_t) f7_38; + int64_t f3f8_38 = f3_2 * (int64_t) f8_19; + int64_t f3f9_76 = f3_2 * (int64_t) f9_38; + int64_t f4f4 = f4 * (int64_t) f4; + int64_t f4f5_2 = f4_2 * (int64_t) f5; + int64_t f4f6_38 = f4_2 * (int64_t) f6_19; + int64_t f4f7_38 = f4 * (int64_t) f7_38; + int64_t f4f8_38 = f4_2 * (int64_t) f8_19; + int64_t f4f9_38 = f4 * (int64_t) f9_38; + int64_t f5f5_38 = f5 * (int64_t) f5_38; + int64_t f5f6_38 = f5_2 * (int64_t) f6_19; + int64_t f5f7_76 = f5_2 * (int64_t) f7_38; + int64_t f5f8_38 = f5_2 * (int64_t) f8_19; + int64_t f5f9_76 = f5_2 * (int64_t) f9_38; + int64_t f6f6_19 = f6 * (int64_t) f6_19; + int64_t f6f7_38 = f6 * (int64_t) f7_38; + int64_t f6f8_38 = f6_2 * (int64_t) f8_19; + int64_t f6f9_38 = f6 * (int64_t) f9_38; + int64_t f7f7_38 = f7 * (int64_t) f7_38; + int64_t f7f8_38 = f7_2 * (int64_t) f8_19; + int64_t f7f9_76 = f7_2 * (int64_t) f9_38; + int64_t f8f8_19 = f8 * (int64_t) f8_19; + int64_t f8f9_38 = f8 * (int64_t) f9_38; + int64_t f9f9_38 = f9 * (int64_t) f9_38; + int64_t h0 = f0f0 +f1f9_76+f2f8_38+f3f7_76+f4f6_38+f5f5_38; + int64_t h1 = f0f1_2+f2f9_38+f3f8_38+f4f7_38+f5f6_38; + int64_t h2 = f0f2_2+f1f1_2 +f3f9_76+f4f8_38+f5f7_76+f6f6_19; + int64_t h3 = f0f3_2+f1f2_2 +f4f9_38+f5f8_38+f6f7_38; + int64_t h4 = f0f4_2+f1f3_4 +f2f2 +f5f9_76+f6f8_38+f7f7_38; + int64_t h5 = f0f5_2+f1f4_2 +f2f3_2 +f6f9_38+f7f8_38; + int64_t h6 = f0f6_2+f1f5_4 +f2f4_2 +f3f3_2 +f7f9_76+f8f8_19; + int64_t h7 = f0f7_2+f1f6_2 +f2f5_2 +f3f4_2 +f8f9_38; + int64_t h8 = f0f8_2+f1f7_4 +f2f6_2 +f3f5_4 +f4f4 +f9f9_38; + int64_t h9 = f0f9_2+f1f8_2 +f2f7_2 +f3f6_2 +f4f5_2; + int64_t carry0; + int64_t carry1; + int64_t carry2; + int64_t carry3; + int64_t carry4; + int64_t carry5; + int64_t carry6; + int64_t carry7; + int64_t carry8; + int64_t carry9; + + carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + + carry1 = (h1 + (int64_t) (1<<24)) >> 25; h2 += carry1; h1 -= carry1 << 25; + carry5 = (h5 + (int64_t) (1<<24)) >> 25; h6 += carry5; h5 -= carry5 << 25; + + carry2 = (h2 + (int64_t) (1<<25)) >> 26; h3 += carry2; h2 -= carry2 << 26; + carry6 = (h6 + (int64_t) (1<<25)) >> 26; h7 += carry6; h6 -= carry6 << 26; + + carry3 = (h3 + (int64_t) (1<<24)) >> 25; h4 += carry3; h3 -= carry3 << 25; + carry7 = (h7 + (int64_t) (1<<24)) >> 25; h8 += carry7; h7 -= carry7 << 25; + + carry4 = (h4 + (int64_t) (1<<25)) >> 26; h5 += carry4; h4 -= carry4 << 26; + carry8 = (h8 + (int64_t) (1<<25)) >> 26; h9 += carry8; h8 -= carry8 << 26; + + carry9 = (h9 + (int64_t) (1<<24)) >> 25; h0 += carry9 * 19; h9 -= carry9 << 25; + + carry0 = (h0 + (int64_t) (1<<25)) >> 26; h1 += carry0; h0 -= carry0 << 26; + + h[0] = (int32_t)h0; + h[1] = (int32_t)h1; + h[2] = (int32_t)h2; + h[3] = (int32_t)h3; + h[4] = (int32_t)h4; + h[5] = (int32_t)h5; + h[6] = (int32_t)h6; + h[7] = (int32_t)h7; + h[8] = (int32_t)h8; + h[9] = (int32_t)h9; +} + + + +/* +h = f - g +Can overlap h with f or g. + +Preconditions: + |f| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + |g| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + +Postconditions: + |h| bounded by 1.1*2^26,1.1*2^25,1.1*2^26,1.1*2^25,etc. +*/ + +void fe_sub(fe h,fe f,fe g) +{ + int32_t f0 = f[0]; + int32_t f1 = f[1]; + int32_t f2 = f[2]; + int32_t f3 = f[3]; + int32_t f4 = f[4]; + int32_t f5 = f[5]; + int32_t f6 = f[6]; + int32_t f7 = f[7]; + int32_t f8 = f[8]; + int32_t f9 = f[9]; + int32_t g0 = g[0]; + int32_t g1 = g[1]; + int32_t g2 = g[2]; + int32_t g3 = g[3]; + int32_t g4 = g[4]; + int32_t g5 = g[5]; + int32_t g6 = g[6]; + int32_t g7 = g[7]; + int32_t g8 = g[8]; + int32_t g9 = g[9]; + int32_t h0 = f0 - g0; + int32_t h1 = f1 - g1; + int32_t h2 = f2 - g2; + int32_t h3 = f3 - g3; + int32_t h4 = f4 - g4; + int32_t h5 = f5 - g5; + int32_t h6 = f6 - g6; + int32_t h7 = f7 - g7; + int32_t h8 = f8 - g8; + int32_t h9 = f9 - g9; + h[0] = h0; + h[1] = h1; + h[2] = h2; + h[3] = h3; + h[4] = h4; + h[5] = h5; + h[6] = h6; + h[7] = h7; + h[8] = h8; + h[9] = h9; +} + + + +/* +Preconditions: + |h| bounded by 1.1*2^25,1.1*2^24,1.1*2^25,1.1*2^24,etc. + +Write p=2^255-19; q=floor(h/p). +Basic claim: q = floor(2^(-255)(h + 19 2^(-25)h9 + 2^(-1))). + +Proof: + Have |h|<=p so |q|<=1 so |19^2 2^(-255) q|<1/4. + Also have |h-2^230 h9|<2^230 so |19 2^(-255)(h-2^230 h9)|<1/4. + + Write y=2^(-1)-19^2 2^(-255)q-19 2^(-255)(h-2^230 h9). + Then 0> 25; + q = (h0 + q) >> 26; + q = (h1 + q) >> 25; + q = (h2 + q) >> 26; + q = (h3 + q) >> 25; + q = (h4 + q) >> 26; + q = (h5 + q) >> 25; + q = (h6 + q) >> 26; + q = (h7 + q) >> 25; + q = (h8 + q) >> 26; + q = (h9 + q) >> 25; + + /* Goal: Output h-(2^255-19)q, which is between 0 and 2^255-20. */ + h0 += 19 * q; + /* Goal: Output h-2^255 q, which is between 0 and 2^255-20. */ + + carry0 = h0 >> 26; h1 += carry0; h0 -= carry0 << 26; + carry1 = h1 >> 25; h2 += carry1; h1 -= carry1 << 25; + carry2 = h2 >> 26; h3 += carry2; h2 -= carry2 << 26; + carry3 = h3 >> 25; h4 += carry3; h3 -= carry3 << 25; + carry4 = h4 >> 26; h5 += carry4; h4 -= carry4 << 26; + carry5 = h5 >> 25; h6 += carry5; h5 -= carry5 << 25; + carry6 = h6 >> 26; h7 += carry6; h6 -= carry6 << 26; + carry7 = h7 >> 25; h8 += carry7; h7 -= carry7 << 25; + carry8 = h8 >> 26; h9 += carry8; h8 -= carry8 << 26; + carry9 = h9 >> 25; h9 -= carry9 << 25; + /* h10 = carry9 */ + + /* + Goal: Output h0+...+2^255 h10-2^255 q, which is between 0 and 2^255-20. + Have h0+...+2^230 h9 between 0 and 2^255-1; + evidently 2^255 h10-2^255 q = 0. + Goal: Output h0+...+2^230 h9. + */ + + s[0] = h0 >> 0; + s[1] = h0 >> 8; + s[2] = h0 >> 16; + s[3] = (h0 >> 24) | (h1 << 2); + s[4] = h1 >> 6; + s[5] = h1 >> 14; + s[6] = (h1 >> 22) | (h2 << 3); + s[7] = h2 >> 5; + s[8] = h2 >> 13; + s[9] = (h2 >> 21) | (h3 << 5); + s[10] = h3 >> 3; + s[11] = h3 >> 11; + s[12] = (h3 >> 19) | (h4 << 6); + s[13] = h4 >> 2; + s[14] = h4 >> 10; + s[15] = h4 >> 18; + s[16] = h5 >> 0; + s[17] = h5 >> 8; + s[18] = h5 >> 16; + s[19] = (h5 >> 24) | (h6 << 1); + s[20] = h6 >> 7; + s[21] = h6 >> 15; + s[22] = (h6 >> 23) | (h7 << 3); + s[23] = h7 >> 5; + s[24] = h7 >> 13; + s[25] = (h7 >> 21) | (h8 << 4); + s[26] = h8 >> 4; + s[27] = h8 >> 12; + s[28] = (h8 >> 20) | (h9 << 6); + s[29] = h9 >> 2; + s[30] = h9 >> 10; + s[31] = h9 >> 18; +} + +#endif /*HAVE_ECC25519*/ + diff --git a/wolfcrypt/src/ecc25519_montgomery.q b/wolfcrypt/src/ecc25519_montgomery.q new file mode 100644 index 000000000..39a2cb134 --- /dev/null +++ b/wolfcrypt/src/ecc25519_montgomery.q @@ -0,0 +1,99 @@ +#/* ecc25519_montgomery.q +# * +# * Copyright (C) 2006-2015 wolfSSL Inc. +# * +# * This file is part of wolfSSL. (formerly known as CyaSSL) +# * +# * wolfSSL is free software; you can redistribute it and/or modify +# * it under the terms of the GNU General Public License as published by +# * the Free Software Foundation; either version 2 of the License, or +# * (at your option) any later version. +# * +# * wolfSSL is distributed in the hope that it will be useful, +# * but WITHOUT ANY WARRANTY; without even the implied warranty of +# * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# * GNU General Public License for more details. +# * +# * You should have received a copy of the GNU General Public License +# * along with this program; if not, write to the Free Software +# * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA +# * +# * From Daniel J Bernstein's curve25519 ref10 work. +# */ + +:name:fe:x2:z2:x3:z3:tmp0:tmp1: +fe r:var/r=fe: + +enter f:enter/f:>X2=fe#1:>Z2=fe#2:>X3=fe#3:>Z3=fe#4: +return:nofallthrough:h=fe:asm/fe_add(>h,h=fe:asm/fe_sub(>h,h=fe:asm/fe_mul(>h,h=fe:asm/fe_sq(>h,h=fe:asm/fe_mul(>h,x1,h=fe:asm/fe_mul121666(>h,z1=fe#11: +return:nofallthrough:h=fe:asm/fe_mul(>h,h=fe:#k:asm/fe_sq(>h,h,>h);: + +: + +fe z1 +fe z2 +fe z8 +fe z9 +fe z11 +fe z22 +fe z_5_0 +fe z_10_5 +fe z_10_0 +fe z_20_10 +fe z_20_0 +fe z_40_20 +fe z_40_0 +fe z_50_10 +fe z_50_0 +fe z_100_50 +fe z_100_0 +fe z_200_100 +fe z_200_0 +fe z_250_50 +fe z_250_0 +fe z_255_5 +fe z_255_21 + +enter pow225521 + +z2 = z1^2^1 +z8 = z2^2^2 +z9 = z1*z8 +z11 = z2*z9 +z22 = z11^2^1 +z_5_0 = z9*z22 +z_10_5 = z_5_0^2^5 +z_10_0 = z_10_5*z_5_0 +z_20_10 = z_10_0^2^10 +z_20_0 = z_20_10*z_10_0 +z_40_20 = z_20_0^2^20 +z_40_0 = z_40_20*z_20_0 +z_50_10 = z_40_0^2^10 +z_50_0 = z_50_10*z_10_0 +z_100_50 = z_50_0^2^50 +z_100_0 = z_100_50*z_50_0 +z_200_100 = z_100_0^2^100 +z_200_0 = z_200_100*z_100_0 +z_250_50 = z_200_0^2^50 +z_250_0 = z_250_50*z_50_0 +z_255_5 = z_250_0^2^5 +z_255_21 = z_255_5*z11 + +return diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index c48a3a928..595950e69 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -62,6 +62,9 @@ #ifdef HAVE_ECC #include #endif +#ifdef HAVE_ECC25519 + #include +#endif #ifdef HAVE_BLAKE2 #include #endif @@ -184,6 +187,9 @@ int pbkdf2_test(void); int ecc_encrypt_test(void); #endif #endif +#ifdef HAVE_ECC25519 + int ecc25519_test(void); +#endif #ifdef HAVE_BLAKE2 int blake2b_test(void); #endif @@ -510,6 +516,13 @@ int wolfcrypt_test(void* args) #endif #endif +#ifdef HAVE_ECC25519 + if ( (ret = ecc25519_test()) != 0) + return err_sys("ECC25519 test failed!\n", ret); + else + printf( "ECC25519 test passed!\n"); +#endif + #ifdef HAVE_LIBZ if ( (ret = compress_test()) != 0) return err_sys("COMPRESS test failed!\n", ret); @@ -4992,6 +5005,74 @@ int ecc_encrypt_test(void) #endif /* HAVE_ECC_ENCRYPT */ #endif /* HAVE_ECC */ + +#ifdef HAVE_ECC25519 + +int ecc25519_test(void) +{ + RNG rng; + byte sharedA[1024]; + byte sharedB[1024]; + word32 x, y; + byte exportBuf[1024]; + ecc25519_key userA, userB, pubKey; + + if (wc_InitRng(&rng) != 0) + return -1001; + + wc_ecc25519_init(&userA); + wc_ecc25519_init(&userB); + wc_ecc25519_init(&pubKey); + + /* make curve25519 keys */ + if (wc_ecc25519_make_key(&rng, 32, &userA) != 0) + return -1014; + + if (wc_ecc25519_make_key(&rng, 32, &userB) != 0) + return -1002; + + /* find shared secret key */ + x = sizeof(sharedA); + if (wc_ecc25519_shared_secret(&userA, &userB, sharedA, &x) != 0) + return -1015; + + y = sizeof(sharedB); + if (wc_ecc25519_shared_secret(&userB, &userA, sharedB, &y) != 0) + return -1003; + + /* compare shared secret keys to test they are the same */ + if (y != x) + return -1004; + + if (memcmp(sharedA, sharedB, x)) + return -1005; + + /* export a public key and import it for another user */ + x = sizeof(exportBuf); + if (wc_ecc25519_export_public(&userA, exportBuf, &x) != 0) + return -1006; + + if (wc_ecc25519_import_public(exportBuf, x, &pubKey) != 0) + return -1007; + + /* test shared key after importing a public key */ + y = sizeof(sharedB); + if (wc_ecc25519_shared_secret(&userB, &pubKey, sharedB, &y) != 0) + return -1008; + + if (memcmp(sharedA, sharedB, y)) + return -1010; + + /* clean up keys when done */ + wc_ecc25519_free(&pubKey); + wc_ecc25519_free(&userB); + wc_ecc25519_free(&userA); + + return 0; +} +#endif /* HAVE_ECC25519 */ + + #ifdef HAVE_LIBZ const byte sample_text[] = diff --git a/wolfssl/wolfcrypt/ecc25519.h b/wolfssl/wolfcrypt/ecc25519.h new file mode 100644 index 000000000..8fd23e088 --- /dev/null +++ b/wolfssl/wolfcrypt/ecc25519.h @@ -0,0 +1,101 @@ +/* ecc25519.h + * + * Copyright (C) 2006-2015 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifdef HAVE_ECC25519 + +#ifndef WOLF_CRYPT_ECC25519_H +#define WOLF_CRYPT_ECC25519_H + +#include +#include +#include +#include + +#ifdef __cplusplus + extern "C" { +#endif + +#define ECC25519_KEYSIZE 32 + +/* ECC set type */ +typedef struct { + int size; /* The size of the curve in octets */ + const char* name; /* name of this curve */ +} ecc25519_set_type; + + +/* ECC point */ +typedef struct { + byte point[ECC25519_KEYSIZE]; +}ECPoint; + +/* An ECC25519 Key */ +typedef struct { + int type; /* Public or Private */ + int idx; /* Index into the ecc_sets[] for the parameters of + this curve if -1, this key is using user supplied + curve in dp */ + const ecc25519_set_type* dp; /* domain parameters, either points to + curves (idx >= 0) or user supplied */ + byte f; /* format of key */ + ECPoint p; /* public key */ + ECPoint k; /* private key */ +} ecc25519_key; + +WOLFSSL_API +int wc_ecc25519_make_key(RNG* rng, int keysize, ecc25519_key* key); + +WOLFSSL_API +int wc_ecc25519_shared_secret(ecc25519_key* private_key, ecc25519_key* public_key, + byte* out, word32* outlen); + +WOLFSSL_API +int wc_ecc25519_init(ecc25519_key* key); + +WOLFSSL_API +void wc_ecc25519_free(ecc25519_key* key); + + +/* raw key helpers */ +WOLFSSL_API +int wc_ecc25519_import_private_raw(const byte* priv, word32 privSz, + const byte* pub, word32 pubSz, ecc25519_key* key); +WOLFSSL_API +int wc_ecc25519_export_private_raw(ecc25519_key* key, byte* out, word32* outLen); + +WOLFSSL_API +int wc_ecc25519_import_public(const byte* in, word32 inLen, ecc25519_key* key); + +WOLFSSL_API +int wc_ecc25519_export_public(ecc25519_key* key, byte* out, word32* outLen); + + +/* size helper */ +WOLFSSL_API +int wc_ecc25519_size(ecc25519_key* key); + +#ifdef __cplusplus + } /* extern "C" */ +#endif + +#endif /* WOLF_CRYPT_ECC25519_H */ +#endif /* HAVE_ECC25519 */ + diff --git a/wolfssl/wolfcrypt/ecc25519_fe.h b/wolfssl/wolfcrypt/ecc25519_fe.h new file mode 100644 index 000000000..c27d9e7c7 --- /dev/null +++ b/wolfssl/wolfcrypt/ecc25519_fe.h @@ -0,0 +1,57 @@ +/* ecc25519_fe.h + * + * Copyright (C) 2006-2015 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + */ + +#ifdef HAVE_ECC25519 +#ifndef WOLF_CRYPT_ECC25519_FE_H +#define WOLF_CRYPT_ECC25519_FE_H + +#include +#include + +typedef int32_t fe[10]; + +/* +fe means field element. +Here the field is \Z/(2^255-19). +An element t, entries t[0]...t[9], represents the integer +t[0]+2^26 t[1]+2^51 t[2]+2^77 t[3]+2^102 t[4]+...+2^230 t[9]. +Bounds on each t[i] vary depending on context. +*/ + +void fe_frombytes(fe,const unsigned char *); +void fe_tobytes(unsigned char *,fe); + +void fe_copy(fe,fe); +void fe_0(fe); +void fe_1(fe); +void fe_cswap(fe,fe,unsigned int); + +void fe_add(fe,fe,fe); +void fe_sub(fe,fe,fe); +void fe_mul(fe,fe,fe); +void fe_sq(fe,fe); +void fe_mul121666(fe,fe); +void fe_invert(fe,fe); + +#endif + +#endif /*HAVE_ECC25519*/ + diff --git a/wolfssl/wolfcrypt/ecc25519_montgomery.h b/wolfssl/wolfcrypt/ecc25519_montgomery.h new file mode 100644 index 000000000..d949f3ff1 --- /dev/null +++ b/wolfssl/wolfcrypt/ecc25519_montgomery.h @@ -0,0 +1,42 @@ +/* ecc25519_montgomery.h + * + * Copyright (C) 2006-2015 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * From Daniel J Bernstein's curve25519 ref10 work. + */ + +fe_sub(tmp0,x3,z3); +fe_sub(tmp1,x2,z2); +fe_add(x2,x2,z2); +fe_add(z2,x3,z3); +fe_mul(z3,tmp0,x2); +fe_mul(z2,z2,tmp1); +fe_sq(tmp0,tmp1); +fe_sq(tmp1,x2); +fe_add(x3,z3,z2); +fe_sub(z2,z3,z2); +fe_mul(x2,tmp1,tmp0); +fe_sub(tmp1,tmp1,tmp0); +fe_sq(z2,z2); +fe_mul121666(z3,tmp1); +fe_sq(x3,x3); +fe_add(tmp0,tmp0,z3); +fe_mul(z3,x1,z2); +fe_mul(z2,tmp1,tmp0); + diff --git a/wolfssl/wolfcrypt/ecc25519_pow225521.h b/wolfssl/wolfcrypt/ecc25519_pow225521.h new file mode 100644 index 000000000..1082ee6bf --- /dev/null +++ b/wolfssl/wolfcrypt/ecc25519_pow225521.h @@ -0,0 +1,46 @@ +/* ecc25519_pow225521.h + * + * Copyright (C) 2006-2015 wolfSSL Inc. + * + * This file is part of wolfSSL. (formerly known as CyaSSL) + * + * wolfSSL is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * wolfSSL is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + * + * From Daniel J Bernstein's curve25519 ref10 work. + */ + +fe_sq(t0,z); for (i = 1;i < 1;++i) fe_sq(t0,t0); +fe_sq(t1,t0); for (i = 1;i < 2;++i) fe_sq(t1,t1); +fe_mul(t1,z,t1); +fe_mul(t0,t0,t1); +fe_sq(t2,t0); for (i = 1;i < 1;++i) fe_sq(t2,t2); +fe_mul(t1,t1,t2); +fe_sq(t2,t1); for (i = 1;i < 5;++i) fe_sq(t2,t2); +fe_mul(t1,t2,t1); +fe_sq(t2,t1); for (i = 1;i < 10;++i) fe_sq(t2,t2); +fe_mul(t2,t2,t1); +fe_sq(t3,t2); for (i = 1;i < 20;++i) fe_sq(t3,t3); +fe_mul(t2,t3,t2); +fe_sq(t2,t2); for (i = 1;i < 10;++i) fe_sq(t2,t2); +fe_mul(t1,t2,t1); +fe_sq(t2,t1); for (i = 1;i < 50;++i) fe_sq(t2,t2); +fe_mul(t2,t2,t1); +fe_sq(t3,t2); for (i = 1;i < 100;++i) fe_sq(t3,t3); +fe_mul(t2,t3,t2); +fe_sq(t2,t2); for (i = 1;i < 50;++i) fe_sq(t2,t2); +fe_mul(t1,t2,t1); +fe_sq(t1,t1); for (i = 1;i < 5;++i) fe_sq(t1,t1); +fe_mul(out,t1,t0); + diff --git a/wolfssl/wolfcrypt/include.am b/wolfssl/wolfcrypt/include.am index fc9d836c0..1e7209d99 100644 --- a/wolfssl/wolfcrypt/include.am +++ b/wolfssl/wolfcrypt/include.am @@ -14,6 +14,8 @@ nobase_include_HEADERS+= \ wolfssl/wolfcrypt/dh.h \ wolfssl/wolfcrypt/dsa.h \ wolfssl/wolfcrypt/ecc.h \ + wolfssl/wolfcrypt/ecc25519.h \ + wolfssl/wolfcrypt/ecc25519_fe.h \ wolfssl/wolfcrypt/error-crypt.h \ wolfssl/wolfcrypt/fips_test.h \ wolfssl/wolfcrypt/hc128.h \