diff --git a/configure.ac b/configure.ac index f7fde4d59..5ecb76338 100644 --- a/configure.ac +++ b/configure.ac @@ -1163,6 +1163,12 @@ then ENABLED_CURVE25519=yes fi +if test "$ENABLED_CURVE25519" = "no128bit" +then + AM_CFLAGS="$AM_CFLAGS -DNO_CURVED25519_128BIT" + ENABLED_CURVE25519=yes +fi + if test "$ENABLED_CURVE25519" = "yes" then AM_CFLAGS="$AM_CFLAGS -DHAVE_CURVE25519" diff --git a/wolfcrypt/src/fe_operations.c b/wolfcrypt/src/fe_operations.c index 7da9b2f50..f02301186 100755 --- a/wolfcrypt/src/fe_operations.c +++ b/wolfcrypt/src/fe_operations.c @@ -41,7 +41,7 @@ #include #endif -#ifdef HAVE___UINT128_T +#ifdef CURVED25519_128BIT #include "fe_x25519_128.i" #else diff --git a/wolfcrypt/src/fe_x25519_128.i b/wolfcrypt/src/fe_x25519_128.i index 1b939a671..1d174488f 100644 --- a/wolfcrypt/src/fe_x25519_128.i +++ b/wolfcrypt/src/fe_x25519_128.i @@ -1,4 +1,4 @@ -/* fp_mont_small.i +/* fp_x25519_128.i * * Copyright (C) 2006-2017 wolfSSL Inc. * @@ -253,6 +253,7 @@ void fe_add(fe r, const fe a, const fe b) */ void fe_mul(fe r, const fe a, const fe b) { + const __int128_t k19 = 19; __int128_t t0 = ((__int128_t)a[0]) * b[0]; __int128_t t1 = ((__int128_t)a[0]) * b[1] + ((__int128_t)a[1]) * b[0]; @@ -280,19 +281,19 @@ void fe_mul(fe r, const fe a, const fe b) __int128_t t8 = ((__int128_t)a[4]) * b[4]; /* Modulo reduce double long word. */ - t0 += t5 * 19; - t1 += t6 * 19; - t2 += t7 * 19; - t3 += t8 * 19; + t0 += t5 * k19; + t1 += t6 * k19; + t2 += t7 * k19; + t3 += t8 * k19; /* Normalize to 51-bits of data per word. */ - t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff; + t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; - r[0] += (t4 >> 51) * 19; + r[0] += (t4 >> 51) * k19; r[4] = t4 & 0x7ffffffffffff; } @@ -304,36 +305,38 @@ void fe_mul(fe r, const fe a, const fe b) */ void fe_sq(fe r, const fe a) { + const __int128_t k19 = 19; + const __int128_t k2 = 2; __int128_t t0 = ((__int128_t)a[0]) * a[0]; - __int128_t t1 = ((__int128_t)a[0]) * a[1] * 2; - __int128_t t2 = ((__int128_t)a[0]) * a[2] * 2 + __int128_t t1 = ((__int128_t)a[0]) * a[1] * k2; + __int128_t t2 = ((__int128_t)a[0]) * a[2] * k2 + ((__int128_t)a[1]) * a[1]; - __int128_t t3 = ((__int128_t)a[0]) * a[3] * 2 - + ((__int128_t)a[1]) * a[2] * 2; - __int128_t t4 = ((__int128_t)a[0]) * a[4] * 2 - + ((__int128_t)a[1]) * a[3] * 2 + __int128_t t3 = ((__int128_t)a[0]) * a[3] * k2 + + ((__int128_t)a[1]) * a[2] * k2; + __int128_t t4 = ((__int128_t)a[0]) * a[4] * k2 + + ((__int128_t)a[1]) * a[3] * k2 + ((__int128_t)a[2]) * a[2]; - __int128_t t5 = ((__int128_t)a[1]) * a[4] * 2 - + ((__int128_t)a[2]) * a[3] * 2; - __int128_t t6 = ((__int128_t)a[2]) * a[4] * 2 + __int128_t t5 = ((__int128_t)a[1]) * a[4] * k2 + + ((__int128_t)a[2]) * a[3] * k2; + __int128_t t6 = ((__int128_t)a[2]) * a[4] * k2 + ((__int128_t)a[3]) * a[3]; - __int128_t t7 = ((__int128_t)a[3]) * a[4] * 2; + __int128_t t7 = ((__int128_t)a[3]) * a[4] * k2; __int128_t t8 = ((__int128_t)a[4]) * a[4]; /* Modulo reduce double long word. */ - t0 += t5 * 19; - t1 += t6 * 19; - t2 += t7 * 19; - t3 += t8 * 19; + t0 += t5 * k19; + t1 += t6 * k19; + t2 += t7 * k19; + t3 += t8 * k19; /* Normalize to 51-bits of data per word. */ - t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff; + t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; - r[0] += (t4 >> 51) * 19; + r[0] += (t4 >> 51) * k19; r[4] = t4 & 0x7ffffffffffff; } @@ -345,20 +348,22 @@ void fe_sq(fe r, const fe a) */ void fe_mul121666(fe r, fe a) { - __int128_t t0 = ((__int128_t)a[0]) * (int64_t)121666; - __int128_t t1 = ((__int128_t)a[1]) * (int64_t)121666; - __int128_t t2 = ((__int128_t)a[2]) * (int64_t)121666; - __int128_t t3 = ((__int128_t)a[3]) * (int64_t)121666; - __int128_t t4 = ((__int128_t)a[4]) * (int64_t)121666; + const __int128_t k19 = 19; + const __int128_t k121666 = 121666; + __int128_t t0 = ((__int128_t)a[0]) * k121666; + __int128_t t1 = ((__int128_t)a[1]) * k121666; + __int128_t t2 = ((__int128_t)a[2]) * k121666; + __int128_t t3 = ((__int128_t)a[3]) * k121666; + __int128_t t4 = ((__int128_t)a[4]) * k121666; /* Normalize to 51-bits of data per word. */ - t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff; + t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; - r[0] += (t4 >> 51) * 19; + r[0] += (t4 >> 51) * k19; r[4] = t4 & 0x7ffffffffffff; } @@ -546,36 +551,38 @@ void fe_pow22523(fe r, const fe a) */ void fe_sq2(fe r, const fe a) { - __int128_t t0 = 2 * (((__int128_t)a[0]) * a[0]); - __int128_t t1 = 2 * (((__int128_t)a[0]) * a[1] * 2); - __int128_t t2 = 2 * (((__int128_t)a[0]) * a[2] * 2 + const __int128_t k2 = 2; + const __int128_t k19 = 19; + __int128_t t0 = k2 * (((__int128_t)a[0]) * a[0]); + __int128_t t1 = k2 * (((__int128_t)a[0]) * a[1] * k2); + __int128_t t2 = k2 * (((__int128_t)a[0]) * a[2] * k2 + ((__int128_t)a[1]) * a[1]); - __int128_t t3 = 2 * (((__int128_t)a[0]) * a[3] * 2 - + ((__int128_t)a[1]) * a[2] * 2); - __int128_t t4 = 2 * (((__int128_t)a[0]) * a[4] * 2 - + ((__int128_t)a[1]) * a[3] * 2 + __int128_t t3 = k2 * (((__int128_t)a[0]) * a[3] * k2 + + ((__int128_t)a[1]) * a[2] * k2); + __int128_t t4 = k2 * (((__int128_t)a[0]) * a[4] * k2 + + ((__int128_t)a[1]) * a[3] * k2 + ((__int128_t)a[2]) * a[2]); - __int128_t t5 = 2 * (((__int128_t)a[1]) * a[4] * 2 - + ((__int128_t)a[2]) * a[3] * 2); - __int128_t t6 = 2 * (((__int128_t)a[2]) * a[4] * 2 + __int128_t t5 = k2 * (((__int128_t)a[1]) * a[4] * k2 + + ((__int128_t)a[2]) * a[3] * k2); + __int128_t t6 = k2 * (((__int128_t)a[2]) * a[4] * k2 + ((__int128_t)a[3]) * a[3]); - __int128_t t7 = 2 * (((__int128_t)a[3]) * a[4] * 2); - __int128_t t8 = 2 * (((__int128_t)a[4]) * a[4]); + __int128_t t7 = k2 * (((__int128_t)a[3]) * a[4] * k2); + __int128_t t8 = k2 * (((__int128_t)a[4]) * a[4]); /* Modulo reduce double long word. */ - t0 += t5 * 19; - t1 += t6 * 19; - t2 += t7 * 19; - t3 += t8 * 19; + t0 += t5 * k19; + t1 += t6 * k19; + t2 += t7 * k19; + t3 += t8 * k19; /* Normalize to 51-bits of data per word. */ - t0 += (t4 >> 51) * 19; t4 &= 0x7ffffffffffff; + t0 += (t4 >> 51) * k19; t4 &= 0x7ffffffffffff; t1 += t0 >> 51; r[0] = t0 & 0x7ffffffffffff; t2 += t1 >> 51; r[1] = t1 & 0x7ffffffffffff; t3 += t2 >> 51; r[2] = t2 & 0x7ffffffffffff; t4 += t3 >> 51; r[3] = t3 & 0x7ffffffffffff; - r[0] += (t4 >> 51) * 19; + r[0] += (t4 >> 51) * k19; r[4] = t4 & 0x7ffffffffffff; } diff --git a/wolfcrypt/src/ge_operations.c b/wolfcrypt/src/ge_operations.c index 3da367027..f692ac31c 100644 --- a/wolfcrypt/src/ge_operations.c +++ b/wolfcrypt/src/ge_operations.c @@ -765,7 +765,7 @@ static void cmov(ge_precomp *t,const ge_precomp *u,unsigned char b) fe_cmov(t->xy2d,u->xy2d,b); } -#ifdef HAVE___UINT128_T +#ifdef CURVED25519_128BIT static const ge_precomp base[32][8] = { { { @@ -3569,7 +3569,7 @@ static void slide(signed char *r,const unsigned char *a) } } -#ifdef HAVE___UINT128_T +#ifdef CURVED25519_128BIT static const ge_precomp Bi[8] = { { { 0x493c6f58c3b85, 0x0df7181c325f7, 0x0f50b0b3e4cb7, 0x5329385a44c32, 0x07cf9d3a33d4b }, @@ -3719,7 +3719,7 @@ int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, return 0; } -#ifdef HAVE___UINT128_T +#ifdef CURVED25519_128BIT static const ge d = { 0x34dca135978a3, 0x1a8283b156ebd, 0x5e7a26001c029, 0x739c663a03cbb, 0x52036cee2b6ff @@ -3732,7 +3732,7 @@ static const ge d = { #endif -#ifdef HAVE___UINT128_T +#ifdef CURVED25519_128BIT static const ge sqrtm1 = { 0x61b274a0ea0b0, 0x0d5a5fc8f189d, 0x7ef5e9cbd0c60, 0x78595a6804c9e, 0x2b8324804fc1d @@ -3921,7 +3921,7 @@ void ge_p3_dbl(ge_p1p1 *r,const ge_p3 *p) r = p */ -#ifdef HAVE___UINT128_T +#ifdef CURVED25519_128BIT static const ge d2 = { 0x69b9426b2f159, 0x35050762add7a, 0x3cf44c0038052, 0x6738cc7407977, 0x2406d9dc56dff diff --git a/wolfssl/wolfcrypt/fe_operations.h b/wolfssl/wolfcrypt/fe_operations.h index 318712a82..4761de40f 100644 --- a/wolfssl/wolfcrypt/fe_operations.h +++ b/wolfssl/wolfcrypt/fe_operations.h @@ -33,6 +33,10 @@ #include +#if defined(HAVE___UINT128_T) && !defined(NO_CURVED25519_128BIT) + #define CURVED25519_128BIT +#endif + /* fe means field element. Here the field is \Z/(2^255-19). @@ -60,7 +64,7 @@ WOLFSSL_LOCAL int curve25519(byte * q, byte * n, byte * p); /* default to be faster but take more memory */ #if !defined(CURVE25519_SMALL) || !defined(ED25519_SMALL) -#if defined(HAVE___UINT128_T) +#if defined(CURVED25519_128BIT) typedef int64_t fe[5]; #else typedef int32_t fe[10]; diff --git a/wolfssl/wolfcrypt/ge_operations.h b/wolfssl/wolfcrypt/ge_operations.h index b4f189960..f3a110b33 100644 --- a/wolfssl/wolfcrypt/ge_operations.h +++ b/wolfssl/wolfcrypt/ge_operations.h @@ -47,7 +47,7 @@ Representations: #ifdef ED25519_SMALL typedef byte ge[F25519_SIZE]; -#elif defined(HAVE___UINT128_T) +#elif defined(CURVED25519_128BIT) typedef int64_t ge[5]; #else typedef int32_t ge[10];