From 9f0fa7500f553e19ba904129aeef70ecb8499103 Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 4 May 2016 23:14:30 -0700 Subject: [PATCH 01/16] Added configure "--disable-eccshamir" option. --- configure.ac | 35 +++++++++++++++++++++++++++++++---- 1 file changed, 31 insertions(+), 4 deletions(-) diff --git a/configure.ac b/configure.ac index a46fcc17b..64c4231c8 100644 --- a/configure.ac +++ b/configure.ac @@ -731,6 +731,14 @@ then ECC_DEFAULT=yes fi +# ECC Shamir +AC_ARG_ENABLE([ecc], + [AS_HELP_STRING([--enable-eccshamir],[Enable ECC Shamir (default: enabled on x86_64)])], + [ ENABLED_ECC_SHAMIR=$enableval ], + [ ENABLED_ECC_SHAMIR=$ECC_DEFAULT ] + ) + + # ECC AC_ARG_ENABLE([ecc], [AS_HELP_STRING([--enable-ecc],[Enable ECC (default: enabled on x86_64)])], @@ -751,7 +759,11 @@ fi if test "$ENABLED_ECC" = "yes" then - AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256 -DECC_SHAMIR" + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256" + if test "$ENABLED_ECC_SHAMIR" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DECC_SHAMIR" + fi fi AM_CONDITIONAL([BUILD_ECC], [test "x$ENABLED_ECC" = "xyes"]) @@ -1961,8 +1973,13 @@ then if test "x$ENABLED_ECC" = "xno" then ENABLED_ECC="yes" - AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256 -DECC_SHAMIR" + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256" AM_CONDITIONAL([BUILD_ECC], [test "x$ENABLED_ECC" = "xyes"]) + + if test "$ENABLED_ECC_SHAMIR" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DECC_SHAMIR" + fi fi if test "x$ENABLED_OPENSSLEXTRA" = "xno" then @@ -2029,8 +2046,13 @@ then if test "x$ENABLED_ECC" = "xno" then ENABLED_ECC="yes" - AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256 -DECC_SHAMIR" + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256" AM_CONDITIONAL([BUILD_ECC], [test "x$ENABLED_ECC" = "xyes"]) + + if test "$ENABLED_ECC_SHAMIR" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DECC_SHAMIR" + fi fi if test "x$ENABLED_PKCALLBACKS" = "xno" then @@ -2122,8 +2144,13 @@ then then ENABLED_OPENSSLEXTRA="yes" ENABLED_ECC="yes" - AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256 -DECC_SHAMIR" + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256" AM_CONDITIONAL([BUILD_ECC], [test "x$ENABLED_ECC" = "xyes"]) + + if test "$ENABLED_ECC_SHAMIR" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DECC_SHAMIR" + fi fi AM_CFLAGS="$AM_CFLAGS -DHAVE_STUNNEL -DWOLFSSL_ALWAYS_VERIFY_CB" From 9001036e0926dfa2d4352b2aa3330901e6839495 Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 4 May 2016 23:14:59 -0700 Subject: [PATCH 02/16] Fixes memory leak in the wc_RsaFunction if failure happens when using normal math (not fast math) and RSA_LOW_MEM is not defined. --- wolfcrypt/src/rsa.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index 690a7c804..7dd775809 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -845,7 +845,9 @@ static int wc_RsaFunction(const byte* in, word32 inLen, byte* out, mp_clear(&tmpa); mp_clear(&tmpb); - if (ret != 0) return ret; + if (ret != 0) { + goto done; + } #endif /* RSA_LOW_MEM */ } From 7c3fbd76440a9f8e258f94ecd76c62d086df3c57 Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 4 May 2016 23:15:38 -0700 Subject: [PATCH 03/16] Fix for fp_copy() when used with ALT_ECC_SIZE so any excess digits on the destination that we didn't write to are set to zero. --- wolfcrypt/src/tfm.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index 81372ab8c..be71639f6 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -2177,13 +2177,20 @@ int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d) } #ifdef ALT_ECC_SIZE -void fp_copy(fp_int *a, fp_int* b) +void fp_copy(fp_int *a, fp_int *b) { if (a != b && b->size >= a->used) { + int x, oldused; + oldused = b->used; b->used = a->used; b->sign = a->sign; XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit)); + + /* zero any excess digits on the destination that we didn't write to */ + for (x = b->used; x < oldused; x++) { + b->dp[x] = 0; + } } } From a5d27853fa8d7cd62ba1d26eeaa6219d99d68b5a Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 4 May 2016 23:19:24 -0700 Subject: [PATCH 04/16] Fixes to fp_mul and fp_div to clear any excess digits on the destination. Added compile-time check to confirm FP_SIZE is compatible with TFM_ acceleration defines enabled. Updated comments in other places where excess digits are cleared. --- wolfcrypt/src/tfm.c | 167 ++++++++++++++++++++++------------------ wolfssl/wolfcrypt/tfm.h | 72 ++--------------- 2 files changed, 99 insertions(+), 140 deletions(-) diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index be71639f6..dd1aa14b4 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -118,6 +118,8 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c) } c->used = x; + + /* zero any excess digits on the destination that we didn't write to */ for (; x < oldused; x++) { c->dp[x] = 0; } @@ -179,6 +181,8 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) c->dp[x] = (fp_digit)t; t = (t >> DIGIT_BIT)&1; } + + /* zero any excess digits on the destination that we didn't write to */ for (; x < oldused; x++) { c->dp[x] = 0; } @@ -188,7 +192,9 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) /* c = a * b */ void fp_mul(fp_int *A, fp_int *B, fp_int *C) { - int y, yy; + int y, yy, oldused; + + oldused = C->used; y = MAX(A->used, B->used); yy = MIN(A->used, B->used); @@ -196,7 +202,7 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C) /* call generic if we're out of range */ if (y + yy > FP_SIZE) { fp_mul_comba(A, B, C); - return ; + goto clean; } /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size @@ -205,98 +211,104 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C) if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications */ -#ifdef TFM_MUL3 +#if defined(TFM_MUL3) && FP_SIZE >= 6 if (y <= 3) { fp_mul_comba3(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL4 +#if defined(TFM_MUL4) && FP_SIZE >= 8 if (y == 4) { fp_mul_comba4(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL6 +#if defined(TFM_MUL6) && FP_SIZE >= 12 if (y <= 6) { fp_mul_comba6(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL7 +#if defined(TFM_MUL7) && FP_SIZE >= 14 if (y == 7) { fp_mul_comba7(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL8 +#if defined(TFM_MUL8) && FP_SIZE >= 16 if (y == 8) { fp_mul_comba8(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL9 +#if defined(TFM_MUL9) && FP_SIZE >= 18 if (y == 9) { fp_mul_comba9(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL12 +#if defined(TFM_MUL12) && FP_SIZE >= 24 if (y <= 12) { fp_mul_comba12(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL17 +#if defined(TFM_MUL17) && FP_SIZE >= 34 if (y <= 17) { fp_mul_comba17(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_SMALL_SET +#if defined(TFM_SMALL_SET) && FP_SIZE >= 32 if (y <= 16) { fp_mul_comba_small(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL20) +#if defined(TFM_MUL20) && FP_SIZE >= 40 if (y <= 20) { fp_mul_comba20(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL24) +#if defined(TFM_MUL24) && FP_SIZE >= 48 if (yy >= 16 && y <= 24) { fp_mul_comba24(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL28) +#if defined(TFM_MUL28) && FP_SIZE >= 56 if (yy >= 20 && y <= 28) { fp_mul_comba28(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL32) +#if defined(TFM_MUL32) && FP_SIZE >= 64 if (yy >= 24 && y <= 32) { fp_mul_comba32(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL48) +#if defined(TFM_MUL48) && FP_SIZE >= 96 if (yy >= 40 && y <= 48) { - fp_mul_comba48(A,B,C); - return; + fp_mul_comba48(A,B,C); + goto clean; } #endif -#if defined(TFM_MUL64) +#if defined(TFM_MUL64) && FP_SIZE >= 128 if (yy >= 56 && y <= 64) { fp_mul_comba64(A,B,C); - return; + goto clean; } #endif fp_mul_comba(A,B,C); + +clean: + /* zero any excess digits on the destination that we didn't write to */ + for (y = C->used; y < oldused; y++) { + C->dp[y] = 0; + } } void fp_mul_2(fp_int * a, fp_int * b) @@ -340,9 +352,7 @@ void fp_mul_2(fp_int * a, fp_int * b) ++(b->used); } - /* now zero any excess digits on the destination - * that we didn't write to - */ + /* zero any excess digits on the destination that we didn't write to */ tmpb = b->dp + b->used; for (x = b->used; x < oldused; x++) { *tmpb++ = 0; @@ -370,6 +380,8 @@ void fp_mul_d(fp_int *a, fp_digit b, fp_int *c) c->dp[c->used++] = (fp_digit) w; ++x; } + + /* zero any excess digits on the destination that we didn't write to */ for (; x < oldused; x++) { c->dp[x] = 0; } @@ -627,9 +639,7 @@ int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d) if (d != NULL) { fp_div_2d (&x, norm, &x, NULL); -/* the following is a kludge, essentially we were seeing the right remainder but - with excess digits that should have been zero - */ + /* zero any excess digits on the destination that we didn't write to */ for (i = b->used; i < x.used; i++) { x.dp[i] = 0; } @@ -669,7 +679,7 @@ void fp_div_2(fp_int * a, fp_int * b) r = rr; } - /* zero excess digits */ + /* zero any excess digits on the destination that we didn't write to */ tmpb = b->dp + b->used; for (x = b->used; x < oldused; x++) { *tmpb++ = 0; @@ -1267,105 +1277,114 @@ void fp_2expt(fp_int *a, int b) /* b = a*a */ void fp_sqr(fp_int *A, fp_int *B) { - int y = A->used; + int y, oldused; + + oldused = B->used; + y = A->used; /* call generic if we're out of range */ if (y + y > FP_SIZE) { fp_sqr_comba(A, B); - return ; + goto clean; } -#if defined(TFM_SQR3) +#if defined(TFM_SQR3) && FP_SIZE >= 6 if (y <= 3) { fp_sqr_comba3(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR4) +#if defined(TFM_SQR4) && FP_SIZE >= 8 if (y == 4) { fp_sqr_comba4(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR6) +#if defined(TFM_SQR6) && FP_SIZE >= 12 if (y <= 6) { fp_sqr_comba6(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR7) +#if defined(TFM_SQR7) && FP_SIZE >= 14 if (y == 7) { fp_sqr_comba7(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR8) +#if defined(TFM_SQR8) && FP_SIZE >= 16 if (y == 8) { fp_sqr_comba8(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR9) +#if defined(TFM_SQR9) && FP_SIZE >= 18 if (y == 9) { fp_sqr_comba9(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR12) +#if defined(TFM_SQR12) && FP_SIZE >= 24 if (y <= 12) { fp_sqr_comba12(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR17) +#if defined(TFM_SQR17) && FP_SIZE >= 34 if (y <= 17) { fp_sqr_comba17(A,B); - return; + goto clean; } #endif #if defined(TFM_SMALL_SET) if (y <= 16) { fp_sqr_comba_small(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR20) +#if defined(TFM_SQR20) && FP_SIZE >= 40 if (y <= 20) { fp_sqr_comba20(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR24) +#if defined(TFM_SQR24) && FP_SIZE >= 48 if (y <= 24) { fp_sqr_comba24(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR28) +#if defined(TFM_SQR28) && FP_SIZE >= 56 if (y <= 28) { fp_sqr_comba28(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR32) +#if defined(TFM_SQR32) && FP_SIZE >= 64 if (y <= 32) { fp_sqr_comba32(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR48) +#if defined(TFM_SQR48) && FP_SIZE >= 96 if (y <= 48) { fp_sqr_comba48(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR64) +#if defined(TFM_SQR64) && FP_SIZE >= 128 if (y <= 64) { fp_sqr_comba64(A,B); - return; + goto clean; } #endif fp_sqr_comba(A, B); + +clean: + /* zero any excess digits on the destination that we didn't write to */ + for (y = B->used; y < oldused; y++) { + B->dp[y] = 0; + } } /* generic comba squarer */ @@ -1652,7 +1671,8 @@ static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp) *tmpm++ = *_c++; } - for (; x < oldused; x++) { + /* zero any excess digits on the destination that we didn't write to */ + for (; x < oldused; x++) { *tmpm++ = 0; } @@ -1733,7 +1753,8 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) *tmpm++ = *_c++; } - for (; x < oldused; x++) { + /* zero any excess digits on the destination that we didn't write to */ + for (; x < oldused; x++) { *tmpm++ = 0; } diff --git a/wolfssl/wolfcrypt/tfm.h b/wolfssl/wolfcrypt/tfm.h index c0e05e4ae..ce633b43d 100644 --- a/wolfssl/wolfcrypt/tfm.h +++ b/wolfssl/wolfcrypt/tfm.h @@ -211,6 +211,7 @@ #if defined(FP_64BIT) /* for GCC only on supported platforms */ typedef unsigned long long fp_digit; /* 64bit, 128 uses mode(TI) below */ + #define SIZEOF_FP_DIGIT 8 typedef unsigned long fp_word __attribute__ ((mode(TI))); #else #if defined(_MSC_VER) || defined(__BORLANDC__) @@ -221,12 +222,14 @@ #ifndef NO_64BIT typedef unsigned int fp_digit; + #define SIZEOF_FP_DIGIT 4 typedef ulong64 fp_word; #define FP_32BIT #else /* some procs like coldfire prefer not to place multiply into 64bit type even though it exists */ typedef unsigned short fp_digit; + #define SIZEOF_FP_DIGIT 2 typedef unsigned int fp_word; #endif #endif @@ -234,7 +237,7 @@ #endif /* WOLFSSL_BIGINT_TYPES */ /* # of digits this is */ -#define DIGIT_BIT (int)((CHAR_BIT) * sizeof(fp_digit)) +#define DIGIT_BIT ((CHAR_BIT) * SIZEOF_FP_DIGIT) /* Max size of any number in bits. Basically the largest size you will be * multiplying should be half [or smaller] of FP_MAX_SIZE-four_digit @@ -548,103 +551,38 @@ void fp_reverse(unsigned char *s, int len); void fp_mul_comba(fp_int *a, fp_int *b, fp_int *c); -#ifdef TFM_SMALL_SET void fp_mul_comba_small(fp_int *a, fp_int *b, fp_int *c); -#endif - -#ifdef TFM_MUL3 void fp_mul_comba3(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL4 void fp_mul_comba4(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL6 void fp_mul_comba6(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL7 void fp_mul_comba7(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL8 void fp_mul_comba8(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL9 void fp_mul_comba9(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL12 void fp_mul_comba12(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL17 void fp_mul_comba17(fp_int *a, fp_int *b, fp_int *c); -#endif - -#ifdef TFM_MUL20 void fp_mul_comba20(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL24 void fp_mul_comba24(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL28 void fp_mul_comba28(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL32 void fp_mul_comba32(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL48 void fp_mul_comba48(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL64 void fp_mul_comba64(fp_int *a, fp_int *b, fp_int *c); -#endif - void fp_sqr_comba(fp_int *a, fp_int *b); - -#ifdef TFM_SMALL_SET void fp_sqr_comba_small(fp_int *a, fp_int *b); -#endif - -#ifdef TFM_SQR3 void fp_sqr_comba3(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR4 void fp_sqr_comba4(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR6 void fp_sqr_comba6(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR7 void fp_sqr_comba7(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR8 void fp_sqr_comba8(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR9 void fp_sqr_comba9(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR12 void fp_sqr_comba12(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR17 void fp_sqr_comba17(fp_int *a, fp_int *b); -#endif - -#ifdef TFM_SQR20 void fp_sqr_comba20(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR24 void fp_sqr_comba24(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR28 void fp_sqr_comba28(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR32 void fp_sqr_comba32(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR48 void fp_sqr_comba48(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR64 void fp_sqr_comba64(fp_int *a, fp_int *b); -#endif + /*extern const char *fp_s_rmap;*/ From fa5dd0100146222a43d7562fdb2c600f481eaecf Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 4 May 2016 23:20:03 -0700 Subject: [PATCH 05/16] Fixes/improvements to the wolfCrypt ECC tests. Fixed bug with sharedA/sharedB being too small when BENCH_EMBEDDED is used and curve size over 256 bit. Added error message for ECC test failures, to show the curve size used. Fix to wc_ecc_verify_hash test to use digest that is not all zeros as that doesn't work correctly for non-Shamir ECC math. Changed return code for wc_ecc_check_check so its unique. --- wolfcrypt/test/test.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 32da38dc7..d5d114c18 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -6560,7 +6560,7 @@ static int ecc_test_key_gen(WC_RNG* rng, int keySize) ret = wc_ecc_check_key(&userA); if (ret != 0) - return -1024; + return -1023; derSz = wc_EccKeyToDer(&userA, der, FOURK_BUF); if (derSz < 0) { @@ -6621,8 +6621,8 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount, int testCompressedKey) { #ifdef BENCH_EMBEDDED - byte sharedA[32]; - byte sharedB[32]; + byte sharedA[128]; /* Needs to be at least keySize */ + byte sharedB[128]; /* Needs to be at least keySize */ #else byte sharedA[1024]; byte sharedB[1024]; @@ -6652,7 +6652,7 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount, ret = wc_ecc_check_key(&userA); if (ret != 0) - return -1024; + return -1023; ret = wc_ecc_make_key(rng, keySize, &userB); if (ret != 0) @@ -6732,6 +6732,7 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount, for (i = 0; i < (int)sizeof(digest); i++) { digest[i] = 0; } + digest[i-1] = 1; /* Set last digit to non-zero value */ x = sizeof(sig); ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, rng, &userA); @@ -6801,12 +6802,14 @@ static int ecc_test_curve(WC_RNG* rng, int keySize) ret = ecc_test_curve_size(rng, keySize, ECC_TEST_VERIFY_COUNT, testCompressedKey); if (ret < 0) { + printf("ecc_test_curve_size %d failed!: %d\n", keySize, ret); return ret; } #ifdef HAVE_ECC_VECTOR_TEST ret = ecc_test_vector(keySize); if (ret < 0) { + printf("ecc_test_vector %d failed!: %d\n", keySize, ret); return ret; } #endif @@ -6814,6 +6817,7 @@ static int ecc_test_curve(WC_RNG* rng, int keySize) #ifdef WOLFSSL_KEY_GEN ret = ecc_test_key_gen(rng, keySize); if (ret < 0) { + printf("ecc_test_key_gen %d failed!: %d\n", keySize, ret); return ret; } #endif From 0ddbe0e60e19e7752108e59b58ad3c70c404e6dc Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 4 May 2016 23:20:55 -0700 Subject: [PATCH 06/16] Enhancement to RSA math function "_fp_exptmod" (non timing resistant version) to support WOLFSSL_SMALL_STACK, which moves the allocation of the 64 fp_int's from the stack to the heap. --- wolfcrypt/src/tfm.c | 32 +++++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index dd1aa14b4..c3a72c9e2 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -1059,9 +1059,14 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) */ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) { - fp_int M[64], res; + fp_int res; fp_digit buf, mp; int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; +#ifdef WOLFSSL_SMALL_STACK + fp_int *M; +#else + fp_int M[64]; +#endif /* find window size */ x = fp_count_bits (X); @@ -1077,15 +1082,23 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) winsize = 6; } - /* init M array */ - for(x = 0; x < (int)(sizeof(M)/sizeof(fp_int)); x++) - fp_init(&M[x]); - /* now setup montgomery */ if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) { return err; } +#ifdef WOLFSSL_SMALL_STACK + /* only allocate space for what's needed */ + M = (fp_int*)XMALLOC(sizeof(fp_int)*(1 << winsize), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (M == NULL) { + return FP_MEM; + } +#endif + + /* init M array */ + for(x = 0; x < (1 << winsize); x++) + fp_init(&M[x]); + /* setup result */ fp_init(&res); @@ -1093,7 +1106,7 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) * * The M table contains powers of the input base, e.g. M[x] = G^x mod P * - * The first half of the table is not computed though accept for M[0] and M[1] + * The first half of the table is not computed though except for M[0] and M[1] */ /* now we need R mod m */ @@ -1212,10 +1225,15 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) /* swap res with Y */ fp_copy (&res, Y); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; } -#endif +#endif /* TFM_TIMING_RESISTANT */ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) { From a4782fcf0105b323c58f53c112d52d90f1773728 Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 4 May 2016 23:22:14 -0700 Subject: [PATCH 07/16] Fix in fast math version of ecc_projective_dbl_point to use a local for x,y,z since ecc_point fp_int's are reduced size and cause math issues with ALT_ECC_SIZE enabled. Added local stack variable cleanups for ecc_projective_add_point. --- wolfcrypt/src/ecc.c | 112 +++++++++++++++++++++++++++----------------- 1 file changed, 69 insertions(+), 43 deletions(-) diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index f3a47793b..1465a8752 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -298,7 +298,14 @@ int ecc_projective_add_point(ecc_point *P, ecc_point *Q, ecc_point *R, fp_sub(modulus, Q->y, &t1); if ( (fp_cmp(P->x, Q->x) == FP_EQ) && (get_digit_count(Q->z) && fp_cmp(P->z, Q->z) == FP_EQ) && - (fp_cmp(P->y, Q->y) == FP_EQ || fp_cmp(P->y, &t1) == FP_EQ)) { + (fp_cmp(P->y, Q->y) == FP_EQ || fp_cmp(P->y, &t1) == FP_EQ)) + { + fp_clear(&x); + fp_clear(&y); + fp_clear(&z); + fp_clear(&t1); + fp_clear(&t2); + return ecc_projective_dbl_point(P, R, modulus, mp); } @@ -423,10 +430,18 @@ int ecc_projective_add_point(ecc_point *P, ecc_point *Q, ecc_point *R, } fp_div_2(&y, &y); + /* return result */ fp_copy(&x, R->x); fp_copy(&y, R->y); fp_copy(&z, R->z); + /* clear stack variables */ + fp_clear(&x); + fp_clear(&y); + fp_clear(&z); + fp_clear(&t1); + fp_clear(&t2); + return MP_OKAY; } @@ -442,41 +457,40 @@ int ecc_projective_add_point(ecc_point *P, ecc_point *Q, ecc_point *R, int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, mp_digit* mp) { - fp_int t1, t2; + fp_int x, y, z, t1, t2; int err; if (P == NULL || R == NULL || modulus == NULL || mp == NULL) return ECC_BAD_ARG_E; - if (P != R) { - fp_copy(P->x, R->x); - fp_copy(P->y, R->y); - fp_copy(P->z, R->z); - } - - if ((err = mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) { + if ((err = mp_init_multi(&x, &y, &z, &t1, &t2, NULL)) != MP_OKAY) { return err; } - /* t1 = Z * Z */ - fp_sqr(R->z, &t1); + /* Use local due to possible insufficient size of alt_ecc_size in ecc_point x,y,z */ + fp_copy(P->x, &x); + fp_copy(P->y, &y); + fp_copy(P->z, &z); + + /* T1 = Z * Z */ + fp_sqr(&z, &t1); fp_montgomery_reduce(&t1, modulus, *mp); /* Z = Y * Z */ - fp_mul(R->z, R->y, R->z); - fp_montgomery_reduce(R->z, modulus, *mp); + fp_mul(&z, &y, &z); + fp_montgomery_reduce(&z, modulus, *mp); /* Z = 2Z */ - fp_add(R->z, R->z, R->z); - if (fp_cmp(R->z, modulus) != FP_LT) { - fp_sub(R->z, modulus, R->z); + fp_add(&z, &z, &z); + if (fp_cmp(&z, modulus) != FP_LT) { + fp_sub(&z, modulus, &z); } - /* &t2 = X - T1 */ - fp_sub(R->x, &t1, &t2); + /* T2 = X - T1 */ + fp_sub(&x, &t1, &t2); if (fp_cmp_d(&t2, 0) == FP_LT) { fp_add(&t2, modulus, &t2); } /* T1 = X + T1 */ - fp_add(&t1, R->x, &t1); + fp_add(&t1, &x, &t1); if (fp_cmp(&t1, modulus) != FP_LT) { fp_sub(&t1, modulus, &t1); } @@ -495,15 +509,15 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, } /* Y = 2Y */ - fp_add(R->y, R->y, R->y); - if (fp_cmp(R->y, modulus) != FP_LT) { - fp_sub(R->y, modulus, R->y); + fp_add(&y, &y, &y); + if (fp_cmp(&y, modulus) != FP_LT) { + fp_sub(&y, modulus, &y); } /* Y = Y * Y */ - fp_sqr(R->y, R->y); - fp_montgomery_reduce(R->y, modulus, *mp); + fp_sqr(&y, &y); + fp_montgomery_reduce(&y, modulus, *mp); /* T2 = Y * Y */ - fp_sqr(R->y, &t2); + fp_sqr(&y, &t2); fp_montgomery_reduce(&t2, modulus, *mp); /* T2 = T2/2 */ if (fp_isodd(&t2)) { @@ -511,37 +525,49 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, } fp_div_2(&t2, &t2); /* Y = Y * X */ - fp_mul(R->y, R->x, R->y); - fp_montgomery_reduce(R->y, modulus, *mp); + fp_mul(&y, &x, &y); + fp_montgomery_reduce(&y, modulus, *mp); /* X = T1 * T1 */ - fp_sqr(&t1, R->x); - fp_montgomery_reduce(R->x, modulus, *mp); + fp_sqr(&t1, &x); + fp_montgomery_reduce(&x, modulus, *mp); /* X = X - Y */ - fp_sub(R->x, R->y, R->x); - if (fp_cmp_d(R->x, 0) == FP_LT) { - fp_add(R->x, modulus, R->x); + fp_sub(&x, &y, &x); + if (fp_cmp_d(&x, 0) == FP_LT) { + fp_add(&x, modulus, &x); } /* X = X - Y */ - fp_sub(R->x, R->y, R->x); - if (fp_cmp_d(R->x, 0) == FP_LT) { - fp_add(R->x, modulus, R->x); + fp_sub(&x, &y, &x); + if (fp_cmp_d(&x, 0) == FP_LT) { + fp_add(&x, modulus, &x); } /* Y = Y - X */ - fp_sub(R->y, R->x, R->y); - if (fp_cmp_d(R->y, 0) == FP_LT) { - fp_add(R->y, modulus, R->y); + fp_sub(&y, &x, &y); + if (fp_cmp_d(&y, 0) == FP_LT) { + fp_add(&y, modulus, &y); } /* Y = Y * T1 */ - fp_mul(R->y, &t1, R->y); - fp_montgomery_reduce(R->y, modulus, *mp); + fp_mul(&y, &t1, &y); + fp_montgomery_reduce(&y, modulus, *mp); /* Y = Y - T2 */ - fp_sub(R->y, &t2, R->y); - if (fp_cmp_d(R->y, 0) == FP_LT) { - fp_add(R->y, modulus, R->y); + fp_sub(&y, &t2, &y); + if (fp_cmp_d(&y, 0) == FP_LT) { + fp_add(&y, modulus, &y); } + /* Return x, y, and z */ + fp_copy(&x, R->x); + fp_copy(&y, R->y); + fp_copy(&z, R->z); + + /* Clear used locals */ + fp_clear(&x); + fp_clear(&y); + fp_clear(&z); + fp_clear(&t1); + fp_clear(&t2); + return MP_OKAY; } From 5cbc4bdf2967692ac971d52dad50a4bc228c517c Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 4 May 2016 23:23:04 -0700 Subject: [PATCH 08/16] Added new "WOLFSSL_DEBUG_MATH", which enables use of "mp_dump" to display information about an mp_int. --- wolfcrypt/src/integer.c | 38 ++++++++++++++++++++++++++++++++-- wolfcrypt/src/tfm.c | 41 ++++++++++++++++++++++++++++++++----- wolfssl/wolfcrypt/integer.h | 6 ++++++ wolfssl/wolfcrypt/tfm.h | 6 ++++++ 4 files changed, 84 insertions(+), 7 deletions(-) diff --git a/wolfcrypt/src/integer.c b/wolfcrypt/src/integer.c index 9e9b3d01e..045effb9f 100644 --- a/wolfcrypt/src/integer.c +++ b/wolfcrypt/src/integer.c @@ -40,6 +40,10 @@ #include +#ifdef WOLFSSL_DEBUG_MATH + #include +#endif + #ifndef NO_WOLFSSL_SMALL_STACK #ifndef WOLFSSL_SMALL_STACK #define WOLFSSL_SMALL_STACK @@ -4628,7 +4632,8 @@ int mp_read_radix (mp_int * a, const char *str, int radix) } #endif /* HAVE_ECC */ -#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) +#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \ + defined(WOLFSSL_DEBUG_MATH) /* returns size of ASCII representation */ int mp_radix_size (mp_int *a, int radix, int *size) @@ -4739,7 +4744,36 @@ int mp_toradix (mp_int *a, char *str, int radix) return MP_OKAY; } -#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */ +#ifdef WOLFSSL_DEBUG_MATH +void mp_dump(const char* desc, mp_int* a, byte verbose) +{ + char *buffer; + int size = a->alloc; + + buffer = (char*)XMALLOC(size * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (buffer == NULL) { + return; + } + + printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n", + desc, a, a->used, a->sign, size, (int)sizeof(mp_digit)); + + mp_toradix(a, buffer, 16); + printf(" %s\n ", buffer); + + if (verbose) { + int i; + for(i=0; ialloc * (int)sizeof(mp_digit); i++) { + printf("%02x ", *(((byte*)a->dp) + i)); + } + printf("\n"); + } + + XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER); +} +#endif /* WOLFSSL_DEBUG_MATH */ + +#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */ #endif /* USE_FAST_MATH */ diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index c3a72c9e2..5f8e7df2d 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -50,6 +50,10 @@ #include #include /* will define asm MACROS or C ones */ +#ifdef WOLFSSL_DEBUG_MATH + #include +#endif + /* math settings check */ word32 CheckRunTimeSettings(void) @@ -2328,7 +2332,8 @@ int mp_montgomery_calc_normalization(mp_int *a, mp_int *b) #endif /* WOLFSSL_KEYGEN || HAVE_ECC */ -#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) +#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \ + defined(WOLFSSL_DEBUG_MATH) static const int lnz[16] = { 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 @@ -2475,7 +2480,7 @@ int mp_mod_d(fp_int *a, fp_digit b, fp_digit *c) return fp_mod_d(a, b, c); } -#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */ +#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */ #ifdef WOLFSSL_KEY_GEN @@ -2918,7 +2923,8 @@ int mp_cnt_lsb(fp_int* a) #endif /* HAVE_ECC */ -#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) +#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \ + defined(WOLFSSL_DEBUG_MATH) /* returns size of ASCII representation */ int mp_radix_size (mp_int *a, int radix, int *size) @@ -3026,7 +3032,32 @@ int mp_toradix (mp_int *a, char *str, int radix) return FP_OKAY; } -#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */ +#ifdef WOLFSSL_DEBUG_MATH +void mp_dump(const char* desc, mp_int* a, byte verbose) +{ + char buffer[FP_SIZE * sizeof(fp_digit) * 2]; + int size = FP_SIZE; + +#ifdef ALT_ECC_SIZE + size = a->size; +#endif + + printf("%s: ptr=%p, used=%d, sign=%d, size=%d, fpd=%d\n", + desc, a, a->used, a->sign, size, (int)sizeof(fp_digit)); + + mp_toradix(a, buffer, 16); + printf(" %s\n ", buffer); + + if (verbose) { + int i; + for(i=0; idp) + i)); + } + printf("\n"); + } +} +#endif /* WOLFSSL_DEBUG_MATH */ + +#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */ #endif /* USE_FAST_MATH */ - diff --git a/wolfssl/wolfcrypt/integer.h b/wolfssl/wolfcrypt/integer.h index a0ca3c15e..2b38601cb 100644 --- a/wolfssl/wolfcrypt/integer.h +++ b/wolfssl/wolfcrypt/integer.h @@ -310,6 +310,12 @@ int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e, int mp_toradix (mp_int *a, char *str, int radix); int mp_radix_size (mp_int * a, int radix, int *size); +#ifdef WOLFSSL_DEBUG_MATH + void mp_dump(const char* desc, mp_int* a, byte verbose); +#else + #define mp_dump(desc, a, verbose) +#endif + #if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) int mp_sqrmod(mp_int* a, mp_int* b, mp_int* c); #endif diff --git a/wolfssl/wolfcrypt/tfm.h b/wolfssl/wolfcrypt/tfm.h index ce633b43d..f86a7e52f 100644 --- a/wolfssl/wolfcrypt/tfm.h +++ b/wolfssl/wolfcrypt/tfm.h @@ -645,6 +645,12 @@ void mp_rshb(mp_int *a, int x); int mp_toradix (mp_int *a, char *str, int radix); int mp_radix_size (mp_int * a, int radix, int *size); +#ifdef WOLFSSL_DEBUG_MATH + void mp_dump(const char* desc, mp_int* a, byte verbose); +#else + #define mp_dump(desc, a, verbose) +#endif + #ifdef HAVE_ECC int mp_read_radix(mp_int* a, const char* str, int radix); void mp_set(fp_int *a, fp_digit b); From 1b602d783c4b46c88be364a13afe707339413265 Mon Sep 17 00:00:00 2001 From: David Garske Date: Wed, 4 May 2016 23:24:33 -0700 Subject: [PATCH 09/16] Fast math correction of "sizeof" to use (). Updates to tfm and ecc comments. --- wolfcrypt/src/ecc.c | 2 +- wolfcrypt/src/tfm.c | 20 +++++--------------- 2 files changed, 6 insertions(+), 16 deletions(-) diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index 1465a8752..2952e5c2d 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -2788,7 +2788,7 @@ int wc_ecc_export_x963_ex(ecc_key* key, byte* out, word32* outLen, } #endif /* HAVE_ECC_KEY_EXPORT */ -/* is ec point on curve described by dp ? */ +/* is ecc point on curve described by dp ? */ static int ecc_is_point(const ecc_set_type* dp, ecc_point* ecp, mp_int* prime) { mp_int b, t1, t2; diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index 5f8e7df2d..258e31e7d 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -1554,7 +1554,7 @@ int fp_cmp_mag(fp_int *a, fp_int *b) return FP_EQ; } -/* setups the montgomery reduction */ +/* sets up the montgomery reduction */ int fp_montgomery_setup(fp_int *a, fp_digit *rho) { fp_digit x, b; @@ -1653,7 +1653,7 @@ static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp) /* now zero the buff */ - XMEMSET(c, 0, sizeof c); + XMEMSET(c, 0, sizeof(c)); pa = m->used; /* copy the input */ @@ -1733,7 +1733,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) /* now zero the buff */ - XMEMSET(c, 0, sizeof c); + XMEMSET(c, 0, sizeof(c)); pa = m->used; /* copy the input */ @@ -1872,7 +1872,7 @@ void fp_set(fp_int *a, fp_digit b) a->used = a->dp[0] ? 1 : 0; } -/* chek if a bit is set */ +/* check if a bit is set */ int fp_is_bit_set (fp_int *a, fp_digit b) { fp_digit i; @@ -2246,49 +2246,39 @@ void fp_init_copy(fp_int *a, fp_int* b) } #endif -/* fast math conversion */ +/* fast math wrappers */ int mp_copy(fp_int* a, fp_int* b) { fp_copy(a, b); return MP_OKAY; } - -/* fast math conversion */ int mp_isodd(mp_int* a) { return fp_isodd(a); } - -/* fast math conversion */ int mp_iszero(mp_int* a) { return fp_iszero(a); } -/* fast math conversion */ int mp_count_bits (mp_int* a) { return fp_count_bits(a); } - int mp_leading_bit (mp_int* a) { return fp_leading_bit(a); } - -/* fast math conversion */ void mp_rshb (mp_int* a, int x) { fp_rshb(a, x); } - -/* fast math wrappers */ int mp_set_int(mp_int *a, mp_digit b) { fp_set(a, b); From fe58db2a07a23641a8c2c183095c044784b53ffa Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 5 May 2016 12:24:08 -0700 Subject: [PATCH 10/16] Fixed typo with new "eccshamir" configure option. --- configure.ac | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure.ac b/configure.ac index 64c4231c8..3f496a7d7 100644 --- a/configure.ac +++ b/configure.ac @@ -732,7 +732,7 @@ ECC_DEFAULT=yes fi # ECC Shamir -AC_ARG_ENABLE([ecc], +AC_ARG_ENABLE([eccshamir], [AS_HELP_STRING([--enable-eccshamir],[Enable ECC Shamir (default: enabled on x86_64)])], [ ENABLED_ECC_SHAMIR=$enableval ], [ ENABLED_ECC_SHAMIR=$ECC_DEFAULT ] From 880b2e454bbbbb67ddab921d3fd908e124c4bffb Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 5 May 2016 19:00:50 -0700 Subject: [PATCH 11/16] Refactor of the ecc_projective_add_point and ecc_projective_dbl_point functions to eliminate duplicate versions. Modified new single functions to work with normal, fast and alt_ecc_size math options. Careful use of mp_clear to retain original performance. --- wolfcrypt/src/ecc.c | 554 ++++++++++++-------------------------------- 1 file changed, 150 insertions(+), 404 deletions(-) diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index 2952e5c2d..cf5989344 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -268,311 +268,6 @@ static mp_digit get_digit(mp_int* a, int n) } -#if defined(USE_FAST_MATH) - -/* fast math accelerated version, but not for fp ecc yet */ - -/** - Add two ECC points - P The point to add - Q The point to add - R [out] The destination of the double - modulus The modulus of the field the ECC curve is in - mp The "b" value from montgomery_setup() - return MP_OKAY on success -*/ -int ecc_projective_add_point(ecc_point *P, ecc_point *Q, ecc_point *R, - mp_int* modulus, mp_digit* mp) -{ - fp_int t1, t2, x, y, z; - int err; - - if (P == NULL || Q == NULL || R == NULL || modulus == NULL || mp == NULL) - return ECC_BAD_ARG_E; - - if ((err = mp_init_multi(&t1, &t2, &x, &y, &z, NULL)) != MP_OKAY) { - return err; - } - - /* should we dbl instead? */ - fp_sub(modulus, Q->y, &t1); - if ( (fp_cmp(P->x, Q->x) == FP_EQ) && - (get_digit_count(Q->z) && fp_cmp(P->z, Q->z) == FP_EQ) && - (fp_cmp(P->y, Q->y) == FP_EQ || fp_cmp(P->y, &t1) == FP_EQ)) - { - fp_clear(&x); - fp_clear(&y); - fp_clear(&z); - fp_clear(&t1); - fp_clear(&t2); - - return ecc_projective_dbl_point(P, R, modulus, mp); - } - - fp_copy(P->x, &x); - fp_copy(P->y, &y); - fp_copy(P->z, &z); - - /* if Z is one then these are no-operations */ - if (get_digit_count(Q->z)) { - /* T1 = Z' * Z' */ - fp_sqr(Q->z, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* X = X * T1 */ - fp_mul(&t1, &x, &x); - fp_montgomery_reduce(&x, modulus, *mp); - /* T1 = Z' * T1 */ - fp_mul(Q->z, &t1, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* Y = Y * T1 */ - fp_mul(&t1, &y, &y); - fp_montgomery_reduce(&y, modulus, *mp); - } - - /* T1 = Z*Z */ - fp_sqr(&z, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* T2 = X' * T1 */ - fp_mul(Q->x, &t1, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* T1 = Z * T1 */ - fp_mul(&z, &t1, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* T1 = Y' * T1 */ - fp_mul(Q->y, &t1, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - - /* Y = Y - T1 */ - fp_sub(&y, &t1, &y); - if (fp_cmp_d(&y, 0) == FP_LT) { - fp_add(&y, modulus, &y); - } - /* T1 = 2T1 */ - fp_add(&t1, &t1, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - /* T1 = Y + T1 */ - fp_add(&t1, &y, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - /* X = X - T2 */ - fp_sub(&x, &t2, &x); - if (fp_cmp_d(&x, 0) == FP_LT) { - fp_add(&x, modulus, &x); - } - /* T2 = 2T2 */ - fp_add(&t2, &t2, &t2); - if (fp_cmp(&t2, modulus) != FP_LT) { - fp_sub(&t2, modulus, &t2); - } - /* T2 = X + T2 */ - fp_add(&t2, &x, &t2); - if (fp_cmp(&t2, modulus) != FP_LT) { - fp_sub(&t2, modulus, &t2); - } - - /* if Z' != 1 */ - if (get_digit_count(Q->z)) { - /* Z = Z * Z' */ - fp_mul(&z, Q->z, &z); - fp_montgomery_reduce(&z, modulus, *mp); - } - - /* Z = Z * X */ - fp_mul(&z, &x, &z); - fp_montgomery_reduce(&z, modulus, *mp); - - /* T1 = T1 * X */ - fp_mul(&t1, &x, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* X = X * X */ - fp_sqr(&x, &x); - fp_montgomery_reduce(&x, modulus, *mp); - /* T2 = T2 * x */ - fp_mul(&t2, &x, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* T1 = T1 * X */ - fp_mul(&t1, &x, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - - /* X = Y*Y */ - fp_sqr(&y, &x); - fp_montgomery_reduce(&x, modulus, *mp); - /* X = X - T2 */ - fp_sub(&x, &t2, &x); - if (fp_cmp_d(&x, 0) == FP_LT) { - fp_add(&x, modulus, &x); - } - - /* T2 = T2 - X */ - fp_sub(&t2, &x, &t2); - if (fp_cmp_d(&t2, 0) == FP_LT) { - fp_add(&t2, modulus, &t2); - } - /* T2 = T2 - X */ - fp_sub(&t2, &x, &t2); - if (fp_cmp_d(&t2, 0) == FP_LT) { - fp_add(&t2, modulus, &t2); - } - /* T2 = T2 * Y */ - fp_mul(&t2, &y, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* Y = T2 - T1 */ - fp_sub(&t2, &t1, &y); - if (fp_cmp_d(&y, 0) == FP_LT) { - fp_add(&y, modulus, &y); - } - /* Y = Y/2 */ - if (fp_isodd(&y)) { - fp_add(&y, modulus, &y); - } - fp_div_2(&y, &y); - - /* return result */ - fp_copy(&x, R->x); - fp_copy(&y, R->y); - fp_copy(&z, R->z); - - /* clear stack variables */ - fp_clear(&x); - fp_clear(&y); - fp_clear(&z); - fp_clear(&t1); - fp_clear(&t2); - - return MP_OKAY; -} - - -/** - Double an ECC point - P The point to double - R [out] The destination of the double - modulus The modulus of the field the ECC curve is in - mp The "b" value from montgomery_setup() - return MP_OKAY on success -*/ -int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, - mp_digit* mp) -{ - fp_int x, y, z, t1, t2; - int err; - - if (P == NULL || R == NULL || modulus == NULL || mp == NULL) - return ECC_BAD_ARG_E; - - if ((err = mp_init_multi(&x, &y, &z, &t1, &t2, NULL)) != MP_OKAY) { - return err; - } - - /* Use local due to possible insufficient size of alt_ecc_size in ecc_point x,y,z */ - fp_copy(P->x, &x); - fp_copy(P->y, &y); - fp_copy(P->z, &z); - - /* T1 = Z * Z */ - fp_sqr(&z, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* Z = Y * Z */ - fp_mul(&z, &y, &z); - fp_montgomery_reduce(&z, modulus, *mp); - /* Z = 2Z */ - fp_add(&z, &z, &z); - if (fp_cmp(&z, modulus) != FP_LT) { - fp_sub(&z, modulus, &z); - } - - /* T2 = X - T1 */ - fp_sub(&x, &t1, &t2); - if (fp_cmp_d(&t2, 0) == FP_LT) { - fp_add(&t2, modulus, &t2); - } - /* T1 = X + T1 */ - fp_add(&t1, &x, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - /* T2 = T1 * T2 */ - fp_mul(&t1, &t2, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* T1 = 2T2 */ - fp_add(&t2, &t2, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - /* T1 = T1 + T2 */ - fp_add(&t1, &t2, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - - /* Y = 2Y */ - fp_add(&y, &y, &y); - if (fp_cmp(&y, modulus) != FP_LT) { - fp_sub(&y, modulus, &y); - } - /* Y = Y * Y */ - fp_sqr(&y, &y); - fp_montgomery_reduce(&y, modulus, *mp); - /* T2 = Y * Y */ - fp_sqr(&y, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* T2 = T2/2 */ - if (fp_isodd(&t2)) { - fp_add(&t2, modulus, &t2); - } - fp_div_2(&t2, &t2); - /* Y = Y * X */ - fp_mul(&y, &x, &y); - fp_montgomery_reduce(&y, modulus, *mp); - - /* X = T1 * T1 */ - fp_sqr(&t1, &x); - fp_montgomery_reduce(&x, modulus, *mp); - /* X = X - Y */ - fp_sub(&x, &y, &x); - if (fp_cmp_d(&x, 0) == FP_LT) { - fp_add(&x, modulus, &x); - } - /* X = X - Y */ - fp_sub(&x, &y, &x); - if (fp_cmp_d(&x, 0) == FP_LT) { - fp_add(&x, modulus, &x); - } - - /* Y = Y - X */ - fp_sub(&y, &x, &y); - if (fp_cmp_d(&y, 0) == FP_LT) { - fp_add(&y, modulus, &y); - } - /* Y = Y * T1 */ - fp_mul(&y, &t1, &y); - fp_montgomery_reduce(&y, modulus, *mp); - /* Y = Y - T2 */ - fp_sub(&y, &t2, &y); - if (fp_cmp_d(&y, 0) == FP_LT) { - fp_add(&y, modulus, &y); - } - - /* Return x, y, and z */ - fp_copy(&x, R->x); - fp_copy(&y, R->y); - fp_copy(&z, R->z); - - /* Clear used locals */ - fp_clear(&x); - fp_clear(&y); - fp_clear(&z); - fp_clear(&t1); - fp_clear(&t2); - - return MP_OKAY; -} - -#else /* USE_FAST_MATH */ - /** Add two ECC points P The point to add @@ -585,43 +280,63 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, mp_int* modulus, mp_digit* mp) { - mp_int t1; - mp_int t2; - mp_int x; - mp_int y; - mp_int z; + mp_int t1, t2; +#ifdef ALT_ECC_SIZE + mp_int rx, ry, rz; +#endif + mp_int *x, *y, *z; int err; if (P == NULL || Q == NULL || R == NULL || modulus == NULL || mp == NULL) return ECC_BAD_ARG_E; - if ((err = mp_init_multi(&t1, &t2, &x, &y, &z, NULL)) != MP_OKAY) { + if ((err = mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) { return err; } /* should we dbl instead? */ - err = mp_sub(modulus, Q->y, &t1); - + if (err == MP_OKAY) + err = mp_sub(modulus, Q->y, &t1); if (err == MP_OKAY) { if ( (mp_cmp(P->x, Q->x) == MP_EQ) && (get_digit_count(Q->z) && mp_cmp(P->z, Q->z) == MP_EQ) && (mp_cmp(P->y, Q->y) == MP_EQ || mp_cmp(P->y, &t1) == MP_EQ)) { mp_clear(&t1); mp_clear(&t2); - mp_clear(&x); - mp_clear(&y); - mp_clear(&z); - return ecc_projective_dbl_point(P, R, modulus, mp); } } + + if (err != MP_OKAY) { + mp_clear(&t1); + mp_clear(&t2); + return err; + } + +#ifdef ALT_ECC_SIZE + /* Use local stack variable */ + x = ℞ + y = &ry; + z = &rz; + + if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) { + mp_clear(&t1); + mp_clear(&t2); + return err; + } +#else + /* Use destination directly */ + x = R->x; + y = R->y; + z = R->z; +#endif if (err == MP_OKAY) - err = mp_copy(P->x, &x); + err = mp_copy(P->x, x); if (err == MP_OKAY) - err = mp_copy(P->y, &y); + err = mp_copy(P->y, y); if (err == MP_OKAY) - err = mp_copy(P->z, &z); + err = mp_copy(P->z, z); /* if Z is one then these are no-operations */ if (err == MP_OKAY) { @@ -633,9 +348,9 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, /* X = X * T1 */ if (err == MP_OKAY) - err = mp_mul(&t1, &x, &x); + err = mp_mul(&t1, x, x); if (err == MP_OKAY) - err = mp_montgomery_reduce(&x, modulus, *mp); + err = mp_montgomery_reduce(x, modulus, *mp); /* T1 = Z' * T1 */ if (err == MP_OKAY) @@ -645,15 +360,15 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, /* Y = Y * T1 */ if (err == MP_OKAY) - err = mp_mul(&t1, &y, &y); + err = mp_mul(&t1, y, y); if (err == MP_OKAY) - err = mp_montgomery_reduce(&y, modulus, *mp); + err = mp_montgomery_reduce(y, modulus, *mp); } } /* T1 = Z*Z */ if (err == MP_OKAY) - err = mp_sqr(&z, &t1); + err = mp_sqr(z, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); @@ -665,7 +380,7 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, /* T1 = Z * T1 */ if (err == MP_OKAY) - err = mp_mul(&z, &t1, &t1); + err = mp_mul(z, &t1, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); @@ -677,10 +392,10 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, /* Y = Y - T1 */ if (err == MP_OKAY) - err = mp_sub(&y, &t1, &y); + err = mp_sub(y, &t1, y); if (err == MP_OKAY) { - if (mp_cmp_d(&y, 0) == MP_LT) - err = mp_add(&y, modulus, &y); + if (mp_cmp_d(y, 0) == MP_LT) + err = mp_add(y, modulus, y); } /* T1 = 2T1 */ if (err == MP_OKAY) @@ -691,17 +406,17 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, } /* T1 = Y + T1 */ if (err == MP_OKAY) - err = mp_add(&t1, &y, &t1); + err = mp_add(&t1, y, &t1); if (err == MP_OKAY) { if (mp_cmp(&t1, modulus) != MP_LT) err = mp_sub(&t1, modulus, &t1); } /* X = X - T2 */ if (err == MP_OKAY) - err = mp_sub(&x, &t2, &x); + err = mp_sub(x, &t2, x); if (err == MP_OKAY) { - if (mp_cmp_d(&x, 0) == MP_LT) - err = mp_add(&x, modulus, &x); + if (mp_cmp_d(x, 0) == MP_LT) + err = mp_add(x, modulus, x); } /* T2 = 2T2 */ if (err == MP_OKAY) @@ -712,7 +427,7 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, } /* T2 = X + T2 */ if (err == MP_OKAY) - err = mp_add(&t2, &x, &t2); + err = mp_add(&t2, x, &t2); if (err == MP_OKAY) { if (mp_cmp(&t2, modulus) != MP_LT) err = mp_sub(&t2, modulus, &t2); @@ -721,103 +436,104 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, if (err == MP_OKAY) { if (get_digit_count(Q->z)) { /* Z = Z * Z' */ - err = mp_mul(&z, Q->z, &z); + err = mp_mul(z, Q->z, z); if (err == MP_OKAY) - err = mp_montgomery_reduce(&z, modulus, *mp); + err = mp_montgomery_reduce(z, modulus, *mp); } } /* Z = Z * X */ if (err == MP_OKAY) - err = mp_mul(&z, &x, &z); + err = mp_mul(z, x, z); if (err == MP_OKAY) - err = mp_montgomery_reduce(&z, modulus, *mp); + err = mp_montgomery_reduce(z, modulus, *mp); /* T1 = T1 * X */ if (err == MP_OKAY) - err = mp_mul(&t1, &x, &t1); + err = mp_mul(&t1, x, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); /* X = X * X */ if (err == MP_OKAY) - err = mp_sqr(&x, &x); + err = mp_sqr(x, x); if (err == MP_OKAY) - err = mp_montgomery_reduce(&x, modulus, *mp); + err = mp_montgomery_reduce(x, modulus, *mp); /* T2 = T2 * x */ if (err == MP_OKAY) - err = mp_mul(&t2, &x, &t2); + err = mp_mul(&t2, x, &t2); if (err == MP_OKAY) err = mp_montgomery_reduce(&t2, modulus, *mp); /* T1 = T1 * X */ if (err == MP_OKAY) - err = mp_mul(&t1, &x, &t1); + err = mp_mul(&t1, x, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); /* X = Y*Y */ if (err == MP_OKAY) - err = mp_sqr(&y, &x); + err = mp_sqr(y, x); if (err == MP_OKAY) - err = mp_montgomery_reduce(&x, modulus, *mp); + err = mp_montgomery_reduce(x, modulus, *mp); /* X = X - T2 */ if (err == MP_OKAY) - err = mp_sub(&x, &t2, &x); + err = mp_sub(x, &t2, x); if (err == MP_OKAY) { - if (mp_cmp_d(&x, 0) == MP_LT) - err = mp_add(&x, modulus, &x); + if (mp_cmp_d(x, 0) == MP_LT) + err = mp_add(x, modulus, x); } /* T2 = T2 - X */ if (err == MP_OKAY) - err = mp_sub(&t2, &x, &t2); + err = mp_sub(&t2, x, &t2); if (err == MP_OKAY) { if (mp_cmp_d(&t2, 0) == MP_LT) err = mp_add(&t2, modulus, &t2); } /* T2 = T2 - X */ if (err == MP_OKAY) - err = mp_sub(&t2, &x, &t2); + err = mp_sub(&t2, x, &t2); if (err == MP_OKAY) { if (mp_cmp_d(&t2, 0) == MP_LT) err = mp_add(&t2, modulus, &t2); } /* T2 = T2 * Y */ if (err == MP_OKAY) - err = mp_mul(&t2, &y, &t2); + err = mp_mul(&t2, y, &t2); if (err == MP_OKAY) err = mp_montgomery_reduce(&t2, modulus, *mp); /* Y = T2 - T1 */ if (err == MP_OKAY) - err = mp_sub(&t2, &t1, &y); + err = mp_sub(&t2, &t1, y); if (err == MP_OKAY) { - if (mp_cmp_d(&y, 0) == MP_LT) - err = mp_add(&y, modulus, &y); + if (mp_cmp_d(y, 0) == MP_LT) + err = mp_add(y, modulus, y); } /* Y = Y/2 */ if (err == MP_OKAY) { - if (mp_isodd(&y)) - err = mp_add(&y, modulus, &y); + if (mp_isodd(y)) + err = mp_add(y, modulus, y); } if (err == MP_OKAY) - err = mp_div_2(&y, &y); + err = mp_div_2(y, y); +#ifdef ALT_ECC_SIZE if (err == MP_OKAY) - err = mp_copy(&x, R->x); + err = mp_copy(x, R->x); if (err == MP_OKAY) - err = mp_copy(&y, R->y); + err = mp_copy(y, R->y); if (err == MP_OKAY) - err = mp_copy(&z, R->z); + err = mp_copy(z, R->z); +#endif +#ifndef USE_FAST_MATH /* clean up */ mp_clear(&t1); mp_clear(&t2); - mp_clear(&x); - mp_clear(&y); - mp_clear(&z); +#endif return err; } @@ -834,8 +550,11 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, mp_digit* mp) { - mp_int t1; - mp_int t2; + mp_int t1, t2; +#ifdef ALT_ECC_SIZE + mp_int rx, ry, rz; +#endif + mp_int *x, *y, *z; int err; if (P == NULL || R == NULL || modulus == NULL || mp == NULL) @@ -845,44 +564,61 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, return err; } - if (P != R) { - err = mp_copy(P->x, R->x); - if (err == MP_OKAY) - err = mp_copy(P->y, R->y); - if (err == MP_OKAY) - err = mp_copy(P->z, R->z); +#ifdef ALT_ECC_SIZE + /* Use local stack variable */ + x = ℞ + y = &ry; + z = &rz; + + if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) { + mp_clear(&t1); + mp_clear(&t2); + return err; } +#else + /* Use destination directly */ + x = R->x; + y = R->y; + z = R->z; +#endif + + if (err == MP_OKAY) + err = mp_copy(P->x, x); + if (err == MP_OKAY) + err = mp_copy(P->y, y); + if (err == MP_OKAY) + err = mp_copy(P->z, z); /* t1 = Z * Z */ if (err == MP_OKAY) - err = mp_sqr(R->z, &t1); + err = mp_sqr(z, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); /* Z = Y * Z */ if (err == MP_OKAY) - err = mp_mul(R->z, R->y, R->z); + err = mp_mul(z, y, z); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->z, modulus, *mp); + err = mp_montgomery_reduce(z, modulus, *mp); /* Z = 2Z */ if (err == MP_OKAY) - err = mp_add(R->z, R->z, R->z); + err = mp_add(z, z, z); if (err == MP_OKAY) { - if (mp_cmp(R->z, modulus) != MP_LT) - err = mp_sub(R->z, modulus, R->z); + if (mp_cmp(z, modulus) != MP_LT) + err = mp_sub(z, modulus, z); } /* T2 = X - T1 */ if (err == MP_OKAY) - err = mp_sub(R->x, &t1, &t2); + err = mp_sub(x, &t1, &t2); if (err == MP_OKAY) { if (mp_cmp_d(&t2, 0) == MP_LT) err = mp_add(&t2, modulus, &t2); } /* T1 = X + T1 */ if (err == MP_OKAY) - err = mp_add(&t1, R->x, &t1); + err = mp_add(&t1, x, &t1); if (err == MP_OKAY) { if (mp_cmp(&t1, modulus) != MP_LT) err = mp_sub(&t1, modulus, &t1); @@ -909,20 +645,20 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, } /* Y = 2Y */ if (err == MP_OKAY) - err = mp_add(R->y, R->y, R->y); + err = mp_add(y, y, y); if (err == MP_OKAY) { - if (mp_cmp(R->y, modulus) != MP_LT) - err = mp_sub(R->y, modulus, R->y); + if (mp_cmp(y, modulus) != MP_LT) + err = mp_sub(y, modulus, y); } /* Y = Y * Y */ if (err == MP_OKAY) - err = mp_sqr(R->y, R->y); + err = mp_sqr(y, y); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->y, modulus, *mp); + err = mp_montgomery_reduce(y, modulus, *mp); /* T2 = Y * Y */ if (err == MP_OKAY) - err = mp_sqr(R->y, &t2); + err = mp_sqr(y, &t2); if (err == MP_OKAY) err = mp_montgomery_reduce(&t2, modulus, *mp); @@ -936,59 +672,69 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, /* Y = Y * X */ if (err == MP_OKAY) - err = mp_mul(R->y, R->x, R->y); + err = mp_mul(y, x, y); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->y, modulus, *mp); + err = mp_montgomery_reduce(y, modulus, *mp); /* X = T1 * T1 */ if (err == MP_OKAY) - err = mp_sqr(&t1, R->x); + err = mp_sqr(&t1, x); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->x, modulus, *mp); + err = mp_montgomery_reduce(x, modulus, *mp); /* X = X - Y */ if (err == MP_OKAY) - err = mp_sub(R->x, R->y, R->x); + err = mp_sub(x, y, x); if (err == MP_OKAY) { - if (mp_cmp_d(R->x, 0) == MP_LT) - err = mp_add(R->x, modulus, R->x); + if (mp_cmp_d(x, 0) == MP_LT) + err = mp_add(x, modulus, x); } /* X = X - Y */ if (err == MP_OKAY) - err = mp_sub(R->x, R->y, R->x); + err = mp_sub(x, y, x); if (err == MP_OKAY) { - if (mp_cmp_d(R->x, 0) == MP_LT) - err = mp_add(R->x, modulus, R->x); + if (mp_cmp_d(x, 0) == MP_LT) + err = mp_add(x, modulus, x); } /* Y = Y - X */ if (err == MP_OKAY) - err = mp_sub(R->y, R->x, R->y); + err = mp_sub(y, x, y); if (err == MP_OKAY) { - if (mp_cmp_d(R->y, 0) == MP_LT) - err = mp_add(R->y, modulus, R->y); + if (mp_cmp_d(y, 0) == MP_LT) + err = mp_add(y, modulus, y); } /* Y = Y * T1 */ if (err == MP_OKAY) - err = mp_mul(R->y, &t1, R->y); + err = mp_mul(y, &t1, y); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->y, modulus, *mp); + err = mp_montgomery_reduce(y, modulus, *mp); /* Y = Y - T2 */ if (err == MP_OKAY) - err = mp_sub(R->y, &t2, R->y); + err = mp_sub(y, &t2, y); if (err == MP_OKAY) { - if (mp_cmp_d(R->y, 0) == MP_LT) - err = mp_add(R->y, modulus, R->y); + if (mp_cmp_d(y, 0) == MP_LT) + err = mp_add(y, modulus, y); } +#ifdef ALT_ECC_SIZE + if (err == MP_OKAY) + err = mp_copy(x, R->x); + if (err == MP_OKAY) + err = mp_copy(y, R->y); + if (err == MP_OKAY) + err = mp_copy(z, R->z); +#endif + +#ifndef USE_FAST_MATH /* clean up */ mp_clear(&t1); mp_clear(&t2); +#endif return err; } -#endif /* USE_FAST_MATH */ /** Map a projective jacbobian point back to affine space From 44b1f98b39e242c4a845f8b4150cf32dcdeb31f1 Mon Sep 17 00:00:00 2001 From: David Garske Date: Fri, 6 May 2016 10:59:32 -0700 Subject: [PATCH 12/16] Fixed issue with ALT_ECC_SIZE and default value for FP_MAX_BITS_ECC so its based on max enabled ECC curve bits. --- wolfssl/wolfcrypt/ecc.h | 27 ++++++++++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h index b952e68b9..9f97b902c 100644 --- a/wolfssl/wolfcrypt/ecc.h +++ b/wolfssl/wolfcrypt/ecc.h @@ -61,6 +61,27 @@ typedef struct { } ecc_set_type; +/* Determine max ECC bits based on enabled curves */ +#if defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES) + #define MAX_ECC_BITS 528 +#elif defined(HAVE_ECC384) + #define MAX_ECC_BITS 384 +#elif defined(HAVE_ECC224) + #define MAX_ECC_BITS 224 +#elif !defined(NO_ECC256) + #define MAX_ECC_BITS 256 +#elif defined(HAVE_ECC192) + #define MAX_ECC_BITS 192 +#elif defined(HAVE_ECC160) + #define MAX_ECC_BITS 160 +#elif defined(HAVE_ECC128) + #define MAX_ECC_BITS 128 +#elif defined(HAVE_ECC112) + #define MAX_ECC_BITS 112 +#endif + + + #ifdef ALT_ECC_SIZE /* Note on ALT_ECC_SIZE: @@ -91,12 +112,16 @@ typedef struct { #endif #ifndef FP_MAX_BITS_ECC - #define FP_MAX_BITS_ECC 528 + /* This value should be double the max ecc bit size */ + #define FP_MAX_BITS_ECC (MAX_ECC_BITS*2) #endif + #define FP_MAX_SIZE_ECC (FP_MAX_BITS_ECC+(8*DIGIT_BIT)) + #if FP_MAX_BITS_ECC % CHAR_BIT #error FP_MAX_BITS_ECC must be a multiple of CHAR_BIT #endif + #define FP_SIZE_ECC (FP_MAX_SIZE_ECC/DIGIT_BIT) /* This needs to match the size of the fp_int struct, except the From 8c9b8a596ad61a8c1f0d3780350240979e49b784 Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 9 May 2016 09:50:51 -0700 Subject: [PATCH 13/16] Fixed calculation of max ECC bits with ALT_ECC_SIZE defined so it only allocates what is required. For 8-bit aligned curve sizes its double the max bits. For un-aligned curves sized, like ECC521, its 521 8-bit aligned, doubled, plus digit bit. --- wolfssl/wolfcrypt/ecc.h | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h index 9f97b902c..f9ca3b41a 100644 --- a/wolfssl/wolfcrypt/ecc.h +++ b/wolfssl/wolfcrypt/ecc.h @@ -63,7 +63,7 @@ typedef struct { /* Determine max ECC bits based on enabled curves */ #if defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES) - #define MAX_ECC_BITS 528 + #define MAX_ECC_BITS 521 #elif defined(HAVE_ECC384) #define MAX_ECC_BITS 384 #elif defined(HAVE_ECC224) @@ -111,18 +111,26 @@ typedef struct { #error USE_FAST_MATH must be defined to use ALT_ECC_SIZE #endif +/* determine max bits required for ECC math */ #ifndef FP_MAX_BITS_ECC - /* This value should be double the max ecc bit size */ - #define FP_MAX_BITS_ECC (MAX_ECC_BITS*2) + /* check alignment */ + #if ((MAX_ECC_BITS & CHAR_BIT) == 0) + /* max bits is double */ + #define FP_MAX_BITS_ECC (MAX_ECC_BITS * 2) + #else + /* max bits is rounded up to 8-bit alignment, doubled, plus one digit of fudge */ + #define FP_MAX_BITS_ECC ((((MAX_ECC_BITS + CHAR_BIT) & ~CHAR_BIT) * 2) + DIGIT_BIT) + #endif #endif -#define FP_MAX_SIZE_ECC (FP_MAX_BITS_ECC+(8*DIGIT_BIT)) - +/* verify alignment */ #if FP_MAX_BITS_ECC % CHAR_BIT #error FP_MAX_BITS_ECC must be a multiple of CHAR_BIT #endif -#define FP_SIZE_ECC (FP_MAX_SIZE_ECC/DIGIT_BIT) +/* determine buffer size */ +#define FP_SIZE_ECC (FP_MAX_BITS_ECC/DIGIT_BIT) + /* This needs to match the size of the fp_int struct, except the * fp_digit array will be shorter. */ From 8f6352725a8312c2a88d80132c7a8b250f6680cb Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 9 May 2016 10:34:37 -0700 Subject: [PATCH 14/16] Fixed math for FP_MAX_BITS_ECC calculations. Error in alignment check. Altered non-aligned formula to be (max bits * 2) + digit, then 8-bit aligned. Cleanup of the example user_settings.h. --- IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h | 7 ++++--- wolfssl/wolfcrypt/ecc.h | 6 +++--- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h index 7947ef34e..f8d751ff0 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h +++ b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h @@ -79,12 +79,13 @@ extern "C" { #define ECC_TIMING_RESISTANT #ifdef USE_FAST_MATH - /* Max ECC bits (curve size * 8). ECC256 is (32*8) = 256 */ - /* Note: ECC521 requires (curve size * 16): (66*16) = 1056 */ + /* use reduced size math buffers for ecc points */ #undef ALT_ECC_SIZE #define ALT_ECC_SIZE + + /* optionally override the default max ecc bits */ #undef FP_MAX_BITS_ECC - #define FP_MAX_BITS_ECC 1056 + //#define FP_MAX_BITS_ECC 512 /* Enable TFM optimizations for ECC */ #define TFM_ECC192 diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h index f9ca3b41a..a1bc1f61a 100644 --- a/wolfssl/wolfcrypt/ecc.h +++ b/wolfssl/wolfcrypt/ecc.h @@ -114,12 +114,12 @@ typedef struct { /* determine max bits required for ECC math */ #ifndef FP_MAX_BITS_ECC /* check alignment */ - #if ((MAX_ECC_BITS & CHAR_BIT) == 0) + #if (MAX_ECC_BITS % CHAR_BIT) == 0 /* max bits is double */ #define FP_MAX_BITS_ECC (MAX_ECC_BITS * 2) #else - /* max bits is rounded up to 8-bit alignment, doubled, plus one digit of fudge */ - #define FP_MAX_BITS_ECC ((((MAX_ECC_BITS + CHAR_BIT) & ~CHAR_BIT) * 2) + DIGIT_BIT) + /* max bits is doubled, plus one digit of fudge then 8-bit aligned */ + #define FP_MAX_BITS_ECC (((MAX_ECC_BITS * 2) + DIGIT_BIT) & ~(CHAR_BIT-1)) #endif #endif From d71d0f2cb42df5bd8798f0c18ca84ce7dfb371c4 Mon Sep 17 00:00:00 2001 From: David Garske Date: Mon, 9 May 2016 13:29:25 -0700 Subject: [PATCH 15/16] Fix with fast math disabled so ecc_projective_add_point uses temp local variable for x,y,z result. --- wolfcrypt/src/ecc.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index cf5989344..9167eac36 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -281,7 +281,7 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, mp_int* modulus, mp_digit* mp) { mp_int t1, t2; -#ifdef ALT_ECC_SIZE +#if (defined(USE_FAST_MATH) && defined(ALT_ECC_SIZE)) || !defined(USE_FAST_MATH) mp_int rx, ry, rz; #endif mp_int *x, *y, *z; @@ -313,7 +313,7 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, return err; } -#ifdef ALT_ECC_SIZE +#if (defined(USE_FAST_MATH) && defined(ALT_ECC_SIZE)) || !defined(USE_FAST_MATH) /* Use local stack variable */ x = ℞ y = &ry; @@ -520,7 +520,7 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, if (err == MP_OKAY) err = mp_div_2(y, y); -#ifdef ALT_ECC_SIZE +#if (defined(USE_FAST_MATH) && defined(ALT_ECC_SIZE)) || !defined(USE_FAST_MATH) if (err == MP_OKAY) err = mp_copy(x, R->x); if (err == MP_OKAY) From 2fb45069225330d9ecce95a2795d4e6d50bd0f57 Mon Sep 17 00:00:00 2001 From: David Garske Date: Tue, 10 May 2016 12:20:39 -0700 Subject: [PATCH 16/16] iFixes to FP_MAX_BITS_ECC calculation. Alignment check against digit_bits is based on max ecc bits times two. If alignment check fails we add a digit_bit to make sure we have enough room. --- wolfssl/wolfcrypt/ecc.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h index a1bc1f61a..e67a49720 100644 --- a/wolfssl/wolfcrypt/ecc.h +++ b/wolfssl/wolfcrypt/ecc.h @@ -114,18 +114,18 @@ typedef struct { /* determine max bits required for ECC math */ #ifndef FP_MAX_BITS_ECC /* check alignment */ - #if (MAX_ECC_BITS % CHAR_BIT) == 0 + #if ((MAX_ECC_BITS * 2) % DIGIT_BIT) == 0 /* max bits is double */ #define FP_MAX_BITS_ECC (MAX_ECC_BITS * 2) #else - /* max bits is doubled, plus one digit of fudge then 8-bit aligned */ - #define FP_MAX_BITS_ECC (((MAX_ECC_BITS * 2) + DIGIT_BIT) & ~(CHAR_BIT-1)) + /* max bits is doubled, plus one digit of fudge */ + #define FP_MAX_BITS_ECC ((MAX_ECC_BITS * 2) + DIGIT_BIT) + #endif +#else + /* verify alignment */ + #if FP_MAX_BITS_ECC % CHAR_BIT + #error FP_MAX_BITS_ECC must be a multiple of CHAR_BIT #endif -#endif - -/* verify alignment */ -#if FP_MAX_BITS_ECC % CHAR_BIT - #error FP_MAX_BITS_ECC must be a multiple of CHAR_BIT #endif /* determine buffer size */