diff --git a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h index 7947ef34e..f8d751ff0 100644 --- a/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h +++ b/IDE/ROWLEY-CROSSWORKS-ARM/user_settings.h @@ -79,12 +79,13 @@ extern "C" { #define ECC_TIMING_RESISTANT #ifdef USE_FAST_MATH - /* Max ECC bits (curve size * 8). ECC256 is (32*8) = 256 */ - /* Note: ECC521 requires (curve size * 16): (66*16) = 1056 */ + /* use reduced size math buffers for ecc points */ #undef ALT_ECC_SIZE #define ALT_ECC_SIZE + + /* optionally override the default max ecc bits */ #undef FP_MAX_BITS_ECC - #define FP_MAX_BITS_ECC 1056 + //#define FP_MAX_BITS_ECC 512 /* Enable TFM optimizations for ECC */ #define TFM_ECC192 diff --git a/configure.ac b/configure.ac index a46fcc17b..3f496a7d7 100644 --- a/configure.ac +++ b/configure.ac @@ -731,6 +731,14 @@ then ECC_DEFAULT=yes fi +# ECC Shamir +AC_ARG_ENABLE([eccshamir], + [AS_HELP_STRING([--enable-eccshamir],[Enable ECC Shamir (default: enabled on x86_64)])], + [ ENABLED_ECC_SHAMIR=$enableval ], + [ ENABLED_ECC_SHAMIR=$ECC_DEFAULT ] + ) + + # ECC AC_ARG_ENABLE([ecc], [AS_HELP_STRING([--enable-ecc],[Enable ECC (default: enabled on x86_64)])], @@ -751,7 +759,11 @@ fi if test "$ENABLED_ECC" = "yes" then - AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256 -DECC_SHAMIR" + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256" + if test "$ENABLED_ECC_SHAMIR" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DECC_SHAMIR" + fi fi AM_CONDITIONAL([BUILD_ECC], [test "x$ENABLED_ECC" = "xyes"]) @@ -1961,8 +1973,13 @@ then if test "x$ENABLED_ECC" = "xno" then ENABLED_ECC="yes" - AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256 -DECC_SHAMIR" + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256" AM_CONDITIONAL([BUILD_ECC], [test "x$ENABLED_ECC" = "xyes"]) + + if test "$ENABLED_ECC_SHAMIR" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DECC_SHAMIR" + fi fi if test "x$ENABLED_OPENSSLEXTRA" = "xno" then @@ -2029,8 +2046,13 @@ then if test "x$ENABLED_ECC" = "xno" then ENABLED_ECC="yes" - AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256 -DECC_SHAMIR" + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256" AM_CONDITIONAL([BUILD_ECC], [test "x$ENABLED_ECC" = "xyes"]) + + if test "$ENABLED_ECC_SHAMIR" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DECC_SHAMIR" + fi fi if test "x$ENABLED_PKCALLBACKS" = "xno" then @@ -2122,8 +2144,13 @@ then then ENABLED_OPENSSLEXTRA="yes" ENABLED_ECC="yes" - AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256 -DECC_SHAMIR" + AM_CFLAGS="$AM_CFLAGS -DHAVE_ECC -DTFM_ECC256" AM_CONDITIONAL([BUILD_ECC], [test "x$ENABLED_ECC" = "xyes"]) + + if test "$ENABLED_ECC_SHAMIR" = "yes" + then + AM_CFLAGS="$AM_CFLAGS -DECC_SHAMIR" + fi fi AM_CFLAGS="$AM_CFLAGS -DHAVE_STUNNEL -DWOLFSSL_ALWAYS_VERIFY_CB" diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index f3a47793b..9167eac36 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -268,285 +268,6 @@ static mp_digit get_digit(mp_int* a, int n) } -#if defined(USE_FAST_MATH) - -/* fast math accelerated version, but not for fp ecc yet */ - -/** - Add two ECC points - P The point to add - Q The point to add - R [out] The destination of the double - modulus The modulus of the field the ECC curve is in - mp The "b" value from montgomery_setup() - return MP_OKAY on success -*/ -int ecc_projective_add_point(ecc_point *P, ecc_point *Q, ecc_point *R, - mp_int* modulus, mp_digit* mp) -{ - fp_int t1, t2, x, y, z; - int err; - - if (P == NULL || Q == NULL || R == NULL || modulus == NULL || mp == NULL) - return ECC_BAD_ARG_E; - - if ((err = mp_init_multi(&t1, &t2, &x, &y, &z, NULL)) != MP_OKAY) { - return err; - } - - /* should we dbl instead? */ - fp_sub(modulus, Q->y, &t1); - if ( (fp_cmp(P->x, Q->x) == FP_EQ) && - (get_digit_count(Q->z) && fp_cmp(P->z, Q->z) == FP_EQ) && - (fp_cmp(P->y, Q->y) == FP_EQ || fp_cmp(P->y, &t1) == FP_EQ)) { - return ecc_projective_dbl_point(P, R, modulus, mp); - } - - fp_copy(P->x, &x); - fp_copy(P->y, &y); - fp_copy(P->z, &z); - - /* if Z is one then these are no-operations */ - if (get_digit_count(Q->z)) { - /* T1 = Z' * Z' */ - fp_sqr(Q->z, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* X = X * T1 */ - fp_mul(&t1, &x, &x); - fp_montgomery_reduce(&x, modulus, *mp); - /* T1 = Z' * T1 */ - fp_mul(Q->z, &t1, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* Y = Y * T1 */ - fp_mul(&t1, &y, &y); - fp_montgomery_reduce(&y, modulus, *mp); - } - - /* T1 = Z*Z */ - fp_sqr(&z, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* T2 = X' * T1 */ - fp_mul(Q->x, &t1, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* T1 = Z * T1 */ - fp_mul(&z, &t1, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* T1 = Y' * T1 */ - fp_mul(Q->y, &t1, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - - /* Y = Y - T1 */ - fp_sub(&y, &t1, &y); - if (fp_cmp_d(&y, 0) == FP_LT) { - fp_add(&y, modulus, &y); - } - /* T1 = 2T1 */ - fp_add(&t1, &t1, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - /* T1 = Y + T1 */ - fp_add(&t1, &y, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - /* X = X - T2 */ - fp_sub(&x, &t2, &x); - if (fp_cmp_d(&x, 0) == FP_LT) { - fp_add(&x, modulus, &x); - } - /* T2 = 2T2 */ - fp_add(&t2, &t2, &t2); - if (fp_cmp(&t2, modulus) != FP_LT) { - fp_sub(&t2, modulus, &t2); - } - /* T2 = X + T2 */ - fp_add(&t2, &x, &t2); - if (fp_cmp(&t2, modulus) != FP_LT) { - fp_sub(&t2, modulus, &t2); - } - - /* if Z' != 1 */ - if (get_digit_count(Q->z)) { - /* Z = Z * Z' */ - fp_mul(&z, Q->z, &z); - fp_montgomery_reduce(&z, modulus, *mp); - } - - /* Z = Z * X */ - fp_mul(&z, &x, &z); - fp_montgomery_reduce(&z, modulus, *mp); - - /* T1 = T1 * X */ - fp_mul(&t1, &x, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* X = X * X */ - fp_sqr(&x, &x); - fp_montgomery_reduce(&x, modulus, *mp); - /* T2 = T2 * x */ - fp_mul(&t2, &x, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* T1 = T1 * X */ - fp_mul(&t1, &x, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - - /* X = Y*Y */ - fp_sqr(&y, &x); - fp_montgomery_reduce(&x, modulus, *mp); - /* X = X - T2 */ - fp_sub(&x, &t2, &x); - if (fp_cmp_d(&x, 0) == FP_LT) { - fp_add(&x, modulus, &x); - } - - /* T2 = T2 - X */ - fp_sub(&t2, &x, &t2); - if (fp_cmp_d(&t2, 0) == FP_LT) { - fp_add(&t2, modulus, &t2); - } - /* T2 = T2 - X */ - fp_sub(&t2, &x, &t2); - if (fp_cmp_d(&t2, 0) == FP_LT) { - fp_add(&t2, modulus, &t2); - } - /* T2 = T2 * Y */ - fp_mul(&t2, &y, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* Y = T2 - T1 */ - fp_sub(&t2, &t1, &y); - if (fp_cmp_d(&y, 0) == FP_LT) { - fp_add(&y, modulus, &y); - } - /* Y = Y/2 */ - if (fp_isodd(&y)) { - fp_add(&y, modulus, &y); - } - fp_div_2(&y, &y); - - fp_copy(&x, R->x); - fp_copy(&y, R->y); - fp_copy(&z, R->z); - - return MP_OKAY; -} - - -/** - Double an ECC point - P The point to double - R [out] The destination of the double - modulus The modulus of the field the ECC curve is in - mp The "b" value from montgomery_setup() - return MP_OKAY on success -*/ -int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, - mp_digit* mp) -{ - fp_int t1, t2; - int err; - - if (P == NULL || R == NULL || modulus == NULL || mp == NULL) - return ECC_BAD_ARG_E; - - if (P != R) { - fp_copy(P->x, R->x); - fp_copy(P->y, R->y); - fp_copy(P->z, R->z); - } - - if ((err = mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) { - return err; - } - - /* t1 = Z * Z */ - fp_sqr(R->z, &t1); - fp_montgomery_reduce(&t1, modulus, *mp); - /* Z = Y * Z */ - fp_mul(R->z, R->y, R->z); - fp_montgomery_reduce(R->z, modulus, *mp); - /* Z = 2Z */ - fp_add(R->z, R->z, R->z); - if (fp_cmp(R->z, modulus) != FP_LT) { - fp_sub(R->z, modulus, R->z); - } - - /* &t2 = X - T1 */ - fp_sub(R->x, &t1, &t2); - if (fp_cmp_d(&t2, 0) == FP_LT) { - fp_add(&t2, modulus, &t2); - } - /* T1 = X + T1 */ - fp_add(&t1, R->x, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - /* T2 = T1 * T2 */ - fp_mul(&t1, &t2, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* T1 = 2T2 */ - fp_add(&t2, &t2, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - /* T1 = T1 + T2 */ - fp_add(&t1, &t2, &t1); - if (fp_cmp(&t1, modulus) != FP_LT) { - fp_sub(&t1, modulus, &t1); - } - - /* Y = 2Y */ - fp_add(R->y, R->y, R->y); - if (fp_cmp(R->y, modulus) != FP_LT) { - fp_sub(R->y, modulus, R->y); - } - /* Y = Y * Y */ - fp_sqr(R->y, R->y); - fp_montgomery_reduce(R->y, modulus, *mp); - /* T2 = Y * Y */ - fp_sqr(R->y, &t2); - fp_montgomery_reduce(&t2, modulus, *mp); - /* T2 = T2/2 */ - if (fp_isodd(&t2)) { - fp_add(&t2, modulus, &t2); - } - fp_div_2(&t2, &t2); - /* Y = Y * X */ - fp_mul(R->y, R->x, R->y); - fp_montgomery_reduce(R->y, modulus, *mp); - - /* X = T1 * T1 */ - fp_sqr(&t1, R->x); - fp_montgomery_reduce(R->x, modulus, *mp); - /* X = X - Y */ - fp_sub(R->x, R->y, R->x); - if (fp_cmp_d(R->x, 0) == FP_LT) { - fp_add(R->x, modulus, R->x); - } - /* X = X - Y */ - fp_sub(R->x, R->y, R->x); - if (fp_cmp_d(R->x, 0) == FP_LT) { - fp_add(R->x, modulus, R->x); - } - - /* Y = Y - X */ - fp_sub(R->y, R->x, R->y); - if (fp_cmp_d(R->y, 0) == FP_LT) { - fp_add(R->y, modulus, R->y); - } - /* Y = Y * T1 */ - fp_mul(R->y, &t1, R->y); - fp_montgomery_reduce(R->y, modulus, *mp); - /* Y = Y - T2 */ - fp_sub(R->y, &t2, R->y); - if (fp_cmp_d(R->y, 0) == FP_LT) { - fp_add(R->y, modulus, R->y); - } - - return MP_OKAY; -} - -#else /* USE_FAST_MATH */ - /** Add two ECC points P The point to add @@ -559,43 +280,63 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, mp_int* modulus, mp_digit* mp) { - mp_int t1; - mp_int t2; - mp_int x; - mp_int y; - mp_int z; + mp_int t1, t2; +#if (defined(USE_FAST_MATH) && defined(ALT_ECC_SIZE)) || !defined(USE_FAST_MATH) + mp_int rx, ry, rz; +#endif + mp_int *x, *y, *z; int err; if (P == NULL || Q == NULL || R == NULL || modulus == NULL || mp == NULL) return ECC_BAD_ARG_E; - if ((err = mp_init_multi(&t1, &t2, &x, &y, &z, NULL)) != MP_OKAY) { + if ((err = mp_init_multi(&t1, &t2, NULL, NULL, NULL, NULL)) != MP_OKAY) { return err; } /* should we dbl instead? */ - err = mp_sub(modulus, Q->y, &t1); - + if (err == MP_OKAY) + err = mp_sub(modulus, Q->y, &t1); if (err == MP_OKAY) { if ( (mp_cmp(P->x, Q->x) == MP_EQ) && (get_digit_count(Q->z) && mp_cmp(P->z, Q->z) == MP_EQ) && (mp_cmp(P->y, Q->y) == MP_EQ || mp_cmp(P->y, &t1) == MP_EQ)) { mp_clear(&t1); mp_clear(&t2); - mp_clear(&x); - mp_clear(&y); - mp_clear(&z); - return ecc_projective_dbl_point(P, R, modulus, mp); } } + + if (err != MP_OKAY) { + mp_clear(&t1); + mp_clear(&t2); + return err; + } + +#if (defined(USE_FAST_MATH) && defined(ALT_ECC_SIZE)) || !defined(USE_FAST_MATH) + /* Use local stack variable */ + x = ℞ + y = &ry; + z = &rz; + + if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) { + mp_clear(&t1); + mp_clear(&t2); + return err; + } +#else + /* Use destination directly */ + x = R->x; + y = R->y; + z = R->z; +#endif if (err == MP_OKAY) - err = mp_copy(P->x, &x); + err = mp_copy(P->x, x); if (err == MP_OKAY) - err = mp_copy(P->y, &y); + err = mp_copy(P->y, y); if (err == MP_OKAY) - err = mp_copy(P->z, &z); + err = mp_copy(P->z, z); /* if Z is one then these are no-operations */ if (err == MP_OKAY) { @@ -607,9 +348,9 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, /* X = X * T1 */ if (err == MP_OKAY) - err = mp_mul(&t1, &x, &x); + err = mp_mul(&t1, x, x); if (err == MP_OKAY) - err = mp_montgomery_reduce(&x, modulus, *mp); + err = mp_montgomery_reduce(x, modulus, *mp); /* T1 = Z' * T1 */ if (err == MP_OKAY) @@ -619,15 +360,15 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, /* Y = Y * T1 */ if (err == MP_OKAY) - err = mp_mul(&t1, &y, &y); + err = mp_mul(&t1, y, y); if (err == MP_OKAY) - err = mp_montgomery_reduce(&y, modulus, *mp); + err = mp_montgomery_reduce(y, modulus, *mp); } } /* T1 = Z*Z */ if (err == MP_OKAY) - err = mp_sqr(&z, &t1); + err = mp_sqr(z, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); @@ -639,7 +380,7 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, /* T1 = Z * T1 */ if (err == MP_OKAY) - err = mp_mul(&z, &t1, &t1); + err = mp_mul(z, &t1, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); @@ -651,10 +392,10 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, /* Y = Y - T1 */ if (err == MP_OKAY) - err = mp_sub(&y, &t1, &y); + err = mp_sub(y, &t1, y); if (err == MP_OKAY) { - if (mp_cmp_d(&y, 0) == MP_LT) - err = mp_add(&y, modulus, &y); + if (mp_cmp_d(y, 0) == MP_LT) + err = mp_add(y, modulus, y); } /* T1 = 2T1 */ if (err == MP_OKAY) @@ -665,17 +406,17 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, } /* T1 = Y + T1 */ if (err == MP_OKAY) - err = mp_add(&t1, &y, &t1); + err = mp_add(&t1, y, &t1); if (err == MP_OKAY) { if (mp_cmp(&t1, modulus) != MP_LT) err = mp_sub(&t1, modulus, &t1); } /* X = X - T2 */ if (err == MP_OKAY) - err = mp_sub(&x, &t2, &x); + err = mp_sub(x, &t2, x); if (err == MP_OKAY) { - if (mp_cmp_d(&x, 0) == MP_LT) - err = mp_add(&x, modulus, &x); + if (mp_cmp_d(x, 0) == MP_LT) + err = mp_add(x, modulus, x); } /* T2 = 2T2 */ if (err == MP_OKAY) @@ -686,7 +427,7 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, } /* T2 = X + T2 */ if (err == MP_OKAY) - err = mp_add(&t2, &x, &t2); + err = mp_add(&t2, x, &t2); if (err == MP_OKAY) { if (mp_cmp(&t2, modulus) != MP_LT) err = mp_sub(&t2, modulus, &t2); @@ -695,103 +436,104 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, if (err == MP_OKAY) { if (get_digit_count(Q->z)) { /* Z = Z * Z' */ - err = mp_mul(&z, Q->z, &z); + err = mp_mul(z, Q->z, z); if (err == MP_OKAY) - err = mp_montgomery_reduce(&z, modulus, *mp); + err = mp_montgomery_reduce(z, modulus, *mp); } } /* Z = Z * X */ if (err == MP_OKAY) - err = mp_mul(&z, &x, &z); + err = mp_mul(z, x, z); if (err == MP_OKAY) - err = mp_montgomery_reduce(&z, modulus, *mp); + err = mp_montgomery_reduce(z, modulus, *mp); /* T1 = T1 * X */ if (err == MP_OKAY) - err = mp_mul(&t1, &x, &t1); + err = mp_mul(&t1, x, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); /* X = X * X */ if (err == MP_OKAY) - err = mp_sqr(&x, &x); + err = mp_sqr(x, x); if (err == MP_OKAY) - err = mp_montgomery_reduce(&x, modulus, *mp); + err = mp_montgomery_reduce(x, modulus, *mp); /* T2 = T2 * x */ if (err == MP_OKAY) - err = mp_mul(&t2, &x, &t2); + err = mp_mul(&t2, x, &t2); if (err == MP_OKAY) err = mp_montgomery_reduce(&t2, modulus, *mp); /* T1 = T1 * X */ if (err == MP_OKAY) - err = mp_mul(&t1, &x, &t1); + err = mp_mul(&t1, x, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); /* X = Y*Y */ if (err == MP_OKAY) - err = mp_sqr(&y, &x); + err = mp_sqr(y, x); if (err == MP_OKAY) - err = mp_montgomery_reduce(&x, modulus, *mp); + err = mp_montgomery_reduce(x, modulus, *mp); /* X = X - T2 */ if (err == MP_OKAY) - err = mp_sub(&x, &t2, &x); + err = mp_sub(x, &t2, x); if (err == MP_OKAY) { - if (mp_cmp_d(&x, 0) == MP_LT) - err = mp_add(&x, modulus, &x); + if (mp_cmp_d(x, 0) == MP_LT) + err = mp_add(x, modulus, x); } /* T2 = T2 - X */ if (err == MP_OKAY) - err = mp_sub(&t2, &x, &t2); + err = mp_sub(&t2, x, &t2); if (err == MP_OKAY) { if (mp_cmp_d(&t2, 0) == MP_LT) err = mp_add(&t2, modulus, &t2); } /* T2 = T2 - X */ if (err == MP_OKAY) - err = mp_sub(&t2, &x, &t2); + err = mp_sub(&t2, x, &t2); if (err == MP_OKAY) { if (mp_cmp_d(&t2, 0) == MP_LT) err = mp_add(&t2, modulus, &t2); } /* T2 = T2 * Y */ if (err == MP_OKAY) - err = mp_mul(&t2, &y, &t2); + err = mp_mul(&t2, y, &t2); if (err == MP_OKAY) err = mp_montgomery_reduce(&t2, modulus, *mp); /* Y = T2 - T1 */ if (err == MP_OKAY) - err = mp_sub(&t2, &t1, &y); + err = mp_sub(&t2, &t1, y); if (err == MP_OKAY) { - if (mp_cmp_d(&y, 0) == MP_LT) - err = mp_add(&y, modulus, &y); + if (mp_cmp_d(y, 0) == MP_LT) + err = mp_add(y, modulus, y); } /* Y = Y/2 */ if (err == MP_OKAY) { - if (mp_isodd(&y)) - err = mp_add(&y, modulus, &y); + if (mp_isodd(y)) + err = mp_add(y, modulus, y); } if (err == MP_OKAY) - err = mp_div_2(&y, &y); + err = mp_div_2(y, y); +#if (defined(USE_FAST_MATH) && defined(ALT_ECC_SIZE)) || !defined(USE_FAST_MATH) if (err == MP_OKAY) - err = mp_copy(&x, R->x); + err = mp_copy(x, R->x); if (err == MP_OKAY) - err = mp_copy(&y, R->y); + err = mp_copy(y, R->y); if (err == MP_OKAY) - err = mp_copy(&z, R->z); + err = mp_copy(z, R->z); +#endif +#ifndef USE_FAST_MATH /* clean up */ mp_clear(&t1); mp_clear(&t2); - mp_clear(&x); - mp_clear(&y); - mp_clear(&z); +#endif return err; } @@ -808,8 +550,11 @@ int ecc_projective_add_point(ecc_point* P, ecc_point* Q, ecc_point* R, int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, mp_digit* mp) { - mp_int t1; - mp_int t2; + mp_int t1, t2; +#ifdef ALT_ECC_SIZE + mp_int rx, ry, rz; +#endif + mp_int *x, *y, *z; int err; if (P == NULL || R == NULL || modulus == NULL || mp == NULL) @@ -819,44 +564,61 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, return err; } - if (P != R) { - err = mp_copy(P->x, R->x); - if (err == MP_OKAY) - err = mp_copy(P->y, R->y); - if (err == MP_OKAY) - err = mp_copy(P->z, R->z); +#ifdef ALT_ECC_SIZE + /* Use local stack variable */ + x = ℞ + y = &ry; + z = &rz; + + if ((err = mp_init_multi(x, y, z, NULL, NULL, NULL)) != MP_OKAY) { + mp_clear(&t1); + mp_clear(&t2); + return err; } +#else + /* Use destination directly */ + x = R->x; + y = R->y; + z = R->z; +#endif + + if (err == MP_OKAY) + err = mp_copy(P->x, x); + if (err == MP_OKAY) + err = mp_copy(P->y, y); + if (err == MP_OKAY) + err = mp_copy(P->z, z); /* t1 = Z * Z */ if (err == MP_OKAY) - err = mp_sqr(R->z, &t1); + err = mp_sqr(z, &t1); if (err == MP_OKAY) err = mp_montgomery_reduce(&t1, modulus, *mp); /* Z = Y * Z */ if (err == MP_OKAY) - err = mp_mul(R->z, R->y, R->z); + err = mp_mul(z, y, z); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->z, modulus, *mp); + err = mp_montgomery_reduce(z, modulus, *mp); /* Z = 2Z */ if (err == MP_OKAY) - err = mp_add(R->z, R->z, R->z); + err = mp_add(z, z, z); if (err == MP_OKAY) { - if (mp_cmp(R->z, modulus) != MP_LT) - err = mp_sub(R->z, modulus, R->z); + if (mp_cmp(z, modulus) != MP_LT) + err = mp_sub(z, modulus, z); } /* T2 = X - T1 */ if (err == MP_OKAY) - err = mp_sub(R->x, &t1, &t2); + err = mp_sub(x, &t1, &t2); if (err == MP_OKAY) { if (mp_cmp_d(&t2, 0) == MP_LT) err = mp_add(&t2, modulus, &t2); } /* T1 = X + T1 */ if (err == MP_OKAY) - err = mp_add(&t1, R->x, &t1); + err = mp_add(&t1, x, &t1); if (err == MP_OKAY) { if (mp_cmp(&t1, modulus) != MP_LT) err = mp_sub(&t1, modulus, &t1); @@ -883,20 +645,20 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, } /* Y = 2Y */ if (err == MP_OKAY) - err = mp_add(R->y, R->y, R->y); + err = mp_add(y, y, y); if (err == MP_OKAY) { - if (mp_cmp(R->y, modulus) != MP_LT) - err = mp_sub(R->y, modulus, R->y); + if (mp_cmp(y, modulus) != MP_LT) + err = mp_sub(y, modulus, y); } /* Y = Y * Y */ if (err == MP_OKAY) - err = mp_sqr(R->y, R->y); + err = mp_sqr(y, y); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->y, modulus, *mp); + err = mp_montgomery_reduce(y, modulus, *mp); /* T2 = Y * Y */ if (err == MP_OKAY) - err = mp_sqr(R->y, &t2); + err = mp_sqr(y, &t2); if (err == MP_OKAY) err = mp_montgomery_reduce(&t2, modulus, *mp); @@ -910,59 +672,69 @@ int ecc_projective_dbl_point(ecc_point *P, ecc_point *R, mp_int* modulus, /* Y = Y * X */ if (err == MP_OKAY) - err = mp_mul(R->y, R->x, R->y); + err = mp_mul(y, x, y); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->y, modulus, *mp); + err = mp_montgomery_reduce(y, modulus, *mp); /* X = T1 * T1 */ if (err == MP_OKAY) - err = mp_sqr(&t1, R->x); + err = mp_sqr(&t1, x); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->x, modulus, *mp); + err = mp_montgomery_reduce(x, modulus, *mp); /* X = X - Y */ if (err == MP_OKAY) - err = mp_sub(R->x, R->y, R->x); + err = mp_sub(x, y, x); if (err == MP_OKAY) { - if (mp_cmp_d(R->x, 0) == MP_LT) - err = mp_add(R->x, modulus, R->x); + if (mp_cmp_d(x, 0) == MP_LT) + err = mp_add(x, modulus, x); } /* X = X - Y */ if (err == MP_OKAY) - err = mp_sub(R->x, R->y, R->x); + err = mp_sub(x, y, x); if (err == MP_OKAY) { - if (mp_cmp_d(R->x, 0) == MP_LT) - err = mp_add(R->x, modulus, R->x); + if (mp_cmp_d(x, 0) == MP_LT) + err = mp_add(x, modulus, x); } /* Y = Y - X */ if (err == MP_OKAY) - err = mp_sub(R->y, R->x, R->y); + err = mp_sub(y, x, y); if (err == MP_OKAY) { - if (mp_cmp_d(R->y, 0) == MP_LT) - err = mp_add(R->y, modulus, R->y); + if (mp_cmp_d(y, 0) == MP_LT) + err = mp_add(y, modulus, y); } /* Y = Y * T1 */ if (err == MP_OKAY) - err = mp_mul(R->y, &t1, R->y); + err = mp_mul(y, &t1, y); if (err == MP_OKAY) - err = mp_montgomery_reduce(R->y, modulus, *mp); + err = mp_montgomery_reduce(y, modulus, *mp); /* Y = Y - T2 */ if (err == MP_OKAY) - err = mp_sub(R->y, &t2, R->y); + err = mp_sub(y, &t2, y); if (err == MP_OKAY) { - if (mp_cmp_d(R->y, 0) == MP_LT) - err = mp_add(R->y, modulus, R->y); + if (mp_cmp_d(y, 0) == MP_LT) + err = mp_add(y, modulus, y); } +#ifdef ALT_ECC_SIZE + if (err == MP_OKAY) + err = mp_copy(x, R->x); + if (err == MP_OKAY) + err = mp_copy(y, R->y); + if (err == MP_OKAY) + err = mp_copy(z, R->z); +#endif + +#ifndef USE_FAST_MATH /* clean up */ mp_clear(&t1); mp_clear(&t2); +#endif return err; } -#endif /* USE_FAST_MATH */ /** Map a projective jacbobian point back to affine space @@ -2762,7 +2534,7 @@ int wc_ecc_export_x963_ex(ecc_key* key, byte* out, word32* outLen, } #endif /* HAVE_ECC_KEY_EXPORT */ -/* is ec point on curve described by dp ? */ +/* is ecc point on curve described by dp ? */ static int ecc_is_point(const ecc_set_type* dp, ecc_point* ecp, mp_int* prime) { mp_int b, t1, t2; diff --git a/wolfcrypt/src/integer.c b/wolfcrypt/src/integer.c index 9e9b3d01e..045effb9f 100644 --- a/wolfcrypt/src/integer.c +++ b/wolfcrypt/src/integer.c @@ -40,6 +40,10 @@ #include +#ifdef WOLFSSL_DEBUG_MATH + #include +#endif + #ifndef NO_WOLFSSL_SMALL_STACK #ifndef WOLFSSL_SMALL_STACK #define WOLFSSL_SMALL_STACK @@ -4628,7 +4632,8 @@ int mp_read_radix (mp_int * a, const char *str, int radix) } #endif /* HAVE_ECC */ -#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) +#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \ + defined(WOLFSSL_DEBUG_MATH) /* returns size of ASCII representation */ int mp_radix_size (mp_int *a, int radix, int *size) @@ -4739,7 +4744,36 @@ int mp_toradix (mp_int *a, char *str, int radix) return MP_OKAY; } -#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */ +#ifdef WOLFSSL_DEBUG_MATH +void mp_dump(const char* desc, mp_int* a, byte verbose) +{ + char *buffer; + int size = a->alloc; + + buffer = (char*)XMALLOC(size * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (buffer == NULL) { + return; + } + + printf("%s: ptr=%p, used=%d, sign=%d, size=%d, mpd=%d\n", + desc, a, a->used, a->sign, size, (int)sizeof(mp_digit)); + + mp_toradix(a, buffer, 16); + printf(" %s\n ", buffer); + + if (verbose) { + int i; + for(i=0; ialloc * (int)sizeof(mp_digit); i++) { + printf("%02x ", *(((byte*)a->dp) + i)); + } + printf("\n"); + } + + XFREE(buffer, NULL, DYNAMIC_TYPE_TMP_BUFFER); +} +#endif /* WOLFSSL_DEBUG_MATH */ + +#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */ #endif /* USE_FAST_MATH */ diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index 690a7c804..7dd775809 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -845,7 +845,9 @@ static int wc_RsaFunction(const byte* in, word32 inLen, byte* out, mp_clear(&tmpa); mp_clear(&tmpb); - if (ret != 0) return ret; + if (ret != 0) { + goto done; + } #endif /* RSA_LOW_MEM */ } diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index 81372ab8c..258e31e7d 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -50,6 +50,10 @@ #include #include /* will define asm MACROS or C ones */ +#ifdef WOLFSSL_DEBUG_MATH + #include +#endif + /* math settings check */ word32 CheckRunTimeSettings(void) @@ -118,6 +122,8 @@ void s_fp_add(fp_int *a, fp_int *b, fp_int *c) } c->used = x; + + /* zero any excess digits on the destination that we didn't write to */ for (; x < oldused; x++) { c->dp[x] = 0; } @@ -179,6 +185,8 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) c->dp[x] = (fp_digit)t; t = (t >> DIGIT_BIT)&1; } + + /* zero any excess digits on the destination that we didn't write to */ for (; x < oldused; x++) { c->dp[x] = 0; } @@ -188,7 +196,9 @@ void s_fp_sub(fp_int *a, fp_int *b, fp_int *c) /* c = a * b */ void fp_mul(fp_int *A, fp_int *B, fp_int *C) { - int y, yy; + int y, yy, oldused; + + oldused = C->used; y = MAX(A->used, B->used); yy = MIN(A->used, B->used); @@ -196,7 +206,7 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C) /* call generic if we're out of range */ if (y + yy > FP_SIZE) { fp_mul_comba(A, B, C); - return ; + goto clean; } /* pick a comba (unrolled 4/8/16/32 x or rolled) based on the size @@ -205,98 +215,104 @@ void fp_mul(fp_int *A, fp_int *B, fp_int *C) if say y=17 then we would do (32-17)^2 = 225 unneeded multiplications */ -#ifdef TFM_MUL3 +#if defined(TFM_MUL3) && FP_SIZE >= 6 if (y <= 3) { fp_mul_comba3(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL4 +#if defined(TFM_MUL4) && FP_SIZE >= 8 if (y == 4) { fp_mul_comba4(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL6 +#if defined(TFM_MUL6) && FP_SIZE >= 12 if (y <= 6) { fp_mul_comba6(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL7 +#if defined(TFM_MUL7) && FP_SIZE >= 14 if (y == 7) { fp_mul_comba7(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL8 +#if defined(TFM_MUL8) && FP_SIZE >= 16 if (y == 8) { fp_mul_comba8(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL9 +#if defined(TFM_MUL9) && FP_SIZE >= 18 if (y == 9) { fp_mul_comba9(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL12 +#if defined(TFM_MUL12) && FP_SIZE >= 24 if (y <= 12) { fp_mul_comba12(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_MUL17 +#if defined(TFM_MUL17) && FP_SIZE >= 34 if (y <= 17) { fp_mul_comba17(A,B,C); - return; + goto clean; } #endif -#ifdef TFM_SMALL_SET +#if defined(TFM_SMALL_SET) && FP_SIZE >= 32 if (y <= 16) { fp_mul_comba_small(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL20) +#if defined(TFM_MUL20) && FP_SIZE >= 40 if (y <= 20) { fp_mul_comba20(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL24) +#if defined(TFM_MUL24) && FP_SIZE >= 48 if (yy >= 16 && y <= 24) { fp_mul_comba24(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL28) +#if defined(TFM_MUL28) && FP_SIZE >= 56 if (yy >= 20 && y <= 28) { fp_mul_comba28(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL32) +#if defined(TFM_MUL32) && FP_SIZE >= 64 if (yy >= 24 && y <= 32) { fp_mul_comba32(A,B,C); - return; + goto clean; } #endif -#if defined(TFM_MUL48) +#if defined(TFM_MUL48) && FP_SIZE >= 96 if (yy >= 40 && y <= 48) { - fp_mul_comba48(A,B,C); - return; + fp_mul_comba48(A,B,C); + goto clean; } #endif -#if defined(TFM_MUL64) +#if defined(TFM_MUL64) && FP_SIZE >= 128 if (yy >= 56 && y <= 64) { fp_mul_comba64(A,B,C); - return; + goto clean; } #endif fp_mul_comba(A,B,C); + +clean: + /* zero any excess digits on the destination that we didn't write to */ + for (y = C->used; y < oldused; y++) { + C->dp[y] = 0; + } } void fp_mul_2(fp_int * a, fp_int * b) @@ -340,9 +356,7 @@ void fp_mul_2(fp_int * a, fp_int * b) ++(b->used); } - /* now zero any excess digits on the destination - * that we didn't write to - */ + /* zero any excess digits on the destination that we didn't write to */ tmpb = b->dp + b->used; for (x = b->used; x < oldused; x++) { *tmpb++ = 0; @@ -370,6 +384,8 @@ void fp_mul_d(fp_int *a, fp_digit b, fp_int *c) c->dp[c->used++] = (fp_digit) w; ++x; } + + /* zero any excess digits on the destination that we didn't write to */ for (; x < oldused; x++) { c->dp[x] = 0; } @@ -627,9 +643,7 @@ int fp_div(fp_int *a, fp_int *b, fp_int *c, fp_int *d) if (d != NULL) { fp_div_2d (&x, norm, &x, NULL); -/* the following is a kludge, essentially we were seeing the right remainder but - with excess digits that should have been zero - */ + /* zero any excess digits on the destination that we didn't write to */ for (i = b->used; i < x.used; i++) { x.dp[i] = 0; } @@ -669,7 +683,7 @@ void fp_div_2(fp_int * a, fp_int * b) r = rr; } - /* zero excess digits */ + /* zero any excess digits on the destination that we didn't write to */ tmpb = b->dp + b->used; for (x = b->used; x < oldused; x++) { *tmpb++ = 0; @@ -1049,9 +1063,14 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) */ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) { - fp_int M[64], res; + fp_int res; fp_digit buf, mp; int err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize; +#ifdef WOLFSSL_SMALL_STACK + fp_int *M; +#else + fp_int M[64]; +#endif /* find window size */ x = fp_count_bits (X); @@ -1067,15 +1086,23 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) winsize = 6; } - /* init M array */ - for(x = 0; x < (int)(sizeof(M)/sizeof(fp_int)); x++) - fp_init(&M[x]); - /* now setup montgomery */ if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) { return err; } +#ifdef WOLFSSL_SMALL_STACK + /* only allocate space for what's needed */ + M = (fp_int*)XMALLOC(sizeof(fp_int)*(1 << winsize), NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (M == NULL) { + return FP_MEM; + } +#endif + + /* init M array */ + for(x = 0; x < (1 << winsize); x++) + fp_init(&M[x]); + /* setup result */ fp_init(&res); @@ -1083,7 +1110,7 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) * * The M table contains powers of the input base, e.g. M[x] = G^x mod P * - * The first half of the table is not computed though accept for M[0] and M[1] + * The first half of the table is not computed though except for M[0] and M[1] */ /* now we need R mod m */ @@ -1202,10 +1229,15 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) /* swap res with Y */ fp_copy (&res, Y); + +#ifdef WOLFSSL_SMALL_STACK + XFREE(M, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#endif + return FP_OKAY; } -#endif +#endif /* TFM_TIMING_RESISTANT */ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) { @@ -1267,105 +1299,114 @@ void fp_2expt(fp_int *a, int b) /* b = a*a */ void fp_sqr(fp_int *A, fp_int *B) { - int y = A->used; + int y, oldused; + + oldused = B->used; + y = A->used; /* call generic if we're out of range */ if (y + y > FP_SIZE) { fp_sqr_comba(A, B); - return ; + goto clean; } -#if defined(TFM_SQR3) +#if defined(TFM_SQR3) && FP_SIZE >= 6 if (y <= 3) { fp_sqr_comba3(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR4) +#if defined(TFM_SQR4) && FP_SIZE >= 8 if (y == 4) { fp_sqr_comba4(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR6) +#if defined(TFM_SQR6) && FP_SIZE >= 12 if (y <= 6) { fp_sqr_comba6(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR7) +#if defined(TFM_SQR7) && FP_SIZE >= 14 if (y == 7) { fp_sqr_comba7(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR8) +#if defined(TFM_SQR8) && FP_SIZE >= 16 if (y == 8) { fp_sqr_comba8(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR9) +#if defined(TFM_SQR9) && FP_SIZE >= 18 if (y == 9) { fp_sqr_comba9(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR12) +#if defined(TFM_SQR12) && FP_SIZE >= 24 if (y <= 12) { fp_sqr_comba12(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR17) +#if defined(TFM_SQR17) && FP_SIZE >= 34 if (y <= 17) { fp_sqr_comba17(A,B); - return; + goto clean; } #endif #if defined(TFM_SMALL_SET) if (y <= 16) { fp_sqr_comba_small(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR20) +#if defined(TFM_SQR20) && FP_SIZE >= 40 if (y <= 20) { fp_sqr_comba20(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR24) +#if defined(TFM_SQR24) && FP_SIZE >= 48 if (y <= 24) { fp_sqr_comba24(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR28) +#if defined(TFM_SQR28) && FP_SIZE >= 56 if (y <= 28) { fp_sqr_comba28(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR32) +#if defined(TFM_SQR32) && FP_SIZE >= 64 if (y <= 32) { fp_sqr_comba32(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR48) +#if defined(TFM_SQR48) && FP_SIZE >= 96 if (y <= 48) { fp_sqr_comba48(A,B); - return; + goto clean; } #endif -#if defined(TFM_SQR64) +#if defined(TFM_SQR64) && FP_SIZE >= 128 if (y <= 64) { fp_sqr_comba64(A,B); - return; + goto clean; } #endif fp_sqr_comba(A, B); + +clean: + /* zero any excess digits on the destination that we didn't write to */ + for (y = B->used; y < oldused; y++) { + B->dp[y] = 0; + } } /* generic comba squarer */ @@ -1513,7 +1554,7 @@ int fp_cmp_mag(fp_int *a, fp_int *b) return FP_EQ; } -/* setups the montgomery reduction */ +/* sets up the montgomery reduction */ int fp_montgomery_setup(fp_int *a, fp_digit *rho) { fp_digit x, b; @@ -1612,7 +1653,7 @@ static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp) /* now zero the buff */ - XMEMSET(c, 0, sizeof c); + XMEMSET(c, 0, sizeof(c)); pa = m->used; /* copy the input */ @@ -1652,7 +1693,8 @@ static void fp_montgomery_reduce_mulx(fp_int *a, fp_int *m, fp_digit mp) *tmpm++ = *_c++; } - for (; x < oldused; x++) { + /* zero any excess digits on the destination that we didn't write to */ + for (; x < oldused; x++) { *tmpm++ = 0; } @@ -1691,7 +1733,7 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) /* now zero the buff */ - XMEMSET(c, 0, sizeof c); + XMEMSET(c, 0, sizeof(c)); pa = m->used; /* copy the input */ @@ -1733,7 +1775,8 @@ void fp_montgomery_reduce(fp_int *a, fp_int *m, fp_digit mp) *tmpm++ = *_c++; } - for (; x < oldused; x++) { + /* zero any excess digits on the destination that we didn't write to */ + for (; x < oldused; x++) { *tmpm++ = 0; } @@ -1829,7 +1872,7 @@ void fp_set(fp_int *a, fp_digit b) a->used = a->dp[0] ? 1 : 0; } -/* chek if a bit is set */ +/* check if a bit is set */ int fp_is_bit_set (fp_int *a, fp_digit b) { fp_digit i; @@ -2177,13 +2220,20 @@ int mp_div_2d(fp_int* a, int b, fp_int* c, fp_int* d) } #ifdef ALT_ECC_SIZE -void fp_copy(fp_int *a, fp_int* b) +void fp_copy(fp_int *a, fp_int *b) { if (a != b && b->size >= a->used) { + int x, oldused; + oldused = b->used; b->used = a->used; b->sign = a->sign; XMEMCPY(b->dp, a->dp, a->used * sizeof(fp_digit)); + + /* zero any excess digits on the destination that we didn't write to */ + for (x = b->used; x < oldused; x++) { + b->dp[x] = 0; + } } } @@ -2196,49 +2246,39 @@ void fp_init_copy(fp_int *a, fp_int* b) } #endif -/* fast math conversion */ +/* fast math wrappers */ int mp_copy(fp_int* a, fp_int* b) { fp_copy(a, b); return MP_OKAY; } - -/* fast math conversion */ int mp_isodd(mp_int* a) { return fp_isodd(a); } - -/* fast math conversion */ int mp_iszero(mp_int* a) { return fp_iszero(a); } -/* fast math conversion */ int mp_count_bits (mp_int* a) { return fp_count_bits(a); } - int mp_leading_bit (mp_int* a) { return fp_leading_bit(a); } - -/* fast math conversion */ void mp_rshb (mp_int* a, int x) { fp_rshb(a, x); } - -/* fast math wrappers */ int mp_set_int(mp_int *a, mp_digit b) { fp_set(a, b); @@ -2282,7 +2322,8 @@ int mp_montgomery_calc_normalization(mp_int *a, mp_int *b) #endif /* WOLFSSL_KEYGEN || HAVE_ECC */ -#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) +#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \ + defined(WOLFSSL_DEBUG_MATH) static const int lnz[16] = { 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0 @@ -2429,7 +2470,7 @@ int mp_mod_d(fp_int *a, fp_digit b, fp_digit *c) return fp_mod_d(a, b, c); } -#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */ +#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */ #ifdef WOLFSSL_KEY_GEN @@ -2872,7 +2913,8 @@ int mp_cnt_lsb(fp_int* a) #endif /* HAVE_ECC */ -#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) +#if defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || \ + defined(WOLFSSL_DEBUG_MATH) /* returns size of ASCII representation */ int mp_radix_size (mp_int *a, int radix, int *size) @@ -2980,7 +3022,32 @@ int mp_toradix (mp_int *a, char *str, int radix) return FP_OKAY; } -#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) */ +#ifdef WOLFSSL_DEBUG_MATH +void mp_dump(const char* desc, mp_int* a, byte verbose) +{ + char buffer[FP_SIZE * sizeof(fp_digit) * 2]; + int size = FP_SIZE; + +#ifdef ALT_ECC_SIZE + size = a->size; +#endif + + printf("%s: ptr=%p, used=%d, sign=%d, size=%d, fpd=%d\n", + desc, a, a->used, a->sign, size, (int)sizeof(fp_digit)); + + mp_toradix(a, buffer, 16); + printf(" %s\n ", buffer); + + if (verbose) { + int i; + for(i=0; idp) + i)); + } + printf("\n"); + } +} +#endif /* WOLFSSL_DEBUG_MATH */ + +#endif /* defined(WOLFSSL_KEY_GEN) || defined(HAVE_COMP_KEY) || defined(WOLFSSL_DEBUG_MATH) */ #endif /* USE_FAST_MATH */ - diff --git a/wolfcrypt/test/test.c b/wolfcrypt/test/test.c index 32da38dc7..d5d114c18 100644 --- a/wolfcrypt/test/test.c +++ b/wolfcrypt/test/test.c @@ -6560,7 +6560,7 @@ static int ecc_test_key_gen(WC_RNG* rng, int keySize) ret = wc_ecc_check_key(&userA); if (ret != 0) - return -1024; + return -1023; derSz = wc_EccKeyToDer(&userA, der, FOURK_BUF); if (derSz < 0) { @@ -6621,8 +6621,8 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount, int testCompressedKey) { #ifdef BENCH_EMBEDDED - byte sharedA[32]; - byte sharedB[32]; + byte sharedA[128]; /* Needs to be at least keySize */ + byte sharedB[128]; /* Needs to be at least keySize */ #else byte sharedA[1024]; byte sharedB[1024]; @@ -6652,7 +6652,7 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount, ret = wc_ecc_check_key(&userA); if (ret != 0) - return -1024; + return -1023; ret = wc_ecc_make_key(rng, keySize, &userB); if (ret != 0) @@ -6732,6 +6732,7 @@ static int ecc_test_curve_size(WC_RNG* rng, int keySize, int testVerifyCount, for (i = 0; i < (int)sizeof(digest); i++) { digest[i] = 0; } + digest[i-1] = 1; /* Set last digit to non-zero value */ x = sizeof(sig); ret = wc_ecc_sign_hash(digest, sizeof(digest), sig, &x, rng, &userA); @@ -6801,12 +6802,14 @@ static int ecc_test_curve(WC_RNG* rng, int keySize) ret = ecc_test_curve_size(rng, keySize, ECC_TEST_VERIFY_COUNT, testCompressedKey); if (ret < 0) { + printf("ecc_test_curve_size %d failed!: %d\n", keySize, ret); return ret; } #ifdef HAVE_ECC_VECTOR_TEST ret = ecc_test_vector(keySize); if (ret < 0) { + printf("ecc_test_vector %d failed!: %d\n", keySize, ret); return ret; } #endif @@ -6814,6 +6817,7 @@ static int ecc_test_curve(WC_RNG* rng, int keySize) #ifdef WOLFSSL_KEY_GEN ret = ecc_test_key_gen(rng, keySize); if (ret < 0) { + printf("ecc_test_key_gen %d failed!: %d\n", keySize, ret); return ret; } #endif diff --git a/wolfssl/wolfcrypt/ecc.h b/wolfssl/wolfcrypt/ecc.h index b952e68b9..e67a49720 100644 --- a/wolfssl/wolfcrypt/ecc.h +++ b/wolfssl/wolfcrypt/ecc.h @@ -61,6 +61,27 @@ typedef struct { } ecc_set_type; +/* Determine max ECC bits based on enabled curves */ +#if defined(HAVE_ECC521) || defined(HAVE_ALL_CURVES) + #define MAX_ECC_BITS 521 +#elif defined(HAVE_ECC384) + #define MAX_ECC_BITS 384 +#elif defined(HAVE_ECC224) + #define MAX_ECC_BITS 224 +#elif !defined(NO_ECC256) + #define MAX_ECC_BITS 256 +#elif defined(HAVE_ECC192) + #define MAX_ECC_BITS 192 +#elif defined(HAVE_ECC160) + #define MAX_ECC_BITS 160 +#elif defined(HAVE_ECC128) + #define MAX_ECC_BITS 128 +#elif defined(HAVE_ECC112) + #define MAX_ECC_BITS 112 +#endif + + + #ifdef ALT_ECC_SIZE /* Note on ALT_ECC_SIZE: @@ -90,14 +111,26 @@ typedef struct { #error USE_FAST_MATH must be defined to use ALT_ECC_SIZE #endif +/* determine max bits required for ECC math */ #ifndef FP_MAX_BITS_ECC - #define FP_MAX_BITS_ECC 528 + /* check alignment */ + #if ((MAX_ECC_BITS * 2) % DIGIT_BIT) == 0 + /* max bits is double */ + #define FP_MAX_BITS_ECC (MAX_ECC_BITS * 2) + #else + /* max bits is doubled, plus one digit of fudge */ + #define FP_MAX_BITS_ECC ((MAX_ECC_BITS * 2) + DIGIT_BIT) + #endif +#else + /* verify alignment */ + #if FP_MAX_BITS_ECC % CHAR_BIT + #error FP_MAX_BITS_ECC must be a multiple of CHAR_BIT + #endif #endif -#define FP_MAX_SIZE_ECC (FP_MAX_BITS_ECC+(8*DIGIT_BIT)) -#if FP_MAX_BITS_ECC % CHAR_BIT - #error FP_MAX_BITS_ECC must be a multiple of CHAR_BIT -#endif -#define FP_SIZE_ECC (FP_MAX_SIZE_ECC/DIGIT_BIT) + +/* determine buffer size */ +#define FP_SIZE_ECC (FP_MAX_BITS_ECC/DIGIT_BIT) + /* This needs to match the size of the fp_int struct, except the * fp_digit array will be shorter. */ diff --git a/wolfssl/wolfcrypt/integer.h b/wolfssl/wolfcrypt/integer.h index a0ca3c15e..2b38601cb 100644 --- a/wolfssl/wolfcrypt/integer.h +++ b/wolfssl/wolfcrypt/integer.h @@ -310,6 +310,12 @@ int mp_init_multi(mp_int* a, mp_int* b, mp_int* c, mp_int* d, mp_int* e, int mp_toradix (mp_int *a, char *str, int radix); int mp_radix_size (mp_int * a, int radix, int *size); +#ifdef WOLFSSL_DEBUG_MATH + void mp_dump(const char* desc, mp_int* a, byte verbose); +#else + #define mp_dump(desc, a, verbose) +#endif + #if defined(HAVE_ECC) || defined(WOLFSSL_KEY_GEN) int mp_sqrmod(mp_int* a, mp_int* b, mp_int* c); #endif diff --git a/wolfssl/wolfcrypt/tfm.h b/wolfssl/wolfcrypt/tfm.h index c0e05e4ae..f86a7e52f 100644 --- a/wolfssl/wolfcrypt/tfm.h +++ b/wolfssl/wolfcrypt/tfm.h @@ -211,6 +211,7 @@ #if defined(FP_64BIT) /* for GCC only on supported platforms */ typedef unsigned long long fp_digit; /* 64bit, 128 uses mode(TI) below */ + #define SIZEOF_FP_DIGIT 8 typedef unsigned long fp_word __attribute__ ((mode(TI))); #else #if defined(_MSC_VER) || defined(__BORLANDC__) @@ -221,12 +222,14 @@ #ifndef NO_64BIT typedef unsigned int fp_digit; + #define SIZEOF_FP_DIGIT 4 typedef ulong64 fp_word; #define FP_32BIT #else /* some procs like coldfire prefer not to place multiply into 64bit type even though it exists */ typedef unsigned short fp_digit; + #define SIZEOF_FP_DIGIT 2 typedef unsigned int fp_word; #endif #endif @@ -234,7 +237,7 @@ #endif /* WOLFSSL_BIGINT_TYPES */ /* # of digits this is */ -#define DIGIT_BIT (int)((CHAR_BIT) * sizeof(fp_digit)) +#define DIGIT_BIT ((CHAR_BIT) * SIZEOF_FP_DIGIT) /* Max size of any number in bits. Basically the largest size you will be * multiplying should be half [or smaller] of FP_MAX_SIZE-four_digit @@ -548,103 +551,38 @@ void fp_reverse(unsigned char *s, int len); void fp_mul_comba(fp_int *a, fp_int *b, fp_int *c); -#ifdef TFM_SMALL_SET void fp_mul_comba_small(fp_int *a, fp_int *b, fp_int *c); -#endif - -#ifdef TFM_MUL3 void fp_mul_comba3(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL4 void fp_mul_comba4(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL6 void fp_mul_comba6(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL7 void fp_mul_comba7(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL8 void fp_mul_comba8(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL9 void fp_mul_comba9(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL12 void fp_mul_comba12(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL17 void fp_mul_comba17(fp_int *a, fp_int *b, fp_int *c); -#endif - -#ifdef TFM_MUL20 void fp_mul_comba20(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL24 void fp_mul_comba24(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL28 void fp_mul_comba28(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL32 void fp_mul_comba32(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL48 void fp_mul_comba48(fp_int *a, fp_int *b, fp_int *c); -#endif -#ifdef TFM_MUL64 void fp_mul_comba64(fp_int *a, fp_int *b, fp_int *c); -#endif - void fp_sqr_comba(fp_int *a, fp_int *b); - -#ifdef TFM_SMALL_SET void fp_sqr_comba_small(fp_int *a, fp_int *b); -#endif - -#ifdef TFM_SQR3 void fp_sqr_comba3(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR4 void fp_sqr_comba4(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR6 void fp_sqr_comba6(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR7 void fp_sqr_comba7(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR8 void fp_sqr_comba8(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR9 void fp_sqr_comba9(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR12 void fp_sqr_comba12(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR17 void fp_sqr_comba17(fp_int *a, fp_int *b); -#endif - -#ifdef TFM_SQR20 void fp_sqr_comba20(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR24 void fp_sqr_comba24(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR28 void fp_sqr_comba28(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR32 void fp_sqr_comba32(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR48 void fp_sqr_comba48(fp_int *a, fp_int *b); -#endif -#ifdef TFM_SQR64 void fp_sqr_comba64(fp_int *a, fp_int *b); -#endif + /*extern const char *fp_s_rmap;*/ @@ -707,6 +645,12 @@ void mp_rshb(mp_int *a, int x); int mp_toradix (mp_int *a, char *str, int radix); int mp_radix_size (mp_int * a, int radix, int *size); +#ifdef WOLFSSL_DEBUG_MATH + void mp_dump(const char* desc, mp_int* a, byte verbose); +#else + #define mp_dump(desc, a, verbose) +#endif + #ifdef HAVE_ECC int mp_read_radix(mp_int* a, const char* str, int radix); void mp_set(fp_int *a, fp_digit b);