From 6ef9e79ff5ccd2b96fdfed404ada872fd29514be Mon Sep 17 00:00:00 2001 From: toddouska Date: Tue, 13 Sep 2016 09:13:39 -0700 Subject: [PATCH 1/2] switch timing resistant exptmod to use temp for square instead of leaking key bit to cache monitor --- wolfcrypt/src/tfm.c | 31 +++++++++++++++++++++++++++++-- wolfssl/wolfcrypt/types.h | 1 + 2 files changed, 30 insertions(+), 2 deletions(-) diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index 35364334a..06a4846e4 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -1035,13 +1035,29 @@ int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) #ifdef TFM_TIMING_RESISTANT +/* all off / all on pointer addresses for constant calculations */ +static const wolfssl_word off_on_addr[2] = +{ +#if defined(WC_64BIT_CPU) + W64LIT(0x0000000000000000), + W64LIT(0xffffffffffffffff) +#elif defined(WC_16BIT_CPU) + 0x0000U, + 0xffffU +#else + /* 32 bit */ + 0x00000000U, + 0xffffffffU +#endif +}; + /* timing resistant montgomery ladder based exptmod Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002 */ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) { - fp_int R[2]; + fp_int R[3]; fp_digit buf, mp; int err, bitcnt, digidx, y; @@ -1052,6 +1068,7 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) fp_init(&R[0]); fp_init(&R[1]); + fp_init(&R[2]); /* now we need R mod m */ fp_montgomery_calc_normalization (&R[0], P); @@ -1092,7 +1109,17 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) /* do ops */ fp_mul(&R[0], &R[1], &R[y^1]); fp_montgomery_reduce(&R[y^1], P, mp); - fp_sqr(&R[y], &R[y]); fp_montgomery_reduce(&R[y], P, mp); + + /* instead of using R[y] for sqr, which leaks key bit to cache monitor, + * use R[2] as temp, make sure address calc is constant, keep + * &R[0] and &R[1] in cache */ + fp_copy((fp_int*) ( ((wolfssl_word)&R[0] & off_on_addr[(y^1)]) + + ((wolfssl_word)&R[1] & off_on_addr[y]) ), + &R[2]); + fp_sqr(&R[2], &R[2]); fp_montgomery_reduce(&R[2], P, mp); + fp_copy(&R[2], + (fp_int*) ( ((wolfssl_word)&R[0] & off_on_addr[(y^1)]) + + ((wolfssl_word)&R[1] & off_on_addr[y]) ) ); } fp_montgomery_reduce(&R[0], P, mp); diff --git a/wolfssl/wolfcrypt/types.h b/wolfssl/wolfcrypt/types.h index d67453601..7612546c0 100644 --- a/wolfssl/wolfcrypt/types.h +++ b/wolfssl/wolfcrypt/types.h @@ -91,6 +91,7 @@ defined(__mips64) || defined(__x86_64__) || defined(_M_X64)) || \ defined(__aarch64__) typedef word64 wolfssl_word; + #define WC_64BIT_CPU #else typedef word32 wolfssl_word; #ifdef WORD64_AVAILABLE From 46a0ee8e690913a41a10f3c296cfab8345500218 Mon Sep 17 00:00:00 2001 From: toddouska Date: Tue, 13 Sep 2016 11:10:10 -0700 Subject: [PATCH 2/2] switch ecc timising resistant mulmod double to use temp instead of leaking key bit to cache monitor --- wolfcrypt/src/ecc.c | 57 ++++++++++++++++++++++++++++++++++++++++++++- wolfcrypt/src/tfm.c | 11 +++++---- 2 files changed, 62 insertions(+), 6 deletions(-) diff --git a/wolfcrypt/src/ecc.c b/wolfcrypt/src/ecc.c index ec324308b..f4cc1e831 100644 --- a/wolfcrypt/src/ecc.c +++ b/wolfcrypt/src/ecc.c @@ -1876,6 +1876,27 @@ int wc_ecc_mulmod(mp_int* k, ecc_point *G, ecc_point *R, mp_int* a, #else /* ECC_TIMING_RESISTANT */ + +#if defined(TFM_TIMINING_RESISTANT) && defined(USE_FAST_MATH) + /* let's use the one we already have */ + extern const wolfssl_word wc_off_on_addr[2]; +#else + static const wolfssl_word wc_off_on_addr[2] = + { + #if defined(WC_64BIT_CPU) + W64LIT(0x0000000000000000), + W64LIT(0xffffffffffffffff) + #elif defined(WC_16BIT_CPU) + 0x0000U, + 0xffffU + #else + /* 32 bit */ + 0x00000000U, + 0xffffffffU + #endif + }; +#endif + /** Perform a point multiplication (timing resistant) k The scalar to multiply by @@ -2013,8 +2034,42 @@ int wc_ecc_mulmod_ex(mp_int* k, ecc_point *G, ecc_point *R, if (err == MP_OKAY) err = ecc_projective_add_point(M[0], M[1], M[i^1], a, modulus, mp); + /* instead of using M[i] for double, which leaks key bit to cache + * monitor, use M[2] as temp, make sure address calc is constant, + * keep &M[0] and &M[1] in cache */ if (err == MP_OKAY) - err = ecc_projective_dbl_point(M[i], M[i], a, modulus, mp); + err = mp_copy((mp_int*) + ( ((wolfssl_word)&M[0]->x & wc_off_on_addr[i^1]) + + ((wolfssl_word)&M[1]->x & wc_off_on_addr[i])), + M[2]->x); + if (err == MP_OKAY) + err = mp_copy((mp_int*) + ( ((wolfssl_word)&M[0]->y & wc_off_on_addr[i^1]) + + ((wolfssl_word)&M[1]->y & wc_off_on_addr[i])), + M[2]->y); + if (err == MP_OKAY) + err = mp_copy((mp_int*) + ( ((wolfssl_word)&M[0]->z & wc_off_on_addr[i^1]) + + ((wolfssl_word)&M[1]->z & wc_off_on_addr[i])), + M[2]->z); + if (err == MP_OKAY) + err = ecc_projective_dbl_point(M[2], M[2], a, modulus, mp); + /* copy M[2] back to M[i] */ + if (err == MP_OKAY) + err = mp_copy(M[2]->x, + (mp_int*) + ( ((wolfssl_word)&M[0]->x & wc_off_on_addr[i^1]) + + ((wolfssl_word)&M[1]->x & wc_off_on_addr[i])) ); + if (err == MP_OKAY) + err = mp_copy(M[2]->y, + (mp_int*) + ( ((wolfssl_word)&M[0]->y & wc_off_on_addr[i^1]) + + ((wolfssl_word)&M[1]->y & wc_off_on_addr[i])) ); + if (err == MP_OKAY) + err = mp_copy(M[2]->z, + (mp_int*) + ( ((wolfssl_word)&M[0]->z & wc_off_on_addr[i^1]) + + ((wolfssl_word)&M[1]->z & wc_off_on_addr[i])) ); if (err != MP_OKAY) break; } /* end for */ diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index 06a4846e4..7c6a55518 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -1036,7 +1036,8 @@ int fp_addmod(fp_int *a, fp_int *b, fp_int *c, fp_int *d) #ifdef TFM_TIMING_RESISTANT /* all off / all on pointer addresses for constant calculations */ -static const wolfssl_word off_on_addr[2] = +/* ecc.c uses same table */ +const wolfssl_word wc_off_on_addr[2] = { #if defined(WC_64BIT_CPU) W64LIT(0x0000000000000000), @@ -1113,13 +1114,13 @@ static int _fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y) /* instead of using R[y] for sqr, which leaks key bit to cache monitor, * use R[2] as temp, make sure address calc is constant, keep * &R[0] and &R[1] in cache */ - fp_copy((fp_int*) ( ((wolfssl_word)&R[0] & off_on_addr[(y^1)]) + - ((wolfssl_word)&R[1] & off_on_addr[y]) ), + fp_copy((fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) + + ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ), &R[2]); fp_sqr(&R[2], &R[2]); fp_montgomery_reduce(&R[2], P, mp); fp_copy(&R[2], - (fp_int*) ( ((wolfssl_word)&R[0] & off_on_addr[(y^1)]) + - ((wolfssl_word)&R[1] & off_on_addr[y]) ) ); + (fp_int*) ( ((wolfssl_word)&R[0] & wc_off_on_addr[y^1]) + + ((wolfssl_word)&R[1] & wc_off_on_addr[y]) ) ); } fp_montgomery_reduce(&R[0], P, mp);