diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 85fc455b7..37d554bf0 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -5104,6 +5104,51 @@ int sp_copy(const sp_int* a, sp_int* r) } #endif +#if ((defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \ + !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH))) || \ + defined(OPENSSL_ALL)) && defined(WC_PROTECT_ENCRYPTED_MEM) + +/* Copy 2 numbers into two results based on y. Copy a fixed number of digits. + * + * Constant time implementation. + * When y is 0, r1 = a2 and r2 = a1. + * When y is 1, r1 = a1 and r2 = a2. + * + * @param [in] a1 First number to copy. + * @param [in] a2 Second number to copy. + * @param [out] r1 First result number to copy into. + * @param [out] r2 Second result number to copy into. + * @param [in] y Indicates which number goes into which result number. + * @param [in] used Number of digits to copy. + */ +static void _sp_copy_2_ct(const sp_int* a1, const sp_int* a2, sp_int* r1, + sp_int* r2, int y, unsigned int used) +{ + unsigned int i; + + /* Copy data - constant time. */ + for (i = 0; i < used; i++) { + r1->dp[i] = (a1->dp[i] & ((sp_digit)wc_off_on_addr[y ])) + + (a2->dp[i] & ((sp_digit)wc_off_on_addr[y^1])); + r2->dp[i] = (a1->dp[i] & ((sp_digit)wc_off_on_addr[y^1])) + + (a2->dp[i] & ((sp_digit)wc_off_on_addr[y ])); + } + /* Copy used. */ + r1->used = (a1->used & ((int)wc_off_on_addr[y ])) + + (a2->used & ((int)wc_off_on_addr[y^1])); + r2->used = (a1->used & ((int)wc_off_on_addr[y^1])) + + (a2->used & ((int)wc_off_on_addr[y ])); +#ifdef WOLFSSL_SP_INT_NEGATIVE + /* Copy sign. */ + r1->sign = (a1->sign & ((int)wc_off_on_addr[y ])) + + (a2->sign & ((int)wc_off_on_addr[y^1])); + r2->sign = (a1->sign & ((int)wc_off_on_addr[y^1])) + + (a2->sign & ((int)wc_off_on_addr[y ])); +#endif +} + +#endif + #if defined(WOLFSSL_SP_MATH_ALL) || (defined(HAVE_ECC) && defined(FP_ECC)) /* Initializes r and copies in value from a. * @@ -12483,6 +12528,9 @@ int sp_invmod_mont_ct(const sp_int* a, const sp_int* m, sp_int* r, #if (defined(WOLFSSL_SP_MATH_ALL) && !defined(WOLFSSL_RSA_VERIFY_ONLY) && \ !defined(WOLFSSL_RSA_PUBLIC_ONLY)) || !defined(NO_DH) || \ defined(OPENSSL_ALL) + +#ifndef WC_PROTECT_ENCRYPTED_MEM + /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m * Process the exponent one bit at a time. * Is constant time and can be cache attack resistant. @@ -12614,6 +12662,105 @@ static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits, FREE_SP_INT_ARRAY(t, NULL); return err; } + +#else + +/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m + * Process the exponent one bit at a time with base in Montgomery form. + * Is constant time and cache attack resistant. + * + * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", + * Cryptographic Hardware and Embedded Systems, CHES 2002 + * + * Algorithm: + * b: base, e: exponent, m: modulus, r: result, bits: #bits to use + * 1. t[1] = b mod m. + * 2. t[0] = 1 + * 3. For i in (bits-1)...0 + * 3.1. y = e[i] + * 3.2. t[2] = t[0] * t[1] + * 3.3. t[3] = t[y] ^ 2 + * 3.4. t[y] = t[3], t[y^1] = t[2] + * 4. r = t[0] + * + * @param [in] b SP integer that is the base. + * @param [in] e SP integer that is the exponent. + * @param [in] bits Number of bits in exponent to use. May be greater than + * count of bits in e. + * @param [in] m SP integer that is the modulus. + * @param [out] r SP integer to hold result. + * + * @return MP_OKAY on success. + * @return MP_MEM when dynamic memory allocation fails. + */ +static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits, + const sp_int* m, sp_int* r) +{ + int err = MP_OKAY; + int done = 0; + DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4); + + /* Allocate temporaries. */ + ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL); + if (err == MP_OKAY) { + /* Initialize temporaries. */ + _sp_init_size(t[0], m->used * 2 + 1); + _sp_init_size(t[1], m->used * 2 + 1); + _sp_init_size(t[2], m->used * 2 + 1); + _sp_init_size(t[3], m->used * 2 + 1); + + /* 1. Ensure base is less than modulus. */ + if (_sp_cmp_abs(b, m) != MP_LT) { + err = sp_mod(b, m, t[1]); + /* Handle base == modulus. */ + if ((err == MP_OKAY) && sp_iszero(t[1])) { + _sp_set(r, 0); + done = 1; + } + } + else { + /* Copy base into working variable. */ + err = sp_copy(b, t[1]); + } + } + + if ((!done) && (err == MP_OKAY)) { + int i; + + /* 2. t[0] = 1 */ + _sp_set(t[0], 1); + + /* 3. For i in (bits-1)...0 */ + for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) { + /* 3.1. y = e[i] */ + int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1; + + /* 3.2. t[2] = t[0] * t[1] */ + err = sp_mulmod(t[0], t[1], m, t[2]); + /* 3.3. t[3] = t[y] ^ 2 */ + if (err == MP_OKAY) { + _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) + + ((size_t)t[1] & sp_off_on_addr[y ])), + t[3]); + err = sp_sqrmod(t[3], m, t[3]); + } + /* 3.4. t[y] = t[3], t[y^1] = t[2] */ + if (err == MP_OKAY) { + _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used); + } + } + } + if ((!done) && (err == MP_OKAY)) { + /* 4. r = t[0] */ + err = sp_copy(t[0], r); + } + + FREE_SP_INT_ARRAY(t, NULL); + return err; +} + +#endif /* WC_PROTECT_ENCRYPTED_MEM */ + #endif #if (defined(WOLFSSL_SP_MATH_ALL) && ((!defined(WOLFSSL_RSA_VERIFY_ONLY) && \ @@ -12621,6 +12768,9 @@ static int _sp_exptmod_ex(const sp_int* b, const sp_int* e, int bits, defined(OPENSSL_ALL) #ifndef WC_NO_HARDEN #if !defined(WC_NO_CACHE_RESISTANT) + +#ifndef WC_PROTECT_ENCRYPTED_MEM + /* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m * Process the exponent one bit at a time with base in Montgomery form. * Is constant time and cache attack resistant. @@ -12759,6 +12909,130 @@ static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits, FREE_SP_INT_ARRAY(t, NULL); return err; } + +#else + +/* Internal. Exponentiates b to the power of e modulo m into r: r = b ^ e mod m + * Process the exponent one bit at a time with base in Montgomery form. + * Is constant time and cache attack resistant. + * + * Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", + * Cryptographic Hardware and Embedded Systems, CHES 2002 + * + * Algorithm: + * b: base, e: exponent, m: modulus, r: result, bits: #bits to use + * 1. t[1] = b mod m. + * 2. t[0] = ToMont(1) + * 3. t[1] = ToMont(t[1]) + * 4. For i in (bits-1)...0 + * 4.1. y = e[i] + * 4.2. t[2] = t[0] * t[1] + * 4.3. t[3] = t[y] ^ 2 + * 4.4. t[y] = t[3], t[y^1] = t[2] + * 5. t[0] = FromMont(t[0]) + * 6. r = t[0] + * + * @param [in] b SP integer that is the base. + * @param [in] e SP integer that is the exponent. + * @param [in] bits Number of bits in exponent to use. May be greater than + * count of bits in e. + * @param [in] m SP integer that is the modulus. + * @param [out] r SP integer to hold result. + * + * @return MP_OKAY on success. + * @return MP_MEM when dynamic memory allocation fails. + */ +static int _sp_exptmod_mont_ex(const sp_int* b, const sp_int* e, int bits, + const sp_int* m, sp_int* r) +{ + int err = MP_OKAY; + int done = 0; + DECL_SP_INT_ARRAY(t, m->used * 2 + 1, 4); + + /* Allocate temporaries. */ + ALLOC_SP_INT_ARRAY(t, m->used * 2 + 1, 4, err, NULL); + if (err == MP_OKAY) { + /* Initialize temporaries. */ + _sp_init_size(t[0], m->used * 2 + 1); + _sp_init_size(t[1], m->used * 2 + 1); + _sp_init_size(t[2], m->used * 2 + 1); + _sp_init_size(t[3], m->used * 2 + 1); + + /* 1. Ensure base is less than modulus. */ + if (_sp_cmp_abs(b, m) != MP_LT) { + err = sp_mod(b, m, t[1]); + /* Handle base == modulus. */ + if ((err == MP_OKAY) && sp_iszero(t[1])) { + _sp_set(r, 0); + done = 1; + } + } + else { + /* Copy base into working variable. */ + err = sp_copy(b, t[1]); + } + } + + if ((!done) && (err == MP_OKAY)) { + int i; + sp_int_digit mp; + + /* Calculate Montgomery multiplier for reduction. */ + _sp_mont_setup(m, &mp); + /* 2. t[0] = ToMont(1) + * Calculate 1 in Montgomery form. + */ + err = sp_mont_norm(t[0], m); + if (err == MP_OKAY) { + /* 3. t[1] = ToMont(t[1]) + * Convert base to Montgomery form. + */ + err = sp_mulmod(t[1], t[0], m, t[1]); + } + + /* 4. For i in (bits-1)...0 */ + for (i = bits - 1; (err == MP_OKAY) && (i >= 0); i--) { + /* 4.1. y = e[i] */ + int y = (e->dp[i >> SP_WORD_SHIFT] >> (i & SP_WORD_MASK)) & 1; + + /* 4.2. t[2] = t[0] * t[1] */ + err = sp_mul(t[0], t[1], t[2]); + if (err == MP_OKAY) { + err = _sp_mont_red(t[2], m, mp); + } + /* 4.3. t[3] = t[y] ^ 2 */ + if (err == MP_OKAY) { + _sp_copy((sp_int*)(((size_t)t[0] & sp_off_on_addr[y^1]) + + ((size_t)t[1] & sp_off_on_addr[y ])), + t[3]); + err = sp_sqr(t[3], t[3]); + } + if (err == MP_OKAY) { + err = _sp_mont_red(t[3], m, mp); + } + /* 4.4. t[y] = t[3], t[y^1] = t[2] */ + if (err == MP_OKAY) { + _sp_copy_2_ct(t[2], t[3], t[0], t[1], y, m->used); + } + } + + if (err == MP_OKAY) { + /* 5. t[0] = FromMont(t[0]) */ + err = _sp_mont_red(t[0], m, mp); + /* Reduction implementation returns number to range: 0..m-1. */ + } + } + if ((!done) && (err == MP_OKAY)) { + /* 6. r = t[0] */ + err = sp_copy(t[0], r); + } + + FREE_SP_INT_ARRAY(t, NULL); + return err; +} + +#endif /* WC_PROTECT_ENCRYPTED_MEM */ + #else #ifdef SP_ALLOC diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index 1ab156a24..48d6b3bcb 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -1977,6 +1977,8 @@ int fp_exptmod_nb(exptModNb_t* nb, fp_int* G, fp_int* X, fp_int* P, fp_int* Y) #endif /* WC_RSA_NONBLOCK */ +#ifndef WC_PROTECT_ENCRYPTED_MEM + /* timing resistant montgomery ladder based exptmod Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", Cryptographic Hardware and Embedded Systems, CHES 2002 @@ -2159,6 +2161,171 @@ static int _fp_exptmod_ct(fp_int * G, fp_int * X, int digits, fp_int * P, return err; } +#else + +/* Copy from a1 and a2 into r1 and r2 based on y in constant time. + * When y is 1, r1 = a1 and r2 = a2. + * When y is 0, r1 = a2 and r2 = a1. + * Always copy size digits as that is the maximum size for a1 and a2. + */ +static void fp_copy_2_ct(fp_int* a1, fp_int* a2, fp_int* r1, fp_int* r2, int y, + int size) +{ + int i; + + /* Copy data - constant time. */ + for (i = 0; i < size; i++) { + r1->dp[i] = (a1->dp[i] & ((fp_digit)wc_off_on_addr[y ])) + + (a2->dp[i] & ((fp_digit)wc_off_on_addr[y^1])); + r2->dp[i] = (a1->dp[i] & ((fp_digit)wc_off_on_addr[y^1])) + + (a2->dp[i] & ((fp_digit)wc_off_on_addr[y ])); + } + /* Copy used. */ + r1->used = (a1->used & ((int)wc_off_on_addr[y ])) + + (a2->used & ((int)wc_off_on_addr[y^1])); + r2->used = (a1->used & ((int)wc_off_on_addr[y^1])) + + (a2->used & ((int)wc_off_on_addr[y ])); + /* Copy sign. */ + r1->sign = (a1->sign & ((int)wc_off_on_addr[y ])) + + (a2->sign & ((int)wc_off_on_addr[y^1])); + r2->sign = (a1->sign & ((int)wc_off_on_addr[y^1])) + + (a2->sign & ((int)wc_off_on_addr[y ])); +} + +/* timing resistant montgomery ladder based exptmod + Based on work by Marc Joye, Sung-Ming Yen, "The Montgomery Powering Ladder", + Cryptographic Hardware and Embedded Systems, CHES 2002 +*/ +static int _fp_exptmod_ct(fp_int * G, fp_int * X, int digits, fp_int * P, + fp_int * Y) +{ +#ifndef WOLFSSL_SMALL_STACK + fp_int R[4]; /* need a temp for cache resistance */ +#else + fp_int *R; +#endif + fp_digit buf, mp; + int err, bitcnt, digidx, y; + + /* now setup montgomery */ + if ((err = fp_montgomery_setup (P, &mp)) != FP_OKAY) { + return err; + } + +#ifdef WOLFSSL_SMALL_STACK + R = (fp_int*)XMALLOC(sizeof(fp_int) * 4, NULL, DYNAMIC_TYPE_BIGINT); + if (R == NULL) + return FP_MEM; +#endif + fp_init(&R[0]); + fp_init(&R[1]); + fp_init(&R[2]); + fp_init(&R[3]); + + /* now we need R mod m */ + err = fp_montgomery_calc_normalization (&R[0], P); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + + /* now set R[0][1] to G * R mod m */ + if (fp_cmp_mag(P, G) != FP_GT) { + /* G > P so we reduce it first */ + err = fp_mod(G, P, &R[1]); + if (err != FP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; + } + } else { + fp_copy(G, &R[1]); + } + err = fp_mulmod (&R[1], &R[0], P, &R[1]); + if (err != FP_OKAY) { +#ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; + } + + /* for j = t-1 downto 0 do + r_!k = R0*R1; r_k = r_k^2 + */ + + /* set initial mode and bit cnt */ + bitcnt = 1; + buf = 0; + digidx = digits - 1; + + for (;;) { + /* grab next digit as required */ + if (--bitcnt == 0) { + /* if digidx == -1 we are out of digits so break */ + if (digidx == -1) { + break; + } + /* read next digit and reset bitcnt */ + buf = X->dp[digidx--]; + bitcnt = (int)DIGIT_BIT; + } + + /* grab the next msb from the exponent */ + y = (int)(buf >> (DIGIT_BIT - 1)) & 1; + buf <<= (fp_digit)1; + + /* do ops */ + err = fp_mul(&R[0], &R[1], &R[2]); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + err = fp_montgomery_reduce(&R[2], P, mp); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + + /* instead of using R[y] for sqr, which leaks key bit to cache monitor, + * use R[3] as temp, make sure address calc is constant, keep + * &R[0] and &R[1] in cache */ + fp_copy((fp_int*) ( ((wc_ptr_t)&R[0] & wc_off_on_addr[y^1]) + + ((wc_ptr_t)&R[1] & wc_off_on_addr[y]) ), + &R[3]); + err = fp_sqr(&R[3], &R[3]); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + err = fp_montgomery_reduce(&R[3], P, mp); + if (err != FP_OKAY) { + #ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); + #endif + return err; + } + fp_copy_2_ct(&R[2], &R[3], &R[0], &R[1], y, P->used); + } + + err = fp_montgomery_reduce(&R[0], P, mp); + fp_copy(&R[0], Y); +#ifdef WOLFSSL_SMALL_STACK + XFREE(R, NULL, DYNAMIC_TYPE_BIGINT); +#endif + return err; +} + +#endif /* WC_PROTECT_ENCRYPTED_MEM */ + #endif /* TFM_TIMING_RESISTANT */ /* y = g**x (mod b)