From b716c88e01079e2876dd851fb64d1fe5fe60d789 Mon Sep 17 00:00:00 2001 From: Daniel Pouzzner Date: Wed, 22 Sep 2021 13:58:05 -0500 Subject: [PATCH] smallstack refactors for ge_double_scalarmult_vartime(), sp_ModExp_4096(), and sp_DhExp_4096(). --- wolfcrypt/src/ge_operations.c | 102 +++++++++++++++++++++++++--------- wolfcrypt/src/sp_x86_64.c | 62 +++++++++++++++++++++ 2 files changed, 137 insertions(+), 27 deletions(-) diff --git a/wolfcrypt/src/ge_operations.c b/wolfcrypt/src/ge_operations.c index f743ef87b..e079ed5ba 100644 --- a/wolfcrypt/src/ge_operations.c +++ b/wolfcrypt/src/ge_operations.c @@ -9192,6 +9192,8 @@ void ge_scalarmult_base(ge_p3 *h,const unsigned char *a) } +#define SLIDE_SIZE 256 + /* ge double scalar mult */ static void slide(signed char *r,const unsigned char *a) { @@ -9199,18 +9201,18 @@ static void slide(signed char *r,const unsigned char *a) int b; int k; - for (i = 0;i < 256;++i) + for (i = 0;i < SLIDE_SIZE;++i) r[i] = 1 & (a[i >> 3] >> (i & 7)); - for (i = 0;i < 256;++i) + for (i = 0;i < SLIDE_SIZE;++i) if (r[i]) { - for (b = 1;b <= 6 && i + b < 256;++b) { + for (b = 1;b <= 6 && i + b < SLIDE_SIZE;++b) { if (r[i + b]) { if (r[i] + (r[i + b] << b) <= 15) { r[i] += r[i + b] << b; r[i + b] = 0; } else if (r[i] - (r[i + b] << b) >= -15) { r[i] -= r[i + b] << b; - for (k = i + b;k < 256;++k) { + for (k = i + b;k < SLIDE_SIZE;++k) { if (!r[k]) { r[k] = 1; break; @@ -9408,26 +9410,53 @@ B is the Ed25519 base point (x,4/5) with x positive. int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, const ge_p3 *A, const unsigned char *b) { - signed char aslide[256]; - signed char bslide[256]; +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + signed char *aslide = NULL; + signed char *bslide = NULL; + ge_cached *Ai = NULL; /* A,3A,5A,7A,9A,11A,13A,15A */ + + ge_p1p1 *t = NULL; + ge_p3 *u = NULL; + ge_p3 *A2 = NULL; + + int ret; +#else + signed char aslide[SLIDE_SIZE]; + signed char bslide[SLIDE_SIZE]; ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ - ge_p1p1 t; - ge_p3 u; - ge_p3 A2; + + ge_p1p1 t[1]; + ge_p3 u[1]; + ge_p3 A2[1]; +#endif int i; +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + if (((aslide = (signed char *)XMALLOC(SLIDE_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) || + ((bslide = (signed char *)XMALLOC(SLIDE_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) || + ((Ai = (ge_cached *)XMALLOC(8 * sizeof(*Ai), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) || + ((t = (ge_p1p1 *)XMALLOC(sizeof(*t), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) || + ((u = (ge_p3 *)XMALLOC(sizeof(*u), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) || + ((A2 = (ge_p3 *)XMALLOC(sizeof(*A2), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL)) + { + ret = MEMORY_E; + goto out; + } else + ret = 0; +#endif + slide(aslide,a); slide(bslide,b); ge_p3_to_cached(&Ai[0],A); - ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t); - ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u); - ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u); - ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u); - ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u); - ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u); - ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u); - ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u); + ge_p3_dbl(t,A); ge_p1p1_to_p3(A2,t); + ge_add(t,A2,&Ai[0]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[1],u); + ge_add(t,A2,&Ai[1]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[2],u); + ge_add(t,A2,&Ai[2]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[3],u); + ge_add(t,A2,&Ai[3]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[4],u); + ge_add(t,A2,&Ai[4]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[5],u); + ge_add(t,A2,&Ai[5]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[6],u); + ge_add(t,A2,&Ai[6]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[7],u); ge_p2_0(r); @@ -9436,28 +9465,47 @@ int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, } for (;i >= 0;--i) { - ge_p2_dbl(&t,r); + ge_p2_dbl(t,r); if (aslide[i] > 0) { - ge_p1p1_to_p3(&u,&t); - ge_add(&t,&u,&Ai[aslide[i]/2]); + ge_p1p1_to_p3(u,t); + ge_add(t,u,&Ai[aslide[i]/2]); } else if (aslide[i] < 0) { - ge_p1p1_to_p3(&u,&t); - ge_sub(&t,&u,&Ai[(-aslide[i])/2]); + ge_p1p1_to_p3(u,t); + ge_sub(t,u,&Ai[(-aslide[i])/2]); } if (bslide[i] > 0) { - ge_p1p1_to_p3(&u,&t); - ge_madd(&t,&u,&Bi[bslide[i]/2]); + ge_p1p1_to_p3(u,t); + ge_madd(t,u,&Bi[bslide[i]/2]); } else if (bslide[i] < 0) { - ge_p1p1_to_p3(&u,&t); - ge_msub(&t,&u,&Bi[(-bslide[i])/2]); + ge_p1p1_to_p3(u,t); + ge_msub(t,u,&Bi[(-bslide[i])/2]); } - ge_p1p1_to_p2(r,&t); + ge_p1p1_to_p2(r,t); } +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + out: + + if (aslide != NULL) + XFREE(aslide, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (bslide != NULL) + XFREE(bslide, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (Ai != NULL) + XFREE(Ai, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (t != NULL) + XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (u != NULL) + XFREE(u, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (A2 != NULL) + XFREE(A2, NULL, DYNAMIC_TYPE_TMP_BUFFER); + + return ret; +#else return 0; +#endif } #ifdef CURVED25519_ASM_64BIT diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index 85d323b96..9c08063ef 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -6016,10 +6016,17 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, mp_int* res) { int err = MP_OKAY; +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit *b = NULL; + sp_digit *e = NULL; + sp_digit *m = NULL; + sp_digit* r; +#else sp_digit b[128]; sp_digit e[64]; sp_digit m[64]; sp_digit* r = b; +#endif #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); #endif @@ -6033,6 +6040,19 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = MP_VAL; } +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + if (((b = (sp_digit *)XMALLOC(128 * sizeof(*b), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) || + ((e = (sp_digit *)XMALLOC(64 * sizeof(*e), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) || + ((m = (sp_digit *)XMALLOC(64 * sizeof(*m), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL)) + { + err = MEMORY_E; + } else { + r = b; + } + } +#endif + if (err == MP_OKAY) { sp_4096_from_mp(b, 64, base); sp_4096_from_mp(e, 64, exp); @@ -6050,7 +6070,18 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod, err = sp_4096_to_mp(r, res); } +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + if (b != NULL) + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (e != NULL) { + XMEMSET(e, 0, 64); + XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + if (m != NULL) + XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#else XMEMSET(e, 0, sizeof(e)); +#endif return err; } @@ -6326,10 +6357,17 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, const mp_int* mod, byte* out, word32* outLen) { int err = MP_OKAY; +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + sp_digit *b = NULL; + sp_digit *e = NULL; + sp_digit *m = NULL; + sp_digit* r; +#else sp_digit b[128]; sp_digit e[64]; sp_digit m[64]; sp_digit* r = b; +#endif word32 i; #ifdef HAVE_INTEL_AVX2 word32 cpuid_flags = cpuid_get_flags(); @@ -6343,6 +6381,19 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, err = MP_VAL; } +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + if (err == MP_OKAY) { + if (((b = (sp_digit *)XMALLOC(128 * sizeof(*b), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) || + ((e = (sp_digit *)XMALLOC(64 * sizeof(*e), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) || + ((m = (sp_digit *)XMALLOC(64 * sizeof(*m), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL)) + { + err = MEMORY_E; + } else { + r = b; + } + } +#endif + if (err == MP_OKAY) { sp_4096_from_mp(b, 64, base); sp_4096_from_bin(e, 64, exp, expLen); @@ -6379,7 +6430,18 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } +#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) + if (b != NULL) + XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER); + if (e != NULL) { + XMEMSET(e, 0, 64); + XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER); + } + if (m != NULL) + XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER); +#else XMEMSET(e, 0, sizeof(e)); +#endif return err; }