Merge pull request #4420 from douzzer/smallstack-ge-sp

SP, Ed25519: smallstack
This commit is contained in:
Sean Parkinson
2021-10-13 08:33:36 +10:00
committed by GitHub
2 changed files with 137 additions and 27 deletions

View File

@ -9192,6 +9192,8 @@ void ge_scalarmult_base(ge_p3 *h,const unsigned char *a)
} }
#define SLIDE_SIZE 256
/* ge double scalar mult */ /* ge double scalar mult */
static void slide(signed char *r,const unsigned char *a) static void slide(signed char *r,const unsigned char *a)
{ {
@ -9199,18 +9201,18 @@ static void slide(signed char *r,const unsigned char *a)
int b; int b;
int k; int k;
for (i = 0;i < 256;++i) for (i = 0;i < SLIDE_SIZE;++i)
r[i] = 1 & (a[i >> 3] >> (i & 7)); r[i] = 1 & (a[i >> 3] >> (i & 7));
for (i = 0;i < 256;++i) for (i = 0;i < SLIDE_SIZE;++i)
if (r[i]) { if (r[i]) {
for (b = 1;b <= 6 && i + b < 256;++b) { for (b = 1;b <= 6 && i + b < SLIDE_SIZE;++b) {
if (r[i + b]) { if (r[i + b]) {
if (r[i] + (r[i + b] << b) <= 15) { if (r[i] + (r[i + b] << b) <= 15) {
r[i] += r[i + b] << b; r[i + b] = 0; r[i] += r[i + b] << b; r[i + b] = 0;
} else if (r[i] - (r[i + b] << b) >= -15) { } else if (r[i] - (r[i + b] << b) >= -15) {
r[i] -= r[i + b] << b; r[i] -= r[i + b] << b;
for (k = i + b;k < 256;++k) { for (k = i + b;k < SLIDE_SIZE;++k) {
if (!r[k]) { if (!r[k]) {
r[k] = 1; r[k] = 1;
break; break;
@ -9408,26 +9410,53 @@ B is the Ed25519 base point (x,4/5) with x positive.
int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a, int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a,
const ge_p3 *A, const unsigned char *b) const ge_p3 *A, const unsigned char *b)
{ {
signed char aslide[256]; #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
signed char bslide[256]; signed char *aslide = NULL;
signed char *bslide = NULL;
ge_cached *Ai = NULL; /* A,3A,5A,7A,9A,11A,13A,15A */
ge_p1p1 *t = NULL;
ge_p3 *u = NULL;
ge_p3 *A2 = NULL;
int ret;
#else
signed char aslide[SLIDE_SIZE];
signed char bslide[SLIDE_SIZE];
ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */ ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
ge_p1p1 t;
ge_p3 u; ge_p1p1 t[1];
ge_p3 A2; ge_p3 u[1];
ge_p3 A2[1];
#endif
int i; int i;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (((aslide = (signed char *)XMALLOC(SLIDE_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((bslide = (signed char *)XMALLOC(SLIDE_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((Ai = (ge_cached *)XMALLOC(8 * sizeof(*Ai), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((t = (ge_p1p1 *)XMALLOC(sizeof(*t), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((u = (ge_p3 *)XMALLOC(sizeof(*u), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((A2 = (ge_p3 *)XMALLOC(sizeof(*A2), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL))
{
ret = MEMORY_E;
goto out;
} else
ret = 0;
#endif
slide(aslide,a); slide(aslide,a);
slide(bslide,b); slide(bslide,b);
ge_p3_to_cached(&Ai[0],A); ge_p3_to_cached(&Ai[0],A);
ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t); ge_p3_dbl(t,A); ge_p1p1_to_p3(A2,t);
ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u); ge_add(t,A2,&Ai[0]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[1],u);
ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u); ge_add(t,A2,&Ai[1]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[2],u);
ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u); ge_add(t,A2,&Ai[2]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[3],u);
ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u); ge_add(t,A2,&Ai[3]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[4],u);
ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u); ge_add(t,A2,&Ai[4]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[5],u);
ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u); ge_add(t,A2,&Ai[5]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[6],u);
ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u); ge_add(t,A2,&Ai[6]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[7],u);
ge_p2_0(r); ge_p2_0(r);
@ -9436,28 +9465,47 @@ int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a,
} }
for (;i >= 0;--i) { for (;i >= 0;--i) {
ge_p2_dbl(&t,r); ge_p2_dbl(t,r);
if (aslide[i] > 0) { if (aslide[i] > 0) {
ge_p1p1_to_p3(&u,&t); ge_p1p1_to_p3(u,t);
ge_add(&t,&u,&Ai[aslide[i]/2]); ge_add(t,u,&Ai[aslide[i]/2]);
} else if (aslide[i] < 0) { } else if (aslide[i] < 0) {
ge_p1p1_to_p3(&u,&t); ge_p1p1_to_p3(u,t);
ge_sub(&t,&u,&Ai[(-aslide[i])/2]); ge_sub(t,u,&Ai[(-aslide[i])/2]);
} }
if (bslide[i] > 0) { if (bslide[i] > 0) {
ge_p1p1_to_p3(&u,&t); ge_p1p1_to_p3(u,t);
ge_madd(&t,&u,&Bi[bslide[i]/2]); ge_madd(t,u,&Bi[bslide[i]/2]);
} else if (bslide[i] < 0) { } else if (bslide[i] < 0) {
ge_p1p1_to_p3(&u,&t); ge_p1p1_to_p3(u,t);
ge_msub(&t,&u,&Bi[(-bslide[i])/2]); ge_msub(t,u,&Bi[(-bslide[i])/2]);
} }
ge_p1p1_to_p2(r,&t); ge_p1p1_to_p2(r,t);
} }
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
out:
if (aslide != NULL)
XFREE(aslide, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (bslide != NULL)
XFREE(bslide, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (Ai != NULL)
XFREE(Ai, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (t != NULL)
XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (u != NULL)
XFREE(u, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (A2 != NULL)
XFREE(A2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
return ret;
#else
return 0; return 0;
#endif
} }
#ifdef CURVED25519_ASM_64BIT #ifdef CURVED25519_ASM_64BIT

View File

@ -6016,10 +6016,17 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
mp_int* res) mp_int* res)
{ {
int err = MP_OKAY; int err = MP_OKAY;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit *b = NULL;
sp_digit *e = NULL;
sp_digit *m = NULL;
sp_digit* r;
#else
sp_digit b[128]; sp_digit b[128];
sp_digit e[64]; sp_digit e[64];
sp_digit m[64]; sp_digit m[64];
sp_digit* r = b; sp_digit* r = b;
#endif
#ifdef HAVE_INTEL_AVX2 #ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags(); word32 cpuid_flags = cpuid_get_flags();
#endif #endif
@ -6033,6 +6040,19 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
err = MP_VAL; err = MP_VAL;
} }
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (err == MP_OKAY) {
if (((b = (sp_digit *)XMALLOC(128 * sizeof(*b), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) ||
((e = (sp_digit *)XMALLOC(64 * sizeof(*e), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) ||
((m = (sp_digit *)XMALLOC(64 * sizeof(*m), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL))
{
err = MEMORY_E;
} else {
r = b;
}
}
#endif
if (err == MP_OKAY) { if (err == MP_OKAY) {
sp_4096_from_mp(b, 64, base); sp_4096_from_mp(b, 64, base);
sp_4096_from_mp(e, 64, exp); sp_4096_from_mp(e, 64, exp);
@ -6050,7 +6070,18 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
err = sp_4096_to_mp(r, res); err = sp_4096_to_mp(r, res);
} }
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (b != NULL)
XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (e != NULL) {
XMEMSET(e, 0, 64);
XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
}
if (m != NULL)
XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#else
XMEMSET(e, 0, sizeof(e)); XMEMSET(e, 0, sizeof(e));
#endif
return err; return err;
} }
@ -6326,10 +6357,17 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen,
const mp_int* mod, byte* out, word32* outLen) const mp_int* mod, byte* out, word32* outLen)
{ {
int err = MP_OKAY; int err = MP_OKAY;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit *b = NULL;
sp_digit *e = NULL;
sp_digit *m = NULL;
sp_digit* r;
#else
sp_digit b[128]; sp_digit b[128];
sp_digit e[64]; sp_digit e[64];
sp_digit m[64]; sp_digit m[64];
sp_digit* r = b; sp_digit* r = b;
#endif
word32 i; word32 i;
#ifdef HAVE_INTEL_AVX2 #ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags(); word32 cpuid_flags = cpuid_get_flags();
@ -6343,6 +6381,19 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen,
err = MP_VAL; err = MP_VAL;
} }
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (err == MP_OKAY) {
if (((b = (sp_digit *)XMALLOC(128 * sizeof(*b), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) ||
((e = (sp_digit *)XMALLOC(64 * sizeof(*e), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) ||
((m = (sp_digit *)XMALLOC(64 * sizeof(*m), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL))
{
err = MEMORY_E;
} else {
r = b;
}
}
#endif
if (err == MP_OKAY) { if (err == MP_OKAY) {
sp_4096_from_mp(b, 64, base); sp_4096_from_mp(b, 64, base);
sp_4096_from_bin(e, 64, exp, expLen); sp_4096_from_bin(e, 64, exp, expLen);
@ -6379,7 +6430,18 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen,
XMEMMOVE(out, out + i, *outLen); XMEMMOVE(out, out + i, *outLen);
} }
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (b != NULL)
XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (e != NULL) {
XMEMSET(e, 0, 64);
XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
}
if (m != NULL)
XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#else
XMEMSET(e, 0, sizeof(e)); XMEMSET(e, 0, sizeof(e));
#endif
return err; return err;
} }