Merge pull request #4420 from douzzer/smallstack-ge-sp

SP, Ed25519: smallstack
This commit is contained in:
Sean Parkinson
2021-10-13 08:33:36 +10:00
committed by GitHub
2 changed files with 137 additions and 27 deletions

View File

@ -9192,6 +9192,8 @@ void ge_scalarmult_base(ge_p3 *h,const unsigned char *a)
}
#define SLIDE_SIZE 256
/* ge double scalar mult */
static void slide(signed char *r,const unsigned char *a)
{
@ -9199,18 +9201,18 @@ static void slide(signed char *r,const unsigned char *a)
int b;
int k;
for (i = 0;i < 256;++i)
for (i = 0;i < SLIDE_SIZE;++i)
r[i] = 1 & (a[i >> 3] >> (i & 7));
for (i = 0;i < 256;++i)
for (i = 0;i < SLIDE_SIZE;++i)
if (r[i]) {
for (b = 1;b <= 6 && i + b < 256;++b) {
for (b = 1;b <= 6 && i + b < SLIDE_SIZE;++b) {
if (r[i + b]) {
if (r[i] + (r[i + b] << b) <= 15) {
r[i] += r[i + b] << b; r[i + b] = 0;
} else if (r[i] - (r[i + b] << b) >= -15) {
r[i] -= r[i + b] << b;
for (k = i + b;k < 256;++k) {
for (k = i + b;k < SLIDE_SIZE;++k) {
if (!r[k]) {
r[k] = 1;
break;
@ -9408,26 +9410,53 @@ B is the Ed25519 base point (x,4/5) with x positive.
int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a,
const ge_p3 *A, const unsigned char *b)
{
signed char aslide[256];
signed char bslide[256];
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
signed char *aslide = NULL;
signed char *bslide = NULL;
ge_cached *Ai = NULL; /* A,3A,5A,7A,9A,11A,13A,15A */
ge_p1p1 *t = NULL;
ge_p3 *u = NULL;
ge_p3 *A2 = NULL;
int ret;
#else
signed char aslide[SLIDE_SIZE];
signed char bslide[SLIDE_SIZE];
ge_cached Ai[8]; /* A,3A,5A,7A,9A,11A,13A,15A */
ge_p1p1 t;
ge_p3 u;
ge_p3 A2;
ge_p1p1 t[1];
ge_p3 u[1];
ge_p3 A2[1];
#endif
int i;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (((aslide = (signed char *)XMALLOC(SLIDE_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((bslide = (signed char *)XMALLOC(SLIDE_SIZE, NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((Ai = (ge_cached *)XMALLOC(8 * sizeof(*Ai), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((t = (ge_p1p1 *)XMALLOC(sizeof(*t), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((u = (ge_p3 *)XMALLOC(sizeof(*u), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL) ||
((A2 = (ge_p3 *)XMALLOC(sizeof(*A2), NULL, DYNAMIC_TYPE_TMP_BUFFER))== NULL))
{
ret = MEMORY_E;
goto out;
} else
ret = 0;
#endif
slide(aslide,a);
slide(bslide,b);
ge_p3_to_cached(&Ai[0],A);
ge_p3_dbl(&t,A); ge_p1p1_to_p3(&A2,&t);
ge_add(&t,&A2,&Ai[0]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[1],&u);
ge_add(&t,&A2,&Ai[1]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[2],&u);
ge_add(&t,&A2,&Ai[2]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[3],&u);
ge_add(&t,&A2,&Ai[3]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[4],&u);
ge_add(&t,&A2,&Ai[4]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[5],&u);
ge_add(&t,&A2,&Ai[5]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[6],&u);
ge_add(&t,&A2,&Ai[6]); ge_p1p1_to_p3(&u,&t); ge_p3_to_cached(&Ai[7],&u);
ge_p3_dbl(t,A); ge_p1p1_to_p3(A2,t);
ge_add(t,A2,&Ai[0]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[1],u);
ge_add(t,A2,&Ai[1]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[2],u);
ge_add(t,A2,&Ai[2]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[3],u);
ge_add(t,A2,&Ai[3]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[4],u);
ge_add(t,A2,&Ai[4]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[5],u);
ge_add(t,A2,&Ai[5]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[6],u);
ge_add(t,A2,&Ai[6]); ge_p1p1_to_p3(u,t); ge_p3_to_cached(&Ai[7],u);
ge_p2_0(r);
@ -9436,28 +9465,47 @@ int ge_double_scalarmult_vartime(ge_p2 *r, const unsigned char *a,
}
for (;i >= 0;--i) {
ge_p2_dbl(&t,r);
ge_p2_dbl(t,r);
if (aslide[i] > 0) {
ge_p1p1_to_p3(&u,&t);
ge_add(&t,&u,&Ai[aslide[i]/2]);
ge_p1p1_to_p3(u,t);
ge_add(t,u,&Ai[aslide[i]/2]);
} else if (aslide[i] < 0) {
ge_p1p1_to_p3(&u,&t);
ge_sub(&t,&u,&Ai[(-aslide[i])/2]);
ge_p1p1_to_p3(u,t);
ge_sub(t,u,&Ai[(-aslide[i])/2]);
}
if (bslide[i] > 0) {
ge_p1p1_to_p3(&u,&t);
ge_madd(&t,&u,&Bi[bslide[i]/2]);
ge_p1p1_to_p3(u,t);
ge_madd(t,u,&Bi[bslide[i]/2]);
} else if (bslide[i] < 0) {
ge_p1p1_to_p3(&u,&t);
ge_msub(&t,&u,&Bi[(-bslide[i])/2]);
ge_p1p1_to_p3(u,t);
ge_msub(t,u,&Bi[(-bslide[i])/2]);
}
ge_p1p1_to_p2(r,&t);
ge_p1p1_to_p2(r,t);
}
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
out:
if (aslide != NULL)
XFREE(aslide, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (bslide != NULL)
XFREE(bslide, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (Ai != NULL)
XFREE(Ai, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (t != NULL)
XFREE(t, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (u != NULL)
XFREE(u, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (A2 != NULL)
XFREE(A2, NULL, DYNAMIC_TYPE_TMP_BUFFER);
return ret;
#else
return 0;
#endif
}
#ifdef CURVED25519_ASM_64BIT

View File

@ -6016,10 +6016,17 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
mp_int* res)
{
int err = MP_OKAY;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit *b = NULL;
sp_digit *e = NULL;
sp_digit *m = NULL;
sp_digit* r;
#else
sp_digit b[128];
sp_digit e[64];
sp_digit m[64];
sp_digit* r = b;
#endif
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
#endif
@ -6033,6 +6040,19 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
err = MP_VAL;
}
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (err == MP_OKAY) {
if (((b = (sp_digit *)XMALLOC(128 * sizeof(*b), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) ||
((e = (sp_digit *)XMALLOC(64 * sizeof(*e), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) ||
((m = (sp_digit *)XMALLOC(64 * sizeof(*m), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL))
{
err = MEMORY_E;
} else {
r = b;
}
}
#endif
if (err == MP_OKAY) {
sp_4096_from_mp(b, 64, base);
sp_4096_from_mp(e, 64, exp);
@ -6050,7 +6070,18 @@ int sp_ModExp_4096(const mp_int* base, const mp_int* exp, const mp_int* mod,
err = sp_4096_to_mp(r, res);
}
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (b != NULL)
XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (e != NULL) {
XMEMSET(e, 0, 64);
XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
}
if (m != NULL)
XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#else
XMEMSET(e, 0, sizeof(e));
#endif
return err;
}
@ -6326,10 +6357,17 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen,
const mp_int* mod, byte* out, word32* outLen)
{
int err = MP_OKAY;
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
sp_digit *b = NULL;
sp_digit *e = NULL;
sp_digit *m = NULL;
sp_digit* r;
#else
sp_digit b[128];
sp_digit e[64];
sp_digit m[64];
sp_digit* r = b;
#endif
word32 i;
#ifdef HAVE_INTEL_AVX2
word32 cpuid_flags = cpuid_get_flags();
@ -6343,6 +6381,19 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen,
err = MP_VAL;
}
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (err == MP_OKAY) {
if (((b = (sp_digit *)XMALLOC(128 * sizeof(*b), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) ||
((e = (sp_digit *)XMALLOC(64 * sizeof(*e), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL) ||
((m = (sp_digit *)XMALLOC(64 * sizeof(*m), NULL, DYNAMIC_TYPE_TMP_BUFFER)) == NULL))
{
err = MEMORY_E;
} else {
r = b;
}
}
#endif
if (err == MP_OKAY) {
sp_4096_from_mp(b, 64, base);
sp_4096_from_bin(e, 64, exp, expLen);
@ -6379,7 +6430,18 @@ int sp_DhExp_4096(const mp_int* base, const byte* exp, word32 expLen,
XMEMMOVE(out, out + i, *outLen);
}
#if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC)
if (b != NULL)
XFREE(b, NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (e != NULL) {
XMEMSET(e, 0, 64);
XFREE(e, NULL, DYNAMIC_TYPE_TMP_BUFFER);
}
if (m != NULL)
XFREE(m, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#else
XMEMSET(e, 0, sizeof(e));
#endif
return err;
}