Merge pull request #2391 from SparkiDev/tfm_dh_2

Specialized mod exponentiation for base 2 in tfm.c and integer.c
This commit is contained in:
toddouska
2019-08-15 15:59:20 -07:00
committed by GitHub
5 changed files with 496 additions and 6 deletions

View File

@ -886,6 +886,12 @@ int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
#endif
}
#ifdef BN_MP_EXPTMOD_BASE_2
if (G->used == 1 && G->dp[0] == 2) {
return mp_exptmod_base_2(X, P, Y);
}
#endif
/* modified diminished radix reduction */
#if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && \
defined(BN_S_MP_EXPTMOD_C)
@ -2198,6 +2204,170 @@ LBL_M:
return err;
}
#ifdef BN_MP_EXPTMOD_BASE_2
#if DIGIT_BIT < 16
#define WINSIZE 3
#elif DIGIT_BIT < 32
#define WINSIZE 4
#elif DIGIT_BIT < 64
#define WINSIZE 5
#elif DIGIT_BIT < 128
#define WINSIZE 6
#endif
int mp_exptmod_base_2(mp_int * X, mp_int * P, mp_int * Y)
{
mp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, digidx, x, y;
#ifdef WOLFSSL_SMALL_STACK
mp_int *res;
#else
mp_int res[1];
#endif
int (*redux)(mp_int*,mp_int*,mp_digit);
/* automatically pick the comba one if available (saves quite a few
calls/ifs) */
#ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
if (((P->used * 2 + 1) < (int)MP_WARRAY) &&
P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
redux = fast_mp_montgomery_reduce;
} else
#endif
{
#ifdef BN_MP_MONTGOMERY_REDUCE_C
/* use slower baseline Montgomery method */
redux = mp_montgomery_reduce;
#else
return MP_VAL;
#endif
}
#ifdef WOLFSSL_SMALL_STACK
res = (mp_int*)XMALLOC(sizeof(mp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (res == NULL) {
return MP_MEM;
}
#endif
/* now setup montgomery */
if ((err = mp_montgomery_setup(P, &mp)) != MP_OKAY) {
goto LBL_M;
}
/* setup result */
if ((err = mp_init(res)) != MP_OKAY) {
goto LBL_M;
}
/* now we need R mod m */
if ((err = mp_montgomery_calc_normalization(res, P)) != MP_OKAY) {
goto LBL_RES;
}
/* Get the top bits left over after taking WINSIZE bits starting at the
* least-significant.
*/
digidx = X->used - 1;
bitcpy = (X->used * DIGIT_BIT) % WINSIZE;
if (bitcpy > 0) {
bitcnt = (int)DIGIT_BIT - bitcpy;
buf = X->dp[digidx--];
bitbuf = (int)(buf >> bitcnt);
/* Multiply montgomery representation of 1 by 2 ^ top */
err = mp_mul_2d(res, bitbuf, res);
if (err != MP_OKAY) {
goto LBL_RES;
}
err = mp_mod(res, P, res);
if (err != MP_OKAY) {
goto LBL_RES;
}
/* Move out bits used */
buf <<= bitcpy;
bitcnt++;
}
else {
bitcnt = 1;
buf = 0;
}
/* empty window and reset */
bitbuf = 0;
bitcpy = 0;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
/* if digidx == -1 we are out of digits so break */
if (digidx == -1) {
break;
}
/* read next digit and reset bitcnt */
buf = X->dp[digidx--];
bitcnt = (int)DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
buf <<= (mp_digit)1;
/* add bit to the window */
bitbuf |= (y << (WINSIZE - ++bitcpy));
if (bitcpy == WINSIZE) {
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < WINSIZE; x++) {
err = mp_sqr(res, res);
if (err != MP_OKAY) {
goto LBL_RES;
}
err = (*redux)(res, P, mp);
if (err != MP_OKAY) {
goto LBL_RES;
}
}
/* then multiply by 2^bitbuf */
err = mp_mul_2d(res, bitbuf, res);
if (err != MP_OKAY) {
goto LBL_RES;
}
err = mp_mod(res, P, res);
if (err != MP_OKAY) {
goto LBL_RES;
}
/* empty window and reset */
bitcpy = 0;
bitbuf = 0;
}
}
/* fixup result if Montgomery reduction is used
* recall that any value in a Montgomery system is
* actually multiplied by R mod n. So we have
* to reduce one more time to cancel out the factor
* of R.
*/
err = (*redux)(res, P, mp);
if (err != MP_OKAY) {
goto LBL_RES;
}
/* swap res with Y */
mp_copy(res, Y);
LBL_RES:mp_clear (res);
LBL_M:
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#undef WINSIZE
#endif /* BN_MP_EXPTMOD_BASE_2 */
/* setups the montgomery reduction stuff */
int mp_montgomery_setup (mp_int * n, mp_digit * rho)

View File

@ -1857,6 +1857,316 @@ static int _fp_exptmod(fp_int * G, fp_int * X, int digits, fp_int * P,
#endif /* TFM_TIMING_RESISTANT */
#ifdef TFM_TIMING_RESISTANT
#if DIGIT_BIT <= 16
#define WINSIZE 2
#elif DIGIT_BIT <= 32
#define WINSIZE 3
#elif DIGIT_BIT <= 64
#define WINSIZE 4
#elif DIGIT_BIT <= 128
#define WINSIZE 5
#endif
/* y = 2**x (mod b)
* Some restrictions... x must be positive and < b
*/
static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
fp_int * Y)
{
fp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, digidx, x, y;
#ifdef WOLFSSL_SMALL_STACK
fp_int *res;
fp_int *tmp;
#else
fp_int res[1];
fp_int tmp[1];
#endif
#ifdef WOLFSSL_SMALL_STACK
res = (fp_int*)XMALLOC(2*sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (res == NULL) {
return FP_MEM;
}
tmp = &res[1];
#endif
/* now setup montgomery */
if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
/* setup result */
fp_init(res);
fp_init(tmp);
fp_mul_2d(P, 1 << WINSIZE, tmp);
/* now we need R mod m */
fp_montgomery_calc_normalization(res, P);
/* Get the top bits left over after taking WINSIZE bits starting at the
* least-significant.
*/
digidx = digits - 1;
bitcpy = (digits * DIGIT_BIT) % WINSIZE;
if (bitcpy > 0) {
bitcnt = (int)DIGIT_BIT - bitcpy;
buf = X->dp[digidx--];
bitbuf = (int)(buf >> bitcnt);
/* Multiply montgomery representation of 1 by 2 ^ top */
fp_mul_2d(res, bitbuf, res);
fp_add(res, tmp, res);
err = fp_mod(res, P, res);
if (err != FP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
/* Move out bits used */
buf <<= bitcpy;
bitcnt++;
}
else {
bitcnt = 1;
buf = 0;
}
/* empty window and reset */
bitbuf = 0;
bitcpy = 0;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
/* if digidx == -1 we are out of digits so break */
if (digidx == -1) {
break;
}
/* read next digit and reset bitcnt */
buf = X->dp[digidx--];
bitcnt = (int)DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
buf <<= (fp_digit)1;
/* add bit to the window */
bitbuf |= (y << (WINSIZE - ++bitcpy));
if (bitcpy == WINSIZE) {
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < WINSIZE; x++) {
err = fp_sqr(res, res);
if (err != FP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
err = fp_montgomery_reduce(res, P, mp);
if (err != FP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
}
/* then multiply by 2^bitbuf */
fp_mul_2d(res, bitbuf, res);
/* Add in value to make mod operation take same time */
fp_add(res, tmp, res);
err = fp_mod(res, P, res);
if (err != FP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
/* empty window and reset */
bitcpy = 0;
bitbuf = 0;
}
}
/* fixup result if Montgomery reduction is used
* recall that any value in a Montgomery system is
* actually multiplied by R mod n. So we have
* to reduce one more time to cancel out the factor
* of R.
*/
err = fp_montgomery_reduce(res, P, mp);
/* swap res with Y */
fp_copy(res, Y);
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#undef WINSIZE
#else
#if DIGIT_BIT < 16
#define WINSIZE 3
#elif DIGIT_BIT < 32
#define WINSIZE 4
#elif DIGIT_BIT < 64
#define WINSIZE 5
#elif DIGIT_BIT < 128
#define WINSIZE 6
#elif DIGIT_BIT == 128
#define WINSIZE 7
#endif
/* y = 2**x (mod b)
* Some restrictions... x must be positive and < b
*/
static int _fp_exptmod_base_2(fp_int * X, int digits, fp_int * P,
fp_int * Y)
{
fp_digit buf, mp;
int err, bitbuf, bitcpy, bitcnt, digidx, x, y;
#ifdef WOLFSSL_SMALL_STACK
fp_int *res;
#else
fp_int res[1];
#endif
#ifdef WOLFSSL_SMALL_STACK
res = (fp_int*)XMALLOC(sizeof(fp_int), NULL, DYNAMIC_TYPE_TMP_BUFFER);
if (res == NULL) {
return FP_MEM;
}
#endif
/* now setup montgomery */
if ((err = fp_montgomery_setup(P, &mp)) != FP_OKAY) {
return err;
}
/* setup result */
fp_init(res);
/* now we need R mod m */
fp_montgomery_calc_normalization(res, P);
/* Get the top bits left over after taking WINSIZE bits starting at the
* least-significant.
*/
digidx = digits - 1;
bitcpy = (digits * DIGIT_BIT) % WINSIZE;
if (bitcpy > 0) {
bitcnt = (int)DIGIT_BIT - bitcpy;
buf = X->dp[digidx--];
bitbuf = (int)(buf >> bitcnt);
/* Multiply montgomery representation of 1 by 2 ^ top */
fp_mul_2d(res, bitbuf, res);
err = fp_mod(res, P, res);
if (err != FP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
/* Move out bits used */
buf <<= bitcpy;
bitcnt++;
}
else {
bitcnt = 1;
buf = 0;
}
/* empty window and reset */
bitbuf = 0;
bitcpy = 0;
for (;;) {
/* grab next digit as required */
if (--bitcnt == 0) {
/* if digidx == -1 we are out of digits so break */
if (digidx == -1) {
break;
}
/* read next digit and reset bitcnt */
buf = X->dp[digidx--];
bitcnt = (int)DIGIT_BIT;
}
/* grab the next msb from the exponent */
y = (int)(buf >> (DIGIT_BIT - 1)) & 1;
buf <<= (fp_digit)1;
/* add bit to the window */
bitbuf |= (y << (WINSIZE - ++bitcpy));
if (bitcpy == WINSIZE) {
/* ok window is filled so square as required and multiply */
/* square first */
for (x = 0; x < WINSIZE; x++) {
err = fp_sqr(res, res);
if (err != FP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
err = fp_montgomery_reduce(res, P, mp);
if (err != FP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
}
/* then multiply by 2^bitbuf */
fp_mul_2d(res, bitbuf, res);
err = fp_mod(res, P, res);
if (err != FP_OKAY) {
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
/* empty window and reset */
bitcpy = 0;
bitbuf = 0;
}
}
/* fixup result if Montgomery reduction is used
* recall that any value in a Montgomery system is
* actually multiplied by R mod n. So we have
* to reduce one more time to cancel out the factor
* of R.
*/
err = fp_montgomery_reduce(res, P, mp);
/* swap res with Y */
fp_copy(res, Y);
#ifdef WOLFSSL_SMALL_STACK
XFREE(res, NULL, DYNAMIC_TYPE_TMP_BUFFER);
#endif
return err;
}
#undef WINSIZE
#endif
int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
{
@ -1910,6 +2220,9 @@ int fp_exptmod(fp_int * G, fp_int * X, fp_int * P, fp_int * Y)
return FP_VAL;
#endif
}
else if (G->used == 1 && G->dp[0] == 2) {
return _fp_exptmod_base_2(X, X->used, P, Y);
}
else {
/* Positive exponent so just exptmod */
return _fp_exptmod(G, X, X->used, P, Y);

View File

@ -12910,7 +12910,7 @@ static int dh_test_check_pubvalue(void)
}
#endif
#if defined(WOLFSSL_HAVE_SP_DH) && defined(HAVE_FFDHE)
#if defined(HAVE_FFDHE)
#ifdef HAVE_FFDHE_3072
#define FFDHE_KEY_SIZE (3072/8)
@ -12981,7 +12981,7 @@ done:
return ret;
}
#endif /* WOLFSSL_HAVE_SP_DH && HAVE_FFDHE */
#endif /* HAVE_FFDHE */
int dh_test(void)
{
@ -13135,17 +13135,21 @@ int dh_test(void)
ret = dh_test_check_pubvalue();
#endif
#ifdef WOLFSSL_HAVE_SP_DH
/* Specialized code for key gen when using FFDHE-2048 and FFDHE-3072. */
#ifdef HAVE_FFDHE_2048
if (ret == 0)
if (ret == 0) {
ret = dh_test_ffdhe(&rng, wc_Dh_ffdhe2048_Get());
if (ret != 0)
printf("error with FFDHE 2048\n");
}
#endif
#ifdef HAVE_FFDHE_3072
if (ret == 0)
if (ret == 0) {
ret = dh_test_ffdhe(&rng, wc_Dh_ffdhe3072_Get());
if (ret != 0)
printf("error with FFDHE 3072\n");
}
#endif
#endif /* WOLFSSL_HAVE_SP_DH */
wc_FreeDhKey(&key);
keyInit = 0;

View File

@ -324,6 +324,7 @@ MP_API int mp_reduce_is_2k(mp_int *a);
MP_API int mp_dr_is_modulus(mp_int *a);
MP_API int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y,
int);
MP_API int mp_exptmod_base_2 (mp_int * X, mp_int * P, mp_int * Y);
MP_API int mp_montgomery_setup (mp_int * n, mp_digit * rho);
int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho);
MP_API int mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho);

View File

@ -62,6 +62,7 @@
#define BN_MP_DR_SETUP_C
#define BN_MP_EXCH_C
#define BN_MP_EXPT_D_C
#define BN_MP_EXPTMOD_BASE_2
#define BN_MP_EXPTMOD_C
#define BN_MP_EXPTMOD_FAST_C
#define BN_MP_EXTEUCLID_C
@ -358,6 +359,7 @@
#define BN_MP_REDUCE_IS_2K_C
#define BN_MP_ISODD_C
#define BN_MP_EXPTMOD_FAST_C
#define BN_MP_EXPTMOD_BASE_2
#endif
#if defined(BN_MP_EXPTMOD_FAST_C)