From dffb59ea52f6df2ec5a069a2bca368af362d31e0 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 5 Dec 2019 10:09:45 +1000 Subject: [PATCH] sp_int: support for more values in sp_exptmod and fix SP C - fix mont reduce with fast mul_add --- wolfcrypt/src/sp_c32.c | 184 +++++++++++++++++++++------------------- wolfcrypt/src/sp_c64.c | 186 ++++++++++++++++++++++------------------- wolfcrypt/src/sp_int.c | 95 ++++++++++++++------- 3 files changed, 264 insertions(+), 201 deletions(-) diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 4694ebf5e..cfdde5821 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -1424,30 +1424,30 @@ SP_NOINLINE static void sp_2048_mul_add_45(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); for (i = 0; i < 40; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); } - t[1] = tb * a[41]; r[41] += (t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[42]; r[42] += (t[1] >> 23) + (t[2] & 0x7fffff); - t[3] = tb * a[43]; r[43] += (t[2] >> 23) + (t[3] & 0x7fffff); - t[4] = tb * a[44]; r[44] += (t[3] >> 23) + (t[4] & 0x7fffff); - r[45] += t[4] >> 23; + t[1] = tb * a[41]; r[41] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[42]; r[42] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[43]; r[43] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[44]; r[44] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + r[45] += (sp_digit)(t[4] >> 23); #endif /* WOLFSSL_SP_SMALL */ } @@ -1547,6 +1547,8 @@ static void sp_2048_mont_reduce_45(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_2048_norm_45(a + 45); + for (i=0; i<44; i++) { mu = (a[i] * mp) & 0x7fffff; sp_2048_mul_add_45(a+i, m, mu); @@ -2328,27 +2330,27 @@ SP_NOINLINE static void sp_2048_mul_add_90(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); for (i = 0; i < 88; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); } - t[1] = tb * a[89]; r[89] += (t[0] >> 23) + (t[1] & 0x7fffff); - r[90] += t[1] >> 23; + t[1] = tb * a[89]; r[89] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + r[90] += (sp_digit)(t[1] >> 23); #endif /* WOLFSSL_SP_SMALL */ } @@ -2439,6 +2441,8 @@ static void sp_2048_mont_reduce_90(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_2048_norm_90(a + 90); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<89; i++) { @@ -5294,28 +5298,28 @@ SP_NOINLINE static void sp_3072_mul_add_67(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); for (i = 0; i < 64; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); } - t[1] = tb * a[65]; r[65] += (t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[66]; r[66] += (t[1] >> 23) + (t[2] & 0x7fffff); - r[67] += t[2] >> 23; + t[1] = tb * a[65]; r[65] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[66]; r[66] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + r[67] += (sp_digit)(t[2] >> 23); #endif /* WOLFSSL_SP_SMALL */ } @@ -5415,6 +5419,8 @@ static void sp_3072_mont_reduce_67(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_3072_norm_67(a + 67); + for (i=0; i<66; i++) { mu = (a[i] * mp) & 0x7fffff; sp_3072_mul_add_67(a+i, m, mu); @@ -6166,31 +6172,31 @@ SP_NOINLINE static void sp_3072_mul_add_134(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); for (i = 0; i < 128; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); } - t[1] = tb * a[129]; r[129] += (t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[130]; r[130] += (t[1] >> 23) + (t[2] & 0x7fffff); - t[3] = tb * a[131]; r[131] += (t[2] >> 23) + (t[3] & 0x7fffff); - t[4] = tb * a[132]; r[132] += (t[3] >> 23) + (t[4] & 0x7fffff); - t[5] = tb * a[133]; r[133] += (t[4] >> 23) + (t[5] & 0x7fffff); - r[134] += t[5] >> 23; + t[1] = tb * a[129]; r[129] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[130]; r[130] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[131]; r[131] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[132]; r[132] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + t[5] = tb * a[133]; r[133] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); + r[134] += (sp_digit)(t[5] >> 23); #endif /* WOLFSSL_SP_SMALL */ } @@ -6293,6 +6299,8 @@ static void sp_3072_mont_reduce_134(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_3072_norm_134(a + 134); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<133; i++) { @@ -9298,27 +9306,27 @@ SP_NOINLINE static void sp_4096_mul_add_98(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff); for (i = 0; i < 96; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 21) + (t[1] & 0x1fffff); + r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 21) + (t[2] & 0x1fffff); + r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 21) + (t[3] & 0x1fffff); + r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 21) + (t[4] & 0x1fffff); + r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 21) + (t[5] & 0x1fffff); + r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 21) + (t[6] & 0x1fffff); + r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 21) + (t[7] & 0x1fffff); + r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 21) + (t[0] & 0x1fffff); + r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff)); } - t[1] = tb * a[97]; r[97] += (t[0] >> 21) + (t[1] & 0x1fffff); - r[98] += t[1] >> 21; + t[1] = tb * a[97]; r[97] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); + r[98] += (sp_digit)(t[1] >> 21); #endif /* WOLFSSL_SP_SMALL */ } @@ -9409,6 +9417,8 @@ static void sp_4096_mont_reduce_98(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_4096_norm_98(a + 98); + for (i=0; i<97; i++) { mu = (a[i] * mp) & 0x1fffff; sp_4096_mul_add_98(a+i, m, mu); @@ -10204,29 +10214,29 @@ SP_NOINLINE static void sp_4096_mul_add_196(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff); for (i = 0; i < 192; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 21) + (t[1] & 0x1fffff); + r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 21) + (t[2] & 0x1fffff); + r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 21) + (t[3] & 0x1fffff); + r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 21) + (t[4] & 0x1fffff); + r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 21) + (t[5] & 0x1fffff); + r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 21) + (t[6] & 0x1fffff); + r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 21) + (t[7] & 0x1fffff); + r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 21) + (t[0] & 0x1fffff); + r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff)); } - t[1] = tb * a[193]; r[193] += (t[0] >> 21) + (t[1] & 0x1fffff); - t[2] = tb * a[194]; r[194] += (t[1] >> 21) + (t[2] & 0x1fffff); - t[3] = tb * a[195]; r[195] += (t[2] >> 21) + (t[3] & 0x1fffff); - r[196] += t[3] >> 21; + t[1] = tb * a[193]; r[193] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); + t[2] = tb * a[194]; r[194] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff)); + t[3] = tb * a[195]; r[195] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff)); + r[196] += (sp_digit)(t[3] >> 21); #endif /* WOLFSSL_SP_SMALL */ } @@ -10323,6 +10333,8 @@ static void sp_4096_mont_reduce_196(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_4096_norm_196(a + 196); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<195; i++) { @@ -12998,16 +13010,16 @@ SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, t[ 7] = tb * a[ 7]; t[ 8] = tb * a[ 8]; t[ 9] = tb * a[ 9]; - r[ 0] += (t[ 0] & 0x3ffffff); - r[ 1] += (t[ 0] >> 26) + (t[ 1] & 0x3ffffff); - r[ 2] += (t[ 1] >> 26) + (t[ 2] & 0x3ffffff); - r[ 3] += (t[ 2] >> 26) + (t[ 3] & 0x3ffffff); - r[ 4] += (t[ 3] >> 26) + (t[ 4] & 0x3ffffff); - r[ 5] += (t[ 4] >> 26) + (t[ 5] & 0x3ffffff); - r[ 6] += (t[ 5] >> 26) + (t[ 6] & 0x3ffffff); - r[ 7] += (t[ 6] >> 26) + (t[ 7] & 0x3ffffff); - r[ 8] += (t[ 7] >> 26) + (t[ 8] & 0x3ffffff); - r[ 9] += (t[ 8] >> 26) + (t[ 9] & 0x3ffffff); + r[ 0] += (sp_digit)(t[ 0] & 0x3ffffff); + r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff)); + r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff)); + r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff)); + r[ 4] += (sp_digit)((t[ 3] >> 26) + (t[ 4] & 0x3ffffff)); + r[ 5] += (sp_digit)((t[ 4] >> 26) + (t[ 5] & 0x3ffffff)); + r[ 6] += (sp_digit)((t[ 5] >> 26) + (t[ 6] & 0x3ffffff)); + r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff)); + r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff)); + r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff)); r[10] += t[ 9] >> 26; #endif /* WOLFSSL_SP_SMALL */ } diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index 717c46e65..69c009848 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -1058,27 +1058,27 @@ SP_NOINLINE static void sp_2048_mul_add_18(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); for (i = 0; i < 16; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); } - t[1] = tb * a[17]; r[17] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); - r[18] += t[1] >> 57; + t[1] = tb * a[17]; r[17] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + r[18] += (sp_digit)(t[1] >> 57); #endif /* WOLFSSL_SP_SMALL */ } @@ -1164,6 +1164,8 @@ static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_2048_norm_18(a + 18); + for (i=0; i<17; i++) { mu = (a[i] * mp) & 0x1ffffffffffffffL; sp_2048_mul_add_18(a+i, m, mu); @@ -1935,29 +1937,29 @@ SP_NOINLINE static void sp_2048_mul_add_36(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); for (i = 0; i < 32; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); } - t[1] = tb * a[33]; r[33] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); - t[2] = tb * a[34]; r[34] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); - t[3] = tb * a[35]; r[35] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); - r[36] += t[3] >> 57; + t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + r[36] += (sp_digit)(t[3] >> 57); #endif /* WOLFSSL_SP_SMALL */ } @@ -2061,6 +2063,8 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_2048_norm_36(a + 36); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<35; i++) { @@ -5148,28 +5152,28 @@ SP_NOINLINE static void sp_3072_mul_add_27(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); for (i = 0; i < 24; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); } - t[1] = tb * a[25]; r[25] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); - t[2] = tb * a[26]; r[26] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); - r[27] += t[2] >> 57; + t[1] = tb * a[25]; r[25] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[26]; r[26] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + r[27] += (sp_digit)(t[2] >> 57); #endif /* WOLFSSL_SP_SMALL */ } @@ -5269,6 +5273,8 @@ static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_3072_norm_27(a + 27); + for (i=0; i<26; i++) { mu = (a[i] * mp) & 0x1ffffffffffffffL; sp_3072_mul_add_27(a+i, m, mu); @@ -6010,31 +6016,31 @@ SP_NOINLINE static void sp_3072_mul_add_54(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); for (i = 0; i < 48; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); } - t[1] = tb * a[49]; r[49] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); - t[2] = tb * a[50]; r[50] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); - t[3] = tb * a[51]; r[51] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); - t[4] = tb * a[52]; r[52] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); - t[5] = tb * a[53]; r[53] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); - r[54] += t[5] >> 57; + t[1] = tb * a[49]; r[49] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[50]; r[50] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[51]; r[51] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + t[4] = tb * a[52]; r[52] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); + t[5] = tb * a[53]; r[53] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); + r[54] += (sp_digit)(t[5] >> 57); #endif /* WOLFSSL_SP_SMALL */ } @@ -6137,6 +6143,8 @@ static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_3072_norm_54(a + 54); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<53; i++) { @@ -9284,32 +9292,32 @@ SP_NOINLINE static void sp_4096_mul_add_39(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1fffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL); for (i = 0; i < 32; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL)); } - t[1] = tb * a[33]; r[33] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); - t[2] = tb * a[34]; r[34] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); - t[3] = tb * a[35]; r[35] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); - t[4] = tb * a[36]; r[36] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); - t[5] = tb * a[37]; r[37] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); - t[6] = tb * a[38]; r[38] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); - r[39] += t[6] >> 53; + t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); + t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); + t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); + t[4] = tb * a[36]; r[36] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); + t[5] = tb * a[37]; r[37] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); + t[6] = tb * a[38]; r[38] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL)); + r[39] += (sp_digit)(t[6] >> 53); #endif /* WOLFSSL_SP_SMALL */ } @@ -9415,6 +9423,8 @@ static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_4096_norm_39(a + 39); + for (i=0; i<38; i++) { mu = (a[i] * mp) & 0x1fffffffffffffL; sp_4096_mul_add_39(a+i, m, mu); @@ -10225,31 +10235,31 @@ SP_NOINLINE static void sp_4096_mul_add_78(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1fffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL); for (i = 0; i < 72; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL)); } - t[1] = tb * a[73]; r[73] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); - t[2] = tb * a[74]; r[74] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); - t[3] = tb * a[75]; r[75] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); - t[4] = tb * a[76]; r[76] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); - t[5] = tb * a[77]; r[77] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); - r[78] += t[5] >> 53; + t[1] = tb * a[73]; r[73] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); + t[2] = tb * a[74]; r[74] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); + t[3] = tb * a[75]; r[75] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); + t[4] = tb * a[76]; r[76] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); + t[5] = tb * a[77]; r[77] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); + r[78] += (sp_digit)(t[5] >> 53); #endif /* WOLFSSL_SP_SMALL */ } @@ -10352,6 +10362,8 @@ static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_4096_norm_78(a + 78); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<77; i++) { @@ -12779,11 +12791,11 @@ SP_NOINLINE static void sp_256_mul_add_5(sp_digit* r, const sp_digit* a, t[ 2] = tb * a[ 2]; t[ 3] = tb * a[ 3]; t[ 4] = tb * a[ 4]; - r[ 0] += (t[ 0] & 0xfffffffffffffL); - r[ 1] += (t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL); - r[ 2] += (t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL); - r[ 3] += (t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL); - r[ 4] += (t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL); + r[ 0] += (sp_digit)(t[ 0] & 0xfffffffffffffL); + r[ 1] += (sp_digit)((t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL)); + r[ 2] += (sp_digit)((t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL)); + r[ 3] += (sp_digit)((t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL)); + r[ 4] += (sp_digit)((t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL)); r[ 5] += t[ 4] >> 52; #endif /* WOLFSSL_SP_SMALL */ } diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 2ba625294..b12198955 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -1489,24 +1489,56 @@ int sp_lcm(sp_int* a, sp_int* b, sp_int* r) int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) { int err = MP_OKAY; - int bits = sp_count_bits(m); + int done = 0; + int mBits = sp_count_bits(m); + int bBits = sp_count_bits(b); + int eBits = sp_count_bits(e); + if (sp_iszero(m)) { + err = MP_VAL; + } + else if (sp_isone(m)) { + sp_set(r, 0); + done = 1; + } + else if (sp_iszero(e)) { + sp_set(r, 1); + done = 1; + } + else if (sp_iszero(b)) { + sp_set(r, 0); + done = 1; + } + + if (!done && (err == MP_OKAY)) { #ifndef WOLFSSL_SP_NO_2048 - if (bits == 1024) - sp_ModExp_1024(b, e, m, r); - else if (bits == 2048) - sp_ModExp_2048(b, e, m, r); - else + if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) && + (eBits <= 1024)) { + err = sp_ModExp_1024(b, e, m, r); + done = 1; + } + else if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) && + (eBits <= 1024)) { + err = sp_ModExp_2048(b, e, m, r); + done = 1; + } + else #endif #ifndef WOLFSSL_SP_NO_3072 - if (bits == 1536) - sp_ModExp_1536(b, e, m, r); - else if (bits == 3072) - sp_ModExp_3072(b, e, m, r); - else + if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) && + (eBits <= 1536)) { + err = sp_ModExp_1536(b, e, m, r); + done = 1; + } + else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) && + (eBits <= 3072)) { + err = sp_ModExp_3072(b, e, m, r); + done = 1; + } #endif + } #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN) - if (bits == 256) { + if (!done && (err == MP_OKAY)) { int i; #ifdef WOLFSSL_SMALL_STACK @@ -1516,37 +1548,44 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) #endif #ifdef WOLFSSL_SMALL_STACK - t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; + if (!done && (err == MP_OKAY)) { + t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } } #endif - if (err == MP_OKAY) { + if (!done && (err == MP_OKAY)) { sp_init(t); sp_copy(b, t); - bits = sp_count_bits(e); - } - for (i = bits-2; err == MP_OKAY && i >= 0; i--) { - err = sp_sqrmod(t, m, t); - if (err == MP_OKAY && + for (i = eBits-2; err == MP_OKAY && i >= 0; i--) { + err = sp_sqrmod(t, m, t); + if (err == MP_OKAY && (e->dp[i / SP_WORD_SIZE] >> (i % SP_WORD_SIZE)) & 1) { - err = sp_mulmod(t, b, m, t); - } + err = sp_mulmod(t, b, m, t); + } + } } - if (err == MP_OKAY) + if (!done && (err == MP_OKAY)) { sp_copy(t, r); + } #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) + if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); + } #endif } - else -#endif +#else + { err = MP_VAL; + } +#endif - (void)bits; + (void)mBits; + (void)bBits; + (void)eBits; return err; }