From dffb59ea52f6df2ec5a069a2bca368af362d31e0 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Thu, 5 Dec 2019 10:09:45 +1000 Subject: [PATCH 1/3] sp_int: support for more values in sp_exptmod and fix SP C - fix mont reduce with fast mul_add --- wolfcrypt/src/sp_c32.c | 184 +++++++++++++++++++++------------------- wolfcrypt/src/sp_c64.c | 186 ++++++++++++++++++++++------------------- wolfcrypt/src/sp_int.c | 95 ++++++++++++++------- 3 files changed, 264 insertions(+), 201 deletions(-) diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 4694ebf5e..cfdde5821 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -1424,30 +1424,30 @@ SP_NOINLINE static void sp_2048_mul_add_45(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); for (i = 0; i < 40; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); } - t[1] = tb * a[41]; r[41] += (t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[42]; r[42] += (t[1] >> 23) + (t[2] & 0x7fffff); - t[3] = tb * a[43]; r[43] += (t[2] >> 23) + (t[3] & 0x7fffff); - t[4] = tb * a[44]; r[44] += (t[3] >> 23) + (t[4] & 0x7fffff); - r[45] += t[4] >> 23; + t[1] = tb * a[41]; r[41] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[42]; r[42] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[43]; r[43] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[44]; r[44] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + r[45] += (sp_digit)(t[4] >> 23); #endif /* WOLFSSL_SP_SMALL */ } @@ -1547,6 +1547,8 @@ static void sp_2048_mont_reduce_45(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_2048_norm_45(a + 45); + for (i=0; i<44; i++) { mu = (a[i] * mp) & 0x7fffff; sp_2048_mul_add_45(a+i, m, mu); @@ -2328,27 +2330,27 @@ SP_NOINLINE static void sp_2048_mul_add_90(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); for (i = 0; i < 88; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); } - t[1] = tb * a[89]; r[89] += (t[0] >> 23) + (t[1] & 0x7fffff); - r[90] += t[1] >> 23; + t[1] = tb * a[89]; r[89] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + r[90] += (sp_digit)(t[1] >> 23); #endif /* WOLFSSL_SP_SMALL */ } @@ -2439,6 +2441,8 @@ static void sp_2048_mont_reduce_90(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_2048_norm_90(a + 90); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<89; i++) { @@ -5294,28 +5298,28 @@ SP_NOINLINE static void sp_3072_mul_add_67(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); for (i = 0; i < 64; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); } - t[1] = tb * a[65]; r[65] += (t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[66]; r[66] += (t[1] >> 23) + (t[2] & 0x7fffff); - r[67] += t[2] >> 23; + t[1] = tb * a[65]; r[65] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[66]; r[66] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + r[67] += (sp_digit)(t[2] >> 23); #endif /* WOLFSSL_SP_SMALL */ } @@ -5415,6 +5419,8 @@ static void sp_3072_mont_reduce_67(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_3072_norm_67(a + 67); + for (i=0; i<66; i++) { mu = (a[i] * mp) & 0x7fffff; sp_3072_mul_add_67(a+i, m, mu); @@ -6166,31 +6172,31 @@ SP_NOINLINE static void sp_3072_mul_add_134(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x7fffff); for (i = 0; i < 128; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (sp_digit)((t[5] >> 23) + (t[6] & 0x7fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (sp_digit)((t[6] >> 23) + (t[7] & 0x7fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (sp_digit)((t[7] >> 23) + (t[0] & 0x7fffff)); } - t[1] = tb * a[129]; r[129] += (t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[130]; r[130] += (t[1] >> 23) + (t[2] & 0x7fffff); - t[3] = tb * a[131]; r[131] += (t[2] >> 23) + (t[3] & 0x7fffff); - t[4] = tb * a[132]; r[132] += (t[3] >> 23) + (t[4] & 0x7fffff); - t[5] = tb * a[133]; r[133] += (t[4] >> 23) + (t[5] & 0x7fffff); - r[134] += t[5] >> 23; + t[1] = tb * a[129]; r[129] += (sp_digit)((t[0] >> 23) + (t[1] & 0x7fffff)); + t[2] = tb * a[130]; r[130] += (sp_digit)((t[1] >> 23) + (t[2] & 0x7fffff)); + t[3] = tb * a[131]; r[131] += (sp_digit)((t[2] >> 23) + (t[3] & 0x7fffff)); + t[4] = tb * a[132]; r[132] += (sp_digit)((t[3] >> 23) + (t[4] & 0x7fffff)); + t[5] = tb * a[133]; r[133] += (sp_digit)((t[4] >> 23) + (t[5] & 0x7fffff)); + r[134] += (sp_digit)(t[5] >> 23); #endif /* WOLFSSL_SP_SMALL */ } @@ -6293,6 +6299,8 @@ static void sp_3072_mont_reduce_134(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_3072_norm_134(a + 134); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<133; i++) { @@ -9298,27 +9306,27 @@ SP_NOINLINE static void sp_4096_mul_add_98(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff); for (i = 0; i < 96; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 21) + (t[1] & 0x1fffff); + r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 21) + (t[2] & 0x1fffff); + r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 21) + (t[3] & 0x1fffff); + r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 21) + (t[4] & 0x1fffff); + r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 21) + (t[5] & 0x1fffff); + r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 21) + (t[6] & 0x1fffff); + r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 21) + (t[7] & 0x1fffff); + r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 21) + (t[0] & 0x1fffff); + r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff)); } - t[1] = tb * a[97]; r[97] += (t[0] >> 21) + (t[1] & 0x1fffff); - r[98] += t[1] >> 21; + t[1] = tb * a[97]; r[97] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); + r[98] += (sp_digit)(t[1] >> 21); #endif /* WOLFSSL_SP_SMALL */ } @@ -9409,6 +9417,8 @@ static void sp_4096_mont_reduce_98(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_4096_norm_98(a + 98); + for (i=0; i<97; i++) { mu = (a[i] * mp) & 0x1fffff; sp_4096_mul_add_98(a+i, m, mu); @@ -10204,29 +10214,29 @@ SP_NOINLINE static void sp_4096_mul_add_196(sp_digit* r, const sp_digit* a, int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1fffff; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffff); for (i = 0; i < 192; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 21) + (t[1] & 0x1fffff); + r[i+1] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 21) + (t[2] & 0x1fffff); + r[i+2] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 21) + (t[3] & 0x1fffff); + r[i+3] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 21) + (t[4] & 0x1fffff); + r[i+4] += (sp_digit)((t[3] >> 21) + (t[4] & 0x1fffff)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 21) + (t[5] & 0x1fffff); + r[i+5] += (sp_digit)((t[4] >> 21) + (t[5] & 0x1fffff)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 21) + (t[6] & 0x1fffff); + r[i+6] += (sp_digit)((t[5] >> 21) + (t[6] & 0x1fffff)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 21) + (t[7] & 0x1fffff); + r[i+7] += (sp_digit)((t[6] >> 21) + (t[7] & 0x1fffff)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 21) + (t[0] & 0x1fffff); + r[i+8] += (sp_digit)((t[7] >> 21) + (t[0] & 0x1fffff)); } - t[1] = tb * a[193]; r[193] += (t[0] >> 21) + (t[1] & 0x1fffff); - t[2] = tb * a[194]; r[194] += (t[1] >> 21) + (t[2] & 0x1fffff); - t[3] = tb * a[195]; r[195] += (t[2] >> 21) + (t[3] & 0x1fffff); - r[196] += t[3] >> 21; + t[1] = tb * a[193]; r[193] += (sp_digit)((t[0] >> 21) + (t[1] & 0x1fffff)); + t[2] = tb * a[194]; r[194] += (sp_digit)((t[1] >> 21) + (t[2] & 0x1fffff)); + t[3] = tb * a[195]; r[195] += (sp_digit)((t[2] >> 21) + (t[3] & 0x1fffff)); + r[196] += (sp_digit)(t[3] >> 21); #endif /* WOLFSSL_SP_SMALL */ } @@ -10323,6 +10333,8 @@ static void sp_4096_mont_reduce_196(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_4096_norm_196(a + 196); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<195; i++) { @@ -12998,16 +13010,16 @@ SP_NOINLINE static void sp_256_mul_add_10(sp_digit* r, const sp_digit* a, t[ 7] = tb * a[ 7]; t[ 8] = tb * a[ 8]; t[ 9] = tb * a[ 9]; - r[ 0] += (t[ 0] & 0x3ffffff); - r[ 1] += (t[ 0] >> 26) + (t[ 1] & 0x3ffffff); - r[ 2] += (t[ 1] >> 26) + (t[ 2] & 0x3ffffff); - r[ 3] += (t[ 2] >> 26) + (t[ 3] & 0x3ffffff); - r[ 4] += (t[ 3] >> 26) + (t[ 4] & 0x3ffffff); - r[ 5] += (t[ 4] >> 26) + (t[ 5] & 0x3ffffff); - r[ 6] += (t[ 5] >> 26) + (t[ 6] & 0x3ffffff); - r[ 7] += (t[ 6] >> 26) + (t[ 7] & 0x3ffffff); - r[ 8] += (t[ 7] >> 26) + (t[ 8] & 0x3ffffff); - r[ 9] += (t[ 8] >> 26) + (t[ 9] & 0x3ffffff); + r[ 0] += (sp_digit)(t[ 0] & 0x3ffffff); + r[ 1] += (sp_digit)((t[ 0] >> 26) + (t[ 1] & 0x3ffffff)); + r[ 2] += (sp_digit)((t[ 1] >> 26) + (t[ 2] & 0x3ffffff)); + r[ 3] += (sp_digit)((t[ 2] >> 26) + (t[ 3] & 0x3ffffff)); + r[ 4] += (sp_digit)((t[ 3] >> 26) + (t[ 4] & 0x3ffffff)); + r[ 5] += (sp_digit)((t[ 4] >> 26) + (t[ 5] & 0x3ffffff)); + r[ 6] += (sp_digit)((t[ 5] >> 26) + (t[ 6] & 0x3ffffff)); + r[ 7] += (sp_digit)((t[ 6] >> 26) + (t[ 7] & 0x3ffffff)); + r[ 8] += (sp_digit)((t[ 7] >> 26) + (t[ 8] & 0x3ffffff)); + r[ 9] += (sp_digit)((t[ 8] >> 26) + (t[ 9] & 0x3ffffff)); r[10] += t[ 9] >> 26; #endif /* WOLFSSL_SP_SMALL */ } diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index 717c46e65..69c009848 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -1058,27 +1058,27 @@ SP_NOINLINE static void sp_2048_mul_add_18(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); for (i = 0; i < 16; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); } - t[1] = tb * a[17]; r[17] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); - r[18] += t[1] >> 57; + t[1] = tb * a[17]; r[17] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + r[18] += (sp_digit)(t[1] >> 57); #endif /* WOLFSSL_SP_SMALL */ } @@ -1164,6 +1164,8 @@ static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_2048_norm_18(a + 18); + for (i=0; i<17; i++) { mu = (a[i] * mp) & 0x1ffffffffffffffL; sp_2048_mul_add_18(a+i, m, mu); @@ -1935,29 +1937,29 @@ SP_NOINLINE static void sp_2048_mul_add_36(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); for (i = 0; i < 32; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); } - t[1] = tb * a[33]; r[33] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); - t[2] = tb * a[34]; r[34] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); - t[3] = tb * a[35]; r[35] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); - r[36] += t[3] >> 57; + t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + r[36] += (sp_digit)(t[3] >> 57); #endif /* WOLFSSL_SP_SMALL */ } @@ -2061,6 +2063,8 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_2048_norm_36(a + 36); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<35; i++) { @@ -5148,28 +5152,28 @@ SP_NOINLINE static void sp_3072_mul_add_27(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); for (i = 0; i < 24; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); } - t[1] = tb * a[25]; r[25] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); - t[2] = tb * a[26]; r[26] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); - r[27] += t[2] >> 57; + t[1] = tb * a[25]; r[25] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[26]; r[26] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + r[27] += (sp_digit)(t[2] >> 57); #endif /* WOLFSSL_SP_SMALL */ } @@ -5269,6 +5273,8 @@ static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_3072_norm_27(a + 27); + for (i=0; i<26; i++) { mu = (a[i] * mp) & 0x1ffffffffffffffL; sp_3072_mul_add_27(a+i, m, mu); @@ -6010,31 +6016,31 @@ SP_NOINLINE static void sp_3072_mul_add_54(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1ffffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1ffffffffffffffL); for (i = 0; i < 48; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 57) + (t[6] & 0x1ffffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 57) + (t[6] & 0x1ffffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 57) + (t[7] & 0x1ffffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 57) + (t[7] & 0x1ffffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 57) + (t[0] & 0x1ffffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 57) + (t[0] & 0x1ffffffffffffffL)); } - t[1] = tb * a[49]; r[49] += (t[0] >> 57) + (t[1] & 0x1ffffffffffffffL); - t[2] = tb * a[50]; r[50] += (t[1] >> 57) + (t[2] & 0x1ffffffffffffffL); - t[3] = tb * a[51]; r[51] += (t[2] >> 57) + (t[3] & 0x1ffffffffffffffL); - t[4] = tb * a[52]; r[52] += (t[3] >> 57) + (t[4] & 0x1ffffffffffffffL); - t[5] = tb * a[53]; r[53] += (t[4] >> 57) + (t[5] & 0x1ffffffffffffffL); - r[54] += t[5] >> 57; + t[1] = tb * a[49]; r[49] += (sp_digit)((t[0] >> 57) + (t[1] & 0x1ffffffffffffffL)); + t[2] = tb * a[50]; r[50] += (sp_digit)((t[1] >> 57) + (t[2] & 0x1ffffffffffffffL)); + t[3] = tb * a[51]; r[51] += (sp_digit)((t[2] >> 57) + (t[3] & 0x1ffffffffffffffL)); + t[4] = tb * a[52]; r[52] += (sp_digit)((t[3] >> 57) + (t[4] & 0x1ffffffffffffffL)); + t[5] = tb * a[53]; r[53] += (sp_digit)((t[4] >> 57) + (t[5] & 0x1ffffffffffffffL)); + r[54] += (sp_digit)(t[5] >> 57); #endif /* WOLFSSL_SP_SMALL */ } @@ -6137,6 +6143,8 @@ static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_3072_norm_54(a + 54); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<53; i++) { @@ -9284,32 +9292,32 @@ SP_NOINLINE static void sp_4096_mul_add_39(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1fffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL); for (i = 0; i < 32; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL)); } - t[1] = tb * a[33]; r[33] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); - t[2] = tb * a[34]; r[34] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); - t[3] = tb * a[35]; r[35] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); - t[4] = tb * a[36]; r[36] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); - t[5] = tb * a[37]; r[37] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); - t[6] = tb * a[38]; r[38] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); - r[39] += t[6] >> 53; + t[1] = tb * a[33]; r[33] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); + t[2] = tb * a[34]; r[34] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); + t[3] = tb * a[35]; r[35] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); + t[4] = tb * a[36]; r[36] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); + t[5] = tb * a[37]; r[37] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); + t[6] = tb * a[38]; r[38] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL)); + r[39] += (sp_digit)(t[6] >> 53); #endif /* WOLFSSL_SP_SMALL */ } @@ -9415,6 +9423,8 @@ static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_4096_norm_39(a + 39); + for (i=0; i<38; i++) { mu = (a[i] * mp) & 0x1fffffffffffffL; sp_4096_mul_add_39(a+i, m, mu); @@ -10225,31 +10235,31 @@ SP_NOINLINE static void sp_4096_mul_add_78(sp_digit* r, const sp_digit* a, int128_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x1fffffffffffffL; + t[0] = tb * a[0]; r[0] += (sp_digit)(t[0] & 0x1fffffffffffffL); for (i = 0; i < 72; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); + r[i+1] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); + r[i+2] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); + r[i+3] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); + r[i+4] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); + r[i+5] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 53) + (t[6] & 0x1fffffffffffffL); + r[i+6] += (sp_digit)((t[5] >> 53) + (t[6] & 0x1fffffffffffffL)); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 53) + (t[7] & 0x1fffffffffffffL); + r[i+7] += (sp_digit)((t[6] >> 53) + (t[7] & 0x1fffffffffffffL)); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 53) + (t[0] & 0x1fffffffffffffL); + r[i+8] += (sp_digit)((t[7] >> 53) + (t[0] & 0x1fffffffffffffL)); } - t[1] = tb * a[73]; r[73] += (t[0] >> 53) + (t[1] & 0x1fffffffffffffL); - t[2] = tb * a[74]; r[74] += (t[1] >> 53) + (t[2] & 0x1fffffffffffffL); - t[3] = tb * a[75]; r[75] += (t[2] >> 53) + (t[3] & 0x1fffffffffffffL); - t[4] = tb * a[76]; r[76] += (t[3] >> 53) + (t[4] & 0x1fffffffffffffL); - t[5] = tb * a[77]; r[77] += (t[4] >> 53) + (t[5] & 0x1fffffffffffffL); - r[78] += t[5] >> 53; + t[1] = tb * a[73]; r[73] += (sp_digit)((t[0] >> 53) + (t[1] & 0x1fffffffffffffL)); + t[2] = tb * a[74]; r[74] += (sp_digit)((t[1] >> 53) + (t[2] & 0x1fffffffffffffL)); + t[3] = tb * a[75]; r[75] += (sp_digit)((t[2] >> 53) + (t[3] & 0x1fffffffffffffL)); + t[4] = tb * a[76]; r[76] += (sp_digit)((t[3] >> 53) + (t[4] & 0x1fffffffffffffL)); + t[5] = tb * a[77]; r[77] += (sp_digit)((t[4] >> 53) + (t[5] & 0x1fffffffffffffL)); + r[78] += (sp_digit)(t[5] >> 53); #endif /* WOLFSSL_SP_SMALL */ } @@ -10352,6 +10362,8 @@ static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp) int i; sp_digit mu; + sp_4096_norm_78(a + 78); + #ifdef WOLFSSL_SP_DH if (mp != 1) { for (i=0; i<77; i++) { @@ -12779,11 +12791,11 @@ SP_NOINLINE static void sp_256_mul_add_5(sp_digit* r, const sp_digit* a, t[ 2] = tb * a[ 2]; t[ 3] = tb * a[ 3]; t[ 4] = tb * a[ 4]; - r[ 0] += (t[ 0] & 0xfffffffffffffL); - r[ 1] += (t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL); - r[ 2] += (t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL); - r[ 3] += (t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL); - r[ 4] += (t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL); + r[ 0] += (sp_digit)(t[ 0] & 0xfffffffffffffL); + r[ 1] += (sp_digit)((t[ 0] >> 52) + (t[ 1] & 0xfffffffffffffL)); + r[ 2] += (sp_digit)((t[ 1] >> 52) + (t[ 2] & 0xfffffffffffffL)); + r[ 3] += (sp_digit)((t[ 2] >> 52) + (t[ 3] & 0xfffffffffffffL)); + r[ 4] += (sp_digit)((t[ 3] >> 52) + (t[ 4] & 0xfffffffffffffL)); r[ 5] += t[ 4] >> 52; #endif /* WOLFSSL_SP_SMALL */ } diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index 2ba625294..b12198955 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -1489,24 +1489,56 @@ int sp_lcm(sp_int* a, sp_int* b, sp_int* r) int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) { int err = MP_OKAY; - int bits = sp_count_bits(m); + int done = 0; + int mBits = sp_count_bits(m); + int bBits = sp_count_bits(b); + int eBits = sp_count_bits(e); + if (sp_iszero(m)) { + err = MP_VAL; + } + else if (sp_isone(m)) { + sp_set(r, 0); + done = 1; + } + else if (sp_iszero(e)) { + sp_set(r, 1); + done = 1; + } + else if (sp_iszero(b)) { + sp_set(r, 0); + done = 1; + } + + if (!done && (err == MP_OKAY)) { #ifndef WOLFSSL_SP_NO_2048 - if (bits == 1024) - sp_ModExp_1024(b, e, m, r); - else if (bits == 2048) - sp_ModExp_2048(b, e, m, r); - else + if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) && + (eBits <= 1024)) { + err = sp_ModExp_1024(b, e, m, r); + done = 1; + } + else if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) && + (eBits <= 1024)) { + err = sp_ModExp_2048(b, e, m, r); + done = 1; + } + else #endif #ifndef WOLFSSL_SP_NO_3072 - if (bits == 1536) - sp_ModExp_1536(b, e, m, r); - else if (bits == 3072) - sp_ModExp_3072(b, e, m, r); - else + if ((mBits == 1536) && sp_isodd(m) && (bBits <= 1536) && + (eBits <= 1536)) { + err = sp_ModExp_1536(b, e, m, r); + done = 1; + } + else if ((mBits == 3072) && sp_isodd(m) && (bBits <= 3072) && + (eBits <= 3072)) { + err = sp_ModExp_3072(b, e, m, r); + done = 1; + } #endif + } #if defined(WOLFSSL_HAVE_SP_DH) && defined(WOLFSSL_KEY_GEN) - if (bits == 256) { + if (!done && (err == MP_OKAY)) { int i; #ifdef WOLFSSL_SMALL_STACK @@ -1516,37 +1548,44 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) #endif #ifdef WOLFSSL_SMALL_STACK - t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; + if (!done && (err == MP_OKAY)) { + t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } } #endif - if (err == MP_OKAY) { + if (!done && (err == MP_OKAY)) { sp_init(t); sp_copy(b, t); - bits = sp_count_bits(e); - } - for (i = bits-2; err == MP_OKAY && i >= 0; i--) { - err = sp_sqrmod(t, m, t); - if (err == MP_OKAY && + for (i = eBits-2; err == MP_OKAY && i >= 0; i--) { + err = sp_sqrmod(t, m, t); + if (err == MP_OKAY && (e->dp[i / SP_WORD_SIZE] >> (i % SP_WORD_SIZE)) & 1) { - err = sp_mulmod(t, b, m, t); - } + err = sp_mulmod(t, b, m, t); + } + } } - if (err == MP_OKAY) + if (!done && (err == MP_OKAY)) { sp_copy(t, r); + } #ifdef WOLFSSL_SMALL_STACK - if (t != NULL) + if (t != NULL) { XFREE(t, NULL, DYNAMIC_TYPE_BIGINT); + } #endif } - else -#endif +#else + { err = MP_VAL; + } +#endif - (void)bits; + (void)mBits; + (void)bBits; + (void)eBits; return err; } From adc14f75528cb81d9b19b90cdfc0262142b69f7e Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Wed, 11 Dec 2019 10:57:09 +1000 Subject: [PATCH 2/3] sp_int: Check size of numbers for overflow --- wolfcrypt/src/sp_int.c | 139 ++++++++++++++++++++++++------------- wolfssl/wolfcrypt/sp_int.h | 2 +- 2 files changed, 91 insertions(+), 50 deletions(-) diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index b12198955..f14420f3b 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -149,39 +149,44 @@ int sp_unsigned_bin_size(sp_int* a) * a SP integer. * in Array of bytes. * inSz Number of data bytes in array. - * returns MP_OKAY always. + * returns BAD_FUNC_ARG when the number is too big to fit in an SP and + MP_OKAY otherwise. */ -int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz) +int sp_read_unsigned_bin(sp_int* a, const byte* in, int inSz) { + int err = MP_OKAY; int i, j = 0, s = 0; - a->dp[0] = 0; - for (i = inSz-1; i >= 0; i--) { - a->dp[j] |= ((sp_int_digit)in[i]) << s; - if (s == DIGIT_BIT - 8) { - a->dp[++j] = 0; - s = 0; - } - else if (s > DIGIT_BIT - 8) { - s = DIGIT_BIT - s; - if (j + 1 >= a->size) - break; - a->dp[++j] = in[i] >> s; - s = 8 - s; - } - else - s += 8; + if (inSz > SP_INT_DIGITS * (int)sizeof(a->dp[0])) { + err = MP_VAL; } - a->used = j + 1; - if (a->dp[j] == 0) - a->used--; + if (err == MP_OKAY) { + a->dp[0] = 0; + for (i = inSz-1; i >= 0; i--) { + a->dp[j] |= ((sp_int_digit)in[i]) << s; + if (s == DIGIT_BIT - 8) { + a->dp[++j] = 0; + s = 0; + } + else if (s > DIGIT_BIT - 8) { + s = DIGIT_BIT - s; + if (j + 1 >= a->size) + break; + a->dp[++j] = in[i] >> s; + s = 8 - s; + } + else + s += 8; + } - for (j++; j < a->size; j++) - a->dp[j] = 0; - sp_clamp(a); + a->used = j + 1; + sp_clamp(a); + for (j++; j < a->size; j++) + a->dp[j] = 0; + } - return MP_OKAY; + return err; } #ifdef HAVE_ECC @@ -201,8 +206,9 @@ int sp_read_radix(sp_int* a, const char* in, int radix) int i, j = 0, k = 0; char ch; - if ((radix != 16) || (*in == '-')) + if ((radix != 16) || (*in == '-')) { err = BAD_FUNC_ARG; + } if (err == MP_OKAY) { a->dp[0] = 0; @@ -221,7 +227,11 @@ int sp_read_radix(sp_int* a, const char* in, int radix) a->dp[k] |= ((sp_int_digit)ch) << j; j += 4; - if (j == DIGIT_BIT && k < SP_INT_DIGITS) + if (k >= SP_INT_DIGITS - 1) { + err = MP_VAL; + break; + } + if (j == DIGIT_BIT) a->dp[++k] = 0; j &= DIGIT_BIT - 1; } @@ -1082,12 +1092,17 @@ int sp_mul(sp_int* a, sp_int* b, sp_int* r) sp_int tr[1]; #endif + if (a->used + b->used > SP_INT_DIGITS) + err = MP_VAL; + #ifdef WOLFSSL_SMALL_STACK - t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) - err = MP_MEM; - else - tr = &t[1]; + if (err == MP_OKAY) { + t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) + err = MP_MEM; + else + tr = &t[1]; + } #endif if (err == MP_OKAY) { @@ -1114,14 +1129,18 @@ int sp_mul(sp_int* a, sp_int* b, sp_int* r) * a SP integer to square. * m SP integer modulus. * r SP integer result. - * returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails and - * MP_OKAY otherwise. + * returns MP_VAL when m is 0, MP_MEM when dynamic memory allocation fails, + * BAD_FUNC_ARG when a is to big and MP_OKAY otherwise. */ static int sp_sqrmod(sp_int* a, sp_int* m, sp_int* r) { - int err; + int err = MP_OKAY; - err = sp_mul(a, a, r); + if (a->used * 2 > SP_INT_DIGITS) + err = MP_VAL; + + if (err == MP_OKAY) + err = sp_mul(a, a, r); if (err == MP_OKAY) err = sp_mod(r, m, r); @@ -1147,10 +1166,15 @@ int sp_mulmod(sp_int* a, sp_int* b, sp_int* m, sp_int* r) sp_int t[1]; #endif + if (a->used + b->used > SP_INT_DIGITS) + err = MP_VAL; + #ifdef WOLFSSL_SMALL_STACK - t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); - if (t == NULL) { - err = MP_MEM; + if (err == MP_OKAY) { + t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); + if (t == NULL) { + err = MP_MEM; + } } #endif if (err == MP_OKAY) { @@ -1364,7 +1388,9 @@ int sp_invmod(sp_int* a, sp_int* m, sp_int* r) */ err = sp_invmod(m, a, r); if (err == MP_OKAY) { - sp_mul(r, m, r); + err = sp_mul(r, m, r); + } + if (err == MP_OKAY) { sp_sub_d(r, 1, r); sp_div(r, a, r, NULL); sp_sub(m, r, r); @@ -1509,6 +1535,9 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) sp_set(r, 0); done = 1; } + else if (m->used * 2 > SP_INT_DIGITS) { + err = BAD_FUNC_ARG; + } if (!done && (err == MP_OKAY)) { #ifndef WOLFSSL_SP_NO_2048 @@ -1517,8 +1546,8 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) err = sp_ModExp_1024(b, e, m, r); done = 1; } - else if ((mBits == 1024) && sp_isodd(m) && (bBits <= 1024) && - (eBits <= 1024)) { + else if ((mBits == 2048) && sp_isodd(m) && (bBits <= 2048) && + (eBits <= 2048)) { err = sp_ModExp_2048(b, e, m, r); done = 1; } @@ -1549,7 +1578,7 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) #ifdef WOLFSSL_SMALL_STACK if (!done && (err == MP_OKAY)) { - t = (sp_int*)XMALLOC(sizeof(sp_int) * 2, NULL, DYNAMIC_TYPE_BIGINT); + t = (sp_int*)XMALLOC(sizeof(sp_int), NULL, DYNAMIC_TYPE_BIGINT); if (t == NULL) { err = MP_MEM; } @@ -1557,13 +1586,25 @@ int sp_exptmod(sp_int* b, sp_int* e, sp_int* m, sp_int* r) #endif if (!done && (err == MP_OKAY)) { sp_init(t); - sp_copy(b, t); - for (i = eBits-2; err == MP_OKAY && i >= 0; i--) { - err = sp_sqrmod(t, m, t); - if (err == MP_OKAY && - (e->dp[i / SP_WORD_SIZE] >> (i % SP_WORD_SIZE)) & 1) { - err = sp_mulmod(t, b, m, t); + if (sp_cmp(b, m) != MP_LT) { + err = sp_mod(b, m, t); + if (err == MP_OKAY && sp_iszero(t)) { + sp_set(r, 0); + done = 1; + } + } + else { + sp_copy(b, t); + } + + if (!done && (err == MP_OKAY)) { + for (i = eBits-2; err == MP_OKAY && i >= 0; i--) { + err = sp_sqrmod(t, m, t); + if (err == MP_OKAY && (e->dp[i / SP_WORD_SIZE] >> + (i % SP_WORD_SIZE)) & 1) { + err = sp_mulmod(t, b, m, t); + } } } } diff --git a/wolfssl/wolfcrypt/sp_int.h b/wolfssl/wolfcrypt/sp_int.h index 6215c072f..77622d043 100644 --- a/wolfssl/wolfcrypt/sp_int.h +++ b/wolfssl/wolfcrypt/sp_int.h @@ -161,7 +161,7 @@ MP_API int sp_init_multi(sp_int* a, sp_int* b, sp_int* c, sp_int* d, sp_int* e, sp_int* f); MP_API void sp_clear(sp_int* a); MP_API int sp_unsigned_bin_size(sp_int* a); -MP_API int sp_read_unsigned_bin(sp_int* a, const byte* in, word32 inSz); +MP_API int sp_read_unsigned_bin(sp_int* a, const byte* in, int inSz); MP_API int sp_read_radix(sp_int* a, const char* in, int radix); MP_API int sp_cmp(sp_int* a, sp_int* b); MP_API int sp_count_bits(sp_int* a); From e063fb1631f1d117a633a07844f7af94c834d7ac Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Fri, 13 Dec 2019 09:08:55 +1000 Subject: [PATCH 3/3] sp_int.c: Strip leading zeros in sp_read_radix --- wolfcrypt/src/sp_int.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/wolfcrypt/src/sp_int.c b/wolfcrypt/src/sp_int.c index f14420f3b..b835ae5d3 100644 --- a/wolfcrypt/src/sp_int.c +++ b/wolfcrypt/src/sp_int.c @@ -210,6 +210,10 @@ int sp_read_radix(sp_int* a, const char* in, int radix) err = BAD_FUNC_ARG; } + while (*in == '0') { + in++; + } + if (err == MP_OKAY) { a->dp[0] = 0; for (i = (int)(XSTRLEN(in) - 1); i >= 0; i--) { @@ -244,8 +248,9 @@ int sp_read_radix(sp_int* a, const char* in, int radix) for (k++; k < a->size; k++) a->dp[k] = 0; + + sp_clamp(a); } - sp_clamp(a); return err; }