diff --git a/wolfcrypt/benchmark/benchmark.c b/wolfcrypt/benchmark/benchmark.c index 6fd063d4a..a11f9d0c0 100644 --- a/wolfcrypt/benchmark/benchmark.c +++ b/wolfcrypt/benchmark/benchmark.c @@ -458,8 +458,7 @@ static const char* bench_result_words1[][4] = { #endif }; -#if (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) ||\ - defined(WOLFSSL_KEY_GEN) || defined(HAVE_NTRU) || \ +#if !defined(NO_RSA) || defined(WOLFSSL_KEY_GEN) || defined(HAVE_NTRU) || \ defined(HAVE_ECC) || !defined(NO_DH) || defined(HAVE_ECC_ENCRYPT) || \ defined(HAVE_CURVE25519) || defined(HAVE_CURVE25519_SHARED_SECRET) || \ defined(HAVE_ED25519) @@ -578,9 +577,7 @@ static const char* bench_desc_words[][9] = { #if defined(HAVE_ED25519) || defined(HAVE_CURVE25519) || defined(HAVE_ECC) || \ defined(HAVE_ECC) || defined(HAVE_NTRU) || !defined(NO_DH) || \ - (!defined(NO_RSA) && !defined(WOLFSSL_RSA_VERIFY_ONLY)) || \ - defined(HAVE_SCRYPT) - + !defined(NO_RSA) || defined(HAVE_SCRYPT) #define BENCH_ASYM #endif @@ -3862,43 +3859,136 @@ void bench_rsaKeyGen_size(int doAsync, int keySz) #define RSA_BUF_SIZE 384 /* for up to 3072 bit */ +#ifndef WOLFSSL_RSA_PUBLIC_ONLY +#elif defined(USE_CERT_BUFFERS_2048) +static unsigned char rsa_2048_sig[] = { + 0x8c, 0x9e, 0x37, 0xbf, 0xc3, 0xa6, 0xba, 0x1c, + 0x53, 0x22, 0x40, 0x4b, 0x8b, 0x0d, 0x3c, 0x0e, + 0x2e, 0x8c, 0x31, 0x2c, 0x47, 0xbf, 0x03, 0x48, + 0x18, 0x46, 0x73, 0x8d, 0xd7, 0xdd, 0x17, 0x64, + 0x0d, 0x7f, 0xdc, 0x74, 0xed, 0x80, 0xc3, 0xe8, + 0x9a, 0x18, 0x33, 0xd4, 0xe6, 0xc5, 0xe1, 0x54, + 0x75, 0xd1, 0xbb, 0x40, 0xde, 0xa8, 0xb9, 0x1b, + 0x14, 0xe8, 0xc1, 0x39, 0xeb, 0xa0, 0x69, 0x8a, + 0xc6, 0x9b, 0xef, 0x53, 0xb5, 0x23, 0x2b, 0x78, + 0x06, 0x43, 0x37, 0x11, 0x81, 0x84, 0x73, 0x33, + 0x33, 0xfe, 0xf7, 0x5d, 0x2b, 0x84, 0xd6, 0x83, + 0xd6, 0xdd, 0x55, 0x33, 0xef, 0xd1, 0xf7, 0x12, + 0xb0, 0xc2, 0x0e, 0xb1, 0x78, 0xd4, 0xa8, 0xa3, + 0x25, 0xeb, 0xed, 0x9a, 0xb3, 0xee, 0xc3, 0x7e, + 0xce, 0x13, 0x18, 0x86, 0x31, 0xe1, 0xef, 0x01, + 0x0f, 0x6e, 0x67, 0x24, 0x74, 0xbd, 0x0b, 0x7f, + 0xa9, 0xca, 0x6f, 0xaa, 0x83, 0x28, 0x90, 0x40, + 0xf1, 0xb5, 0x10, 0x0e, 0x26, 0x03, 0x05, 0x5d, + 0x87, 0xb4, 0xe0, 0x4c, 0x98, 0xd8, 0xc6, 0x42, + 0x89, 0x77, 0xeb, 0xb6, 0xd4, 0xe6, 0x26, 0xf3, + 0x31, 0x25, 0xde, 0x28, 0x38, 0x58, 0xe8, 0x2c, + 0xf4, 0x56, 0x7c, 0xb6, 0xfd, 0x99, 0xb0, 0xb0, + 0xf4, 0x83, 0xb6, 0x74, 0xa9, 0x5b, 0x9f, 0xe8, + 0xe9, 0xf1, 0xa1, 0x2a, 0xbd, 0xf6, 0x83, 0x28, + 0x09, 0xda, 0xa6, 0xd6, 0xcd, 0x61, 0x60, 0xf7, + 0x13, 0x4e, 0x46, 0x57, 0x38, 0x1e, 0x11, 0x92, + 0x6b, 0x6b, 0xcf, 0xd3, 0xf4, 0x8b, 0x66, 0x03, + 0x25, 0xa3, 0x7a, 0x2f, 0xce, 0xc1, 0x85, 0xa5, + 0x48, 0x91, 0x8a, 0xb3, 0x4f, 0x5d, 0x98, 0xb1, + 0x69, 0x58, 0x47, 0x69, 0x0c, 0x52, 0xdc, 0x42, + 0x4c, 0xef, 0xe8, 0xd4, 0x4d, 0x6a, 0x33, 0x7d, + 0x9e, 0xd2, 0x51, 0xe6, 0x41, 0xbf, 0x4f, 0xa2 +}; +#elif defined(USE_CERT_BUFFERS_3072) +static unsigned char rsa_3072_sig[] = { + 0x1a, 0xd6, 0x0d, 0xfd, 0xe3, 0x41, 0x95, 0x76, + 0x27, 0x16, 0x7d, 0xc7, 0x94, 0x16, 0xca, 0xa8, + 0x26, 0x08, 0xbe, 0x78, 0x87, 0x72, 0x4c, 0xd9, + 0xa7, 0xfc, 0x33, 0x77, 0x2d, 0x53, 0x07, 0xb5, + 0x8c, 0xce, 0x48, 0x17, 0x9b, 0xff, 0x9f, 0x9b, + 0x17, 0xc4, 0xbb, 0x72, 0xed, 0xdb, 0xa0, 0x34, + 0x69, 0x5b, 0xc7, 0x4e, 0xbf, 0xec, 0x13, 0xc5, + 0x98, 0x71, 0x9a, 0x4e, 0x18, 0x0e, 0xcb, 0xe7, + 0xc6, 0xd5, 0x21, 0x31, 0x7c, 0x0d, 0xae, 0x14, + 0x2b, 0x87, 0x4f, 0x77, 0x95, 0x2e, 0x26, 0xe2, + 0x83, 0xfe, 0x49, 0x1e, 0x87, 0x19, 0x4a, 0x63, + 0x73, 0x75, 0xf1, 0xf5, 0x71, 0xd2, 0xce, 0xd4, + 0x39, 0x2b, 0xd9, 0xe0, 0x76, 0x70, 0xc8, 0xf8, + 0xed, 0xdf, 0x90, 0x57, 0x17, 0xb9, 0x16, 0xf6, + 0xe9, 0x49, 0x48, 0xce, 0x5a, 0x8b, 0xe4, 0x84, + 0x7c, 0xf3, 0x31, 0x68, 0x97, 0x45, 0x68, 0x38, + 0x50, 0x3a, 0x70, 0xbd, 0xb3, 0xd3, 0xd2, 0xe0, + 0x56, 0x5b, 0xc2, 0x0c, 0x2c, 0x10, 0x70, 0x7b, + 0xd4, 0x99, 0xf9, 0x38, 0x31, 0xb1, 0x86, 0xa0, + 0x07, 0xf1, 0xf6, 0x53, 0xb0, 0x44, 0x82, 0x40, + 0xd2, 0xab, 0x0e, 0x71, 0x5d, 0xe1, 0xea, 0x3a, + 0x77, 0xc9, 0xef, 0xfe, 0x54, 0x65, 0xa3, 0x49, + 0xfd, 0xa5, 0x33, 0xaa, 0x16, 0x1a, 0x38, 0xe7, + 0xaa, 0xb7, 0x13, 0xb2, 0x3b, 0xc7, 0x00, 0x87, + 0x12, 0xfe, 0xfd, 0xf4, 0x55, 0x6d, 0x1d, 0x4a, + 0x0e, 0xad, 0xd0, 0x4c, 0x55, 0x91, 0x60, 0xd9, + 0xef, 0x74, 0x69, 0x22, 0x8c, 0x51, 0x65, 0xc2, + 0x04, 0xac, 0xd3, 0x8d, 0xf7, 0x35, 0x29, 0x13, + 0x6d, 0x61, 0x7c, 0x39, 0x2f, 0x41, 0x4c, 0xdf, + 0x38, 0xfd, 0x1a, 0x7d, 0x42, 0xa7, 0x6f, 0x3f, + 0x3d, 0x9b, 0xd1, 0x97, 0xab, 0xc0, 0xa7, 0x28, + 0x1c, 0xc0, 0x02, 0x26, 0xeb, 0xce, 0xf9, 0xe1, + 0x34, 0x45, 0xaf, 0xbf, 0x8d, 0xb8, 0xe0, 0xff, + 0xd9, 0x6f, 0x77, 0xf3, 0xf7, 0xed, 0x6a, 0xbb, + 0x03, 0x52, 0xfb, 0x38, 0xfc, 0xea, 0x9f, 0xc9, + 0x98, 0xed, 0x21, 0x45, 0xaf, 0x43, 0x2b, 0x64, + 0x96, 0x82, 0x30, 0xe9, 0xb4, 0x36, 0x89, 0x77, + 0x07, 0x4a, 0xc6, 0x1f, 0x38, 0x7a, 0xee, 0xb6, + 0x86, 0xf6, 0x2f, 0x03, 0xec, 0xa2, 0xe5, 0x48, + 0xe5, 0x5a, 0xf5, 0x1c, 0xd2, 0xd9, 0xd8, 0x2d, + 0x9d, 0x06, 0x07, 0xc9, 0x8b, 0x5d, 0xe0, 0x0f, + 0x5e, 0x0c, 0x53, 0x27, 0xff, 0x23, 0xee, 0xca, + 0x5e, 0x4d, 0xf1, 0x95, 0x77, 0x78, 0x1f, 0xf2, + 0x44, 0x5b, 0x7d, 0x01, 0x49, 0x61, 0x6f, 0x6d, + 0xbf, 0xf5, 0x19, 0x06, 0x39, 0xe9, 0xe9, 0x29, + 0xde, 0x47, 0x5e, 0x2e, 0x1f, 0x68, 0xf4, 0x32, + 0x5e, 0xe9, 0xd0, 0xa7, 0xb4, 0x2a, 0x45, 0xdf, + 0x15, 0x7d, 0x0d, 0x5b, 0xef, 0xc6, 0x23, 0xac +}; +#else + #error Not Supported Yet! +#endif + static void bench_rsa_helper(int doAsync, RsaKey rsaKey[BENCH_MAX_PENDING], int rsaKeySz) { -#ifndef WOLFSSL_RSA_VERIFY_ONLY int ret = 0, i, times, count = 0, pending = 0; -#ifndef WOLFSSL_RSA_PUBLIC_ONLY word32 idx = 0; -#endif +#ifndef WOLFSSL_RSA_PUBLIC_ONLY const char* messageStr = "Everyone gets Friday off."; const int len = (int)XSTRLEN((char*)messageStr); +#endif double start = 0.0f; const char**desc = bench_desc_words[lng_index]; +#ifndef WOLFSSL_RSA_PUBLIC_ONLY DECLARE_VAR_INIT(message, byte, len, messageStr, HEAP_HINT); -#else - (void)doAsync; - (void)rsaKey; - (void)rsaKeySz; #endif -#ifndef WOLFSSL_RSA_VERIFY_ONLY #ifdef USE_CERT_BUFFERS_1024 DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 128, HEAP_HINT); - DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 128, HEAP_HINT); + #ifndef WOLFSSL_RSA_VERIFY_INLINE + DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 128, HEAP_HINT); + #else + byte* out[BENCH_MAX_PENDING]; + #endif #elif defined(USE_CERT_BUFFERS_2048) DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 256, HEAP_HINT); - #ifndef WOLFSSL_RSA_PUBLIC_ONLY + #ifndef WOLFSSL_RSA_VERIFY_INLINE DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 256, HEAP_HINT); + #else + byte* out[BENCH_MAX_PENDING]; #endif #elif defined(USE_CERT_BUFFERS_3072) DECLARE_ARRAY(enc, byte, BENCH_MAX_PENDING, 384, HEAP_HINT); - #ifndef WOLFSSL_RSA_PUBLIC_ONLY + #ifndef WOLFSSL_RSA_VERIFY_INLINE DECLARE_ARRAY(out, byte, BENCH_MAX_PENDING, 384, HEAP_HINT); + #else + byte* out[BENCH_MAX_PENDING]; #endif #else #error "need a cert buffer size" #endif /* USE_CERT_BUFFERS */ -#endif if (!rsa_sign_verify) { #ifndef WOLFSSL_RSA_VERIFY_ONLY @@ -3995,6 +4085,7 @@ exit_rsa_sign: if (ret < 0) { goto exit; } +#endif /* capture resulting encrypt length */ idx = rsaKeySz/8; @@ -4009,8 +4100,26 @@ exit_rsa_sign: for (i = 0; i < BENCH_MAX_PENDING; i++) { if (bench_async_check(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×, ntimes, &pending)) { + #ifndef WOLFSSL_RSA_VERIFY_INLINE ret = wc_RsaSSL_Verify(enc[i], idx, out[i], rsaKeySz/8, &rsaKey[i]); + #elif defined(USE_CERT_BUFFERS_2048) + XMEMCPY(enc[i], rsa_2048_sig, sizeof(rsa_2048_sig)); + idx = sizeof(rsa_2048_sig); + out[i] = NULL; + ret = wc_RsaSSL_VerifyInline(enc[i], idx, &out[i], + &rsaKey[i]); + if (ret > 0) + ret = 0; + #elif defined(USE_CERT_BUFFERS_3072) + XMEMCPY(enc[i], rsa_3072_sig, sizeof(rsa_3072_sig)); + idx = sizeof(rsa_3072_sig); + out[i] = NULL; + ret = wc_RsaSSL_VerifyInline(enc[i], idx, &out[i], + &rsaKey[i]); + if (ret > 0) + ret = 0; + #endif if (!bench_async_handle(&ret, BENCH_ASYNC_GET_DEV(&rsaKey[i]), 1, ×, &pending)) { @@ -4024,7 +4133,6 @@ exit_rsa_sign: exit_rsa_verify: bench_stats_asym_finish("RSA", rsaKeySz, desc[5], doAsync, count, start, ret); -#endif } FREE_ARRAY(enc, BENCH_MAX_PENDING, HEAP_HINT); @@ -4085,7 +4193,18 @@ void bench_rsa(int doAsync) } #else #ifdef USE_CERT_BUFFERS_2048 - ret = mp_read_unsigned_bin(&rsaKey[i].n, &tmp[13], 256); + ret = mp_read_unsigned_bin(&rsaKey[i].n, &tmp[12], 256); + if (ret != 0) { + printf("Setting modulus failed! %d\n", ret); + goto exit_bench_rsa; + } + ret = mp_set_int(&rsaKey[i].e, WC_RSA_EXPONENT); + if (ret != 0) { + printf("Setting public exponent failed! %d\n", ret); + goto exit_bench_rsa; + } + #elif defined(USE_CERT_BUFFERS_3072) + ret = mp_read_unsigned_bin(&rsaKey[i].n, &tmp[12], 384); if (ret != 0) { printf("Setting modulus failed! %d\n", ret); goto exit_bench_rsa; diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index 896d09e2c..c5c50c7e3 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -1253,7 +1253,9 @@ static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen, { int ret; word32 i; +#ifndef WOLFSSL_RSA_VERIFY_ONLY byte invalid = 0; +#endif if (output == NULL || pkcsBlockLen == 0) { return BAD_FUNC_ARG; @@ -1278,6 +1280,7 @@ static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen, *output = (byte *)(pkcsBlock + i); ret = pkcsBlockLen - i; } +#ifndef WOLFSSL_RSA_VERIFY_ONLY else { word32 j; byte pastSep = 0; @@ -1301,6 +1304,7 @@ static int RsaUnPad(const byte *pkcsBlock, unsigned int pkcsBlockLen, *output = (byte *)(pkcsBlock + i); ret = ((int)~invalid) & (pkcsBlockLen - i); } +#endif return ret; } diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 37f023068..cc20b81d7 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -3753,9 +3753,9 @@ static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n) r[0] = 0; for (i = n-1; i >= 0; i--) { r[j] |= ((sp_digit)a[i]) << s; - if (s >= 15) { - r[j] &= 0x7fffff; - s = 23 - s; + if (s >= 14) { + r[j] &= 0x3fffff; + s = 22 - s; if (j + 1 >= max) break; r[++j] = a[i] >> s; @@ -3776,27 +3776,27 @@ static void sp_3072_from_bin(sp_digit* r, int max, const byte* a, int n) */ static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a) { -#if DIGIT_BIT == 23 +#if DIGIT_BIT == 22 int j; XMEMCPY(r, a->dp, sizeof(sp_digit) * a->used); for (j = a->used; j < max; j++) r[j] = 0; -#elif DIGIT_BIT > 23 +#elif DIGIT_BIT > 22 int i, j = 0, s = 0; r[0] = 0; for (i = 0; i < a->used && j < max; i++) { r[j] |= a->dp[i] << s; - r[j] &= 0x7fffff; - s = 23 - s; + r[j] &= 0x3fffff; + s = 22 - s; if (j + 1 >= max) break; r[++j] = a->dp[i] >> s; - while (s + 23 <= DIGIT_BIT) { - s += 23; - r[j] &= 0x7fffff; + while (s + 22 <= DIGIT_BIT) { + s += 22; + r[j] &= 0x3fffff; if (j + 1 >= max) break; if (s < DIGIT_BIT) @@ -3815,11 +3815,11 @@ static void sp_3072_from_mp(sp_digit* r, int max, mp_int* a) r[0] = 0; for (i = 0; i < a->used && j < max; i++) { r[j] |= ((sp_digit)a->dp[i]) << s; - if (s + DIGIT_BIT >= 23) { - r[j] &= 0x7fffff; + if (s + DIGIT_BIT >= 22) { + r[j] &= 0x3fffff; if (j + 1 >= max) break; - s = 23 - s; + s = 22 - s; if (s == DIGIT_BIT) { r[++j] = 0; s = 0; @@ -3848,23 +3848,23 @@ static void sp_3072_to_bin(sp_digit* r, byte* a) { int i, j, s = 0, b; - for (i=0; i<135; i++) { - r[i+1] += r[i] >> 23; - r[i] &= 0x7fffff; + for (i=0; i<139; i++) { + r[i+1] += r[i] >> 22; + r[i] &= 0x3fffff; } j = 3072 / 8 - 1; a[j] = 0; - for (i=0; i<136 && j>=0; i++) { + for (i=0; i<140 && j>=0; i++) { b = 0; a[j--] |= r[i] << s; b += 8 - s; if (j < 0) break; - while (b < 23) { + while (b < 22) { a[j--] = r[i] >> b; b += 8; if (j < 0) break; } - s = 8 - (b - 23); + s = 8 - (b - 22); if (j >= 0) a[j] = 0; if (s != 0) @@ -3879,22 +3879,22 @@ static void sp_3072_to_bin(sp_digit* r, byte* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_17(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_35(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i, j; - int64_t t[34]; + int64_t t[70]; XMEMSET(t, 0, sizeof(t)); - for (i=0; i<17; i++) { - for (j=0; j<17; j++) + for (i=0; i<35; i++) { + for (j=0; j<35; j++) t[i+j] += ((int64_t)a[i]) * b[j]; } - for (i=0; i<33; i++) { - r[i] = t[i] & 0x7fffff; - t[i+1] += t[i] >> 23; + for (i=0; i<69; i++) { + r[i] = t[i] & 0x3fffff; + t[i+1] += t[i] >> 22; } - r[33] = (sp_digit)t[33]; + r[69] = (sp_digit)t[69]; } /* Square a and put result in r. (r = a * a) @@ -3902,22 +3902,22 @@ SP_NOINLINE static void sp_3072_mul_17(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_17(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_35(sp_digit* r, const sp_digit* a) { int i, j; - int64_t t[34]; + int64_t t[70]; XMEMSET(t, 0, sizeof(t)); - for (i=0; i<17; i++) { + for (i=0; i<35; i++) { for (j=0; j> 23; + for (i=0; i<69; i++) { + r[i] = t[i] & 0x3fffff; + t[i+1] += t[i] >> 22; } - r[33] = (sp_digit)t[33]; + r[69] = (sp_digit)t[69]; } /* Add b to a into r. (r = a + b) @@ -3926,33 +3926,7 @@ SP_NOINLINE static void sp_3072_sqr_17(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_17(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - int i; - - for (i = 0; i < 16; i += 8) { - r[i + 0] = a[i + 0] + b[i + 0]; - r[i + 1] = a[i + 1] + b[i + 1]; - r[i + 2] = a[i + 2] + b[i + 2]; - r[i + 3] = a[i + 3] + b[i + 3]; - r[i + 4] = a[i + 4] + b[i + 4]; - r[i + 5] = a[i + 5] + b[i + 5]; - r[i + 6] = a[i + 6] + b[i + 6]; - r[i + 7] = a[i + 7] + b[i + 7]; - } - r[16] = a[16] + b[16]; - - return 0; -} - -/* Add b to a into r. (r = a + b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static int sp_3072_add_34(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_35(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -3969,88 +3943,18 @@ SP_NOINLINE static int sp_3072_add_34(sp_digit* r, const sp_digit* a, } r[32] = a[32] + b[32]; r[33] = a[33] + b[33]; + r[34] = a[34] + b[34]; return 0; } -/* Sub b from a into r. (r = a - b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static int sp_3072_sub_34(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - int i; - - for (i = 0; i < 32; i += 8) { - r[i + 0] = a[i + 0] - b[i + 0]; - r[i + 1] = a[i + 1] - b[i + 1]; - r[i + 2] = a[i + 2] - b[i + 2]; - r[i + 3] = a[i + 3] - b[i + 3]; - r[i + 4] = a[i + 4] - b[i + 4]; - r[i + 5] = a[i + 5] - b[i + 5]; - r[i + 6] = a[i + 6] - b[i + 6]; - r[i + 7] = a[i + 7] - b[i + 7]; - } - r[32] = a[32] - b[32]; - r[33] = a[33] - b[33]; - - return 0; -} - -/* Multiply a and b into r. (r = a * b) - * - * r A single precision integer. - * a A single precision integer. - * b A single precision integer. - */ -SP_NOINLINE static void sp_3072_mul_34(sp_digit* r, const sp_digit* a, - const sp_digit* b) -{ - sp_digit* z0 = r; - sp_digit z1[34]; - sp_digit* a1 = z1; - sp_digit b1[17]; - sp_digit* z2 = r + 34; - sp_3072_add_17(a1, a, &a[17]); - sp_3072_add_17(b1, b, &b[17]); - sp_3072_mul_17(z2, &a[17], &b[17]); - sp_3072_mul_17(z0, a, b); - sp_3072_mul_17(z1, a1, b1); - sp_3072_sub_34(z1, z1, z2); - sp_3072_sub_34(z1, z1, z0); - sp_3072_add_34(r + 17, r + 17, z1); -} - -/* Square a and put result in r. (r = a * a) - * - * r A single precision integer. - * a A single precision integer. - */ -SP_NOINLINE static void sp_3072_sqr_34(sp_digit* r, const sp_digit* a) -{ - sp_digit* z0 = r; - sp_digit z1[34]; - sp_digit* a1 = z1; - sp_digit* z2 = r + 34; - sp_3072_add_17(a1, a, &a[17]); - sp_3072_sqr_17(z2, &a[17]); - sp_3072_sqr_17(z0, a); - sp_3072_sqr_17(z1, a1); - sp_3072_sub_34(z1, z1, z2); - sp_3072_sub_34(z1, z1, z0); - sp_3072_add_34(r + 17, r + 17, z1); -} - /* Add b to a into r. (r = a + b) * * r A single precision integer. * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -4069,6 +3973,8 @@ SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, r[65] = a[65] + b[65]; r[66] = a[66] + b[66]; r[67] = a[67] + b[67]; + r[68] = a[68] + b[68]; + r[69] = a[69] + b[69]; return 0; } @@ -4079,7 +3985,7 @@ SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -4098,6 +4004,8 @@ SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, r[65] = a[65] - b[65]; r[66] = a[66] - b[66]; r[67] = a[67] - b[67]; + r[68] = a[68] - b[68]; + r[69] = a[69] - b[69]; return 0; } @@ -4108,22 +4016,22 @@ SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit* z0 = r; - sp_digit z1[68]; + sp_digit z1[70]; sp_digit* a1 = z1; - sp_digit b1[34]; - sp_digit* z2 = r + 68; - sp_3072_add_34(a1, a, &a[34]); - sp_3072_add_34(b1, b, &b[34]); - sp_3072_mul_34(z2, &a[34], &b[34]); - sp_3072_mul_34(z0, a, b); - sp_3072_mul_34(z1, a1, b1); - sp_3072_sub_68(z1, z1, z2); - sp_3072_sub_68(z1, z1, z0); - sp_3072_add_68(r + 34, r + 34, z1); + sp_digit b1[35]; + sp_digit* z2 = r + 70; + sp_3072_add_35(a1, a, &a[35]); + sp_3072_add_35(b1, b, &b[35]); + sp_3072_mul_35(z2, &a[35], &b[35]); + sp_3072_mul_35(z0, a, b); + sp_3072_mul_35(z1, a1, b1); + sp_3072_sub_70(z1, z1, z2); + sp_3072_sub_70(z1, z1, z0); + sp_3072_add_70(r + 35, r + 35, z1); } /* Square a and put result in r. (r = a * a) @@ -4131,19 +4039,19 @@ SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_70(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z1[68]; + sp_digit z1[70]; sp_digit* a1 = z1; - sp_digit* z2 = r + 68; - sp_3072_add_34(a1, a, &a[34]); - sp_3072_sqr_34(z2, &a[34]); - sp_3072_sqr_34(z0, a); - sp_3072_sqr_34(z1, a1); - sp_3072_sub_68(z1, z1, z2); - sp_3072_sub_68(z1, z1, z0); - sp_3072_add_68(r + 34, r + 34, z1); + sp_digit* z2 = r + 70; + sp_3072_add_35(a1, a, &a[35]); + sp_3072_sqr_35(z2, &a[35]); + sp_3072_sqr_35(z0, a); + sp_3072_sqr_35(z1, a1); + sp_3072_sub_70(z1, z1, z2); + sp_3072_sub_70(z1, z1, z0); + sp_3072_add_70(r + 35, r + 35, z1); } /* Add b to a into r. (r = a + b) @@ -4152,7 +4060,7 @@ SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -4167,6 +4075,10 @@ SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] + b[i + 6]; r[i + 7] = a[i + 7] + b[i + 7]; } + r[136] = a[136] + b[136]; + r[137] = a[137] + b[137]; + r[138] = a[138] + b[138]; + r[139] = a[139] + b[139]; return 0; } @@ -4177,7 +4089,7 @@ SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; @@ -4192,6 +4104,10 @@ SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] - b[i + 6]; r[i + 7] = a[i + 7] - b[i + 7]; } + r[136] = a[136] - b[136]; + r[137] = a[137] - b[137]; + r[138] = a[138] - b[138]; + r[139] = a[139] - b[139]; return 0; } @@ -4202,22 +4118,22 @@ SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { sp_digit* z0 = r; - sp_digit z1[136]; + sp_digit z1[140]; sp_digit* a1 = z1; - sp_digit b1[68]; - sp_digit* z2 = r + 136; - sp_3072_add_68(a1, a, &a[68]); - sp_3072_add_68(b1, b, &b[68]); - sp_3072_mul_68(z2, &a[68], &b[68]); - sp_3072_mul_68(z0, a, b); - sp_3072_mul_68(z1, a1, b1); - sp_3072_sub_136(z1, z1, z2); - sp_3072_sub_136(z1, z1, z0); - sp_3072_add_136(r + 68, r + 68, z1); + sp_digit b1[70]; + sp_digit* z2 = r + 140; + sp_3072_add_70(a1, a, &a[70]); + sp_3072_add_70(b1, b, &b[70]); + sp_3072_mul_70(z2, &a[70], &b[70]); + sp_3072_mul_70(z0, a, b); + sp_3072_mul_70(z1, a1, b1); + sp_3072_sub_140(z1, z1, z2); + sp_3072_sub_140(z1, z1, z0); + sp_3072_add_140(r + 70, r + 70, z1); } /* Square a and put result in r. (r = a * a) @@ -4225,19 +4141,19 @@ SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_140(sp_digit* r, const sp_digit* a) { sp_digit* z0 = r; - sp_digit z1[136]; + sp_digit z1[140]; sp_digit* a1 = z1; - sp_digit* z2 = r + 136; - sp_3072_add_68(a1, a, &a[68]); - sp_3072_sqr_68(z2, &a[68]); - sp_3072_sqr_68(z0, a); - sp_3072_sqr_68(z1, a1); - sp_3072_sub_136(z1, z1, z2); - sp_3072_sub_136(z1, z1, z0); - sp_3072_add_136(r + 68, r + 68, z1); + sp_digit* z2 = r + 140; + sp_3072_add_70(a1, a, &a[70]); + sp_3072_sqr_70(z2, &a[70]); + sp_3072_sqr_70(z0, a); + sp_3072_sqr_70(z1, a1); + sp_3072_sub_140(z1, z1, z2); + sp_3072_sub_140(z1, z1, z0); + sp_3072_add_140(r + 70, r + 70, z1); } #endif /* WOLFSSL_SP_SMALL */ @@ -4248,12 +4164,12 @@ SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] + b[i]; return 0; @@ -4266,12 +4182,12 @@ SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] - b[i]; return 0; @@ -4285,30 +4201,30 @@ SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i, j, k; int64_t c; - c = ((int64_t)a[135]) * b[135]; - r[271] = (sp_digit)(c >> 23); - c = (c & 0x7fffff) << 23; - for (k = 269; k >= 0; k--) { - for (i = 135; i >= 0; i--) { + c = ((int64_t)a[139]) * b[139]; + r[279] = (sp_digit)(c >> 22); + c = (c & 0x3fffff) << 22; + for (k = 277; k >= 0; k--) { + for (i = 139; i >= 0; i--) { j = k - i; - if (j >= 136) + if (j >= 140) break; if (j < 0) continue; c += ((int64_t)a[i]) * b[j]; } - r[k + 2] += c >> 46; - r[k + 1] = (c >> 23) & 0x7fffff; - c = (c & 0x7fffff) << 23; + r[k + 2] += c >> 44; + r[k + 1] = (c >> 22) & 0x3fffff; + c = (c & 0x3fffff) << 22; } - r[0] = (sp_digit)(c >> 23); + r[0] = (sp_digit)(c >> 22); } /* Square a and put result in r. (r = a * a) @@ -4316,18 +4232,18 @@ SP_NOINLINE static void sp_3072_mul_136(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_140(sp_digit* r, const sp_digit* a) { int i, j, k; int64_t c; - c = ((int64_t)a[135]) * a[135]; - r[271] = (sp_digit)(c >> 23); - c = (c & 0x7fffff) << 23; - for (k = 269; k >= 0; k--) { - for (i = 135; i >= 0; i--) { + c = ((int64_t)a[139]) * a[139]; + r[279] = (sp_digit)(c >> 22); + c = (c & 0x3fffff) << 22; + for (k = 277; k >= 0; k--) { + for (i = 139; i >= 0; i--) { j = k - i; - if (j >= 136 || i <= j) + if (j >= 140 || i <= j) break; if (j < 0) continue; @@ -4337,11 +4253,11 @@ SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) if (i == j) c += ((int64_t)a[i]) * a[i]; - r[k + 2] += c >> 46; - r[k + 1] = (c >> 23) & 0x7fffff; - c = (c & 0x7fffff) << 23; + r[k + 2] += c >> 44; + r[k + 1] = (c >> 22) & 0x3fffff; + c = (c & 0x3fffff) << 22; } - r[0] = (sp_digit)(c >> 23); + r[0] = (sp_digit)(c >> 22); } #endif /* WOLFSSL_SP_SMALL */ @@ -4354,12 +4270,12 @@ SP_NOINLINE static void sp_3072_sqr_136(sp_digit* r, const sp_digit* a) * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] + b[i]; return 0; @@ -4372,12 +4288,12 @@ SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] - b[i]; return 0; @@ -4391,30 +4307,30 @@ SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i, j, k; int64_t c; - c = ((int64_t)a[67]) * b[67]; - r[135] = (sp_digit)(c >> 23); - c = (c & 0x7fffff) << 23; - for (k = 133; k >= 0; k--) { - for (i = 67; i >= 0; i--) { + c = ((int64_t)a[69]) * b[69]; + r[139] = (sp_digit)(c >> 22); + c = (c & 0x3fffff) << 22; + for (k = 137; k >= 0; k--) { + for (i = 69; i >= 0; i--) { j = k - i; - if (j >= 68) + if (j >= 70) break; if (j < 0) continue; c += ((int64_t)a[i]) * b[j]; } - r[k + 2] += c >> 46; - r[k + 1] = (c >> 23) & 0x7fffff; - c = (c & 0x7fffff) << 23; + r[k + 2] += c >> 44; + r[k + 1] = (c >> 22) & 0x3fffff; + c = (c & 0x3fffff) << 22; } - r[0] = (sp_digit)(c >> 23); + r[0] = (sp_digit)(c >> 22); } /* Square a and put result in r. (r = a * a) @@ -4422,18 +4338,18 @@ SP_NOINLINE static void sp_3072_mul_68(sp_digit* r, const sp_digit* a, * r A single precision integer. * a A single precision integer. */ -SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a) +SP_NOINLINE static void sp_3072_sqr_70(sp_digit* r, const sp_digit* a) { int i, j, k; int64_t c; - c = ((int64_t)a[67]) * a[67]; - r[135] = (sp_digit)(c >> 23); - c = (c & 0x7fffff) << 23; - for (k = 133; k >= 0; k--) { - for (i = 67; i >= 0; i--) { + c = ((int64_t)a[69]) * a[69]; + r[139] = (sp_digit)(c >> 22); + c = (c & 0x3fffff) << 22; + for (k = 137; k >= 0; k--) { + for (i = 69; i >= 0; i--) { j = k - i; - if (j >= 68 || i <= j) + if (j >= 70 || i <= j) break; if (j < 0) continue; @@ -4443,11 +4359,11 @@ SP_NOINLINE static void sp_3072_sqr_68(sp_digit* r, const sp_digit* a) if (i == j) c += ((int64_t)a[i]) * a[i]; - r[k + 2] += c >> 46; - r[k + 1] = (c >> 23) & 0x7fffff; - c = (c & 0x7fffff) << 23; + r[k + 2] += c >> 44; + r[k + 1] = (c >> 22) & 0x3fffff; + c = (c & 0x3fffff) << 22; } - r[0] = (sp_digit)(c >> 23); + r[0] = (sp_digit)(c >> 22); } #endif /* WOLFSSL_SP_SMALL */ @@ -4467,10 +4383,10 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) x *= 2 - b * x; /* here x*a==1 mod 2**8 */ x *= 2 - b * x; /* here x*a==1 mod 2**16 */ x *= 2 - b * x; /* here x*a==1 mod 2**32 */ - x &= 0x7fffff; + x &= 0x3fffff; /* rho = -1/m mod b */ - *rho = (1L << 23) - x; + *rho = (1L << 22) - x; } /* Multiply a by scalar b into r. (r = a * b) @@ -4479,7 +4395,7 @@ static void sp_3072_mont_setup(sp_digit* a, sp_digit* rho) * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_d_140(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -4487,37 +4403,43 @@ SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 136; i++) { + for (i = 0; i < 140; i++) { t += tb * a[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[136] = (sp_digit)t; + r[140] = (sp_digit)t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] = t[0] & 0x3fffff; for (i = 0; i < 136; i += 8) { t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] = (sp_digit)(t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] = (sp_digit)(t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] = (sp_digit)(t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] = (sp_digit)(t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] = (sp_digit)(t[7] >> 22) + (t[0] & 0x3fffff); } - r[136] = (sp_digit)(t[7] >> 23); + t[1] = tb * a[137]; + r[137] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); + t[2] = tb * a[138]; + r[138] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); + t[3] = tb * a[139]; + r[139] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); + r[140] = (sp_digit)(t[3] >> 22); #endif /* WOLFSSL_SP_SMALL */ } @@ -4529,35 +4451,37 @@ SP_NOINLINE static void sp_3072_mul_d_136(sp_digit* r, const sp_digit* a, * r A single precision number. * m A signle precision number. */ -static void sp_3072_mont_norm_68(sp_digit* r, sp_digit* m) +static void sp_3072_mont_norm_70(sp_digit* r, sp_digit* m) { /* Set r = 2^n - 1. */ #ifdef WOLFSSL_SP_SMALL int i; - for (i=0; i<67; i++) - r[i] = 0x7fffff; + for (i=0; i<69; i++) + r[i] = 0x3fffff; #else int i; for (i = 0; i < 64; i += 8) { - r[i + 0] = 0x7fffff; - r[i + 1] = 0x7fffff; - r[i + 2] = 0x7fffff; - r[i + 3] = 0x7fffff; - r[i + 4] = 0x7fffff; - r[i + 5] = 0x7fffff; - r[i + 6] = 0x7fffff; - r[i + 7] = 0x7fffff; + r[i + 0] = 0x3fffff; + r[i + 1] = 0x3fffff; + r[i + 2] = 0x3fffff; + r[i + 3] = 0x3fffff; + r[i + 4] = 0x3fffff; + r[i + 5] = 0x3fffff; + r[i + 6] = 0x3fffff; + r[i + 7] = 0x3fffff; } - r[64] = 0x7fffff; - r[65] = 0x7fffff; - r[66] = 0x7fffff; + r[64] = 0x3fffff; + r[65] = 0x3fffff; + r[66] = 0x3fffff; + r[67] = 0x3fffff; + r[68] = 0x3fffff; #endif - r[67] = 0x3ffffl; + r[69] = 0x3ffffl; /* r = (2^n - 1) mod n */ - sp_3072_sub_68(r, r, m); + sp_3072_sub_70(r, r, m); /* Add one so r = 2^n mod m */ r[0] += 1; @@ -4570,17 +4494,19 @@ static void sp_3072_mont_norm_68(sp_digit* r, sp_digit* m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_digit sp_3072_cmp_68(const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_cmp_70(const sp_digit* a, const sp_digit* b) { sp_digit r = 0; #ifdef WOLFSSL_SP_SMALL int i; - for (i=67; i>=0; i--) + for (i=69; i>=0; i--) r |= (a[i] - b[i]) & (0 - !r); #else int i; + r |= (a[69] - b[69]) & (0 - !r); + r |= (a[68] - b[68]) & (0 - !r); r |= (a[67] - b[67]) & (0 - !r); r |= (a[66] - b[66]) & (0 - !r); r |= (a[65] - b[65]) & (0 - !r); @@ -4608,13 +4534,13 @@ static sp_digit sp_3072_cmp_68(const sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static void sp_3072_cond_sub_68(sp_digit* r, const sp_digit* a, +static void sp_3072_cond_sub_70(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] - (b[i] & m); #else int i; @@ -4633,6 +4559,8 @@ static void sp_3072_cond_sub_68(sp_digit* r, const sp_digit* a, r[65] = a[65] - (b[65] & m); r[66] = a[66] - (b[66] & m); r[67] = a[67] - (b[67] & m); + r[68] = a[68] - (b[68] & m); + r[69] = a[69] - (b[69] & m); #endif /* WOLFSSL_SP_SMALL */ } @@ -4642,7 +4570,7 @@ static void sp_3072_cond_sub_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_add_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_add_70(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -4650,74 +4578,80 @@ SP_NOINLINE static void sp_3072_mul_add_68(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 68; i++) { + for (i = 0; i < 70; i++) { t += (tb * a[i]) + r[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[68] += t; + r[70] += t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += t[0] & 0x3fffff; for (i = 0; i < 64; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (t[7] >> 22) + (t[0] & 0x3fffff); } - t[1] = tb * a[65]; r[65] += (t[0] >> 23) + (t[1] & 0x7fffff); - t[2] = tb * a[66]; r[66] += (t[1] >> 23) + (t[2] & 0x7fffff); - t[3] = tb * a[67]; r[67] += (t[2] >> 23) + (t[3] & 0x7fffff); - r[68] += t[3] >> 23; + t[1] = tb * a[65]; r[65] += (t[0] >> 22) + (t[1] & 0x3fffff); + t[2] = tb * a[66]; r[66] += (t[1] >> 22) + (t[2] & 0x3fffff); + t[3] = tb * a[67]; r[67] += (t[2] >> 22) + (t[3] & 0x3fffff); + t[4] = tb * a[68]; r[68] += (t[3] >> 22) + (t[4] & 0x3fffff); + t[5] = tb * a[69]; r[69] += (t[4] >> 22) + (t[5] & 0x3fffff); + r[70] += t[5] >> 22; #endif /* WOLFSSL_SP_SMALL */ } -/* Normalize the values in each word to 23. +/* Normalize the values in each word to 22. * * a Array of sp_digit to normalize. */ -static void sp_3072_norm_68(sp_digit* a) +static void sp_3072_norm_70(sp_digit* a) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 67; i++) { - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + for (i = 0; i < 69; i++) { + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; } #else int i; for (i = 0; i < 64; i += 8) { - a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff; - a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff; - a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff; - a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff; - a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff; - a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff; - a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff; - a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff; - a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff; + a[i+1] += a[i+0] >> 22; a[i+0] &= 0x3fffff; + a[i+2] += a[i+1] >> 22; a[i+1] &= 0x3fffff; + a[i+3] += a[i+2] >> 22; a[i+2] &= 0x3fffff; + a[i+4] += a[i+3] >> 22; a[i+3] &= 0x3fffff; + a[i+5] += a[i+4] >> 22; a[i+4] &= 0x3fffff; + a[i+6] += a[i+5] >> 22; a[i+5] &= 0x3fffff; + a[i+7] += a[i+6] >> 22; a[i+6] &= 0x3fffff; + a[i+8] += a[i+7] >> 22; a[i+7] &= 0x3fffff; + a[i+9] += a[i+8] >> 22; a[i+8] &= 0x3fffff; } - a[64+1] += a[64] >> 23; - a[64] &= 0x7fffff; - a[65+1] += a[65] >> 23; - a[65] &= 0x7fffff; - a[66+1] += a[66] >> 23; - a[66] &= 0x7fffff; + a[64+1] += a[64] >> 22; + a[64] &= 0x3fffff; + a[65+1] += a[65] >> 22; + a[65] &= 0x3fffff; + a[66+1] += a[66] >> 22; + a[66] &= 0x3fffff; + a[67+1] += a[67] >> 22; + a[67] &= 0x3fffff; + a[68+1] += a[68] >> 22; + a[68] &= 0x3fffff; #endif } @@ -4726,54 +4660,58 @@ static void sp_3072_norm_68(sp_digit* a) * r A single precision number. * a A single precision number. */ -static void sp_3072_mont_shift_68(sp_digit* r, const sp_digit* a) +static void sp_3072_mont_shift_70(sp_digit* r, const sp_digit* a) { #ifdef WOLFSSL_SP_SMALL int i; sp_digit n, s; - s = a[68]; - n = a[67] >> 18; - for (i = 0; i < 67; i++) { - n += (s & 0x7fffff) << 5; - r[i] = n & 0x7fffff; - n >>= 23; - s = a[69 + i] + (s >> 23); + s = a[70]; + n = a[69] >> 18; + for (i = 0; i < 69; i++) { + n += (s & 0x3fffff) << 4; + r[i] = n & 0x3fffff; + n >>= 22; + s = a[71 + i] + (s >> 22); } - n += s << 5; - r[67] = n; + n += s << 4; + r[69] = n; #else sp_digit n, s; int i; - s = a[68]; n = a[67] >> 18; + s = a[70]; n = a[69] >> 18; for (i = 0; i < 64; i += 8) { - n += (s & 0x7fffff) << 5; r[i+0] = n & 0x7fffff; - n >>= 23; s = a[i+69] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+1] = n & 0x7fffff; - n >>= 23; s = a[i+70] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+2] = n & 0x7fffff; - n >>= 23; s = a[i+71] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+3] = n & 0x7fffff; - n >>= 23; s = a[i+72] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+4] = n & 0x7fffff; - n >>= 23; s = a[i+73] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+5] = n & 0x7fffff; - n >>= 23; s = a[i+74] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+6] = n & 0x7fffff; - n >>= 23; s = a[i+75] + (s >> 23); - n += (s & 0x7fffff) << 5; r[i+7] = n & 0x7fffff; - n >>= 23; s = a[i+76] + (s >> 23); + n += (s & 0x3fffff) << 4; r[i+0] = n & 0x3fffff; + n >>= 22; s = a[i+71] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+1] = n & 0x3fffff; + n >>= 22; s = a[i+72] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+2] = n & 0x3fffff; + n >>= 22; s = a[i+73] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+3] = n & 0x3fffff; + n >>= 22; s = a[i+74] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+4] = n & 0x3fffff; + n >>= 22; s = a[i+75] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+5] = n & 0x3fffff; + n >>= 22; s = a[i+76] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+6] = n & 0x3fffff; + n >>= 22; s = a[i+77] + (s >> 22); + n += (s & 0x3fffff) << 4; r[i+7] = n & 0x3fffff; + n >>= 22; s = a[i+78] + (s >> 22); } - n += (s & 0x7fffff) << 5; r[64] = n & 0x7fffff; - n >>= 23; s = a[133] + (s >> 23); - n += (s & 0x7fffff) << 5; r[65] = n & 0x7fffff; - n >>= 23; s = a[134] + (s >> 23); - n += (s & 0x7fffff) << 5; r[66] = n & 0x7fffff; - n >>= 23; s = a[135] + (s >> 23); - n += s << 5; r[67] = n; + n += (s & 0x3fffff) << 4; r[64] = n & 0x3fffff; + n >>= 22; s = a[135] + (s >> 22); + n += (s & 0x3fffff) << 4; r[65] = n & 0x3fffff; + n >>= 22; s = a[136] + (s >> 22); + n += (s & 0x3fffff) << 4; r[66] = n & 0x3fffff; + n >>= 22; s = a[137] + (s >> 22); + n += (s & 0x3fffff) << 4; r[67] = n & 0x3fffff; + n >>= 22; s = a[138] + (s >> 22); + n += (s & 0x3fffff) << 4; r[68] = n & 0x3fffff; + n >>= 22; s = a[139] + (s >> 22); + n += s << 4; r[69] = n; #endif /* WOLFSSL_SP_SMALL */ - XMEMSET(&r[68], 0, sizeof(*r) * 68); + XMEMSET(&r[70], 0, sizeof(*r) * 70); } /* Reduce the number back to 3072 bits using Montgomery reduction. @@ -4782,24 +4720,24 @@ static void sp_3072_mont_shift_68(sp_digit* r, const sp_digit* a) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_3072_mont_reduce_68(sp_digit* a, sp_digit* m, sp_digit mp) +static void sp_3072_mont_reduce_70(sp_digit* a, sp_digit* m, sp_digit mp) { int i; sp_digit mu; - for (i=0; i<67; i++) { - mu = (a[i] * mp) & 0x7fffff; - sp_3072_mul_add_68(a+i, m, mu); - a[i+1] += a[i] >> 23; + for (i=0; i<69; i++) { + mu = (a[i] * mp) & 0x3fffff; + sp_3072_mul_add_70(a+i, m, mu); + a[i+1] += a[i] >> 22; } mu = (a[i] * mp) & 0x3ffffl; - sp_3072_mul_add_68(a+i, m, mu); - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + sp_3072_mul_add_70(a+i, m, mu); + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; - sp_3072_mont_shift_68(a, a); - sp_3072_cond_sub_68(a, a, m, 0 - ((a[67] >> 18) > 0)); - sp_3072_norm_68(a); + sp_3072_mont_shift_70(a, a); + sp_3072_cond_sub_70(a, a, m, 0 - ((a[69] >> 18) > 0)); + sp_3072_norm_70(a); } /* Multiply two Montogmery form numbers mod the modulus (prime). @@ -4811,11 +4749,11 @@ static void sp_3072_mont_reduce_68(sp_digit* a, sp_digit* m, sp_digit mp) * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_3072_mont_mul_68(sp_digit* r, sp_digit* a, sp_digit* b, +static void sp_3072_mont_mul_70(sp_digit* r, sp_digit* a, sp_digit* b, sp_digit* m, sp_digit mp) { - sp_3072_mul_68(r, a, b); - sp_3072_mont_reduce_68(r, m, mp); + sp_3072_mul_70(r, a, b); + sp_3072_mont_reduce_70(r, m, mp); } /* Square the Montgomery form number. (r = a * a mod m) @@ -4825,11 +4763,11 @@ static void sp_3072_mont_mul_68(sp_digit* r, sp_digit* a, sp_digit* b, * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_3072_mont_sqr_68(sp_digit* r, sp_digit* a, sp_digit* m, +static void sp_3072_mont_sqr_70(sp_digit* r, sp_digit* a, sp_digit* m, sp_digit mp) { - sp_3072_sqr_68(r, a); - sp_3072_mont_reduce_68(r, m, mp); + sp_3072_sqr_70(r, a); + sp_3072_mont_reduce_70(r, m, mp); } /* Multiply a by scalar b into r. (r = a * b) @@ -4838,7 +4776,7 @@ static void sp_3072_mont_sqr_68(sp_digit* r, sp_digit* a, sp_digit* m, * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_d_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_d_70(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -4846,43 +4784,47 @@ SP_NOINLINE static void sp_3072_mul_d_68(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 68; i++) { + for (i = 0; i < 70; i++) { t += tb * a[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[68] = (sp_digit)t; + r[70] = (sp_digit)t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] = t[0] & 0x3fffff; for (i = 0; i < 64; i += 8) { t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] = (sp_digit)(t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] = (sp_digit)(t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] = (sp_digit)(t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] = (sp_digit)(t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] = (sp_digit)(t[7] >> 22) + (t[0] & 0x3fffff); } t[1] = tb * a[65]; - r[65] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[65] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[66]; - r[66] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + r[66] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[67]; - r[67] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); - r[68] = (sp_digit)(t[3] >> 23); + r[67] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); + t[4] = tb * a[68]; + r[68] = (sp_digit)(t[3] >> 22) + (t[4] & 0x3fffff); + t[5] = tb * a[69]; + r[69] = (sp_digit)(t[4] >> 22) + (t[5] & 0x3fffff); + r[70] = (sp_digit)(t[5] >> 22); #endif /* WOLFSSL_SP_SMALL */ } @@ -4894,13 +4836,13 @@ SP_NOINLINE static void sp_3072_mul_d_68(sp_digit* r, const sp_digit* a, * b A single precision number to add. * m Mask value to apply. */ -static void sp_3072_cond_add_68(sp_digit* r, const sp_digit* a, +static void sp_3072_cond_add_70(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] + (b[i] & m); #else int i; @@ -4919,6 +4861,8 @@ static void sp_3072_cond_add_68(sp_digit* r, const sp_digit* a, r[65] = a[65] + (b[65] & m); r[66] = a[66] + (b[66] & m); r[67] = a[67] + (b[67] & m); + r[68] = a[68] + (b[68] & m); + r[69] = a[69] + (b[69] & m); #endif /* WOLFSSL_SP_SMALL */ } @@ -4929,12 +4873,12 @@ static void sp_3072_cond_add_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] - b[i]; return 0; @@ -4948,12 +4892,12 @@ SP_NOINLINE static int sp_3072_sub_68(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_70(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 68; i++) + for (i = 0; i < 70; i++) r[i] = a[i] + b[i]; return 0; @@ -4968,7 +4912,7 @@ SP_NOINLINE static int sp_3072_add_68(sp_digit* r, const sp_digit* a, * r Remainder from the division. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m, +static int sp_3072_div_70(sp_digit* a, sp_digit* d, sp_digit* m, sp_digit* r) { int i; @@ -4977,18 +4921,18 @@ static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m, #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) sp_digit* td; #else - sp_digit t1d[136], t2d[68 + 1]; + sp_digit t1d[140], t2d[70 + 1]; #endif sp_digit* t1; sp_digit* t2; int err = MP_OKAY; #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) - td = XMALLOC(sizeof(sp_digit) * (3 * 68 + 1), NULL, + td = XMALLOC(sizeof(sp_digit) * (3 * 70 + 1), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td != NULL) { t1 = td; - t2 = td + 2 * 68; + t2 = td + 2 * 70; } else err = MEMORY_E; @@ -5000,41 +4944,41 @@ static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m, (void)m; if (err == MP_OKAY) { - div = d[67]; - XMEMCPY(t1, a, sizeof(*t1) * 2 * 68); - for (i=67; i>=0; i--) { - t1[68 + i] += t1[68 + i - 1] >> 23; - t1[68 + i - 1] &= 0x7fffff; - d1 = t1[68 + i]; - d1 <<= 23; - d1 += t1[68 + i - 1]; + div = d[69]; + XMEMCPY(t1, a, sizeof(*t1) * 2 * 70); + for (i=69; i>=0; i--) { + t1[70 + i] += t1[70 + i - 1] >> 22; + t1[70 + i - 1] &= 0x3fffff; + d1 = t1[70 + i]; + d1 <<= 22; + d1 += t1[70 + i - 1]; r1 = (sp_digit)(d1 / div); - sp_3072_mul_d_68(t2, d, r1); - sp_3072_sub_68(&t1[i], &t1[i], t2); - t1[68 + i] -= t2[68]; - t1[68 + i] += t1[68 + i - 1] >> 23; - t1[68 + i - 1] &= 0x7fffff; - r1 = (((-t1[68 + i]) << 23) - t1[68 + i - 1]) / div; + sp_3072_mul_d_70(t2, d, r1); + sp_3072_sub_70(&t1[i], &t1[i], t2); + t1[70 + i] -= t2[70]; + t1[70 + i] += t1[70 + i - 1] >> 22; + t1[70 + i - 1] &= 0x3fffff; + r1 = (((-t1[70 + i]) << 22) - t1[70 + i - 1]) / div; r1++; - sp_3072_mul_d_68(t2, d, r1); - sp_3072_add_68(&t1[i], &t1[i], t2); - t1[68 + i] += t1[68 + i - 1] >> 23; - t1[68 + i - 1] &= 0x7fffff; + sp_3072_mul_d_70(t2, d, r1); + sp_3072_add_70(&t1[i], &t1[i], t2); + t1[70 + i] += t1[70 + i - 1] >> 22; + t1[70 + i - 1] &= 0x3fffff; } - t1[68 - 1] += t1[68 - 2] >> 23; - t1[68 - 2] &= 0x7fffff; - d1 = t1[68 - 1]; + t1[70 - 1] += t1[70 - 2] >> 22; + t1[70 - 2] &= 0x3fffff; + d1 = t1[70 - 1]; r1 = (sp_digit)(d1 / div); - sp_3072_mul_d_68(t2, d, r1); - sp_3072_sub_68(t1, t1, t2); - XMEMCPY(r, t1, sizeof(*r) * 2 * 68); - for (i=0; i<66; i++) { - r[i+1] += r[i] >> 23; - r[i] &= 0x7fffff; + sp_3072_mul_d_70(t2, d, r1); + sp_3072_sub_70(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2 * 70); + for (i=0; i<68; i++) { + r[i+1] += r[i] >> 22; + r[i] &= 0x3fffff; } - sp_3072_cond_add_68(r, r, d, 0 - (r[67] < 0)); + sp_3072_cond_add_70(r, r, d, 0 - (r[69] < 0)); } #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) @@ -5052,9 +4996,9 @@ static int sp_3072_div_68(sp_digit* a, sp_digit* d, sp_digit* m, * m A single precision number that is the modulus to reduce with. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_3072_mod_68(sp_digit* r, sp_digit* a, sp_digit* m) +static int sp_3072_mod_70(sp_digit* r, sp_digit* a, sp_digit* m) { - return sp_3072_div_68(a, m, NULL, r); + return sp_3072_div_70(a, m, NULL, r); } /* Modular exponentiate a to the e mod m. (r = a^e mod m) @@ -5066,7 +5010,7 @@ static int sp_3072_mod_68(sp_digit* r, sp_digit* a, sp_digit* m) * m A single precision number that is the modulus. * returns 0 on success and MEMORY_E on dynamic memory allocation failure. */ -static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, +static int sp_3072_mod_exp_70(sp_digit* r, sp_digit* a, sp_digit* e, int bits, sp_digit* m, int reduceA) { #ifdef WOLFSSL_SP_SMALL @@ -5079,62 +5023,62 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int c, y; int err = MP_OKAY; - td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 68 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 70 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { - XMEMSET(td, 0, sizeof(*td) * 3 * 68 * 2); + XMEMSET(td, 0, sizeof(*td) * 3 * 70 * 2); norm = t[0] = td; - t[1] = &td[68 * 2]; - t[2] = &td[2 * 68 * 2]; + t[1] = &td[70 * 2]; + t[2] = &td[2 * 70 * 2]; sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_68(norm, m); + sp_3072_mont_norm_70(norm, m); if (reduceA) - err = sp_3072_mod_68(t[1], a, m); + err = sp_3072_mod_70(t[1], a, m); else - XMEMCPY(t[1], a, sizeof(sp_digit) * 68); + XMEMCPY(t[1], a, sizeof(sp_digit) * 70); } if (err == MP_OKAY) { - sp_3072_mul_68(t[1], t[1], norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], t[1], norm); + err = sp_3072_mod_70(t[1], t[1], m); } if (err == MP_OKAY) { - i = bits / 23; - c = bits % 23; - n = e[i--] << (23 - c); + i = bits / 22; + c = bits % 22; + n = e[i--] << (22 - c); for (; ; c--) { if (c == 0) { if (i == -1) break; n = e[i--]; - c = 23; + c = 22; } - y = (n >> 22) & 1; + y = (n >> 21) & 1; n <<= 1; - sp_3072_mont_mul_68(t[y^1], t[0], t[1], m, mp); + sp_3072_mont_mul_70(t[y^1], t[0], t[1], m, mp); XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), - sizeof(*t[2]) * 68 * 2); - sp_3072_mont_sqr_68(t[2], t[2], m, mp); + sizeof(*t[2]) * 70 * 2); + sp_3072_mont_sqr_70(t[2], t[2], m, mp); XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), t[2], - sizeof(*t[2]) * 68 * 2); + sizeof(*t[2]) * 70 * 2); } - sp_3072_mont_reduce_68(t[0], m, mp); - n = sp_3072_cmp_68(t[0], m); - sp_3072_cond_sub_68(t[0], t[0], m, (n < 0) - 1); - XMEMCPY(r, t[0], sizeof(*r) * 68 * 2); + sp_3072_mont_reduce_70(t[0], m, mp); + n = sp_3072_cmp_70(t[0], m); + sp_3072_cond_sub_70(t[0], t[0], m, (n < 0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 70 * 2); } @@ -5144,7 +5088,7 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, return err; #elif defined(WOLFSSL_SP_CACHE_RESISTANT) #ifndef WOLFSSL_SMALL_STACK - sp_digit t[3][136]; + sp_digit t[3][140]; #else sp_digit* td; sp_digit* t[3]; @@ -5157,15 +5101,15 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int err = MP_OKAY; #ifdef WOLFSSL_SMALL_STACK - td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 68 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 70 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { t[0] = td; - t[1] = &td[68 * 2]; - t[2] = &td[2 * 68 * 2]; + t[1] = &td[70 * 2]; + t[2] = &td[2 * 70 * 2]; norm = t[0]; } #else @@ -5174,49 +5118,49 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, if (err == MP_OKAY) { sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_68(norm, m); + sp_3072_mont_norm_70(norm, m); if (reduceA) { - err = sp_3072_mod_68(t[1], a, m); + err = sp_3072_mod_70(t[1], a, m); if (err == MP_OKAY) { - sp_3072_mul_68(t[1], t[1], norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], t[1], norm); + err = sp_3072_mod_70(t[1], t[1], m); } } else { - sp_3072_mul_68(t[1], a, norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], a, norm); + err = sp_3072_mod_70(t[1], t[1], m); } } if (err == MP_OKAY) { - i = bits / 23; - c = bits % 23; - n = e[i--] << (23 - c); + i = bits / 22; + c = bits % 22; + n = e[i--] << (22 - c); for (; ; c--) { if (c == 0) { if (i == -1) break; n = e[i--]; - c = 23; + c = 22; } - y = (n >> 22) & 1; + y = (n >> 21) & 1; n <<= 1; - sp_3072_mont_mul_68(t[y^1], t[0], t[1], m, mp); + sp_3072_mont_mul_70(t[y^1], t[0], t[1], m, mp); XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); - sp_3072_mont_sqr_68(t[2], t[2], m, mp); + sp_3072_mont_sqr_70(t[2], t[2], m, mp); XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); } - sp_3072_mont_reduce_68(t[0], m, mp); - n = sp_3072_cmp_68(t[0], m); - sp_3072_cond_sub_68(t[0], t[0], m, (n < 0) - 1); + sp_3072_mont_reduce_70(t[0], m, mp); + n = sp_3072_cmp_70(t[0], m); + sp_3072_cond_sub_70(t[0], t[0], m, (n < 0) - 1); XMEMCPY(r, t[0], sizeof(t[0])); } @@ -5228,13 +5172,13 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, return err; #else #ifndef WOLFSSL_SMALL_STACK - sp_digit t[32][136]; + sp_digit t[32][140]; #else sp_digit* t[32]; sp_digit* td; #endif sp_digit* norm; - sp_digit rt[136]; + sp_digit rt[140]; sp_digit mp = 1; sp_digit n; int i; @@ -5242,14 +5186,14 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int err = MP_OKAY; #ifdef WOLFSSL_SMALL_STACK - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 136, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 140, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { for (i=0; i<32; i++) - t[i] = td + i * 136; + t[i] = td + i * 140; norm = t[0]; } #else @@ -5258,67 +5202,67 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, if (err == MP_OKAY) { sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_68(norm, m); + sp_3072_mont_norm_70(norm, m); if (reduceA) { - err = sp_3072_mod_68(t[1], a, m); + err = sp_3072_mod_70(t[1], a, m); if (err == MP_OKAY) { - sp_3072_mul_68(t[1], t[1], norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], t[1], norm); + err = sp_3072_mod_70(t[1], t[1], m); } } else { - sp_3072_mul_68(t[1], a, norm); - err = sp_3072_mod_68(t[1], t[1], m); + sp_3072_mul_70(t[1], a, norm); + err = sp_3072_mod_70(t[1], t[1], m); } } if (err == MP_OKAY) { - sp_3072_mont_sqr_68(t[ 2], t[ 1], m, mp); - sp_3072_mont_mul_68(t[ 3], t[ 2], t[ 1], m, mp); - sp_3072_mont_sqr_68(t[ 4], t[ 2], m, mp); - sp_3072_mont_mul_68(t[ 5], t[ 3], t[ 2], m, mp); - sp_3072_mont_sqr_68(t[ 6], t[ 3], m, mp); - sp_3072_mont_mul_68(t[ 7], t[ 4], t[ 3], m, mp); - sp_3072_mont_sqr_68(t[ 8], t[ 4], m, mp); - sp_3072_mont_mul_68(t[ 9], t[ 5], t[ 4], m, mp); - sp_3072_mont_sqr_68(t[10], t[ 5], m, mp); - sp_3072_mont_mul_68(t[11], t[ 6], t[ 5], m, mp); - sp_3072_mont_sqr_68(t[12], t[ 6], m, mp); - sp_3072_mont_mul_68(t[13], t[ 7], t[ 6], m, mp); - sp_3072_mont_sqr_68(t[14], t[ 7], m, mp); - sp_3072_mont_mul_68(t[15], t[ 8], t[ 7], m, mp); - sp_3072_mont_sqr_68(t[16], t[ 8], m, mp); - sp_3072_mont_mul_68(t[17], t[ 9], t[ 8], m, mp); - sp_3072_mont_sqr_68(t[18], t[ 9], m, mp); - sp_3072_mont_mul_68(t[19], t[10], t[ 9], m, mp); - sp_3072_mont_sqr_68(t[20], t[10], m, mp); - sp_3072_mont_mul_68(t[21], t[11], t[10], m, mp); - sp_3072_mont_sqr_68(t[22], t[11], m, mp); - sp_3072_mont_mul_68(t[23], t[12], t[11], m, mp); - sp_3072_mont_sqr_68(t[24], t[12], m, mp); - sp_3072_mont_mul_68(t[25], t[13], t[12], m, mp); - sp_3072_mont_sqr_68(t[26], t[13], m, mp); - sp_3072_mont_mul_68(t[27], t[14], t[13], m, mp); - sp_3072_mont_sqr_68(t[28], t[14], m, mp); - sp_3072_mont_mul_68(t[29], t[15], t[14], m, mp); - sp_3072_mont_sqr_68(t[30], t[15], m, mp); - sp_3072_mont_mul_68(t[31], t[16], t[15], m, mp); + sp_3072_mont_sqr_70(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_70(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_70(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_70(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_70(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_70(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_70(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_70(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_70(t[10], t[ 5], m, mp); + sp_3072_mont_mul_70(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_70(t[12], t[ 6], m, mp); + sp_3072_mont_mul_70(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_70(t[14], t[ 7], m, mp); + sp_3072_mont_mul_70(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_70(t[16], t[ 8], m, mp); + sp_3072_mont_mul_70(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_70(t[18], t[ 9], m, mp); + sp_3072_mont_mul_70(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_70(t[20], t[10], m, mp); + sp_3072_mont_mul_70(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_70(t[22], t[11], m, mp); + sp_3072_mont_mul_70(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_70(t[24], t[12], m, mp); + sp_3072_mont_mul_70(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_70(t[26], t[13], m, mp); + sp_3072_mont_mul_70(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_70(t[28], t[14], m, mp); + sp_3072_mont_mul_70(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_70(t[30], t[15], m, mp); + sp_3072_mont_mul_70(t[31], t[16], t[15], m, mp); bits = ((bits + 4) / 5) * 5; - i = ((bits + 22) / 23) - 1; - c = bits % 23; + i = ((bits + 21) / 22) - 1; + c = bits % 22; if (c == 0) - c = 23; - if (i < 68) + c = 22; + if (i < 70) n = e[i--] << (32 - c); else { n = 0; i--; } if (c < 5) { - n |= e[i--] << (9 - c); - c += 23; + n |= e[i--] << (10 - c); + c += 22; } y = (n >> 27) & 0x1f; n <<= 5; @@ -5326,25 +5270,25 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, XMEMCPY(rt, t[y], sizeof(rt)); for (; i>=0 || c>=5; ) { if (c < 5) { - n |= e[i--] << (9 - c); - c += 23; + n |= e[i--] << (10 - c); + c += 22; } y = (n >> 27) & 0x1f; n <<= 5; c -= 5; - sp_3072_mont_sqr_68(rt, rt, m, mp); - sp_3072_mont_sqr_68(rt, rt, m, mp); - sp_3072_mont_sqr_68(rt, rt, m, mp); - sp_3072_mont_sqr_68(rt, rt, m, mp); - sp_3072_mont_sqr_68(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); + sp_3072_mont_sqr_70(rt, rt, m, mp); - sp_3072_mont_mul_68(rt, rt, t[y], m, mp); + sp_3072_mont_mul_70(rt, rt, t[y], m, mp); } - sp_3072_mont_reduce_68(rt, m, mp); - n = sp_3072_cmp_68(rt, m); - sp_3072_cond_sub_68(rt, rt, m, (n < 0) - 1); + sp_3072_mont_reduce_70(rt, m, mp); + n = sp_3072_cmp_70(rt, m); + sp_3072_cond_sub_70(rt, rt, m, (n < 0) - 1); XMEMCPY(r, rt, sizeof(rt)); } @@ -5365,32 +5309,35 @@ static int sp_3072_mod_exp_68(sp_digit* r, sp_digit* a, sp_digit* e, int bits, * r A single precision number. * m A signle precision number. */ -static void sp_3072_mont_norm_136(sp_digit* r, sp_digit* m) +static void sp_3072_mont_norm_140(sp_digit* r, sp_digit* m) { /* Set r = 2^n - 1. */ #ifdef WOLFSSL_SP_SMALL int i; - for (i=0; i<135; i++) - r[i] = 0x7fffff; + for (i=0; i<139; i++) + r[i] = 0x3fffff; #else int i; for (i = 0; i < 136; i += 8) { - r[i + 0] = 0x7fffff; - r[i + 1] = 0x7fffff; - r[i + 2] = 0x7fffff; - r[i + 3] = 0x7fffff; - r[i + 4] = 0x7fffff; - r[i + 5] = 0x7fffff; - r[i + 6] = 0x7fffff; - r[i + 7] = 0x7fffff; + r[i + 0] = 0x3fffff; + r[i + 1] = 0x3fffff; + r[i + 2] = 0x3fffff; + r[i + 3] = 0x3fffff; + r[i + 4] = 0x3fffff; + r[i + 5] = 0x3fffff; + r[i + 6] = 0x3fffff; + r[i + 7] = 0x3fffff; } + r[136] = 0x3fffff; + r[137] = 0x3fffff; + r[138] = 0x3fffff; #endif - r[135] = 0x1fffl; + r[139] = 0x3fffl; /* r = (2^n - 1) mod n */ - sp_3072_sub_136(r, r, m); + sp_3072_sub_140(r, r, m); /* Add one so r = 2^n mod m */ r[0] += 1; @@ -5403,17 +5350,21 @@ static void sp_3072_mont_norm_136(sp_digit* r, sp_digit* m) * return -ve, 0 or +ve if a is less than, equal to or greater than b * respectively. */ -static sp_digit sp_3072_cmp_136(const sp_digit* a, const sp_digit* b) +static sp_digit sp_3072_cmp_140(const sp_digit* a, const sp_digit* b) { sp_digit r = 0; #ifdef WOLFSSL_SP_SMALL int i; - for (i=135; i>=0; i--) + for (i=139; i>=0; i--) r |= (a[i] - b[i]) & (0 - !r); #else int i; + r |= (a[139] - b[139]) & (0 - !r); + r |= (a[138] - b[138]) & (0 - !r); + r |= (a[137] - b[137]) & (0 - !r); + r |= (a[136] - b[136]) & (0 - !r); for (i = 128; i >= 0; i -= 8) { r |= (a[i + 7] - b[i + 7]) & (0 - !r); r |= (a[i + 6] - b[i + 6]) & (0 - !r); @@ -5437,13 +5388,13 @@ static sp_digit sp_3072_cmp_136(const sp_digit* a, const sp_digit* b) * b A single precision number to subtract. * m Mask value to apply. */ -static void sp_3072_cond_sub_136(sp_digit* r, const sp_digit* a, +static void sp_3072_cond_sub_140(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] - (b[i] & m); #else int i; @@ -5458,6 +5409,10 @@ static void sp_3072_cond_sub_136(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] - (b[i + 6] & m); r[i + 7] = a[i + 7] - (b[i + 7] & m); } + r[136] = a[136] - (b[136] & m); + r[137] = a[137] - (b[137] & m); + r[138] = a[138] - (b[138] & m); + r[139] = a[139] - (b[139] & m); #endif /* WOLFSSL_SP_SMALL */ } @@ -5467,7 +5422,7 @@ static void sp_3072_cond_sub_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_add_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_add_140(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -5475,79 +5430,74 @@ SP_NOINLINE static void sp_3072_mul_add_136(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 136; i++) { + for (i = 0; i < 140; i++) { t += (tb * a[i]) + r[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[136] += t; + r[140] += t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] += t[0] & 0x7fffff; + t[0] = tb * a[0]; r[0] += t[0] & 0x3fffff; for (i = 0; i < 136; i += 8) { t[1] = tb * a[i+1]; - r[i+1] += (t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] += (t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] += (t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] += (t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] += (t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] += (t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] += (t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] += (t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] += (t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] += (t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] += (t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] += (t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] += (t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] += (t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] += (t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] += (t[7] >> 22) + (t[0] & 0x3fffff); } - r[136] += t[7] >> 23; + t[1] = tb * a[137]; r[137] += (t[0] >> 22) + (t[1] & 0x3fffff); + t[2] = tb * a[138]; r[138] += (t[1] >> 22) + (t[2] & 0x3fffff); + t[3] = tb * a[139]; r[139] += (t[2] >> 22) + (t[3] & 0x3fffff); + r[140] += t[3] >> 22; #endif /* WOLFSSL_SP_SMALL */ } -/* Normalize the values in each word to 23. +/* Normalize the values in each word to 22. * * a Array of sp_digit to normalize. */ -static void sp_3072_norm_136(sp_digit* a) +static void sp_3072_norm_140(sp_digit* a) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 135; i++) { - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + for (i = 0; i < 139; i++) { + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; } #else int i; - for (i = 0; i < 128; i += 8) { - a[i+1] += a[i+0] >> 23; a[i+0] &= 0x7fffff; - a[i+2] += a[i+1] >> 23; a[i+1] &= 0x7fffff; - a[i+3] += a[i+2] >> 23; a[i+2] &= 0x7fffff; - a[i+4] += a[i+3] >> 23; a[i+3] &= 0x7fffff; - a[i+5] += a[i+4] >> 23; a[i+4] &= 0x7fffff; - a[i+6] += a[i+5] >> 23; a[i+5] &= 0x7fffff; - a[i+7] += a[i+6] >> 23; a[i+6] &= 0x7fffff; - a[i+8] += a[i+7] >> 23; a[i+7] &= 0x7fffff; - a[i+9] += a[i+8] >> 23; a[i+8] &= 0x7fffff; + for (i = 0; i < 136; i += 8) { + a[i+1] += a[i+0] >> 22; a[i+0] &= 0x3fffff; + a[i+2] += a[i+1] >> 22; a[i+1] &= 0x3fffff; + a[i+3] += a[i+2] >> 22; a[i+2] &= 0x3fffff; + a[i+4] += a[i+3] >> 22; a[i+3] &= 0x3fffff; + a[i+5] += a[i+4] >> 22; a[i+4] &= 0x3fffff; + a[i+6] += a[i+5] >> 22; a[i+5] &= 0x3fffff; + a[i+7] += a[i+6] >> 22; a[i+6] &= 0x3fffff; + a[i+8] += a[i+7] >> 22; a[i+7] &= 0x3fffff; + a[i+9] += a[i+8] >> 22; a[i+8] &= 0x3fffff; } - a[128+1] += a[128] >> 23; - a[128] &= 0x7fffff; - a[129+1] += a[129] >> 23; - a[129] &= 0x7fffff; - a[130+1] += a[130] >> 23; - a[130] &= 0x7fffff; - a[131+1] += a[131] >> 23; - a[131] &= 0x7fffff; - a[132+1] += a[132] >> 23; - a[132] &= 0x7fffff; - a[133+1] += a[133] >> 23; - a[133] &= 0x7fffff; - a[134+1] += a[134] >> 23; - a[134] &= 0x7fffff; + a[136+1] += a[136] >> 22; + a[136] &= 0x3fffff; + a[137+1] += a[137] >> 22; + a[137] &= 0x3fffff; + a[138+1] += a[138] >> 22; + a[138] &= 0x3fffff; #endif } @@ -5556,44 +5506,54 @@ static void sp_3072_norm_136(sp_digit* a) * r A single precision number. * a A single precision number. */ -static void sp_3072_mont_shift_136(sp_digit* r, const sp_digit* a) +static void sp_3072_mont_shift_140(sp_digit* r, const sp_digit* a) { #ifdef WOLFSSL_SP_SMALL int i; - int64_t n = a[135] >> 13; - n += ((int64_t)a[136]) << 10; + sp_digit n, s; - for (i = 0; i < 135; i++) { - r[i] = n & 0x7fffff; - n >>= 23; - n += ((int64_t)a[137 + i]) << 10; + s = a[140]; + n = a[139] >> 14; + for (i = 0; i < 139; i++) { + n += (s & 0x3fffff) << 8; + r[i] = n & 0x3fffff; + n >>= 22; + s = a[141 + i] + (s >> 22); } - r[135] = (sp_digit)n; + n += s << 8; + r[139] = n; #else + sp_digit n, s; int i; - int64_t n = a[135] >> 13; - n += ((int64_t)a[136]) << 10; + + s = a[140]; n = a[139] >> 14; for (i = 0; i < 136; i += 8) { - r[i + 0] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 137]) << 10; - r[i + 1] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 138]) << 10; - r[i + 2] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 139]) << 10; - r[i + 3] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 140]) << 10; - r[i + 4] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 141]) << 10; - r[i + 5] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 142]) << 10; - r[i + 6] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 143]) << 10; - r[i + 7] = n & 0x7fffff; - n >>= 23; n += ((int64_t)a[i + 144]) << 10; + n += (s & 0x3fffff) << 8; r[i+0] = n & 0x3fffff; + n >>= 22; s = a[i+141] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+1] = n & 0x3fffff; + n >>= 22; s = a[i+142] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+2] = n & 0x3fffff; + n >>= 22; s = a[i+143] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+3] = n & 0x3fffff; + n >>= 22; s = a[i+144] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+4] = n & 0x3fffff; + n >>= 22; s = a[i+145] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+5] = n & 0x3fffff; + n >>= 22; s = a[i+146] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+6] = n & 0x3fffff; + n >>= 22; s = a[i+147] + (s >> 22); + n += (s & 0x3fffff) << 8; r[i+7] = n & 0x3fffff; + n >>= 22; s = a[i+148] + (s >> 22); } - r[135] = (sp_digit)n; + n += (s & 0x3fffff) << 8; r[136] = n & 0x3fffff; + n >>= 22; s = a[277] + (s >> 22); + n += (s & 0x3fffff) << 8; r[137] = n & 0x3fffff; + n >>= 22; s = a[278] + (s >> 22); + n += (s & 0x3fffff) << 8; r[138] = n & 0x3fffff; + n >>= 22; s = a[279] + (s >> 22); + n += s << 8; r[139] = n; #endif /* WOLFSSL_SP_SMALL */ - XMEMSET(&r[136], 0, sizeof(*r) * 136); + XMEMSET(&r[140], 0, sizeof(*r) * 140); } /* Reduce the number back to 3072 bits using Montgomery reduction. @@ -5602,49 +5562,49 @@ static void sp_3072_mont_shift_136(sp_digit* r, const sp_digit* a) * m The single precision number representing the modulus. * mp The digit representing the negative inverse of m mod 2^n. */ -static void sp_3072_mont_reduce_136(sp_digit* a, sp_digit* m, sp_digit mp) +static void sp_3072_mont_reduce_140(sp_digit* a, sp_digit* m, sp_digit mp) { int i; sp_digit mu; #ifdef WOLFSSL_SP_DH if (mp != 1) { - for (i=0; i<135; i++) { - mu = (a[i] * mp) & 0x7fffff; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; + for (i=0; i<139; i++) { + mu = (a[i] * mp) & 0x3fffff; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; } - mu = (a[i] * mp) & 0x1fffl; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + mu = (a[i] * mp) & 0x3fffl; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; } else { - for (i=0; i<135; i++) { - mu = a[i] & 0x7fffff; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; + for (i=0; i<139; i++) { + mu = a[i] & 0x3fffff; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; } - mu = a[i] & 0x1fffl; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + mu = a[i] & 0x3fffl; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; } #else - for (i=0; i<135; i++) { - mu = (a[i] * mp) & 0x7fffff; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; + for (i=0; i<139; i++) { + mu = (a[i] * mp) & 0x3fffff; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; } - mu = (a[i] * mp) & 0x1fffl; - sp_3072_mul_add_136(a+i, m, mu); - a[i+1] += a[i] >> 23; - a[i] &= 0x7fffff; + mu = (a[i] * mp) & 0x3fffl; + sp_3072_mul_add_140(a+i, m, mu); + a[i+1] += a[i] >> 22; + a[i] &= 0x3fffff; #endif - sp_3072_mont_shift_136(a, a); - sp_3072_cond_sub_136(a, a, m, 0 - ((a[135] >> 13) > 0)); - sp_3072_norm_136(a); + sp_3072_mont_shift_140(a, a); + sp_3072_cond_sub_140(a, a, m, 0 - ((a[139] >> 14) > 0)); + sp_3072_norm_140(a); } /* Multiply two Montogmery form numbers mod the modulus (prime). @@ -5656,11 +5616,11 @@ static void sp_3072_mont_reduce_136(sp_digit* a, sp_digit* m, sp_digit mp) * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_3072_mont_mul_136(sp_digit* r, sp_digit* a, sp_digit* b, +static void sp_3072_mont_mul_140(sp_digit* r, sp_digit* a, sp_digit* b, sp_digit* m, sp_digit mp) { - sp_3072_mul_136(r, a, b); - sp_3072_mont_reduce_136(r, m, mp); + sp_3072_mul_140(r, a, b); + sp_3072_mont_reduce_140(r, m, mp); } /* Square the Montgomery form number. (r = a * a mod m) @@ -5670,11 +5630,11 @@ static void sp_3072_mont_mul_136(sp_digit* r, sp_digit* a, sp_digit* b, * m Modulus (prime). * mp Montogmery mulitplier. */ -static void sp_3072_mont_sqr_136(sp_digit* r, sp_digit* a, sp_digit* m, +static void sp_3072_mont_sqr_140(sp_digit* r, sp_digit* a, sp_digit* m, sp_digit mp) { - sp_3072_sqr_136(r, a); - sp_3072_mont_reduce_136(r, m, mp); + sp_3072_sqr_140(r, a); + sp_3072_mont_reduce_140(r, m, mp); } /* Multiply a by scalar b into r. (r = a * b) @@ -5683,7 +5643,7 @@ static void sp_3072_mont_sqr_136(sp_digit* r, sp_digit* a, sp_digit* m, * a A single precision integer. * b A scalar. */ -SP_NOINLINE static void sp_3072_mul_d_272(sp_digit* r, const sp_digit* a, +SP_NOINLINE static void sp_3072_mul_d_280(sp_digit* r, const sp_digit* a, const sp_digit b) { #ifdef WOLFSSL_SP_SMALL @@ -5691,37 +5651,37 @@ SP_NOINLINE static void sp_3072_mul_d_272(sp_digit* r, const sp_digit* a, int64_t t = 0; int i; - for (i = 0; i < 272; i++) { + for (i = 0; i < 280; i++) { t += tb * a[i]; - r[i] = t & 0x7fffff; - t >>= 23; + r[i] = t & 0x3fffff; + t >>= 22; } - r[272] = (sp_digit)t; + r[280] = (sp_digit)t; #else int64_t tb = b; int64_t t[8]; int i; - t[0] = tb * a[0]; r[0] = t[0] & 0x7fffff; - for (i = 0; i < 272; i += 8) { + t[0] = tb * a[0]; r[0] = t[0] & 0x3fffff; + for (i = 0; i < 280; i += 8) { t[1] = tb * a[i+1]; - r[i+1] = (sp_digit)(t[0] >> 23) + (t[1] & 0x7fffff); + r[i+1] = (sp_digit)(t[0] >> 22) + (t[1] & 0x3fffff); t[2] = tb * a[i+2]; - r[i+2] = (sp_digit)(t[1] >> 23) + (t[2] & 0x7fffff); + r[i+2] = (sp_digit)(t[1] >> 22) + (t[2] & 0x3fffff); t[3] = tb * a[i+3]; - r[i+3] = (sp_digit)(t[2] >> 23) + (t[3] & 0x7fffff); + r[i+3] = (sp_digit)(t[2] >> 22) + (t[3] & 0x3fffff); t[4] = tb * a[i+4]; - r[i+4] = (sp_digit)(t[3] >> 23) + (t[4] & 0x7fffff); + r[i+4] = (sp_digit)(t[3] >> 22) + (t[4] & 0x3fffff); t[5] = tb * a[i+5]; - r[i+5] = (sp_digit)(t[4] >> 23) + (t[5] & 0x7fffff); + r[i+5] = (sp_digit)(t[4] >> 22) + (t[5] & 0x3fffff); t[6] = tb * a[i+6]; - r[i+6] = (sp_digit)(t[5] >> 23) + (t[6] & 0x7fffff); + r[i+6] = (sp_digit)(t[5] >> 22) + (t[6] & 0x3fffff); t[7] = tb * a[i+7]; - r[i+7] = (sp_digit)(t[6] >> 23) + (t[7] & 0x7fffff); + r[i+7] = (sp_digit)(t[6] >> 22) + (t[7] & 0x3fffff); t[0] = tb * a[i+8]; - r[i+8] = (sp_digit)(t[7] >> 23) + (t[0] & 0x7fffff); + r[i+8] = (sp_digit)(t[7] >> 22) + (t[0] & 0x3fffff); } - r[272] = (sp_digit)(t[7] >> 23); + r[280] = (sp_digit)(t[7] >> 22); #endif /* WOLFSSL_SP_SMALL */ } @@ -5733,13 +5693,13 @@ SP_NOINLINE static void sp_3072_mul_d_272(sp_digit* r, const sp_digit* a, * b A single precision number to add. * m Mask value to apply. */ -static void sp_3072_cond_add_136(sp_digit* r, const sp_digit* a, +static void sp_3072_cond_add_140(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] + (b[i] & m); #else int i; @@ -5754,6 +5714,10 @@ static void sp_3072_cond_add_136(sp_digit* r, const sp_digit* a, r[i + 6] = a[i + 6] + (b[i + 6] & m); r[i + 7] = a[i + 7] + (b[i + 7] & m); } + r[136] = a[136] + (b[136] & m); + r[137] = a[137] + (b[137] & m); + r[138] = a[138] + (b[138] & m); + r[139] = a[139] + (b[139] & m); #endif /* WOLFSSL_SP_SMALL */ } @@ -5764,12 +5728,12 @@ static void sp_3072_cond_add_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_sub_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] - b[i]; return 0; @@ -5783,37 +5747,40 @@ SP_NOINLINE static int sp_3072_sub_136(sp_digit* r, const sp_digit* a, * a A single precision integer. * b A single precision integer. */ -SP_NOINLINE static int sp_3072_add_136(sp_digit* r, const sp_digit* a, +SP_NOINLINE static int sp_3072_add_140(sp_digit* r, const sp_digit* a, const sp_digit* b) { int i; - for (i = 0; i < 136; i++) + for (i = 0; i < 140; i++) r[i] = a[i] + b[i]; return 0; } #endif -SP_NOINLINE static void sp_3072_rshift_136(sp_digit* r, sp_digit* a, byte n) +SP_NOINLINE static void sp_3072_rshift_140(sp_digit* r, sp_digit* a, byte n) { int i; #ifdef WOLFSSL_SP_SMALL - for (i=0; i<135; i++) - r[i] = ((a[i] >> n) | (a[i + 1] << (23 - n))) & 0x7fffff; + for (i=0; i<139; i++) + r[i] = ((a[i] >> n) | (a[i + 1] << (22 - n))) & 0x3fffff; #else - for (i=0; i<128; i += 8) { - r[i+0] = ((a[i+0] >> n) | (a[i+1] << (23 - n))) & 0x7fffff; - r[i+1] = ((a[i+1] >> n) | (a[i+2] << (23 - n))) & 0x7fffff; - r[i+2] = ((a[i+2] >> n) | (a[i+3] << (23 - n))) & 0x7fffff; - r[i+3] = ((a[i+3] >> n) | (a[i+4] << (23 - n))) & 0x7fffff; - r[i+4] = ((a[i+4] >> n) | (a[i+5] << (23 - n))) & 0x7fffff; - r[i+5] = ((a[i+5] >> n) | (a[i+6] << (23 - n))) & 0x7fffff; - r[i+6] = ((a[i+6] >> n) | (a[i+7] << (23 - n))) & 0x7fffff; - r[i+7] = ((a[i+7] >> n) | (a[i+8] << (23 - n))) & 0x7fffff; + for (i=0; i<136; i += 8) { + r[i+0] = ((a[i+0] >> n) | (a[i+1] << (22 - n))) & 0x3fffff; + r[i+1] = ((a[i+1] >> n) | (a[i+2] << (22 - n))) & 0x3fffff; + r[i+2] = ((a[i+2] >> n) | (a[i+3] << (22 - n))) & 0x3fffff; + r[i+3] = ((a[i+3] >> n) | (a[i+4] << (22 - n))) & 0x3fffff; + r[i+4] = ((a[i+4] >> n) | (a[i+5] << (22 - n))) & 0x3fffff; + r[i+5] = ((a[i+5] >> n) | (a[i+6] << (22 - n))) & 0x3fffff; + r[i+6] = ((a[i+6] >> n) | (a[i+7] << (22 - n))) & 0x3fffff; + r[i+7] = ((a[i+7] >> n) | (a[i+8] << (22 - n))) & 0x3fffff; } + r[136] = ((a[136] >> n) | (a[137] << (22 - n))) & 0x3fffff; + r[137] = ((a[137] >> n) | (a[138] << (22 - n))) & 0x3fffff; + r[138] = ((a[138] >> n) | (a[139] << (22 - n))) & 0x3fffff; #endif - r[135] = a[135] >> n; + r[139] = a[139] >> n; } /* Divide d in a and put remainder into r (m*d + r = a) @@ -5825,7 +5792,7 @@ SP_NOINLINE static void sp_3072_rshift_136(sp_digit* r, sp_digit* a, byte n) * r Remainder from the division. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, +static int sp_3072_div_140(sp_digit* a, sp_digit* d, sp_digit* m, sp_digit* r) { int i; @@ -5834,7 +5801,7 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) sp_digit* td; #else - sp_digit t1d[272 + 1], t2d[136 + 1], sdd[136 + 1]; + sp_digit t1d[280 + 1], t2d[140 + 1], sdd[140 + 1]; #endif sp_digit* t1; sp_digit* t2; @@ -5842,12 +5809,12 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, int err = MP_OKAY; #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) - td = XMALLOC(sizeof(sp_digit) * (4 * 136 + 3), NULL, + td = XMALLOC(sizeof(sp_digit) * (4 * 140 + 3), NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td != NULL) { t1 = td; - t2 = td + 272 + 1; - sd = t2 + 136 + 1; + t2 = td + 280 + 1; + sd = t2 + 140 + 1; } else err = MEMORY_E; @@ -5860,45 +5827,45 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, (void)m; if (err == MP_OKAY) { - sp_3072_mul_d_136(sd, d, 1 << 10); - sp_3072_mul_d_272(t1, a, 1 << 10); - div = sd[135]; - for (i=136; i>=0; i--) { - t1[136 + i] += t1[136 + i - 1] >> 23; - t1[136 + i - 1] &= 0x7fffff; - d1 = t1[136 + i]; - d1 <<= 23; - d1 += t1[136 + i - 1]; + sp_3072_mul_d_140(sd, d, 1 << 8); + sp_3072_mul_d_280(t1, a, 1 << 8); + div = sd[139]; + for (i=140; i>=0; i--) { + t1[140 + i] += t1[140 + i - 1] >> 22; + t1[140 + i - 1] &= 0x3fffff; + d1 = t1[140 + i]; + d1 <<= 22; + d1 += t1[140 + i - 1]; r1 = (sp_digit)(d1 / div); - sp_3072_mul_d_136(t2, sd, r1); - sp_3072_sub_136(&t1[i], &t1[i], t2); - t1[136 + i] -= t2[136]; - t1[136 + i] += t1[136 + i - 1] >> 23; - t1[136 + i - 1] &= 0x7fffff; - r1 = (((-t1[136 + i]) << 23) - t1[136 + i - 1]) / div; - r1 -= t1[136 + i]; - sp_3072_mul_d_136(t2, sd, r1); - sp_3072_add_136(&t1[i], &t1[i], t2); - t1[136 + i] += t1[136 + i - 1] >> 23; - t1[136 + i - 1] &= 0x7fffff; + sp_3072_mul_d_140(t2, sd, r1); + sp_3072_sub_140(&t1[i], &t1[i], t2); + t1[140 + i] -= t2[140]; + t1[140 + i] += t1[140 + i - 1] >> 22; + t1[140 + i - 1] &= 0x3fffff; + r1 = (((-t1[140 + i]) << 22) - t1[140 + i - 1]) / div; + r1 -= t1[140 + i]; + sp_3072_mul_d_140(t2, sd, r1); + sp_3072_add_140(&t1[i], &t1[i], t2); + t1[140 + i] += t1[140 + i - 1] >> 22; + t1[140 + i - 1] &= 0x3fffff; } - t1[136 - 1] += t1[136 - 2] >> 23; - t1[136 - 2] &= 0x7fffff; - d1 = t1[136 - 1]; + t1[140 - 1] += t1[140 - 2] >> 22; + t1[140 - 2] &= 0x3fffff; + d1 = t1[140 - 1]; r1 = (sp_digit)(d1 / div); - sp_3072_mul_d_136(t2, sd, r1); - sp_3072_sub_136(t1, t1, t2); - XMEMCPY(r, t1, sizeof(*r) * 2 * 136); - for (i=0; i<134; i++) { - r[i+1] += r[i] >> 23; - r[i] &= 0x7fffff; + sp_3072_mul_d_140(t2, sd, r1); + sp_3072_sub_140(t1, t1, t2); + XMEMCPY(r, t1, sizeof(*r) * 2 * 140); + for (i=0; i<138; i++) { + r[i+1] += r[i] >> 22; + r[i] &= 0x3fffff; } - sp_3072_cond_add_136(r, r, sd, 0 - (r[135] < 0)); + sp_3072_cond_add_140(r, r, sd, 0 - (r[139] < 0)); } - sp_3072_rshift_136(r, r, 10); + sp_3072_rshift_140(r, r, 8); #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (td != NULL) @@ -5915,9 +5882,9 @@ static int sp_3072_div_136(sp_digit* a, sp_digit* d, sp_digit* m, * m A single precision number that is the modulus to reduce with. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_3072_mod_136(sp_digit* r, sp_digit* a, sp_digit* m) +static int sp_3072_mod_140(sp_digit* r, sp_digit* a, sp_digit* m) { - return sp_3072_div_136(a, m, NULL, r); + return sp_3072_div_140(a, m, NULL, r); } #if defined(SP_RSA_PRIVATE_EXP_D) || defined(WOLFSSL_HAVE_SP_DH) @@ -5930,7 +5897,7 @@ static int sp_3072_mod_136(sp_digit* r, sp_digit* a, sp_digit* m) * m A single precision number that is the modulus. * returns 0 on success and MEMORY_E on dynamic memory allocation failure. */ -static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, +static int sp_3072_mod_exp_140(sp_digit* r, sp_digit* a, sp_digit* e, int bits, sp_digit* m, int reduceA) { #ifdef WOLFSSL_SP_SMALL @@ -5943,62 +5910,62 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int c, y; int err = MP_OKAY; - td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 136 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 140 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { - XMEMSET(td, 0, sizeof(*td) * 3 * 136 * 2); + XMEMSET(td, 0, sizeof(*td) * 3 * 140 * 2); norm = t[0] = td; - t[1] = &td[136 * 2]; - t[2] = &td[2 * 136 * 2]; + t[1] = &td[140 * 2]; + t[2] = &td[2 * 140 * 2]; sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); if (reduceA) - err = sp_3072_mod_136(t[1], a, m); + err = sp_3072_mod_140(t[1], a, m); else - XMEMCPY(t[1], a, sizeof(sp_digit) * 136); + XMEMCPY(t[1], a, sizeof(sp_digit) * 140); } if (err == MP_OKAY) { - sp_3072_mul_136(t[1], t[1], norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], t[1], norm); + err = sp_3072_mod_140(t[1], t[1], m); } if (err == MP_OKAY) { - i = bits / 23; - c = bits % 23; - n = e[i--] << (23 - c); + i = bits / 22; + c = bits % 22; + n = e[i--] << (22 - c); for (; ; c--) { if (c == 0) { if (i == -1) break; n = e[i--]; - c = 23; + c = 22; } - y = (n >> 22) & 1; + y = (n >> 21) & 1; n <<= 1; - sp_3072_mont_mul_136(t[y^1], t[0], t[1], m, mp); + sp_3072_mont_mul_140(t[y^1], t[0], t[1], m, mp); XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), - sizeof(*t[2]) * 136 * 2); - sp_3072_mont_sqr_136(t[2], t[2], m, mp); + sizeof(*t[2]) * 140 * 2); + sp_3072_mont_sqr_140(t[2], t[2], m, mp); XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), t[2], - sizeof(*t[2]) * 136 * 2); + sizeof(*t[2]) * 140 * 2); } - sp_3072_mont_reduce_136(t[0], m, mp); - n = sp_3072_cmp_136(t[0], m); - sp_3072_cond_sub_136(t[0], t[0], m, (n < 0) - 1); - XMEMCPY(r, t[0], sizeof(*r) * 136 * 2); + sp_3072_mont_reduce_140(t[0], m, mp); + n = sp_3072_cmp_140(t[0], m); + sp_3072_cond_sub_140(t[0], t[0], m, (n < 0) - 1); + XMEMCPY(r, t[0], sizeof(*r) * 140 * 2); } @@ -6008,7 +5975,7 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, return err; #elif defined(WOLFSSL_SP_CACHE_RESISTANT) #ifndef WOLFSSL_SMALL_STACK - sp_digit t[3][272]; + sp_digit t[3][280]; #else sp_digit* td; sp_digit* t[3]; @@ -6021,15 +5988,15 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int err = MP_OKAY; #ifdef WOLFSSL_SMALL_STACK - td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 136 * 2, NULL, + td = (sp_digit*)XMALLOC(sizeof(*td) * 3 * 140 * 2, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { t[0] = td; - t[1] = &td[136 * 2]; - t[2] = &td[2 * 136 * 2]; + t[1] = &td[140 * 2]; + t[2] = &td[2 * 140 * 2]; norm = t[0]; } #else @@ -6038,49 +6005,49 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, if (err == MP_OKAY) { sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); if (reduceA) { - err = sp_3072_mod_136(t[1], a, m); + err = sp_3072_mod_140(t[1], a, m); if (err == MP_OKAY) { - sp_3072_mul_136(t[1], t[1], norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], t[1], norm); + err = sp_3072_mod_140(t[1], t[1], m); } } else { - sp_3072_mul_136(t[1], a, norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], a, norm); + err = sp_3072_mod_140(t[1], t[1], m); } } if (err == MP_OKAY) { - i = bits / 23; - c = bits % 23; - n = e[i--] << (23 - c); + i = bits / 22; + c = bits % 22; + n = e[i--] << (22 - c); for (; ; c--) { if (c == 0) { if (i == -1) break; n = e[i--]; - c = 23; + c = 22; } - y = (n >> 22) & 1; + y = (n >> 21) & 1; n <<= 1; - sp_3072_mont_mul_136(t[y^1], t[0], t[1], m, mp); + sp_3072_mont_mul_140(t[y^1], t[0], t[1], m, mp); XMEMCPY(t[2], (void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), sizeof(t[2])); - sp_3072_mont_sqr_136(t[2], t[2], m, mp); + sp_3072_mont_sqr_140(t[2], t[2], m, mp); XMEMCPY((void*)(((size_t)t[0] & addr_mask[y^1]) + ((size_t)t[1] & addr_mask[y])), t[2], sizeof(t[2])); } - sp_3072_mont_reduce_136(t[0], m, mp); - n = sp_3072_cmp_136(t[0], m); - sp_3072_cond_sub_136(t[0], t[0], m, (n < 0) - 1); + sp_3072_mont_reduce_140(t[0], m, mp); + n = sp_3072_cmp_140(t[0], m); + sp_3072_cond_sub_140(t[0], t[0], m, (n < 0) - 1); XMEMCPY(r, t[0], sizeof(t[0])); } @@ -6092,13 +6059,13 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, return err; #else #ifndef WOLFSSL_SMALL_STACK - sp_digit t[32][272]; + sp_digit t[32][280]; #else sp_digit* t[32]; sp_digit* td; #endif sp_digit* norm; - sp_digit rt[272]; + sp_digit rt[280]; sp_digit mp = 1; sp_digit n; int i; @@ -6106,14 +6073,14 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, int err = MP_OKAY; #ifdef WOLFSSL_SMALL_STACK - td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 272, NULL, + td = (sp_digit*)XMALLOC(sizeof(sp_digit) * 32 * 280, NULL, DYNAMIC_TYPE_TMP_BUFFER); if (td == NULL) err = MEMORY_E; if (err == MP_OKAY) { for (i=0; i<32; i++) - t[i] = td + i * 272; + t[i] = td + i * 280; norm = t[0]; } #else @@ -6122,67 +6089,67 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, if (err == MP_OKAY) { sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); if (reduceA) { - err = sp_3072_mod_136(t[1], a, m); + err = sp_3072_mod_140(t[1], a, m); if (err == MP_OKAY) { - sp_3072_mul_136(t[1], t[1], norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], t[1], norm); + err = sp_3072_mod_140(t[1], t[1], m); } } else { - sp_3072_mul_136(t[1], a, norm); - err = sp_3072_mod_136(t[1], t[1], m); + sp_3072_mul_140(t[1], a, norm); + err = sp_3072_mod_140(t[1], t[1], m); } } if (err == MP_OKAY) { - sp_3072_mont_sqr_136(t[ 2], t[ 1], m, mp); - sp_3072_mont_mul_136(t[ 3], t[ 2], t[ 1], m, mp); - sp_3072_mont_sqr_136(t[ 4], t[ 2], m, mp); - sp_3072_mont_mul_136(t[ 5], t[ 3], t[ 2], m, mp); - sp_3072_mont_sqr_136(t[ 6], t[ 3], m, mp); - sp_3072_mont_mul_136(t[ 7], t[ 4], t[ 3], m, mp); - sp_3072_mont_sqr_136(t[ 8], t[ 4], m, mp); - sp_3072_mont_mul_136(t[ 9], t[ 5], t[ 4], m, mp); - sp_3072_mont_sqr_136(t[10], t[ 5], m, mp); - sp_3072_mont_mul_136(t[11], t[ 6], t[ 5], m, mp); - sp_3072_mont_sqr_136(t[12], t[ 6], m, mp); - sp_3072_mont_mul_136(t[13], t[ 7], t[ 6], m, mp); - sp_3072_mont_sqr_136(t[14], t[ 7], m, mp); - sp_3072_mont_mul_136(t[15], t[ 8], t[ 7], m, mp); - sp_3072_mont_sqr_136(t[16], t[ 8], m, mp); - sp_3072_mont_mul_136(t[17], t[ 9], t[ 8], m, mp); - sp_3072_mont_sqr_136(t[18], t[ 9], m, mp); - sp_3072_mont_mul_136(t[19], t[10], t[ 9], m, mp); - sp_3072_mont_sqr_136(t[20], t[10], m, mp); - sp_3072_mont_mul_136(t[21], t[11], t[10], m, mp); - sp_3072_mont_sqr_136(t[22], t[11], m, mp); - sp_3072_mont_mul_136(t[23], t[12], t[11], m, mp); - sp_3072_mont_sqr_136(t[24], t[12], m, mp); - sp_3072_mont_mul_136(t[25], t[13], t[12], m, mp); - sp_3072_mont_sqr_136(t[26], t[13], m, mp); - sp_3072_mont_mul_136(t[27], t[14], t[13], m, mp); - sp_3072_mont_sqr_136(t[28], t[14], m, mp); - sp_3072_mont_mul_136(t[29], t[15], t[14], m, mp); - sp_3072_mont_sqr_136(t[30], t[15], m, mp); - sp_3072_mont_mul_136(t[31], t[16], t[15], m, mp); + sp_3072_mont_sqr_140(t[ 2], t[ 1], m, mp); + sp_3072_mont_mul_140(t[ 3], t[ 2], t[ 1], m, mp); + sp_3072_mont_sqr_140(t[ 4], t[ 2], m, mp); + sp_3072_mont_mul_140(t[ 5], t[ 3], t[ 2], m, mp); + sp_3072_mont_sqr_140(t[ 6], t[ 3], m, mp); + sp_3072_mont_mul_140(t[ 7], t[ 4], t[ 3], m, mp); + sp_3072_mont_sqr_140(t[ 8], t[ 4], m, mp); + sp_3072_mont_mul_140(t[ 9], t[ 5], t[ 4], m, mp); + sp_3072_mont_sqr_140(t[10], t[ 5], m, mp); + sp_3072_mont_mul_140(t[11], t[ 6], t[ 5], m, mp); + sp_3072_mont_sqr_140(t[12], t[ 6], m, mp); + sp_3072_mont_mul_140(t[13], t[ 7], t[ 6], m, mp); + sp_3072_mont_sqr_140(t[14], t[ 7], m, mp); + sp_3072_mont_mul_140(t[15], t[ 8], t[ 7], m, mp); + sp_3072_mont_sqr_140(t[16], t[ 8], m, mp); + sp_3072_mont_mul_140(t[17], t[ 9], t[ 8], m, mp); + sp_3072_mont_sqr_140(t[18], t[ 9], m, mp); + sp_3072_mont_mul_140(t[19], t[10], t[ 9], m, mp); + sp_3072_mont_sqr_140(t[20], t[10], m, mp); + sp_3072_mont_mul_140(t[21], t[11], t[10], m, mp); + sp_3072_mont_sqr_140(t[22], t[11], m, mp); + sp_3072_mont_mul_140(t[23], t[12], t[11], m, mp); + sp_3072_mont_sqr_140(t[24], t[12], m, mp); + sp_3072_mont_mul_140(t[25], t[13], t[12], m, mp); + sp_3072_mont_sqr_140(t[26], t[13], m, mp); + sp_3072_mont_mul_140(t[27], t[14], t[13], m, mp); + sp_3072_mont_sqr_140(t[28], t[14], m, mp); + sp_3072_mont_mul_140(t[29], t[15], t[14], m, mp); + sp_3072_mont_sqr_140(t[30], t[15], m, mp); + sp_3072_mont_mul_140(t[31], t[16], t[15], m, mp); bits = ((bits + 4) / 5) * 5; - i = ((bits + 22) / 23) - 1; - c = bits % 23; + i = ((bits + 21) / 22) - 1; + c = bits % 22; if (c == 0) - c = 23; - if (i < 136) + c = 22; + if (i < 140) n = e[i--] << (32 - c); else { n = 0; i--; } if (c < 5) { - n |= e[i--] << (9 - c); - c += 23; + n |= e[i--] << (10 - c); + c += 22; } y = (n >> 27) & 0x1f; n <<= 5; @@ -6190,25 +6157,25 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, XMEMCPY(rt, t[y], sizeof(rt)); for (; i>=0 || c>=5; ) { if (c < 5) { - n |= e[i--] << (9 - c); - c += 23; + n |= e[i--] << (10 - c); + c += 22; } y = (n >> 27) & 0x1f; n <<= 5; c -= 5; - sp_3072_mont_sqr_136(rt, rt, m, mp); - sp_3072_mont_sqr_136(rt, rt, m, mp); - sp_3072_mont_sqr_136(rt, rt, m, mp); - sp_3072_mont_sqr_136(rt, rt, m, mp); - sp_3072_mont_sqr_136(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); + sp_3072_mont_sqr_140(rt, rt, m, mp); - sp_3072_mont_mul_136(rt, rt, t[y], m, mp); + sp_3072_mont_mul_140(rt, rt, t[y], m, mp); } - sp_3072_mont_reduce_136(rt, m, mp); - n = sp_3072_cmp_136(rt, m); - sp_3072_cond_sub_136(rt, rt, m, (n < 0) - 1); + sp_3072_mont_reduce_140(rt, m, mp); + n = sp_3072_cmp_140(rt, m); + sp_3072_cond_sub_140(rt, rt, m, (n < 0) - 1); XMEMCPY(r, rt, sizeof(rt)); } @@ -6230,12 +6197,12 @@ static int sp_3072_mod_exp_136(sp_digit* r, sp_digit* a, sp_digit* e, int bits, * a A single precision integer. * m Mask to AND against each digit. */ -static void sp_3072_mask_68(sp_digit* r, sp_digit* a, sp_digit m) +static void sp_3072_mask_70(sp_digit* r, sp_digit* a, sp_digit m) { #ifdef WOLFSSL_SP_SMALL int i; - for (i=0; i<68; i++) + for (i=0; i<70; i++) r[i] = a[i] & m; #else int i; @@ -6254,6 +6221,8 @@ static void sp_3072_mask_68(sp_digit* r, sp_digit* a, sp_digit m) r[65] = a[65] & m; r[66] = a[66] & m; r[67] = a[67] & m; + r[68] = a[68] & m; + r[69] = a[69] & m; #endif } @@ -6287,12 +6256,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, if (*outLen < 384) err = MP_TO_E; - if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 384 || + if (err == MP_OKAY && (mp_count_bits(em) > 22 || inLen > 384 || mp_count_bits(mm) != 3072)) err = MP_READ_E; if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 5, NULL, + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 140 * 5, NULL, DYNAMIC_TYPE_RSA); if (d == NULL) err = MEMORY_E; @@ -6300,12 +6269,12 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, if (err == MP_OKAY) { a = d; - r = a + 136 * 2; - m = r + 136 * 2; + r = a + 140 * 2; + m = r + 140 * 2; norm = r; - sp_3072_from_bin(a, 136, in, inLen); -#if DIGIT_BIT >= 23 + sp_3072_from_bin(a, 140, in, inLen); +#if DIGIT_BIT >= 22 e[0] = em->dp[0]; #else e[0] = em->dp[0]; @@ -6317,30 +6286,30 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, } if (err == MP_OKAY) { - sp_3072_from_mp(m, 136, mm); + sp_3072_from_mp(m, 140, mm); sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); } if (err == MP_OKAY) { - sp_3072_mul_136(a, a, norm); - err = sp_3072_mod_136(a, a, m); + sp_3072_mul_140(a, a, norm); + err = sp_3072_mod_140(a, a, m); } if (err == MP_OKAY) { - for (i=22; i>=0; i--) + for (i=21; i>=0; i--) if (e[0] >> i) break; - XMEMCPY(r, a, sizeof(sp_digit) * 136 * 2); + XMEMCPY(r, a, sizeof(sp_digit) * 140 * 2); for (i--; i>=0; i--) { - sp_3072_mont_sqr_136(r, r, m, mp); + sp_3072_mont_sqr_140(r, r, m, mp); if (((e[0] >> i) & 1) == 1) - sp_3072_mont_mul_136(r, r, a, m, mp); + sp_3072_mont_mul_140(r, r, a, m, mp); } - sp_3072_mont_reduce_136(r, m, mp); - mp = sp_3072_cmp_136(r, m); - sp_3072_cond_sub_136(r, r, m, (mp < 0) - 1); + sp_3072_mont_reduce_140(r, m, mp); + mp = sp_3072_cmp_140(r, m); + sp_3072_cond_sub_140(r, r, m, (mp < 0) - 1); sp_3072_to_bin(r, out); *outLen = 384; @@ -6352,7 +6321,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, return err; #else #if !defined(WOLFSSL_SP_SMALL) && !defined(WOLFSSL_SMALL_STACK) - sp_digit ad[272], md[136], rd[272]; + sp_digit ad[280], md[140], rd[280]; #else sp_digit* d = NULL; #endif @@ -6364,13 +6333,13 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, if (*outLen < 384) err = MP_TO_E; - if (err == MP_OKAY && (mp_count_bits(em) > 23 || inLen > 384 || + if (err == MP_OKAY && (mp_count_bits(em) > 22 || inLen > 384 || mp_count_bits(mm) != 3072)) err = MP_READ_E; #if defined(WOLFSSL_SP_SMALL) || defined(WOLFSSL_SMALL_STACK) if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 5, NULL, + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 140 * 5, NULL, DYNAMIC_TYPE_RSA); if (d == NULL) err = MEMORY_E; @@ -6378,8 +6347,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, if (err == MP_OKAY) { a = d; - r = a + 136 * 2; - m = r + 136 * 2; + r = a + 140 * 2; + m = r + 140 * 2; } #else a = ad; @@ -6388,8 +6357,8 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, #endif if (err == MP_OKAY) { - sp_3072_from_bin(a, 136, in, inLen); -#if DIGIT_BIT >= 23 + sp_3072_from_bin(a, 140, in, inLen); +#if DIGIT_BIT >= 22 e[0] = em->dp[0]; #else e[0] = em->dp[0]; @@ -6400,16 +6369,16 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, err = MP_EXPTMOD_E; } if (err == MP_OKAY) { - sp_3072_from_mp(m, 136, mm); + sp_3072_from_mp(m, 140, mm); if (e[0] == 0x3) { if (err == MP_OKAY) { - sp_3072_sqr_136(r, a); - err = sp_3072_mod_136(r, r, m); + sp_3072_sqr_140(r, a); + err = sp_3072_mod_140(r, r, m); } if (err == MP_OKAY) { - sp_3072_mul_136(r, a, r); - err = sp_3072_mod_136(r, r, m); + sp_3072_mul_140(r, a, r); + err = sp_3072_mod_140(r, r, m); } } else { @@ -6418,28 +6387,28 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, mp_int* em, mp_int* mm, sp_digit mp; sp_3072_mont_setup(m, &mp); - sp_3072_mont_norm_136(norm, m); + sp_3072_mont_norm_140(norm, m); if (err == MP_OKAY) { - sp_3072_mul_136(a, a, norm); - err = sp_3072_mod_136(a, a, m); + sp_3072_mul_140(a, a, norm); + err = sp_3072_mod_140(a, a, m); } if (err == MP_OKAY) { - for (i=22; i>=0; i--) + for (i=21; i>=0; i--) if (e[0] >> i) break; - XMEMCPY(r, a, sizeof(sp_digit) * 272); + XMEMCPY(r, a, sizeof(sp_digit) * 280); for (i--; i>=0; i--) { - sp_3072_mont_sqr_136(r, r, m, mp); + sp_3072_mont_sqr_140(r, r, m, mp); if (((e[0] >> i) & 1) == 1) - sp_3072_mont_mul_136(r, r, a, m, mp); + sp_3072_mont_mul_140(r, r, a, m, mp); } - sp_3072_mont_reduce_136(r, m, mp); - mp = sp_3072_cmp_136(r, m); - sp_3072_cond_sub_136(r, r, m, (mp < 0) - 1); + sp_3072_mont_reduce_140(r, m, mp); + mp = sp_3072_cmp_140(r, m); + sp_3072_cond_sub_140(r, r, m, (mp < 0) - 1); } } } @@ -6501,20 +6470,20 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, err = MP_READ_E; if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 136 * 4, NULL, + d = (sp_digit*)XMALLOC(sizeof(sp_digit) * 140 * 4, NULL, DYNAMIC_TYPE_RSA); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { - a = d + 136; - m = a + 136; + a = d + 140; + m = a + 140; r = a; - sp_3072_from_bin(a, 136, in, inLen); - sp_3072_from_mp(d, 136, dm); - sp_3072_from_mp(m, 136, mm); - err = sp_3072_mod_exp_136(r, a, d, 3072, m, 0); + sp_3072_from_bin(a, 140, in, inLen); + sp_3072_from_mp(d, 140, dm); + sp_3072_from_mp(m, 140, mm); + err = sp_3072_mod_exp_140(r, a, d, 3072, m, 0); } if (err == MP_OKAY) { sp_3072_to_bin(r, out); @@ -6522,13 +6491,13 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, } if (d != NULL) { - XMEMSET(d, 0, sizeof(sp_digit) * 136); + XMEMSET(d, 0, sizeof(sp_digit) * 140); XFREE(d, NULL, DYNAMIC_TYPE_RSA); } return err; #else - sp_digit a[272], d[136], m[136]; + sp_digit a[280], d[140], m[140]; sp_digit* r = a; int err = MP_OKAY; @@ -6545,10 +6514,10 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, err = MP_READ_E; if (err == MP_OKAY) { - sp_3072_from_bin(a, 136, in, inLen); - sp_3072_from_mp(d, 136, dm); - sp_3072_from_mp(m, 136, mm); - err = sp_3072_mod_exp_136(r, a, d, 3072, m, 0); + sp_3072_from_bin(a, 140, in, inLen); + sp_3072_from_mp(d, 140, dm); + sp_3072_from_mp(m, 140, mm); + err = sp_3072_mod_exp_140(r, a, d, 3072, m, 0); } if (err == MP_OKAY) { @@ -6556,7 +6525,7 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, *outLen = 384; } - XMEMSET(d, 0, sizeof(sp_digit) * 136); + XMEMSET(d, 0, sizeof(sp_digit) * 140); return err; #endif /* WOLFSSL_SP_SMALL || defined(WOLFSSL_SMALL_STACK) */ @@ -6584,61 +6553,61 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, err = MP_READ_E; if (err == MP_OKAY) { - t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 68 * 11, NULL, + t = (sp_digit*)XMALLOC(sizeof(sp_digit) * 70 * 11, NULL, DYNAMIC_TYPE_RSA); if (t == NULL) err = MEMORY_E; } if (err == MP_OKAY) { a = t; - p = a + 136 * 2; - q = p + 68; - qi = dq = dp = q + 68; - tmpa = qi + 68; - tmpb = tmpa + 136; + p = a + 140 * 2; + q = p + 70; + qi = dq = dp = q + 70; + tmpa = qi + 70; + tmpb = tmpa + 140; tmp = t; - r = tmp + 136; + r = tmp + 140; - sp_3072_from_bin(a, 136, in, inLen); - sp_3072_from_mp(p, 68, pm); - sp_3072_from_mp(q, 68, qm); - sp_3072_from_mp(dp, 68, dpm); - err = sp_3072_mod_exp_68(tmpa, a, dp, 1536, p, 1); + sp_3072_from_bin(a, 140, in, inLen); + sp_3072_from_mp(p, 70, pm); + sp_3072_from_mp(q, 70, qm); + sp_3072_from_mp(dp, 70, dpm); + err = sp_3072_mod_exp_70(tmpa, a, dp, 1536, p, 1); } if (err == MP_OKAY) { - sp_3072_from_mp(dq, 68, dqm); - err = sp_3072_mod_exp_68(tmpb, a, dq, 1536, q, 1); + sp_3072_from_mp(dq, 70, dqm); + err = sp_3072_mod_exp_70(tmpb, a, dq, 1536, q, 1); } if (err == MP_OKAY) { - sp_3072_sub_68(tmpa, tmpa, tmpb); - sp_3072_mask_68(tmp, p, tmpa[67] >> 31); - sp_3072_add_68(tmpa, tmpa, tmp); + sp_3072_sub_70(tmpa, tmpa, tmpb); + sp_3072_mask_70(tmp, p, tmpa[69] >> 31); + sp_3072_add_70(tmpa, tmpa, tmp); - sp_3072_from_mp(qi, 68, qim); - sp_3072_mul_68(tmpa, tmpa, qi); - err = sp_3072_mod_68(tmpa, tmpa, p); + sp_3072_from_mp(qi, 70, qim); + sp_3072_mul_70(tmpa, tmpa, qi); + err = sp_3072_mod_70(tmpa, tmpa, p); } if (err == MP_OKAY) { - sp_3072_mul_68(tmpa, q, tmpa); - sp_3072_add_136(r, tmpb, tmpa); - sp_3072_norm_136(r); + sp_3072_mul_70(tmpa, q, tmpa); + sp_3072_add_140(r, tmpb, tmpa); + sp_3072_norm_140(r); sp_3072_to_bin(r, out); *outLen = 384; } if (t != NULL) { - XMEMSET(t, 0, sizeof(sp_digit) * 68 * 11); + XMEMSET(t, 0, sizeof(sp_digit) * 70 * 11); XFREE(t, NULL, DYNAMIC_TYPE_RSA); } return err; #else - sp_digit a[136 * 2]; - sp_digit p[68], q[68], dp[68], dq[68], qi[68]; - sp_digit tmp[136], tmpa[136], tmpb[136]; + sp_digit a[140 * 2]; + sp_digit p[70], q[70], dp[70], dq[70], qi[70]; + sp_digit tmp[140], tmpa[140], tmpb[140]; sp_digit* r = a; int err = MP_OKAY; @@ -6651,30 +6620,30 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, mp_int* dm, err = MP_READ_E; if (err == MP_OKAY) { - sp_3072_from_bin(a, 136, in, inLen); - sp_3072_from_mp(p, 68, pm); - sp_3072_from_mp(q, 68, qm); - sp_3072_from_mp(dp, 68, dpm); - sp_3072_from_mp(dq, 68, dqm); - sp_3072_from_mp(qi, 68, qim); + sp_3072_from_bin(a, 140, in, inLen); + sp_3072_from_mp(p, 70, pm); + sp_3072_from_mp(q, 70, qm); + sp_3072_from_mp(dp, 70, dpm); + sp_3072_from_mp(dq, 70, dqm); + sp_3072_from_mp(qi, 70, qim); - err = sp_3072_mod_exp_68(tmpa, a, dp, 1536, p, 1); + err = sp_3072_mod_exp_70(tmpa, a, dp, 1536, p, 1); } if (err == MP_OKAY) - err = sp_3072_mod_exp_68(tmpb, a, dq, 1536, q, 1); + err = sp_3072_mod_exp_70(tmpb, a, dq, 1536, q, 1); if (err == MP_OKAY) { - sp_3072_sub_68(tmpa, tmpa, tmpb); - sp_3072_mask_68(tmp, p, tmpa[67] >> 31); - sp_3072_add_68(tmpa, tmpa, tmp); - sp_3072_mul_68(tmpa, tmpa, qi); - err = sp_3072_mod_68(tmpa, tmpa, p); + sp_3072_sub_70(tmpa, tmpa, tmpb); + sp_3072_mask_70(tmp, p, tmpa[69] >> 31); + sp_3072_add_70(tmpa, tmpa, tmp); + sp_3072_mul_70(tmpa, tmpa, qi); + err = sp_3072_mod_70(tmpa, tmpa, p); } if (err == MP_OKAY) { - sp_3072_mul_68(tmpa, tmpa, q); - sp_3072_add_136(r, tmpb, tmpa); - sp_3072_norm_136(r); + sp_3072_mul_70(tmpa, tmpa, q); + sp_3072_add_140(r, tmpb, tmpa); + sp_3072_norm_140(r); sp_3072_to_bin(r, out); *outLen = 384; @@ -6707,25 +6676,25 @@ static int sp_3072_to_mp(sp_digit* a, mp_int* r) err = mp_grow(r, (3072 + DIGIT_BIT - 1) / DIGIT_BIT); if (err == MP_OKAY) { -#if DIGIT_BIT == 23 - XMEMCPY(r->dp, a, sizeof(sp_digit) * 136); - r->used = 136; +#if DIGIT_BIT == 22 + XMEMCPY(r->dp, a, sizeof(sp_digit) * 140); + r->used = 140; mp_clamp(r); -#elif DIGIT_BIT < 23 +#elif DIGIT_BIT < 22 int i, j = 0, s = 0; r->dp[0] = 0; - for (i = 0; i < 136; i++) { + for (i = 0; i < 140; i++) { r->dp[j] |= a[i] << s; r->dp[j] &= (1l << DIGIT_BIT) - 1; s = DIGIT_BIT - s; r->dp[++j] = a[i] >> s; - while (s + DIGIT_BIT <= 23) { + while (s + DIGIT_BIT <= 22) { s += DIGIT_BIT; r->dp[j] &= (1l << DIGIT_BIT) - 1; r->dp[++j] = a[i] >> s; } - s = 23 - s; + s = 22 - s; } r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; mp_clamp(r); @@ -6733,18 +6702,18 @@ static int sp_3072_to_mp(sp_digit* a, mp_int* r) int i, j = 0, s = 0; r->dp[0] = 0; - for (i = 0; i < 136; i++) { + for (i = 0; i < 140; i++) { r->dp[j] |= ((mp_digit)a[i]) << s; - if (s + 23 >= DIGIT_BIT) { + if (s + 22 >= DIGIT_BIT) { #if DIGIT_BIT < 32 r->dp[j] &= (1l << DIGIT_BIT) - 1; #endif s = DIGIT_BIT - s; r->dp[++j] = a[i] >> s; - s = 23 - s; + s = 22 - s; } else - s += 23; + s += 22; } r->used = (3072 + DIGIT_BIT - 1) / DIGIT_BIT; mp_clamp(r); @@ -6780,22 +6749,22 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) } if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL, DYNAMIC_TYPE_DH); + d = (sp_digit*)XMALLOC(sizeof(*d) * 140 * 4, NULL, DYNAMIC_TYPE_DH); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { b = d; - e = b + 136 * 2; - m = e + 136; + e = b + 140 * 2; + m = e + 140; r = b; - sp_3072_from_mp(b, 136, base); - sp_3072_from_mp(e, 136, exp); - sp_3072_from_mp(m, 136, mod); + sp_3072_from_mp(b, 140, base); + sp_3072_from_mp(e, 140, exp); + sp_3072_from_mp(m, 140, mod); - err = sp_3072_mod_exp_136(r, b, e, mp_count_bits(exp), m, 0); + err = sp_3072_mod_exp_140(r, b, e, mp_count_bits(exp), m, 0); } if (err == MP_OKAY) { @@ -6803,13 +6772,13 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) } if (d != NULL) { - XMEMSET(e, 0, sizeof(sp_digit) * 136); + XMEMSET(e, 0, sizeof(sp_digit) * 140); XFREE(d, NULL, DYNAMIC_TYPE_DH); } return err; #else #ifndef WOLFSSL_SMALL_STACK - sp_digit bd[272], ed[136], md[136]; + sp_digit bd[280], ed[140], md[140]; #else sp_digit* d = NULL; #endif @@ -6827,15 +6796,15 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #ifdef WOLFSSL_SMALL_STACK if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL, DYNAMIC_TYPE_DH); + d = (sp_digit*)XMALLOC(sizeof(*d) * 140 * 4, NULL, DYNAMIC_TYPE_DH); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { b = d; - e = b + 136 * 2; - m = e + 136; + e = b + 140 * 2; + m = e + 140; r = b; } #else @@ -6845,18 +6814,18 @@ int sp_ModExp_3072(mp_int* base, mp_int* exp, mp_int* mod, mp_int* res) #endif if (err == MP_OKAY) { - sp_3072_from_mp(b, 136, base); - sp_3072_from_mp(e, 136, exp); - sp_3072_from_mp(m, 136, mod); + sp_3072_from_mp(b, 140, base); + sp_3072_from_mp(e, 140, exp); + sp_3072_from_mp(m, 140, mod); - err = sp_3072_mod_exp_136(r, b, e, expBits, m, 0); + err = sp_3072_mod_exp_140(r, b, e, expBits, m, 0); } if (err == MP_OKAY) { err = sp_3072_to_mp(r, res); } - XMEMSET(e, 0, sizeof(sp_digit) * 136); + XMEMSET(e, 0, sizeof(sp_digit) * 140); #ifdef WOLFSSL_SMALL_STACK if (d != NULL) @@ -6897,22 +6866,22 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, } if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL, DYNAMIC_TYPE_DH); + d = (sp_digit*)XMALLOC(sizeof(*d) * 140 * 4, NULL, DYNAMIC_TYPE_DH); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { b = d; - e = b + 136 * 2; - m = e + 136; + e = b + 140 * 2; + m = e + 140; r = b; - sp_3072_from_mp(b, 136, base); - sp_3072_from_bin(e, 136, exp, expLen); - sp_3072_from_mp(m, 136, mod); + sp_3072_from_mp(b, 140, base); + sp_3072_from_bin(e, 140, exp, expLen); + sp_3072_from_mp(m, 140, mod); - err = sp_3072_mod_exp_136(r, b, e, expLen * 8, m, 0); + err = sp_3072_mod_exp_140(r, b, e, expLen * 8, m, 0); } if (err == MP_OKAY) { @@ -6925,13 +6894,13 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, } if (d != NULL) { - XMEMSET(e, 0, sizeof(sp_digit) * 136); + XMEMSET(e, 0, sizeof(sp_digit) * 140); XFREE(d, NULL, DYNAMIC_TYPE_DH); } return err; #else #ifndef WOLFSSL_SMALL_STACK - sp_digit bd[272], ed[136], md[136]; + sp_digit bd[280], ed[140], md[140]; #else sp_digit* d = NULL; #endif @@ -6949,15 +6918,15 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, #ifdef WOLFSSL_SMALL_STACK if (err == MP_OKAY) { - d = (sp_digit*)XMALLOC(sizeof(*d) * 136 * 4, NULL, DYNAMIC_TYPE_DH); + d = (sp_digit*)XMALLOC(sizeof(*d) * 140 * 4, NULL, DYNAMIC_TYPE_DH); if (d == NULL) err = MEMORY_E; } if (err == MP_OKAY) { b = d; - e = b + 136 * 2; - m = e + 136; + e = b + 140 * 2; + m = e + 140; r = b; } #else @@ -6967,11 +6936,11 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, #endif if (err == MP_OKAY) { - sp_3072_from_mp(b, 136, base); - sp_3072_from_bin(e, 136, exp, expLen); - sp_3072_from_mp(m, 136, mod); + sp_3072_from_mp(b, 140, base); + sp_3072_from_bin(e, 140, exp, expLen); + sp_3072_from_mp(m, 140, mod); - err = sp_3072_mod_exp_136(r, b, e, expLen * 8, m, 0); + err = sp_3072_mod_exp_140(r, b, e, expLen * 8, m, 0); } if (err == MP_OKAY) { @@ -6983,7 +6952,7 @@ int sp_DhExp_3072(mp_int* base, const byte* exp, word32 expLen, XMEMMOVE(out, out + i, *outLen); } - XMEMSET(e, 0, sizeof(sp_digit) * 136); + XMEMSET(e, 0, sizeof(sp_digit) * 140); #ifdef WOLFSSL_SMALL_STACK if (d != NULL) diff --git a/wolfcrypt/src/tfm.c b/wolfcrypt/src/tfm.c index 599e57240..a5af9d63c 100644 --- a/wolfcrypt/src/tfm.c +++ b/wolfcrypt/src/tfm.c @@ -2399,10 +2399,10 @@ void fp_read_unsigned_bin(fp_int *a, const unsigned char *b, int c) /* Use Duff's device to unroll the loop. */ int idx = (c - 1) & ~3; switch (c % 4) { - case 0: do { pd[idx+0] = *b++; - case 3: pd[idx+1] = *b++; - case 2: pd[idx+2] = *b++; - case 1: pd[idx+3] = *b++; + case 0: do { pd[idx+0] = *b++; // fallthrough + case 3: pd[idx+1] = *b++; // fallthrough + case 2: pd[idx+2] = *b++; // fallthrough + case 1: pd[idx+3] = *b++; // fallthrough idx -= 4; } while ((c -= 4) > 0); }