From 189c9ab234b4dacce39f4fcdda0cb03a19093360 Mon Sep 17 00:00:00 2001 From: Sean Parkinson Date: Wed, 4 May 2022 17:02:45 +1000 Subject: [PATCH] Constant time changes GCM: make borrow constant time. AES-GCM decrypt: compare at end and constant time. Random: array_add touchs all elements every time. RSA-OAEP: look for padding byte in constant time (look at every byte in array). SP - reduce conditional use (make them bit ops) - Fix point adds to not use double when adding infinity to infinity - Implement signed div as __divi3 is not constant time. - Move check sof input variables to API. --- wolfcrypt/src/aes.c | 17 +- wolfcrypt/src/random.c | 2 +- wolfcrypt/src/rsa.c | 10 +- wolfcrypt/src/sp_arm32.c | 149 +- wolfcrypt/src/sp_arm64.c | 149 +- wolfcrypt/src/sp_armthumb.c | 149 +- wolfcrypt/src/sp_c32.c | 2469 +++++++++----------------- wolfcrypt/src/sp_c64.c | 3249 ++++++++++------------------------- wolfcrypt/src/sp_cortexm.c | 149 +- wolfcrypt/src/sp_x86_64.c | 227 +-- 10 files changed, 2080 insertions(+), 4490 deletions(-) diff --git a/wolfcrypt/src/aes.c b/wolfcrypt/src/aes.c index 6a1af1f72..34fc62d56 100644 --- a/wolfcrypt/src/aes.c +++ b/wolfcrypt/src/aes.c @@ -4578,14 +4578,14 @@ static WC_INLINE void RIGHTSHIFTX(byte* x) { int i; int carryIn = 0; - int borrow = x[15] & 0x01; + byte borrow = (0x00 - (x[15] & 0x01)) & 0xE1; for (i = 0; i < AES_BLOCK_SIZE; i++) { - int carryOut = x[i] & 0x01; - x[i] = (x[i] >> 1) | (carryIn ? 0x80 : 0); + int carryOut = (x[i] & 0x01) << 7; + x[i] = (x[i] >> 1) | carryIn; carryIn = carryOut; } - if (borrow) x[0] ^= 0xE1; + x[0] ^= borrow; } #endif /* defined(GCM_SMALL) || defined(GCM_TABLE) || defined(GCM_TABLE_4BIT) */ @@ -8153,6 +8153,7 @@ int WARN_UNUSED_RESULT AES_GCM_decrypt_C( ALIGN32 byte scratch[AES_BLOCK_SIZE]; ALIGN32 byte Tprime[AES_BLOCK_SIZE]; ALIGN32 byte EKY0[AES_BLOCK_SIZE]; + sword32 res; if (ivSz == GCM_NONCE_MID_SZ) { /* Counter is IV with bottom 4 bytes set to: 0x00,0x00,0x00,0x01. */ @@ -8187,9 +8188,6 @@ int WARN_UNUSED_RESULT AES_GCM_decrypt_C( aes->aadLen = authInSz; } #endif - if (ConstantCompare(authTag, Tprime, authTagSz) != 0) { - return AES_GCM_AUTH_E; - } #if defined(WOLFSSL_PIC32MZ_CRYPT) if (blocks) { @@ -8248,6 +8246,11 @@ int WARN_UNUSED_RESULT AES_GCM_decrypt_C( XMEMCPY(p, scratch, partial); } + /* ConstantCompare returns XOR of bytes. */ + res = ConstantCompare(authTag, Tprime, authTagSz); + res = (0 - res) >> 31; + ret = (ret & ~res) | (res & AES_GCM_AUTH_E); + return ret; } diff --git a/wolfcrypt/src/random.c b/wolfcrypt/src/random.c index fbd91d468..331c8e6f3 100644 --- a/wolfcrypt/src/random.c +++ b/wolfcrypt/src/random.c @@ -598,7 +598,7 @@ static WC_INLINE void array_add(byte* d, word32 dLen, const byte* s, word32 sLen dIdx--; } - for (; carry != 0 && dIdx >= 0; dIdx--) { + for (; dIdx >= 0; dIdx--) { carry += (word16)d[dIdx]; d[dIdx] = (byte)carry; carry >>= 8; diff --git a/wolfcrypt/src/rsa.c b/wolfcrypt/src/rsa.c index 320cb8030..13016c8c4 100644 --- a/wolfcrypt/src/rsa.c +++ b/wolfcrypt/src/rsa.c @@ -1515,6 +1515,8 @@ static int RsaUnPad_OAEP(byte *pkcsBlock, unsigned int pkcsBlockLen, byte h[WC_MAX_DIGEST_SIZE]; /* max digest size */ byte* tmp; word32 idx; + word32 i; + word32 inc; /* no label is allowed, but catch if no label provided and length > 0 */ if (optLabel == NULL && labelLen > 0) { @@ -1561,7 +1563,13 @@ static int RsaUnPad_OAEP(byte *pkcsBlock, unsigned int pkcsBlockLen, /* advance idx to index of PS and msg separator, account for PS size of 0*/ idx = hLen + 1 + hLen; - while (idx < pkcsBlockLen-1 && pkcsBlock[idx] == 0) {idx++;} + /* Don't reveal length of message: look at every byte. */ + inc = 1; + for (i = hLen + 1 + hLen; i < pkcsBlockLen - 1; i++) { + /* Looking for non-zero byte. */ + inc &= 1 - (((word32)0 - pkcsBlock[i]) >> 31); + idx += inc; + } /* create hash of label for comparison with hash sent */ if ((ret = wc_Hash(hType, optLabel, labelLen, h, hLen)) != 0) { diff --git a/wolfcrypt/src/sp_arm32.c b/wolfcrypt/src/sp_arm32.c index 25f60ec6e..179f569d0 100644 --- a/wolfcrypt/src/sp_arm32.c +++ b/wolfcrypt/src/sp_arm32.c @@ -4542,10 +4542,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -4697,10 +4694,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6986,10 +6980,7 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -7132,10 +7123,7 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -7722,6 +7710,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -8329,7 +8323,7 @@ static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -14993,10 +14987,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -15148,10 +15139,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -18269,10 +18257,7 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -18415,10 +18400,7 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -19061,6 +19043,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -19860,7 +19848,7 @@ static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -25972,10 +25960,7 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -26118,10 +26103,7 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -26820,6 +26802,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -27811,7 +27799,7 @@ static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -31201,8 +31189,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_8(r->x, p256_mod); - sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_8(r->x, r->x, p256_mod, ~(n >> 31)); sp_256_norm_8(r->x); /* y /= z^3 */ @@ -31211,8 +31198,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_8(r->y, p256_mod); - sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_8(r->y, r->y, p256_mod, ~(n >> 31)); sp_256_norm_8(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -31850,7 +31836,8 @@ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, /* Check double */ (void)sp_256_sub_8(ctx->t1, p256_mod, q->y); sp_256_norm_8(ctx->t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -32018,7 +32005,8 @@ static void sp_256_proj_point_add_8(sp_point_256* r, /* Check double */ (void)sp_256_sub_8(t1, p256_mod, q->y); sp_256_norm_8(t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { sp_256_proj_point_dbl_8(r, p, t); } @@ -32051,7 +32039,8 @@ static void sp_256_proj_point_add_8(sp_point_256* r, sp_256_mont_sub_8(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_8(t4, t4, t3, p256_mod); - if (sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { sp_256_proj_point_dbl_8(r, p, t); } else { @@ -32493,7 +32482,8 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, /* Check double */ (void)sp_256_sub_8(t1, p256_mod, q->y); sp_256_norm_8(t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { sp_256_proj_point_dbl_8(r, p, t); } @@ -40347,8 +40337,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_12(r->x, p384_mod); - sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_12(r->x, r->x, p384_mod, ~(n >> 31)); sp_384_norm_12(r->x); /* y /= z^3 */ @@ -40357,8 +40346,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_12(r->y, p384_mod); - sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_12(r->y, r->y, p384_mod, ~(n >> 31)); sp_384_norm_12(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -40886,7 +40874,8 @@ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, /* Check double */ (void)sp_384_sub_12(ctx->t1, p384_mod, q->y); sp_384_norm_12(ctx->t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -41054,7 +41043,8 @@ static void sp_384_proj_point_add_12(sp_point_384* r, /* Check double */ (void)sp_384_sub_12(t1, p384_mod, q->y); sp_384_norm_12(t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { sp_384_proj_point_dbl_12(r, p, t); } @@ -41087,7 +41077,8 @@ static void sp_384_proj_point_add_12(sp_point_384* r, sp_384_mont_sub_12(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_12(t4, t4, t3, p384_mod); - if (sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { sp_384_proj_point_dbl_12(r, p, t); } else { @@ -41553,7 +41544,8 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, /* Check double */ (void)sp_384_sub_12(t1, p384_mod, q->y); sp_384_norm_12(t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { sp_384_proj_point_dbl_12(r, p, t); } @@ -51424,8 +51416,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_17(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_17(r->x, p521_mod); - sp_521_cond_sub_17(r->x, r->x, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_17(r->x, r->x, p521_mod, ~(n >> 31)); sp_521_norm_17(r->x); /* y /= z^3 */ @@ -51434,8 +51425,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_17(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_17(r->y, p521_mod); - sp_521_cond_sub_17(r->y, r->y, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_17(r->y, r->y, p521_mod, ~(n >> 31)); sp_521_norm_17(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -52307,7 +52297,8 @@ static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, /* Check double */ (void)sp_521_sub_17(ctx->t1, p521_mod, q->y); sp_521_norm_17(ctx->t1); - if ((sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -52475,7 +52466,8 @@ static void sp_521_proj_point_add_17(sp_point_521* r, /* Check double */ (void)sp_521_sub_17(t1, p521_mod, q->y); sp_521_norm_17(t1); - if ((sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { sp_521_proj_point_dbl_17(r, p, t); } @@ -52508,7 +52500,8 @@ static void sp_521_proj_point_add_17(sp_point_521* r, sp_521_mont_sub_17(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_17(t4, t4, t3, p521_mod); - if (sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { sp_521_proj_point_dbl_17(r, p, t); } else { @@ -53008,7 +53001,8 @@ static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, /* Check double */ (void)sp_521_sub_17(t1, p521_mod, q->y); sp_521_norm_17(t1); - if ((sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { sp_521_proj_point_dbl_17(r, p, t); } @@ -64519,8 +64513,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_32(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_32(r->x, p1024_mod); - sp_1024_cond_sub_32(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(r->x, r->x, p1024_mod, ~(n >> 31)); sp_1024_norm_32(r->x); /* y /= z^3 */ @@ -64529,8 +64522,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_32(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_32(r->y, p1024_mod); - sp_1024_cond_sub_32(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(r->y, r->y, p1024_mod, ~(n >> 31)); sp_1024_norm_32(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -66089,7 +66081,8 @@ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, /* Check double */ (void)sp_1024_sub_32(ctx->t1, p1024_mod, q->y); sp_1024_norm_32(ctx->t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -66257,7 +66250,8 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, /* Check double */ (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_32(t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { sp_1024_proj_point_dbl_32(r, p, t); } @@ -66290,7 +66284,8 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - if (sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { sp_1024_proj_point_dbl_32(r, p, t); } else { @@ -66620,7 +66615,8 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* /* Check double */ (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_32(t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { sp_1024_proj_point_dbl_32(r, p, t); } @@ -74782,8 +74778,7 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); n = sp_1024_cmp_32(t1, p1024_mod); - sp_1024_cond_sub_32(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(t1, t1, p1024_mod, ~(n >> 31)); sp_1024_norm_32(t1); if (!sp_1024_iszero_32(t1)) { err = MP_VAL; diff --git a/wolfcrypt/src/sp_arm64.c b/wolfcrypt/src/sp_arm64.c index 49eb1f630..7e288aa8e 100644 --- a/wolfcrypt/src/sp_arm64.c +++ b/wolfcrypt/src/sp_arm64.c @@ -3995,10 +3995,7 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -4150,10 +4147,7 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -5599,10 +5593,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -5771,10 +5762,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6271,6 +6259,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -6688,7 +6682,7 @@ static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -13138,10 +13132,7 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -13293,10 +13284,7 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -15134,10 +15122,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -15280,10 +15265,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -15730,6 +15712,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -16243,7 +16231,7 @@ static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -20195,10 +20183,7 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -20341,10 +20326,7 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -20791,6 +20773,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -21400,7 +21388,7 @@ static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -23067,8 +23055,7 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_4(r->x, p256_mod); - sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_4(r->x, r->x, p256_mod, ~(n >> 63)); sp_256_norm_4(r->x); /* y /= z^3 */ @@ -23077,8 +23064,7 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_4(r->y, p256_mod); - sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_4(r->y, r->y, p256_mod, ~(n >> 63)); sp_256_norm_4(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -23730,7 +23716,8 @@ static int sp_256_proj_point_add_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, /* Check double */ (void)sp_256_sub_4(ctx->t1, p256_mod, q->y); sp_256_norm_4(ctx->t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -23898,7 +23885,8 @@ static void sp_256_proj_point_add_4(sp_point_256* r, /* Check double */ (void)sp_256_sub_4(t1, p256_mod, q->y); sp_256_norm_4(t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { sp_256_proj_point_dbl_4(r, p, t); } @@ -23931,7 +23919,8 @@ static void sp_256_proj_point_add_4(sp_point_256* r, sp_256_mont_sub_4(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_4(t4, t4, t3, p256_mod); - if (sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { sp_256_proj_point_dbl_4(r, p, t); } else { @@ -24408,7 +24397,8 @@ static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, /* Check double */ (void)sp_256_sub_4(t1, p256_mod, q->y); sp_256_norm_4(t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { sp_256_proj_point_dbl_4(r, p, t); } @@ -43180,8 +43170,7 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_6(r->x, p384_mod); - sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_6(r->x, r->x, p384_mod, ~(n >> 63)); sp_384_norm_6(r->x); /* y /= z^3 */ @@ -43190,8 +43179,7 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_6(r->y, p384_mod); - sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_6(r->y, r->y, p384_mod, ~(n >> 63)); sp_384_norm_6(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -43752,7 +43740,8 @@ static int sp_384_proj_point_add_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, /* Check double */ (void)sp_384_sub_6(ctx->t1, p384_mod, q->y); sp_384_norm_6(ctx->t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -43920,7 +43909,8 @@ static void sp_384_proj_point_add_6(sp_point_384* r, /* Check double */ (void)sp_384_sub_6(t1, p384_mod, q->y); sp_384_norm_6(t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { sp_384_proj_point_dbl_6(r, p, t); } @@ -43953,7 +43943,8 @@ static void sp_384_proj_point_add_6(sp_point_384* r, sp_384_mont_sub_6(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_6(t4, t4, t3, p384_mod); - if (sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { sp_384_proj_point_dbl_6(r, p, t); } else { @@ -44446,7 +44437,8 @@ static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, /* Check double */ (void)sp_384_sub_6(t1, p384_mod, q->y); sp_384_norm_6(t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { sp_384_proj_point_dbl_6(r, p, t); } @@ -71120,8 +71112,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_9(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_9(r->x, p521_mod); - sp_521_cond_sub_9(r->x, r->x, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_9(r->x, r->x, p521_mod, ~(n >> 63)); sp_521_norm_9(r->x); /* y /= z^3 */ @@ -71130,8 +71121,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_9(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_9(r->y, p521_mod); - sp_521_cond_sub_9(r->y, r->y, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_9(r->y, r->y, p521_mod, ~(n >> 63)); sp_521_norm_9(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -71862,7 +71852,8 @@ static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, /* Check double */ (void)sp_521_sub_9(ctx->t1, p521_mod, q->y); sp_521_norm_9(ctx->t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -72030,7 +72021,8 @@ static void sp_521_proj_point_add_9(sp_point_521* r, /* Check double */ (void)sp_521_sub_9(t1, p521_mod, q->y); sp_521_norm_9(t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { sp_521_proj_point_dbl_9(r, p, t); } @@ -72063,7 +72055,8 @@ static void sp_521_proj_point_add_9(sp_point_521* r, sp_521_mont_sub_9(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_9(t4, t4, t3, p521_mod); - if (sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { sp_521_proj_point_dbl_9(r, p, t); } else { @@ -72574,7 +72567,8 @@ static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, /* Check double */ (void)sp_521_sub_9(t1, p521_mod, q->y); sp_521_norm_9(t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { sp_521_proj_point_dbl_9(r, p, t); } @@ -114629,8 +114623,7 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_16(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_16(r->x, p1024_mod); - sp_1024_cond_sub_16(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(r->x, r->x, p1024_mod, ~(n >> 63)); sp_1024_norm_16(r->x); /* y /= z^3 */ @@ -114639,8 +114632,7 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_16(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_16(r->y, p1024_mod); - sp_1024_cond_sub_16(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(r->y, r->y, p1024_mod, ~(n >> 63)); sp_1024_norm_16(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -115765,7 +115757,8 @@ static int sp_1024_proj_point_add_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, /* Check double */ (void)sp_1024_sub_16(ctx->t1, p1024_mod, q->y); sp_1024_norm_16(ctx->t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -115933,7 +115926,8 @@ static void sp_1024_proj_point_add_16(sp_point_1024* r, /* Check double */ (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_16(t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { sp_1024_proj_point_dbl_16(r, p, t); } @@ -115966,7 +115960,8 @@ static void sp_1024_proj_point_add_16(sp_point_1024* r, sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); - if (sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { sp_1024_proj_point_dbl_16(r, p, t); } else { @@ -116416,7 +116411,8 @@ static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, const sp_point_1024* /* Check double */ (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_16(t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { sp_1024_proj_point_dbl_16(r, p, t); } @@ -123807,8 +123803,7 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); n = sp_1024_cmp_16(t1, p1024_mod); - sp_1024_cond_sub_16(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(t1, t1, p1024_mod, ~(n >> 63)); sp_1024_norm_16(t1); if (!sp_1024_iszero_16(t1)) { err = MP_VAL; diff --git a/wolfcrypt/src/sp_armthumb.c b/wolfcrypt/src/sp_armthumb.c index 0a038d321..feaa4e37d 100644 --- a/wolfcrypt/src/sp_armthumb.c +++ b/wolfcrypt/src/sp_armthumb.c @@ -23882,10 +23882,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -24037,10 +24034,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -27346,10 +27340,7 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -27492,10 +27483,7 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -27975,6 +27963,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -29780,7 +29774,7 @@ static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -75092,10 +75086,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -75247,10 +75238,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -79387,10 +79375,7 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -79533,10 +79518,7 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -80016,6 +79998,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -82619,7 +82607,7 @@ static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -91801,10 +91789,7 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -91947,10 +91932,7 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -92435,6 +92417,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -95826,7 +95814,7 @@ static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -98642,8 +98630,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_8(r->x, p256_mod); - sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_8(r->x, r->x, p256_mod, ~(n >> 31)); sp_256_norm_8(r->x); /* y /= z^3 */ @@ -98652,8 +98639,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_8(r->y, p256_mod); - sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_8(r->y, r->y, p256_mod, ~(n >> 31)); sp_256_norm_8(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -100090,7 +100076,8 @@ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, /* Check double */ (void)sp_256_sub_8(ctx->t1, p256_mod, q->y); sp_256_norm_8(ctx->t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -100258,7 +100245,8 @@ static void sp_256_proj_point_add_8(sp_point_256* r, /* Check double */ (void)sp_256_sub_8(t1, p256_mod, q->y); sp_256_norm_8(t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { sp_256_proj_point_dbl_8(r, p, t); } @@ -100291,7 +100279,8 @@ static void sp_256_proj_point_add_8(sp_point_256* r, sp_256_mont_sub_8(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_8(t4, t4, t3, p256_mod); - if (sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { sp_256_proj_point_dbl_8(r, p, t); } else { @@ -100733,7 +100722,8 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, /* Check double */ (void)sp_256_sub_8(t1, p256_mod, q->y); sp_256_norm_8(t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { sp_256_proj_point_dbl_8(r, p, t); } @@ -109790,8 +109780,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_12(r->x, p384_mod); - sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_12(r->x, r->x, p384_mod, ~(n >> 31)); sp_384_norm_12(r->x); /* y /= z^3 */ @@ -109800,8 +109789,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_12(r->y, p384_mod); - sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_12(r->y, r->y, p384_mod, ~(n >> 31)); sp_384_norm_12(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -110471,7 +110459,8 @@ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, /* Check double */ (void)sp_384_sub_12(ctx->t1, p384_mod, q->y); sp_384_norm_12(ctx->t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -110639,7 +110628,8 @@ static void sp_384_proj_point_add_12(sp_point_384* r, /* Check double */ (void)sp_384_sub_12(t1, p384_mod, q->y); sp_384_norm_12(t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { sp_384_proj_point_dbl_12(r, p, t); } @@ -110672,7 +110662,8 @@ static void sp_384_proj_point_add_12(sp_point_384* r, sp_384_mont_sub_12(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_12(t4, t4, t3, p384_mod); - if (sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { sp_384_proj_point_dbl_12(r, p, t); } else { @@ -111138,7 +111129,8 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, /* Check double */ (void)sp_384_sub_12(t1, p384_mod, q->y); sp_384_norm_12(t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { sp_384_proj_point_dbl_12(r, p, t); } @@ -121832,8 +121824,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_17(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_17(r->x, p521_mod); - sp_521_cond_sub_17(r->x, r->x, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_17(r->x, r->x, p521_mod, ~(n >> 31)); sp_521_norm_17(r->x); /* y /= z^3 */ @@ -121842,8 +121833,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_17(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_17(r->y, p521_mod); - sp_521_cond_sub_17(r->y, r->y, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_17(r->y, r->y, p521_mod, ~(n >> 31)); sp_521_norm_17(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -124015,7 +124005,8 @@ static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, /* Check double */ (void)sp_521_sub_17(ctx->t1, p521_mod, q->y); sp_521_norm_17(ctx->t1); - if ((sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -124183,7 +124174,8 @@ static void sp_521_proj_point_add_17(sp_point_521* r, /* Check double */ (void)sp_521_sub_17(t1, p521_mod, q->y); sp_521_norm_17(t1); - if ((sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { sp_521_proj_point_dbl_17(r, p, t); } @@ -124216,7 +124208,8 @@ static void sp_521_proj_point_add_17(sp_point_521* r, sp_521_mont_sub_17(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_17(t4, t4, t3, p521_mod); - if (sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { sp_521_proj_point_dbl_17(r, p, t); } else { @@ -124716,7 +124709,8 @@ static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, /* Check double */ (void)sp_521_sub_17(t1, p521_mod, q->y); sp_521_norm_17(t1); - if ((sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { sp_521_proj_point_dbl_17(r, p, t); } @@ -202259,8 +202253,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_32(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_32(r->x, p1024_mod); - sp_1024_cond_sub_32(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(r->x, r->x, p1024_mod, ~(n >> 31)); sp_1024_norm_32(r->x); /* y /= z^3 */ @@ -202269,8 +202262,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_32(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_32(r->y, p1024_mod); - sp_1024_cond_sub_32(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(r->y, r->y, p1024_mod, ~(n >> 31)); sp_1024_norm_32(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -208620,7 +208612,8 @@ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, /* Check double */ (void)sp_1024_sub_32(ctx->t1, p1024_mod, q->y); sp_1024_norm_32(ctx->t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -208788,7 +208781,8 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, /* Check double */ (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_32(t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { sp_1024_proj_point_dbl_32(r, p, t); } @@ -208821,7 +208815,8 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - if (sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { sp_1024_proj_point_dbl_32(r, p, t); } else { @@ -209151,7 +209146,8 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* /* Check double */ (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_32(t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { sp_1024_proj_point_dbl_32(r, p, t); } @@ -217313,8 +217309,7 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); n = sp_1024_cmp_32(t1, p1024_mod); - sp_1024_cond_sub_32(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(t1, t1, p1024_mod, ~(n >> 31)); sp_1024_norm_32(t1); if (!sp_1024_iszero_32(t1)) { err = MP_VAL; diff --git a/wolfcrypt/src/sp_c32.c b/wolfcrypt/src/sp_c32.c index 2b7e71f43..9e6ffe405 100644 --- a/wolfcrypt/src/sp_c32.c +++ b/wolfcrypt/src/sp_c32.c @@ -64,7 +64,7 @@ fprintf(stderr, name "=0x"); \ for (ii=0; ii<(bits + 7) / 8; ii++) \ fprintf(stderr, "%02x", nb[ii]); \ - fprintf(stderr, "\n"); \ + fprintf(stderr, "\n"); \ } while (0) #define SP_PRINT_VAL(var, name) \ @@ -1502,24 +1502,24 @@ static sp_digit sp_2048_cmp_36(const sp_digit* a, const sp_digit* b) int i; for (i=35; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } #else int i; r |= (a[35] - b[35]) & (0 - (sp_digit)1); - r |= (a[34] - b[34]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[33] - b[33]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[32] - b[32]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[34] - b[34]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[33] - b[33]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[32] - b[32]) & ~(((sp_digit)0 - r) >> 28); for (i = 24; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 28); } #endif /* WOLFSSL_SP_SMALL */ @@ -1723,6 +1723,7 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_36(a + 36); @@ -1736,8 +1737,8 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; sp_2048_mont_shift_36(a, a); - sp_2048_cond_sub_36(a, a, m, 0 - (((a[35] - m[35]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[35] - m[35]; + sp_2048_cond_sub_36(a, a, m, ~((over - 1) >> 31)); sp_2048_norm_36(a); } @@ -1902,135 +1903,47 @@ SP_NOINLINE static void sp_2048_rshift_36(sp_digit* r, const sp_digit* a, r[35] = a[35] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -2047,7 +1960,6 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -2081,14 +1993,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, t1[36 + 36] += t1[36 + 36 - 1] >> 29; t1[36 + 36 - 1] &= 0x1fffffff; for (i=36; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[36 + i]; - d1 <<= 29; - d1 += t1[36 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_36(t1[36 + i], t1[36 + i - 1], dv); -#endif sp_2048_mul_d_36(t2, sd, r1); (void)sp_2048_sub_36(&t1[i], &t1[i], t2); @@ -2096,14 +2001,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, t1[36 + i] -= t2[36]; t1[36 + i] += t1[36 + i - 1] >> 29; t1[36 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[36 + i]; - d1 <<= 29; - d1 -= t1[36 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_36(-t1[36 + i], -t1[36 + i - 1], dv); -#endif r1 -= t1[36 + i]; sp_2048_mul_d_36(t2, sd, r1); (void)sp_2048_add_36(&t1[i], &t1[i], t2); @@ -2121,8 +2019,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_2048_cond_add_36(r, r, sd, 0 - ((r[35] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_36(r, r, sd, r[35] >> 31); sp_2048_norm_36(r); sp_2048_rshift_36(r, r, 20); @@ -2177,10 +2074,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -2245,8 +2139,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(t[0], m, mp); n = sp_2048_cmp_36(t[0], m); - sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 36 * 2); } @@ -2272,10 +2165,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -2340,8 +2230,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(t[0], m, mp); n = sp_2048_cmp_36(t[0], m); - sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 36 * 2); } @@ -2367,10 +2256,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -2490,8 +2376,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(rt, m, mp); n = sp_2048_cmp_36(rt, m); - sp_2048_cond_sub_36(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 72); } @@ -2565,20 +2450,20 @@ static sp_digit sp_2048_cmp_72(const sp_digit* a, const sp_digit* b) int i; for (i=71; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } #else int i; for (i = 64; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 28); } #endif /* WOLFSSL_SP_SMALL */ @@ -2793,6 +2678,7 @@ static void sp_2048_mont_reduce_72(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_72(a + 71); @@ -2831,8 +2717,8 @@ static void sp_2048_mont_reduce_72(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x1fffffff; #endif sp_2048_mont_shift_72(a, a); - sp_2048_cond_sub_72(a, a, m, 0 - (((a[70] - m[70]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[70] - m[70]; + sp_2048_cond_sub_72(a, a, m, ~((over - 1) >> 31)); sp_2048_norm_72(a); } @@ -3030,135 +2916,47 @@ SP_NOINLINE static void sp_2048_rshift_72(sp_digit* r, const sp_digit* a, r[71] = a[71] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_2048_div_word_72(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -3175,7 +2973,6 @@ static int sp_2048_div_72(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -3209,28 +3006,14 @@ static int sp_2048_div_72(const sp_digit* a, const sp_digit* d, t1[71 + 71] += t1[71 + 71 - 1] >> 29; t1[71 + 71 - 1] &= 0x1fffffff; for (i=71; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[71 + i]; - d1 <<= 29; - d1 += t1[71 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_72(t1[71 + i], t1[71 + i - 1], dv); -#endif sp_2048_mul_d_72(t2, sd, r1); (void)sp_2048_sub_72(&t1[i], &t1[i], t2); sp_2048_norm_71(&t1[i]); t1[71 + i] += t1[71 + i - 1] >> 29; t1[71 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[71 + i]; - d1 <<= 29; - d1 -= t1[71 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_72(-t1[71 + i], -t1[71 + i - 1], dv); -#endif r1 -= t1[71 + i]; sp_2048_mul_d_72(t2, sd, r1); (void)sp_2048_add_72(&t1[i], &t1[i], t2); @@ -3248,8 +3031,7 @@ static int sp_2048_div_72(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_2048_cond_add_72(r, r, sd, 0 - ((r[70] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_72(r, r, sd, r[70] >> 31); sp_2048_norm_71(r); sp_2048_rshift_72(r, r, 11); @@ -3308,10 +3090,7 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -3376,8 +3155,7 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_72(t[0], m, mp); n = sp_2048_cmp_72(t[0], m); - sp_2048_cond_sub_72(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 72 * 2); } @@ -3403,10 +3181,7 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -3471,8 +3246,7 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_72(t[0], m, mp); n = sp_2048_cmp_72(t[0], m); - sp_2048_cond_sub_72(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 72 * 2); } @@ -3498,10 +3272,7 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -3604,8 +3375,7 @@ static int sp_2048_mod_exp_72(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_72(rt, m, mp); n = sp_2048_cmp_72(rt, m); - sp_2048_cond_sub_72(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 144); } @@ -3725,8 +3495,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_72(r, m, mp); mp = sp_2048_cmp_72(r, m); - sp_2048_cond_sub_72(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_2048_cond_sub_72(r, r, m, ~(mp >> 31)); sp_2048_to_bin_72(r, out); *outLen = 256; @@ -3834,8 +3603,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_72(r, m, mp); mp = sp_2048_cmp_72(r, m); - sp_2048_cond_sub_72(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(r, r, m, ~(mp >> 31)); } } } @@ -4058,6 +3826,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -4151,6 +3925,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -4623,7 +4403,7 @@ static int sp_2048_mod_exp_2_72(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -4698,14 +4478,12 @@ static int sp_2048_mod_exp_2_72(sp_digit* r, const sp_digit* e, int bits, const (void)sp_2048_add_72(r, r, tmp); sp_2048_norm_72(r); o = sp_2048_cmp_72(r, m); - sp_2048_cond_sub_72(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(r, r, m, ~(o >> 31)); } sp_2048_mont_reduce_72(r, m, mp); n = sp_2048_cmp_72(r, m); - sp_2048_cond_sub_72(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_72(r, r, m, ~(n >> 31)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -5370,7 +5148,7 @@ static sp_digit sp_3072_cmp_53(const sp_digit* a, const sp_digit* b) int i; for (i=52; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } return r; @@ -5471,6 +5249,7 @@ static void sp_3072_mont_reduce_53(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_53(a + 53); @@ -5484,8 +5263,8 @@ static void sp_3072_mont_reduce_53(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; sp_3072_mont_shift_53(a, a); - sp_3072_cond_sub_53(a, a, m, 0 - (((a[52] - m[52]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[52] - m[52]; + sp_3072_cond_sub_53(a, a, m, ~((over - 1) >> 31)); sp_3072_norm_53(a); } @@ -5708,135 +5487,47 @@ SP_NOINLINE static void sp_3072_rshift_53(sp_digit* r, const sp_digit* a, r[52] = a[52] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_3072_div_word_53(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -5853,7 +5544,6 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -5887,14 +5577,7 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, t1[53 + 53] += t1[53 + 53 - 1] >> 29; t1[53 + 53 - 1] &= 0x1fffffff; for (i=53; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[53 + i]; - d1 <<= 29; - d1 += t1[53 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_53(t1[53 + i], t1[53 + i - 1], dv); -#endif sp_3072_mul_d_53(t2, sd, r1); (void)sp_3072_sub_53(&t1[i], &t1[i], t2); @@ -5902,14 +5585,7 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, t1[53 + i] -= t2[53]; t1[53 + i] += t1[53 + i - 1] >> 29; t1[53 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[53 + i]; - d1 <<= 29; - d1 -= t1[53 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_53(-t1[53 + i], -t1[53 + i - 1], dv); -#endif r1 -= t1[53 + i]; sp_3072_mul_d_53(t2, sd, r1); (void)sp_3072_add_53(&t1[i], &t1[i], t2); @@ -5927,8 +5603,7 @@ static int sp_3072_div_53(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_3072_cond_add_53(r, r, sd, 0 - ((r[52] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_53(r, r, sd, r[52] >> 31); sp_3072_norm_53(r); sp_3072_rshift_53(r, r, 1); @@ -5983,10 +5658,7 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6051,8 +5723,7 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_53(t[0], m, mp); n = sp_3072_cmp_53(t[0], m); - sp_3072_cond_sub_53(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_53(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 53 * 2); } @@ -6078,10 +5749,7 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6146,8 +5814,7 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_53(t[0], m, mp); n = sp_3072_cmp_53(t[0], m); - sp_3072_cond_sub_53(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_53(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 53 * 2); } @@ -6173,10 +5840,7 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6296,8 +5960,7 @@ static int sp_3072_mod_exp_53(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_53(rt, m, mp); n = sp_3072_cmp_53(rt, m); - sp_3072_cond_sub_53(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_53(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 106); } @@ -6366,7 +6029,7 @@ static sp_digit sp_3072_cmp_106(const sp_digit* a, const sp_digit* b) int i; for (i=105; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } return r; @@ -6470,6 +6133,7 @@ static void sp_3072_mont_reduce_106(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_106(a + 106); @@ -6508,8 +6172,8 @@ static void sp_3072_mont_reduce_106(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x1fffffff; #endif sp_3072_mont_shift_106(a, a); - sp_3072_cond_sub_106(a, a, m, 0 - (((a[105] - m[105]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[105] - m[105]; + sp_3072_cond_sub_106(a, a, m, ~((over - 1) >> 31)); sp_3072_norm_106(a); } @@ -6613,135 +6277,47 @@ SP_NOINLINE static void sp_3072_rshift_106(sp_digit* r, const sp_digit* a, r[105] = a[105] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_3072_div_word_106(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -6758,7 +6334,6 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -6792,14 +6367,7 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, t1[106 + 106] += t1[106 + 106 - 1] >> 29; t1[106 + 106 - 1] &= 0x1fffffff; for (i=106; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[106 + i]; - d1 <<= 29; - d1 += t1[106 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_106(t1[106 + i], t1[106 + i - 1], dv); -#endif sp_3072_mul_d_106(t2, sd, r1); (void)sp_3072_sub_106(&t1[i], &t1[i], t2); @@ -6807,14 +6375,7 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, t1[106 + i] -= t2[106]; t1[106 + i] += t1[106 + i - 1] >> 29; t1[106 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[106 + i]; - d1 <<= 29; - d1 -= t1[106 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_106(-t1[106 + i], -t1[106 + i - 1], dv); -#endif r1 -= t1[106 + i]; sp_3072_mul_d_106(t2, sd, r1); (void)sp_3072_add_106(&t1[i], &t1[i], t2); @@ -6832,8 +6393,7 @@ static int sp_3072_div_106(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_3072_cond_add_106(r, r, sd, 0 - ((r[105] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_106(r, r, sd, r[105] >> 31); sp_3072_norm_106(r); sp_3072_rshift_106(r, r, 2); @@ -6889,10 +6449,7 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6957,8 +6514,7 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_106(t[0], m, mp); n = sp_3072_cmp_106(t[0], m); - sp_3072_cond_sub_106(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 106 * 2); } @@ -6984,10 +6540,7 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -7052,8 +6605,7 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_106(t[0], m, mp); n = sp_3072_cmp_106(t[0], m); - sp_3072_cond_sub_106(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 106 * 2); } @@ -7079,10 +6631,7 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -7185,8 +6734,7 @@ static int sp_3072_mod_exp_106(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_106(rt, m, mp); n = sp_3072_cmp_106(rt, m); - sp_3072_cond_sub_106(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 212); } @@ -7304,8 +6852,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_106(r, m, mp); mp = sp_3072_cmp_106(r, m); - sp_3072_cond_sub_106(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_3072_cond_sub_106(r, r, m, ~(mp >> 31)); sp_3072_to_bin_106(r, out); *outLen = 384; @@ -7413,8 +6960,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_106(r, m, mp); mp = sp_3072_cmp_106(r, m); - sp_3072_cond_sub_106(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(r, r, m, ~(mp >> 31)); } } } @@ -7637,6 +7183,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -7730,6 +7282,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -8052,7 +7610,7 @@ static int sp_3072_mod_exp_2_106(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -8127,14 +7685,12 @@ static int sp_3072_mod_exp_2_106(sp_digit* r, const sp_digit* e, int bits, const (void)sp_3072_add_106(r, r, tmp); sp_3072_norm_106(r); o = sp_3072_cmp_106(r, m); - sp_3072_cond_sub_106(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(r, r, m, ~(o >> 31)); } sp_3072_mont_reduce_106(r, m, mp); n = sp_3072_cmp_106(r, m); - sp_3072_cond_sub_106(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_106(r, r, m, ~(n >> 31)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -9549,14 +9105,14 @@ static sp_digit sp_3072_cmp_56(const sp_digit* a, const sp_digit* b) int i; for (i = 48; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 27); } return r; @@ -9715,6 +9271,7 @@ static void sp_3072_mont_reduce_56(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_56(a + 55); @@ -9728,8 +9285,8 @@ static void sp_3072_mont_reduce_56(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 28; a[i] &= 0xfffffff; sp_3072_mont_shift_56(a, a); - sp_3072_cond_sub_56(a, a, m, 0 - (((a[54] - m[54]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[54] - m[54]; + sp_3072_cond_sub_56(a, a, m, ~((over - 1) >> 31)); sp_3072_norm_56(a); } @@ -9855,95 +9412,47 @@ SP_NOINLINE static void sp_3072_rshift_56(sp_digit* r, const sp_digit* a, r[55] = a[55] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_3072_div_word_56(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 28) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 13) + 1; - /* All 28 bits from d1 and top 3 bits from d0. */ - d = (d1 << 3) + (d0 >> 25); - r = d / dv; - d -= r * dv; - /* Up to 4 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 22) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 26); + t = (t / dv) << 13; r += t; - /* Up to 7 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 19) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 11); + t = t / (dv << 2); r += t; - /* Up to 10 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 16) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 13 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 13) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 16 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 10) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 7) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 22 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 4) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 1) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 28 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 28 bits from d1 and top 3 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -9960,7 +9469,6 @@ static int sp_3072_div_56(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -9994,28 +9502,14 @@ static int sp_3072_div_56(const sp_digit* a, const sp_digit* d, t1[55 + 55] += t1[55 + 55 - 1] >> 28; t1[55 + 55 - 1] &= 0xfffffff; for (i=55; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[55 + i]; - d1 <<= 28; - d1 += t1[55 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_56(t1[55 + i], t1[55 + i - 1], dv); -#endif sp_3072_mul_d_56(t2, sd, r1); (void)sp_3072_sub_56(&t1[i], &t1[i], t2); sp_3072_norm_55(&t1[i]); t1[55 + i] += t1[55 + i - 1] >> 28; t1[55 + i - 1] &= 0xfffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[55 + i]; - d1 <<= 28; - d1 -= t1[55 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_56(-t1[55 + i], -t1[55 + i - 1], dv); -#endif r1 -= t1[55 + i]; sp_3072_mul_d_56(t2, sd, r1); (void)sp_3072_add_56(&t1[i], &t1[i], t2); @@ -10033,8 +9527,7 @@ static int sp_3072_div_56(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 28; r[i] &= 0xfffffff; } - sp_3072_cond_add_56(r, r, sd, 0 - ((r[54] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_56(r, r, sd, r[54] >> 31); sp_3072_norm_55(r); sp_3072_rshift_56(r, r, 4); @@ -10090,10 +9583,7 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -10158,8 +9648,7 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_56(t[0], m, mp); n = sp_3072_cmp_56(t[0], m); - sp_3072_cond_sub_56(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_56(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 56 * 2); } @@ -10185,10 +9674,7 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -10253,8 +9739,7 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_56(t[0], m, mp); n = sp_3072_cmp_56(t[0], m); - sp_3072_cond_sub_56(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_56(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 56 * 2); } @@ -10280,10 +9765,7 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -10403,8 +9885,7 @@ static int sp_3072_mod_exp_56(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_56(rt, m, mp); n = sp_3072_cmp_56(rt, m); - sp_3072_cond_sub_56(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_56(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 112); } @@ -10469,14 +9950,14 @@ static sp_digit sp_3072_cmp_112(const sp_digit* a, const sp_digit* b) int i; for (i = 104; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 27); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 27); } return r; @@ -10634,6 +10115,7 @@ static void sp_3072_mont_reduce_112(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_112(a + 110); @@ -10672,8 +10154,8 @@ static void sp_3072_mont_reduce_112(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0xfffffff; #endif sp_3072_mont_shift_112(a, a); - sp_3072_cond_sub_112(a, a, m, 0 - (((a[109] - m[109]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[109] - m[109]; + sp_3072_cond_sub_112(a, a, m, ~((over - 1) >> 31)); sp_3072_norm_112(a); } @@ -10799,95 +10281,47 @@ SP_NOINLINE static void sp_3072_rshift_112(sp_digit* r, const sp_digit* a, r[111] = a[111] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_3072_div_word_112(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 28) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 28) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 13) + 1; - /* All 28 bits from d1 and top 3 bits from d0. */ - d = (d1 << 3) + (d0 >> 25); - r = d / dv; - d -= r * dv; - /* Up to 4 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 22) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 26); + t = (t / dv) << 13; r += t; - /* Up to 7 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 19) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 11); + t = t / (dv << 2); r += t; - /* Up to 10 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 16) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 13 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 13) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 16 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 10) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 7) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 22 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 4) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 1) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 28 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 28 bits from d1 and top 3 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -10904,7 +10338,6 @@ static int sp_3072_div_112(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -10938,28 +10371,14 @@ static int sp_3072_div_112(const sp_digit* a, const sp_digit* d, t1[110 + 110] += t1[110 + 110 - 1] >> 28; t1[110 + 110 - 1] &= 0xfffffff; for (i=110; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[110 + i]; - d1 <<= 28; - d1 += t1[110 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_112(t1[110 + i], t1[110 + i - 1], dv); -#endif sp_3072_mul_d_112(t2, sd, r1); (void)sp_3072_sub_112(&t1[i], &t1[i], t2); sp_3072_norm_110(&t1[i]); t1[110 + i] += t1[110 + i - 1] >> 28; t1[110 + i - 1] &= 0xfffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[110 + i]; - d1 <<= 28; - d1 -= t1[110 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_112(-t1[110 + i], -t1[110 + i - 1], dv); -#endif r1 -= t1[110 + i]; sp_3072_mul_d_112(t2, sd, r1); (void)sp_3072_add_112(&t1[i], &t1[i], t2); @@ -10977,8 +10396,7 @@ static int sp_3072_div_112(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 28; r[i] &= 0xfffffff; } - sp_3072_cond_add_112(r, r, sd, 0 - ((r[109] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_112(r, r, sd, r[109] >> 31); sp_3072_norm_110(r); sp_3072_rshift_112(r, r, 8); @@ -11038,10 +10456,7 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -11106,8 +10521,7 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_112(t[0], m, mp); n = sp_3072_cmp_112(t[0], m); - sp_3072_cond_sub_112(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 112 * 2); } @@ -11133,10 +10547,7 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -11201,8 +10612,7 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_112(t[0], m, mp); n = sp_3072_cmp_112(t[0], m); - sp_3072_cond_sub_112(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 112 * 2); } @@ -11228,10 +10638,7 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -11334,8 +10741,7 @@ static int sp_3072_mod_exp_112(sp_digit* r, const sp_digit* a, const sp_digit* e sp_3072_mont_reduce_112(rt, m, mp); n = sp_3072_cmp_112(rt, m); - sp_3072_cond_sub_112(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 224); } @@ -11455,8 +10861,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_112(r, m, mp); mp = sp_3072_cmp_112(r, m); - sp_3072_cond_sub_112(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_3072_cond_sub_112(r, r, m, ~(mp >> 31)); sp_3072_to_bin_112(r, out); *outLen = 384; @@ -11564,8 +10969,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_112(r, m, mp); mp = sp_3072_cmp_112(r, m); - sp_3072_cond_sub_112(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(r, r, m, ~(mp >> 31)); } } } @@ -11788,6 +11192,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -11881,6 +11291,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -12424,7 +11840,7 @@ static int sp_3072_mod_exp_2_112(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -12499,14 +11915,12 @@ static int sp_3072_mod_exp_2_112(sp_digit* r, const sp_digit* e, int bits, const (void)sp_3072_add_112(r, r, tmp); sp_3072_norm_112(r); o = sp_3072_cmp_112(r, m); - sp_3072_cond_sub_112(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(r, r, m, ~(o >> 31)); } sp_3072_mont_reduce_112(r, m, mp); n = sp_3072_cmp_112(r, m); - sp_3072_cond_sub_112(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_112(r, r, m, ~(n >> 31)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -13175,7 +12589,7 @@ static sp_digit sp_4096_cmp_71(const sp_digit* a, const sp_digit* b) int i; for (i=70; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } return r; @@ -13282,6 +12696,7 @@ static void sp_4096_mont_reduce_71(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_71(a + 71); @@ -13295,8 +12710,8 @@ static void sp_4096_mont_reduce_71(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; sp_4096_mont_shift_71(a, a); - sp_4096_cond_sub_71(a, a, m, 0 - (((a[70] - m[70]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[70] - m[70]; + sp_4096_cond_sub_71(a, a, m, ~((over - 1) >> 31)); sp_4096_norm_71(a); } @@ -13519,135 +12934,47 @@ SP_NOINLINE static void sp_4096_rshift_71(sp_digit* r, const sp_digit* a, r[70] = a[70] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_4096_div_word_71(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -13664,7 +12991,6 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -13698,14 +13024,7 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, t1[71 + 71] += t1[71 + 71 - 1] >> 29; t1[71 + 71 - 1] &= 0x1fffffff; for (i=71; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[71 + i]; - d1 <<= 29; - d1 += t1[71 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_71(t1[71 + i], t1[71 + i - 1], dv); -#endif sp_4096_mul_d_71(t2, sd, r1); (void)sp_4096_sub_71(&t1[i], &t1[i], t2); @@ -13713,14 +13032,7 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, t1[71 + i] -= t2[71]; t1[71 + i] += t1[71 + i - 1] >> 29; t1[71 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[71 + i]; - d1 <<= 29; - d1 -= t1[71 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_71(-t1[71 + i], -t1[71 + i - 1], dv); -#endif r1 -= t1[71 + i]; sp_4096_mul_d_71(t2, sd, r1); (void)sp_4096_add_71(&t1[i], &t1[i], t2); @@ -13738,8 +13050,7 @@ static int sp_4096_div_71(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_4096_cond_add_71(r, r, sd, 0 - ((r[70] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_71(r, r, sd, r[70] >> 31); sp_4096_norm_71(r); sp_4096_rshift_71(r, r, 11); @@ -13794,10 +13105,7 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -13862,8 +13170,7 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_71(t[0], m, mp); n = sp_4096_cmp_71(t[0], m); - sp_4096_cond_sub_71(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_71(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 71 * 2); } @@ -13889,10 +13196,7 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -13957,8 +13261,7 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_71(t[0], m, mp); n = sp_4096_cmp_71(t[0], m); - sp_4096_cond_sub_71(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_71(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 71 * 2); } @@ -13984,10 +13287,7 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -14107,8 +13407,7 @@ static int sp_4096_mod_exp_71(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_71(rt, m, mp); n = sp_4096_cmp_71(rt, m); - sp_4096_cond_sub_71(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_71(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 142); } @@ -14178,7 +13477,7 @@ static sp_digit sp_4096_cmp_142(const sp_digit* a, const sp_digit* b) int i; for (i=141; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } return r; @@ -14282,6 +13581,7 @@ static void sp_4096_mont_reduce_142(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_142(a + 142); @@ -14320,8 +13620,8 @@ static void sp_4096_mont_reduce_142(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x1fffffff; #endif sp_4096_mont_shift_142(a, a); - sp_4096_cond_sub_142(a, a, m, 0 - (((a[141] - m[141]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[141] - m[141]; + sp_4096_cond_sub_142(a, a, m, ~((over - 1) >> 31)); sp_4096_norm_142(a); } @@ -14425,135 +13725,47 @@ SP_NOINLINE static void sp_4096_rshift_142(sp_digit* r, const sp_digit* a, r[141] = a[141] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_4096_div_word_142(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 29) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 29) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 14) + 1; - /* All 29 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 27); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 28); + t = (t / dv) << 14; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 29 bits from d1 and top 2 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -14570,7 +13782,6 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -14604,14 +13815,7 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, t1[142 + 142] += t1[142 + 142 - 1] >> 29; t1[142 + 142 - 1] &= 0x1fffffff; for (i=142; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[142 + i]; - d1 <<= 29; - d1 += t1[142 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_142(t1[142 + i], t1[142 + i - 1], dv); -#endif sp_4096_mul_d_142(t2, sd, r1); (void)sp_4096_sub_142(&t1[i], &t1[i], t2); @@ -14619,14 +13823,7 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, t1[142 + i] -= t2[142]; t1[142 + i] += t1[142 + i - 1] >> 29; t1[142 + i - 1] &= 0x1fffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[142 + i]; - d1 <<= 29; - d1 -= t1[142 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_142(-t1[142 + i], -t1[142 + i - 1], dv); -#endif r1 -= t1[142 + i]; sp_4096_mul_d_142(t2, sd, r1); (void)sp_4096_add_142(&t1[i], &t1[i], t2); @@ -14644,8 +13841,7 @@ static int sp_4096_div_142(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 29; r[i] &= 0x1fffffff; } - sp_4096_cond_add_142(r, r, sd, 0 - ((r[141] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_142(r, r, sd, r[141] >> 31); sp_4096_norm_142(r); sp_4096_rshift_142(r, r, 22); @@ -14701,10 +13897,7 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -14769,8 +13962,7 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_142(t[0], m, mp); n = sp_4096_cmp_142(t[0], m); - sp_4096_cond_sub_142(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 142 * 2); } @@ -14796,10 +13988,7 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -14864,8 +14053,7 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_142(t[0], m, mp); n = sp_4096_cmp_142(t[0], m); - sp_4096_cond_sub_142(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 142 * 2); } @@ -14891,10 +14079,7 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -14997,8 +14182,7 @@ static int sp_4096_mod_exp_142(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_142(rt, m, mp); n = sp_4096_cmp_142(rt, m); - sp_4096_cond_sub_142(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 284); } @@ -15116,8 +14300,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_142(r, m, mp); mp = sp_4096_cmp_142(r, m); - sp_4096_cond_sub_142(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_4096_cond_sub_142(r, r, m, ~(mp >> 31)); sp_4096_to_bin_142(r, out); *outLen = 512; @@ -15225,8 +14408,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_142(r, m, mp); mp = sp_4096_cmp_142(r, m); - sp_4096_cond_sub_142(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(r, r, m, ~(mp >> 31)); } } } @@ -15449,6 +14631,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -15542,6 +14730,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -15864,7 +15058,7 @@ static int sp_4096_mod_exp_2_142(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -15939,14 +15133,12 @@ static int sp_4096_mod_exp_2_142(sp_digit* r, const sp_digit* e, int bits, const (void)sp_4096_add_142(r, r, tmp); sp_4096_norm_142(r); o = sp_4096_cmp_142(r, m); - sp_4096_cond_sub_142(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(r, r, m, ~(o >> 31)); } sp_4096_mont_reduce_142(r, m, mp); n = sp_4096_cmp_142(r, m); - sp_4096_cond_sub_142(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_142(r, r, m, ~(n >> 31)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -17263,14 +16455,14 @@ static sp_digit sp_4096_cmp_81(const sp_digit* a, const sp_digit* b) r |= (a[80] - b[80]) & (0 - (sp_digit)1); for (i = 72; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 25); } return r; @@ -17409,6 +16601,7 @@ static void sp_4096_mont_reduce_81(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_81(a + 79); @@ -17422,8 +16615,8 @@ static void sp_4096_mont_reduce_81(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 26; a[i] &= 0x3ffffff; sp_4096_mont_shift_81(a, a); - sp_4096_cond_sub_81(a, a, m, 0 - (((a[78] - m[78]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[78] - m[78]; + sp_4096_cond_sub_81(a, a, m, ~((over - 1) >> 31)); sp_4096_norm_81(a); } @@ -17546,63 +16739,47 @@ SP_NOINLINE static void sp_4096_rshift_81(sp_digit* r, const sp_digit* a, r[80] = a[80] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_4096_div_word_81(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 26) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 11) + 1; - /* All 26 bits from d1 and top 5 bits from d0. */ - d = (d1 << 5) + (d0 >> 21); - r = d / dv; - d -= r * dv; - /* Up to 6 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 16) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 22); + t = (t / dv) << 11; r += t; - /* Up to 11 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 11) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 7); + t = t / (dv << 4); r += t; - /* Up to 16 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 6) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 21 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 1) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 26 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 26 bits from d1 and top 5 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -17619,7 +16796,6 @@ static int sp_4096_div_81(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -17653,28 +16829,14 @@ static int sp_4096_div_81(const sp_digit* a, const sp_digit* d, t1[79 + 79] += t1[79 + 79 - 1] >> 26; t1[79 + 79 - 1] &= 0x3ffffff; for (i=79; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[79 + i]; - d1 <<= 26; - d1 += t1[79 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_81(t1[79 + i], t1[79 + i - 1], dv); -#endif sp_4096_mul_d_81(t2, sd, r1); (void)sp_4096_sub_81(&t1[i], &t1[i], t2); sp_4096_norm_79(&t1[i]); t1[79 + i] += t1[79 + i - 1] >> 26; t1[79 + i - 1] &= 0x3ffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[79 + i]; - d1 <<= 26; - d1 -= t1[79 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_81(-t1[79 + i], -t1[79 + i - 1], dv); -#endif r1 -= t1[79 + i]; sp_4096_mul_d_81(t2, sd, r1); (void)sp_4096_add_81(&t1[i], &t1[i], t2); @@ -17692,8 +16854,7 @@ static int sp_4096_div_81(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 26; r[i] &= 0x3ffffff; } - sp_4096_cond_add_81(r, r, sd, 0 - ((r[78] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_81(r, r, sd, r[78] >> 31); sp_4096_norm_79(r); sp_4096_rshift_81(r, r, 6); @@ -17750,10 +16911,7 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -17818,8 +16976,7 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_81(t[0], m, mp); n = sp_4096_cmp_81(t[0], m); - sp_4096_cond_sub_81(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_81(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 81 * 2); } @@ -17845,10 +17002,7 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -17913,8 +17067,7 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_81(t[0], m, mp); n = sp_4096_cmp_81(t[0], m); - sp_4096_cond_sub_81(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_81(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 81 * 2); } @@ -17940,10 +17093,7 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -18063,8 +17213,7 @@ static int sp_4096_mod_exp_81(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_81(rt, m, mp); n = sp_4096_cmp_81(rt, m); - sp_4096_cond_sub_81(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_81(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 162); } @@ -18132,16 +17281,16 @@ static sp_digit sp_4096_cmp_162(const sp_digit* a, const sp_digit* b) int i; r |= (a[161] - b[161]) & (0 - (sp_digit)1); - r |= (a[160] - b[160]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[160] - b[160]) & ~(((sp_digit)0 - r) >> 25); for (i = 152; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 25); } return r; @@ -18283,6 +17432,7 @@ static void sp_4096_mont_reduce_162(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_162(a + 158); @@ -18321,8 +17471,8 @@ static void sp_4096_mont_reduce_162(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x3ffffff; #endif sp_4096_mont_shift_162(a, a); - sp_4096_cond_sub_162(a, a, m, 0 - (((a[157] - m[157]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[157] - m[157]; + sp_4096_cond_sub_162(a, a, m, ~((over - 1) >> 31)); sp_4096_norm_162(a); } @@ -18444,63 +17594,47 @@ SP_NOINLINE static void sp_4096_rshift_162(sp_digit* r, const sp_digit* a, r[161] = a[161] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_4096_div_word_162(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 26) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 26) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 11) + 1; - /* All 26 bits from d1 and top 5 bits from d0. */ - d = (d1 << 5) + (d0 >> 21); - r = d / dv; - d -= r * dv; - /* Up to 6 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 16) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 22); + t = (t / dv) << 11; r += t; - /* Up to 11 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 11) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 7); + t = t / (dv << 4); r += t; - /* Up to 16 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 6) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 21 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 1) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 26 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 26 bits from d1 and top 5 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -18517,7 +17651,6 @@ static int sp_4096_div_162(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -18551,28 +17684,14 @@ static int sp_4096_div_162(const sp_digit* a, const sp_digit* d, t1[158 + 158] += t1[158 + 158 - 1] >> 26; t1[158 + 158 - 1] &= 0x3ffffff; for (i=158; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[158 + i]; - d1 <<= 26; - d1 += t1[158 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_162(t1[158 + i], t1[158 + i - 1], dv); -#endif sp_4096_mul_d_162(t2, sd, r1); (void)sp_4096_sub_162(&t1[i], &t1[i], t2); sp_4096_norm_158(&t1[i]); t1[158 + i] += t1[158 + i - 1] >> 26; t1[158 + i - 1] &= 0x3ffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[158 + i]; - d1 <<= 26; - d1 -= t1[158 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_162(-t1[158 + i], -t1[158 + i - 1], dv); -#endif r1 -= t1[158 + i]; sp_4096_mul_d_162(t2, sd, r1); (void)sp_4096_add_162(&t1[i], &t1[i], t2); @@ -18590,8 +17709,7 @@ static int sp_4096_div_162(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 26; r[i] &= 0x3ffffff; } - sp_4096_cond_add_162(r, r, sd, 0 - ((r[157] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_162(r, r, sd, r[157] >> 31); sp_4096_norm_158(r); sp_4096_rshift_162(r, r, 12); @@ -18653,10 +17771,7 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -18721,8 +17836,7 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_162(t[0], m, mp); n = sp_4096_cmp_162(t[0], m); - sp_4096_cond_sub_162(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 162 * 2); } @@ -18748,10 +17862,7 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -18816,8 +17927,7 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_162(t[0], m, mp); n = sp_4096_cmp_162(t[0], m); - sp_4096_cond_sub_162(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(t[0], t[0], m, ~(n >> 31)); XMEMCPY(r, t[0], sizeof(*r) * 162 * 2); } @@ -18843,10 +17953,7 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -18949,8 +18056,7 @@ static int sp_4096_mod_exp_162(sp_digit* r, const sp_digit* a, const sp_digit* e sp_4096_mont_reduce_162(rt, m, mp); n = sp_4096_cmp_162(rt, m); - sp_4096_cond_sub_162(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(rt, rt, m, ~(n >> 31)); XMEMCPY(r, rt, sizeof(sp_digit) * 324); } @@ -19070,8 +18176,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_162(r, m, mp); mp = sp_4096_cmp_162(r, m); - sp_4096_cond_sub_162(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_4096_cond_sub_162(r, r, m, ~(mp >> 31)); sp_4096_to_bin_162(r, out); *outLen = 512; @@ -19179,8 +18284,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_162(r, m, mp); mp = sp_4096_cmp_162(r, m); - sp_4096_cond_sub_162(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(r, r, m, ~(mp >> 31)); } } } @@ -19403,6 +18507,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -19496,6 +18606,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -20139,7 +19255,7 @@ static int sp_4096_mod_exp_2_162(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -20214,14 +19330,12 @@ static int sp_4096_mod_exp_2_162(sp_digit* r, const sp_digit* e, int bits, const (void)sp_4096_add_162(r, r, tmp); sp_4096_norm_162(r); o = sp_4096_cmp_162(r, m); - sp_4096_cond_sub_162(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(r, r, m, ~(o >> 31)); } sp_4096_mont_reduce_162(r, m, mp); n = sp_4096_cmp_162(r, m); - sp_4096_cond_sub_162(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_162(r, r, m, ~(n >> 31)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -20981,18 +20095,18 @@ static sp_digit sp_256_cmp_9(const sp_digit* a, const sp_digit* b) int i; for (i=8; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 28); } #else r |= (a[ 8] - b[ 8]) & (0 - (sp_digit)1); - r |= (a[ 7] - b[ 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 6] - b[ 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 5] - b[ 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 4] - b[ 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 3] - b[ 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 2] - b[ 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 1] - b[ 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 0] - b[ 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[ 7] - b[ 7]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 6] - b[ 6]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 5] - b[ 5]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 4] - b[ 4]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 28); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 28); #endif /* WOLFSSL_SP_SMALL */ return r; @@ -21180,6 +20294,7 @@ static void sp_256_mont_reduce_order_9(sp_digit* a, const sp_digit* m, sp_digit { int i; sp_digit mu; + sp_digit over; sp_256_norm_9(a + 9); @@ -21193,8 +20308,8 @@ static void sp_256_mont_reduce_order_9(sp_digit* a, const sp_digit* m, sp_digit a[i+1] += a[i] >> 29; a[i] &= 0x1fffffff; sp_256_mont_shift_9(a, a); - sp_256_cond_sub_9(a, a, m, 0 - (((a[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[8] >> 24; + sp_256_cond_sub_9(a, a, m, ~((over - 1) >> 31)); sp_256_norm_9(a); } @@ -21425,8 +20540,7 @@ static void sp_256_map_9(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_9(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_9(r->x, p256_mod); - sp_256_cond_sub_9(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_9(r->x, r->x, p256_mod, ~(n >> 28)); sp_256_norm_9(r->x); /* y /= z^3 */ @@ -21435,8 +20549,7 @@ static void sp_256_map_9(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_9(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_9(r->y, p256_mod); - sp_256_cond_sub_9(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_9(r->y, r->y, p256_mod, ~(n >> 28)); sp_256_norm_9(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -21454,10 +20567,11 @@ static void sp_256_map_9(sp_point_256* r, const sp_point_256* p, static void sp_256_mont_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_256_add_9(r, a, b); sp_256_norm_9(r); - sp_256_cond_sub_9(r, r, m, 0 - (((r[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 24; + sp_256_cond_sub_9(r, r, m, ~((over - 1) >> 31)); sp_256_norm_9(r); } @@ -21469,10 +20583,11 @@ static void sp_256_mont_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, */ static void sp_256_mont_dbl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_256_add_9(r, a, a); sp_256_norm_9(r); - sp_256_cond_sub_9(r, r, m, 0 - (((r[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 24; + sp_256_cond_sub_9(r, r, m, ~((over - 1) >> 31)); sp_256_norm_9(r); } @@ -21484,15 +20599,16 @@ static void sp_256_mont_dbl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) */ static void sp_256_mont_tpl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_256_add_9(r, a, a); sp_256_norm_9(r); - sp_256_cond_sub_9(r, r, m, 0 - (((r[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 24; + sp_256_cond_sub_9(r, r, m, ~((over - 1) >> 31)); sp_256_norm_9(r); (void)sp_256_add_9(r, r, a); sp_256_norm_9(r); - sp_256_cond_sub_9(r, r, m, 0 - (((r[8] >> 24) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 24; + sp_256_cond_sub_9(r, r, m, ~((over - 1) >> 31)); sp_256_norm_9(r); } @@ -21878,7 +20994,8 @@ static int sp_256_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, /* Check double */ (void)sp_256_sub_9(ctx->t1, p256_mod, q->y); sp_256_norm_9(ctx->t1); - if ((sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & (sp_256_cmp_equal_9(p->y, q->y) | sp_256_cmp_equal_9(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -22046,7 +21163,8 @@ static void sp_256_proj_point_add_9(sp_point_256* r, /* Check double */ (void)sp_256_sub_9(t1, p256_mod, q->y); sp_256_norm_9(t1); - if ((sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & (sp_256_cmp_equal_9(p->y, q->y) | sp_256_cmp_equal_9(p->y, t1))) != 0) { sp_256_proj_point_dbl_9(r, p, t); } @@ -22079,7 +21197,8 @@ static void sp_256_proj_point_add_9(sp_point_256* r, sp_256_mont_sub_9(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_9(t4, t4, t3, p256_mod); - if (sp_256_iszero_9(t2) & sp_256_iszero_9(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_256_iszero_9(t2) & sp_256_iszero_9(t4) & maskt) { sp_256_proj_point_dbl_9(r, p, t); } else { @@ -23083,7 +22202,8 @@ static void sp_256_proj_point_add_qz1_9(sp_point_256* r, const sp_point_256* p, /* Check double */ (void)sp_256_sub_9(t1, p256_mod, q->y); sp_256_norm_9(t1); - if ((sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_9(p->x, q->x) & sp_256_cmp_equal_9(p->z, q->z) & (sp_256_cmp_equal_9(p->y, q->y) | sp_256_cmp_equal_9(p->y, t1))) != 0) { sp_256_proj_point_dbl_9(r, p, t); } @@ -25612,8 +24732,7 @@ static int sp_256_div_9(const sp_digit* a, const sp_digit* d, t1[9 + i] -= t2[9]; sp_256_norm_9(&t1[i + 1]); - mask = (sp_digit)0 - ((t1[9 + i] > 0) ? - (sp_digit)1 : (sp_digit)0); + mask = ~((t1[9 + i] - 1) >> 31); sp_256_cond_sub_9(t1 + i, t1 + i, sd, mask); sp_256_norm_9(&t1[i + 1]); } @@ -28176,24 +27295,24 @@ static sp_digit sp_384_cmp_15(const sp_digit* a, const sp_digit* b) int i; for (i=14; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 25); } #else r |= (a[14] - b[14]) & (0 - (sp_digit)1); - r |= (a[13] - b[13]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[12] - b[12]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[11] - b[11]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[10] - b[10]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 9] - b[ 9]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 8] - b[ 8]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 7] - b[ 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 6] - b[ 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 5] - b[ 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 4] - b[ 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 3] - b[ 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 2] - b[ 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 1] - b[ 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 0] - b[ 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[13] - b[13]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[12] - b[12]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[11] - b[11]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[10] - b[10]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 9] - b[ 9]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 8] - b[ 8]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 7] - b[ 7]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 6] - b[ 6]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 5] - b[ 5]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 4] - b[ 4]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 25); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 25); #endif /* WOLFSSL_SP_SMALL */ return r; @@ -28391,6 +27510,7 @@ static void sp_384_mont_reduce_order_15(sp_digit* a, const sp_digit* m, sp_digit { int i; sp_digit mu; + sp_digit over; sp_384_norm_15(a + 15); @@ -28404,8 +27524,8 @@ static void sp_384_mont_reduce_order_15(sp_digit* a, const sp_digit* m, sp_digit a[i+1] += a[i] >> 26; a[i] &= 0x3ffffff; sp_384_mont_shift_15(a, a); - sp_384_cond_sub_15(a, a, m, 0 - (((a[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[14] >> 20; + sp_384_cond_sub_15(a, a, m, ~((over - 1) >> 31)); sp_384_norm_15(a); } @@ -28680,8 +27800,7 @@ static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_15(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_15(r->x, p384_mod); - sp_384_cond_sub_15(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_15(r->x, r->x, p384_mod, ~(n >> 25)); sp_384_norm_15(r->x); /* y /= z^3 */ @@ -28690,8 +27809,7 @@ static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_15(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_15(r->y, p384_mod); - sp_384_cond_sub_15(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_15(r->y, r->y, p384_mod, ~(n >> 25)); sp_384_norm_15(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -28709,10 +27827,11 @@ static void sp_384_map_15(sp_point_384* r, const sp_point_384* p, static void sp_384_mont_add_15(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_384_add_15(r, a, b); sp_384_norm_15(r); - sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[14] >> 20; + sp_384_cond_sub_15(r, r, m, ~((over - 1) >> 31)); sp_384_norm_15(r); } @@ -28724,10 +27843,11 @@ static void sp_384_mont_add_15(sp_digit* r, const sp_digit* a, const sp_digit* b */ static void sp_384_mont_dbl_15(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_384_add_15(r, a, a); sp_384_norm_15(r); - sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[14] >> 20; + sp_384_cond_sub_15(r, r, m, ~((over - 1) >> 31)); sp_384_norm_15(r); } @@ -28739,15 +27859,16 @@ static void sp_384_mont_dbl_15(sp_digit* r, const sp_digit* a, const sp_digit* m */ static void sp_384_mont_tpl_15(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_384_add_15(r, a, a); sp_384_norm_15(r); - sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[14] >> 20; + sp_384_cond_sub_15(r, r, m, ~((over - 1) >> 31)); sp_384_norm_15(r); (void)sp_384_add_15(r, r, a); sp_384_norm_15(r); - sp_384_cond_sub_15(r, r, m, 0 - (((r[14] >> 20) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[14] >> 20; + sp_384_cond_sub_15(r, r, m, ~((over - 1) >> 31)); sp_384_norm_15(r); } @@ -29147,7 +28268,8 @@ static int sp_384_proj_point_add_15_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, /* Check double */ (void)sp_384_sub_15(ctx->t1, p384_mod, q->y); sp_384_norm_15(ctx->t1); - if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -29315,7 +28437,8 @@ static void sp_384_proj_point_add_15(sp_point_384* r, /* Check double */ (void)sp_384_sub_15(t1, p384_mod, q->y); sp_384_norm_15(t1); - if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) { sp_384_proj_point_dbl_15(r, p, t); } @@ -29348,7 +28471,8 @@ static void sp_384_proj_point_add_15(sp_point_384* r, sp_384_mont_sub_15(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_15(t4, t4, t3, p384_mod); - if (sp_384_iszero_15(t2) & sp_384_iszero_15(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_384_iszero_15(t2) & sp_384_iszero_15(t4) & maskt) { sp_384_proj_point_dbl_15(r, p, t); } else { @@ -30444,7 +29568,8 @@ static void sp_384_proj_point_add_qz1_15(sp_point_384* r, const sp_point_384* p, /* Check double */ (void)sp_384_sub_15(t1, p384_mod, q->y); sp_384_norm_15(t1); - if ((sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_15(p->x, q->x) & sp_384_cmp_equal_15(p->z, q->z) & (sp_384_cmp_equal_15(p->y, q->y) | sp_384_cmp_equal_15(p->y, t1))) != 0) { sp_384_proj_point_dbl_15(r, p, t); } @@ -33551,8 +32676,7 @@ static int sp_384_div_15(const sp_digit* a, const sp_digit* d, t1[15 + i] -= t2[15]; sp_384_norm_15(&t1[i + 1]); - mask = (sp_digit)0 - ((t1[15 + i] > 0) ? - (sp_digit)1 : (sp_digit)0); + mask = ~((t1[15 + i] - 1) >> 31); sp_384_cond_sub_15(t1 + i, t1 + i, sd, mask); sp_384_norm_15(&t1[i + 1]); } @@ -35812,25 +34936,25 @@ static sp_digit sp_521_cmp_21(const sp_digit* a, const sp_digit* b) int i; for (i=20; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 24); } #else int i; r |= (a[20] - b[20]) & (0 - (sp_digit)1); - r |= (a[19] - b[19]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[18] - b[18]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[17] - b[17]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[16] - b[16]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[19] - b[19]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[18] - b[18]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[17] - b[17]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[16] - b[16]) & ~(((sp_digit)0 - r) >> 24); for (i = 8; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 24); } #endif /* WOLFSSL_SP_SMALL */ @@ -36012,6 +35136,7 @@ static void sp_521_mont_reduce_order_21(sp_digit* a, const sp_digit* m, sp_digit { int i; sp_digit mu; + sp_digit over; sp_521_norm_21(a + 21); @@ -36025,8 +35150,8 @@ static void sp_521_mont_reduce_order_21(sp_digit* a, const sp_digit* m, sp_digit a[i+1] += a[i] >> 25; a[i] &= 0x1ffffff; sp_521_mont_shift_21(a, a); - sp_521_cond_sub_21(a, a, m, 0 - (((a[20] >> 21) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[20] >> 21; + sp_521_cond_sub_21(a, a, m, ~((over - 1) >> 31)); sp_521_norm_21(a); } @@ -36193,8 +35318,7 @@ static void sp_521_map_21(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_21(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_21(r->x, p521_mod); - sp_521_cond_sub_21(r->x, r->x, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_21(r->x, r->x, p521_mod, ~(n >> 24)); sp_521_norm_21(r->x); /* y /= z^3 */ @@ -36203,8 +35327,7 @@ static void sp_521_map_21(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_21(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_21(r->y, p521_mod); - sp_521_cond_sub_21(r->y, r->y, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_21(r->y, r->y, p521_mod, ~(n >> 24)); sp_521_norm_21(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -36222,10 +35345,11 @@ static void sp_521_map_21(sp_point_521* r, const sp_point_521* p, static void sp_521_mont_add_21(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_521_add_21(r, a, b); sp_521_norm_21(r); - sp_521_cond_sub_21(r, r, m, 0 - (((r[20] >> 21) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[20] >> 21; + sp_521_cond_sub_21(r, r, m, ~((over - 1) >> 31)); sp_521_norm_21(r); } @@ -36237,10 +35361,11 @@ static void sp_521_mont_add_21(sp_digit* r, const sp_digit* a, const sp_digit* b */ static void sp_521_mont_dbl_21(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_521_add_21(r, a, a); sp_521_norm_21(r); - sp_521_cond_sub_21(r, r, m, 0 - (((r[20] >> 21) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[20] >> 21; + sp_521_cond_sub_21(r, r, m, ~((over - 1) >> 31)); sp_521_norm_21(r); } @@ -36252,15 +35377,16 @@ static void sp_521_mont_dbl_21(sp_digit* r, const sp_digit* a, const sp_digit* m */ static void sp_521_mont_tpl_21(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_521_add_21(r, a, a); sp_521_norm_21(r); - sp_521_cond_sub_21(r, r, m, 0 - (((r[20] >> 21) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[20] >> 21; + sp_521_cond_sub_21(r, r, m, ~((over - 1) >> 31)); sp_521_norm_21(r); (void)sp_521_add_21(r, r, a); sp_521_norm_21(r); - sp_521_cond_sub_21(r, r, m, 0 - (((r[20] >> 21) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[20] >> 21; + sp_521_cond_sub_21(r, r, m, ~((over - 1) >> 31)); sp_521_norm_21(r); } @@ -36671,7 +35797,8 @@ static int sp_521_proj_point_add_21_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, /* Check double */ (void)sp_521_sub_21(ctx->t1, p521_mod, q->y); sp_521_norm_21(ctx->t1); - if ((sp_521_cmp_equal_21(p->x, q->x) & sp_521_cmp_equal_21(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_21(p->x, q->x) & sp_521_cmp_equal_21(p->z, q->z) & (sp_521_cmp_equal_21(p->y, q->y) | sp_521_cmp_equal_21(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -36839,7 +35966,8 @@ static void sp_521_proj_point_add_21(sp_point_521* r, /* Check double */ (void)sp_521_sub_21(t1, p521_mod, q->y); sp_521_norm_21(t1); - if ((sp_521_cmp_equal_21(p->x, q->x) & sp_521_cmp_equal_21(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_21(p->x, q->x) & sp_521_cmp_equal_21(p->z, q->z) & (sp_521_cmp_equal_21(p->y, q->y) | sp_521_cmp_equal_21(p->y, t1))) != 0) { sp_521_proj_point_dbl_21(r, p, t); } @@ -36872,7 +36000,8 @@ static void sp_521_proj_point_add_21(sp_point_521* r, sp_521_mont_sub_21(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_21(t4, t4, t3, p521_mod); - if (sp_521_iszero_21(t2) & sp_521_iszero_21(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_521_iszero_21(t2) & sp_521_iszero_21(t4) & maskt) { sp_521_proj_point_dbl_21(r, p, t); } else { @@ -37866,7 +36995,8 @@ static void sp_521_proj_point_add_qz1_21(sp_point_521* r, const sp_point_521* p, /* Check double */ (void)sp_521_sub_21(t1, p521_mod, q->y); sp_521_norm_21(t1); - if ((sp_521_cmp_equal_21(p->x, q->x) & sp_521_cmp_equal_21(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_21(p->x, q->x) & sp_521_cmp_equal_21(p->z, q->z) & (sp_521_cmp_equal_21(p->y, q->y) | sp_521_cmp_equal_21(p->y, t1))) != 0) { sp_521_proj_point_dbl_21(r, p, t); } @@ -41374,55 +40504,47 @@ SP_NOINLINE static void sp_521_mul_d_21(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_521_div_word_21(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 25) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 25) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 25) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 10) + 1; - /* All 25 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 19); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 13) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 20); + t = (t / dv) << 10; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 7) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 5); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 1) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 25 bits from d1 and top 6 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -41434,13 +40556,10 @@ static WC_INLINE sp_digit sp_521_div_word_21(sp_digit d1, sp_digit d0, * r Remainder from the division. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_521_div_21(const sp_digit* a, const sp_digit* d, +static int sp_521_div_21(const sp_digit* a, const sp_digit* d, const sp_digit* m, sp_digit* r) { int i; -#ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; -#endif sp_digit dv; sp_digit r1; #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -41468,14 +40587,7 @@ static int sp_521_div_21(const sp_digit* a, const sp_digit* d, for (i=20; i>=0; i--) { t1[21 + i] += t1[21 + i - 1] >> 25; t1[21 + i - 1] &= 0x1ffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[21 + i]; - d1 <<= 25; - d1 += t1[21 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_521_div_word_21(t1[21 + i], t1[21 + i - 1], dv); -#endif sp_521_mul_d_21(t2, d, r1); (void)sp_521_sub_21(&t1[i], &t1[i], t2); @@ -41483,14 +40595,7 @@ static int sp_521_div_21(const sp_digit* a, const sp_digit* d, t1[21 + i] -= t2[21]; t1[21 + i] += t1[21 + i - 1] >> 25; t1[21 + i - 1] &= 0x1ffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[21 + i]; - d1 <<= 25; - d1 -= t1[21 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_521_div_word_21(-t1[21 + i], -t1[21 + i - 1], dv); -#endif r1++; sp_521_mul_d_21(t2, d, r1); (void)sp_521_add_21(&t1[i], &t1[i], t2); @@ -41508,8 +40613,7 @@ static int sp_521_div_21(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 25; r[i] &= 0x1ffffff; } - sp_521_cond_add_21(r, r, d, 0 - ((r[20] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_add_21(r, r, d, r[20] >> 31); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -44018,55 +43122,47 @@ SP_NOINLINE static void sp_1024_rshift_42(sp_digit* r, const sp_digit* a, r[41] = a[41] >> n; } -#ifdef WOLFSSL_SP_DIV_32 static WC_INLINE sp_digit sp_1024_div_word_42(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int64 d = ((sp_int64)d1 << 25) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int64 d = ((sp_int64)d1 << 25) + d0; + sp_uint32 lo = (sp_uint32)d; + sp_digit hi = (sp_digit)(d >> 32); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int64 d = ((sp_int64)d1 << 25) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 10) + 1; - /* All 25 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 19); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 13) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 20); + t = (t / dv) << 10; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 7) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)(d >> 5); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 1) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int64)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 25 bits from d1 and top 6 bits from d0. */ + d -= (sp_int64)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_32 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -44083,7 +43179,6 @@ static int sp_1024_div_42(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_32 - sp_int64 d1; #endif sp_digit dv; sp_digit r1; @@ -44117,28 +43212,14 @@ static int sp_1024_div_42(const sp_digit* a, const sp_digit* d, t1[41 + 41] += t1[41 + 41 - 1] >> 25; t1[41 + 41 - 1] &= 0x1ffffff; for (i=41; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_32 - d1 = t1[41 + i]; - d1 <<= 25; - d1 += t1[41 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_1024_div_word_42(t1[41 + i], t1[41 + i - 1], dv); -#endif sp_1024_mul_d_42(t2, sd, r1); (void)sp_1024_sub_42(&t1[i], &t1[i], t2); sp_1024_norm_41(&t1[i]); t1[41 + i] += t1[41 + i - 1] >> 25; t1[41 + i - 1] &= 0x1ffffff; -#ifndef WOLFSSL_SP_DIV_32 - d1 = -t1[41 + i]; - d1 <<= 25; - d1 -= t1[41 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_1024_div_word_42(-t1[41 + i], -t1[41 + i - 1], dv); -#endif r1 -= t1[41 + i]; sp_1024_mul_d_42(t2, sd, r1); (void)sp_1024_add_42(&t1[i], &t1[i], t2); @@ -44156,8 +43237,7 @@ static int sp_1024_div_42(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 25; r[i] &= 0x1ffffff; } - sp_1024_cond_add_42(r, r, sd, 0 - ((r[40] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_add_42(r, r, sd, r[40] >> 31); sp_1024_norm_41(r); sp_1024_rshift_42(r, r, 1); @@ -44465,22 +43545,22 @@ static sp_digit sp_1024_cmp_42(const sp_digit* a, const sp_digit* b) int i; for (i=41; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 24); } #else int i; r |= (a[41] - b[41]) & (0 - (sp_digit)1); - r |= (a[40] - b[40]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[40] - b[40]) & ~(((sp_digit)0 - r) >> 24); for (i = 32; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 24); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 24); } #endif /* WOLFSSL_SP_SMALL */ @@ -44665,6 +43745,7 @@ static void sp_1024_mont_reduce_42(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_1024_norm_42(a + 41); @@ -44692,8 +43773,8 @@ static void sp_1024_mont_reduce_42(sp_digit* a, const sp_digit* m, sp_digit mp) } sp_1024_norm_42(a + 41); sp_1024_mont_shift_42(a, a); - sp_1024_cond_sub_42(a, a, m, 0 - (((a[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[40] - m[40]; + sp_1024_cond_sub_42(a, a, m, ~((over - 1) >> 31)); sp_1024_norm_42(a); } @@ -44809,8 +43890,7 @@ static void sp_1024_map_42(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_42(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_42(r->x, p1024_mod); - sp_1024_cond_sub_42(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_42(r->x, r->x, p1024_mod, ~(n >> 24)); sp_1024_norm_42(r->x); /* y /= z^3 */ @@ -44819,8 +43899,7 @@ static void sp_1024_map_42(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_42(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_42(r->y, p1024_mod); - sp_1024_cond_sub_42(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_42(r->y, r->y, p1024_mod, ~(n >> 24)); sp_1024_norm_42(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -44838,10 +43917,11 @@ static void sp_1024_map_42(sp_point_1024* r, const sp_point_1024* p, static void sp_1024_mont_add_42(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_42(r, a, b); sp_1024_norm_42(r); - sp_1024_cond_sub_42(r, r, m, 0 - (((r[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[40] - m[40]; + sp_1024_cond_sub_42(r, r, m, ~((over - 1) >> 31)); sp_1024_norm_42(r); } @@ -44853,10 +43933,11 @@ static void sp_1024_mont_add_42(sp_digit* r, const sp_digit* a, const sp_digit* */ static void sp_1024_mont_dbl_42(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_42(r, a, a); sp_1024_norm_42(r); - sp_1024_cond_sub_42(r, r, m, 0 - (((r[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[40] - m[40]; + sp_1024_cond_sub_42(r, r, m, ~((over - 1) >> 31)); sp_1024_norm_42(r); } @@ -44868,15 +43949,16 @@ static void sp_1024_mont_dbl_42(sp_digit* r, const sp_digit* a, const sp_digit* */ static void sp_1024_mont_tpl_42(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_42(r, a, a); sp_1024_norm_42(r); - sp_1024_cond_sub_42(r, r, m, 0 - (((r[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[40] - m[40]; + sp_1024_cond_sub_42(r, r, m, ~((over - 1) >> 31)); sp_1024_norm_42(r); (void)sp_1024_add_42(r, r, a); sp_1024_norm_42(r); - sp_1024_cond_sub_42(r, r, m, 0 - (((r[40] - m[40]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[40] - m[40]; + sp_1024_cond_sub_42(r, r, m, ~((over - 1) >> 31)); sp_1024_norm_42(r); } @@ -45266,7 +44348,8 @@ static int sp_1024_proj_point_add_42_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, /* Check double */ (void)sp_1024_sub_42(ctx->t1, p1024_mod, q->y); sp_1024_norm_42(ctx->t1); - if ((sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & (sp_1024_cmp_equal_42(p->y, q->y) | sp_1024_cmp_equal_42(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -45434,7 +44517,8 @@ static void sp_1024_proj_point_add_42(sp_point_1024* r, /* Check double */ (void)sp_1024_mont_sub_42(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_42(t1); - if ((sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & (sp_1024_cmp_equal_42(p->y, q->y) | sp_1024_cmp_equal_42(p->y, t1))) != 0) { sp_1024_proj_point_dbl_42(r, p, t); } @@ -45467,7 +44551,8 @@ static void sp_1024_proj_point_add_42(sp_point_1024* r, sp_1024_mont_sub_42(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_42(t4, t4, t3, p1024_mod); - if (sp_1024_iszero_42(t2) & sp_1024_iszero_42(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_1024_iszero_42(t2) & sp_1024_iszero_42(t4) & maskt) { sp_1024_proj_point_dbl_42(r, p, t); } else { @@ -46353,7 +45438,8 @@ static void sp_1024_proj_point_add_qz1_42(sp_point_1024* r, const sp_point_1024* /* Check double */ (void)sp_1024_mont_sub_42(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_42(t1); - if ((sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_42(p->x, q->x) & sp_1024_cmp_equal_42(p->z, q->z) & (sp_1024_cmp_equal_42(p->y, q->y) | sp_1024_cmp_equal_42(p->y, t1))) != 0) { sp_1024_proj_point_dbl_42(r, p, t); } @@ -54491,8 +53577,7 @@ static int sp_1024_ecc_is_point_42(const sp_point_1024* point, sp_1024_mont_add_42(t1, t1, point->x, p1024_mod); n = sp_1024_cmp_42(t1, p1024_mod); - sp_1024_cond_sub_42(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_42(t1, t1, p1024_mod, ~(n >> 24)); sp_1024_norm_42(t1); if (!sp_1024_iszero_42(t1)) { err = MP_VAL; diff --git a/wolfcrypt/src/sp_c64.c b/wolfcrypt/src/sp_c64.c index b167a28ad..141b9e6d8 100644 --- a/wolfcrypt/src/sp_c64.c +++ b/wolfcrypt/src/sp_c64.c @@ -64,7 +64,7 @@ fprintf(stderr, name "=0x"); \ for (ii=0; ii<(bits + 7) / 8; ii++) \ fprintf(stderr, "%02x", nb[ii]); \ - fprintf(stderr, "\n"); \ + fprintf(stderr, "\n"); \ } while (0) #define SP_PRINT_VAL(var, name) \ @@ -498,7 +498,7 @@ static sp_digit sp_2048_cmp_17(const sp_digit* a, const sp_digit* b) int i; for (i=16; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 60); } return r; @@ -585,6 +585,7 @@ static void sp_2048_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_17(a + 17); @@ -598,8 +599,8 @@ static void sp_2048_mont_reduce_17(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 61; a[i] &= 0x1fffffffffffffffL; sp_2048_mont_shift_17(a, a); - sp_2048_cond_sub_17(a, a, m, 0 - (((a[16] - m[16]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[16] - m[16]; + sp_2048_cond_sub_17(a, a, m, ~((over - 1) >> 63)); sp_2048_norm_17(a); } @@ -822,263 +823,47 @@ SP_NOINLINE static void sp_2048_rshift_17(sp_digit* r, const sp_digit* a, r[16] = a[16] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_2048_div_word_17(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 61) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 30) + 1; - /* All 61 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 59); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 57) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 60); + t = (t / dv) << 30; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 55) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 29); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 53) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 51) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 49) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 47) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 45) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 43) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 41) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 39) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 37) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 35) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 33) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 31) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 29) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 33 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 27) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 35 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 39 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 41 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 45 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 47 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 51 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 53 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 57 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 59 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 61 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 61 bits from d1 and top 2 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -1095,7 +880,6 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -1129,14 +913,7 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, t1[17 + 17] += t1[17 + 17 - 1] >> 61; t1[17 + 17 - 1] &= 0x1fffffffffffffffL; for (i=17; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[17 + i]; - d1 <<= 61; - d1 += t1[17 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_17(t1[17 + i], t1[17 + i - 1], dv); -#endif sp_2048_mul_d_17(t2, sd, r1); (void)sp_2048_sub_17(&t1[i], &t1[i], t2); @@ -1144,14 +921,7 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, t1[17 + i] -= t2[17]; t1[17 + i] += t1[17 + i - 1] >> 61; t1[17 + i - 1] &= 0x1fffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[17 + i]; - d1 <<= 61; - d1 -= t1[17 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_17(-t1[17 + i], -t1[17 + i - 1], dv); -#endif r1 -= t1[17 + i]; sp_2048_mul_d_17(t2, sd, r1); (void)sp_2048_add_17(&t1[i], &t1[i], t2); @@ -1169,8 +939,7 @@ static int sp_2048_div_17(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 61; r[i] &= 0x1fffffffffffffffL; } - sp_2048_cond_add_17(r, r, sd, 0 - ((r[16] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_17(r, r, sd, r[16] >> 63); sp_2048_norm_17(r); sp_2048_rshift_17(r, r, 13); @@ -1225,10 +994,7 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -1293,8 +1059,7 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_17(t[0], m, mp); n = sp_2048_cmp_17(t[0], m); - sp_2048_cond_sub_17(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_17(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 17 * 2); } @@ -1320,10 +1085,7 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -1388,8 +1150,7 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_17(t[0], m, mp); n = sp_2048_cmp_17(t[0], m); - sp_2048_cond_sub_17(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_17(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 17 * 2); } @@ -1415,10 +1176,7 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -1538,8 +1296,7 @@ static int sp_2048_mod_exp_17(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_17(rt, m, mp); n = sp_2048_cmp_17(rt, m); - sp_2048_cond_sub_17(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_17(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 34); } @@ -1608,7 +1365,7 @@ static sp_digit sp_2048_cmp_34(const sp_digit* a, const sp_digit* b) int i; for (i=33; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 60); } return r; @@ -1698,6 +1455,7 @@ static void sp_2048_mont_reduce_34(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_34(a + 34); @@ -1736,8 +1494,8 @@ static void sp_2048_mont_reduce_34(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x1fffffffffffffffL; #endif sp_2048_mont_shift_34(a, a); - sp_2048_cond_sub_34(a, a, m, 0 - (((a[33] - m[33]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[33] - m[33]; + sp_2048_cond_sub_34(a, a, m, ~((over - 1) >> 63)); sp_2048_norm_34(a); } @@ -1841,263 +1599,47 @@ SP_NOINLINE static void sp_2048_rshift_34(sp_digit* r, const sp_digit* a, r[33] = a[33] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_2048_div_word_34(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 61) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 61) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 30) + 1; - /* All 61 bits from d1 and top 2 bits from d0. */ - d = (d1 << 2) + (d0 >> 59); - r = d / dv; - d -= r * dv; - /* Up to 3 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 57) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 60); + t = (t / dv) << 30; r += t; - /* Up to 5 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 55) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 29); + t = t / (dv << 1); r += t; - /* Up to 7 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 53) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 9 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 51) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 11 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 49) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 13 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 47) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 15 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 45) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 17 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 43) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 41) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 39) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 23 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 37) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 35) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 27 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 33) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 31) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 29) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 33 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 27) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 35 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 25) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 23) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 39 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 21) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 41 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 19) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 17) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 45 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 15) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 47 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 13) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 11) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 51 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 9) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 53 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 7) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 5) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 57 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 3) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 59 bits in r */ - /* Next 2 bits from d0. */ - r <<= 2; - d <<= 2; - d += (d0 >> 1) & ((1 << 2) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 61 bits in r */ - /* Remaining 1 bits from d0. */ - r <<= 1; - d <<= 1; - d += d0 & ((1 << 1) - 1); - t = d / dv; - r += t; - - /* All 61 bits from d1 and top 2 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -2114,7 +1656,6 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -2148,14 +1689,7 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, t1[34 + 34] += t1[34 + 34 - 1] >> 61; t1[34 + 34 - 1] &= 0x1fffffffffffffffL; for (i=34; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[34 + i]; - d1 <<= 61; - d1 += t1[34 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_34(t1[34 + i], t1[34 + i - 1], dv); -#endif sp_2048_mul_d_34(t2, sd, r1); (void)sp_2048_sub_34(&t1[i], &t1[i], t2); @@ -2163,14 +1697,7 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, t1[34 + i] -= t2[34]; t1[34 + i] += t1[34 + i - 1] >> 61; t1[34 + i - 1] &= 0x1fffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[34 + i]; - d1 <<= 61; - d1 -= t1[34 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_34(-t1[34 + i], -t1[34 + i - 1], dv); -#endif r1 -= t1[34 + i]; sp_2048_mul_d_34(t2, sd, r1); (void)sp_2048_add_34(&t1[i], &t1[i], t2); @@ -2188,8 +1715,7 @@ static int sp_2048_div_34(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 61; r[i] &= 0x1fffffffffffffffL; } - sp_2048_cond_add_34(r, r, sd, 0 - ((r[33] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_34(r, r, sd, r[33] >> 63); sp_2048_norm_34(r); sp_2048_rshift_34(r, r, 26); @@ -2245,10 +1771,7 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -2313,8 +1836,7 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_34(t[0], m, mp); n = sp_2048_cmp_34(t[0], m); - sp_2048_cond_sub_34(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 34 * 2); } @@ -2340,10 +1862,7 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -2408,8 +1927,7 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_34(t[0], m, mp); n = sp_2048_cmp_34(t[0], m); - sp_2048_cond_sub_34(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 34 * 2); } @@ -2435,10 +1953,7 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -2541,8 +2056,7 @@ static int sp_2048_mod_exp_34(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_34(rt, m, mp); n = sp_2048_cmp_34(rt, m); - sp_2048_cond_sub_34(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 68); } @@ -2660,8 +2174,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_34(r, m, mp); mp = sp_2048_cmp_34(r, m); - sp_2048_cond_sub_34(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_2048_cond_sub_34(r, r, m, ~(mp >> 63)); sp_2048_to_bin_34(r, out); *outLen = 256; @@ -2769,8 +2282,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_34(r, m, mp); mp = sp_2048_cmp_34(r, m); - sp_2048_cond_sub_34(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(r, r, m, ~(mp >> 63)); } } } @@ -2993,6 +2505,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -3086,6 +2604,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -3408,7 +2932,7 @@ static int sp_2048_mod_exp_2_34(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -3484,14 +3008,12 @@ static int sp_2048_mod_exp_2_34(sp_digit* r, const sp_digit* e, int bits, const (void)sp_2048_add_34(r, r, tmp); sp_2048_norm_34(r); o = sp_2048_cmp_34(r, m); - sp_2048_cond_sub_34(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(r, r, m, ~(o >> 63)); } sp_2048_mont_reduce_34(r, m, mp); n = sp_2048_cmp_34(r, m); - sp_2048_cond_sub_34(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_34(r, r, m, ~(n >> 63)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -4468,16 +3990,16 @@ static sp_digit sp_2048_cmp_18(const sp_digit* a, const sp_digit* b) int i; r |= (a[17] - b[17]) & (0 - (sp_digit)1); - r |= (a[16] - b[16]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[16] - b[16]) & ~(((sp_digit)0 - r) >> 56); for (i = 8; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } return r; @@ -4584,6 +4106,7 @@ static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_18(a + 18); @@ -4597,8 +4120,8 @@ static void sp_2048_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffL; sp_2048_mont_shift_18(a, a); - sp_2048_cond_sub_18(a, a, m, 0 - (((a[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[17] - m[17]; + sp_2048_cond_sub_18(a, a, m, ~((over - 1) >> 63)); sp_2048_norm_18(a); } @@ -4726,95 +4249,47 @@ SP_NOINLINE static void sp_2048_rshift_18(sp_digit* r, const sp_digit* a, r[17] = a[17] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_2048_div_word_18(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -4831,7 +4306,6 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -4865,14 +4339,7 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, t1[18 + 18] += t1[18 + 18 - 1] >> 57; t1[18 + 18 - 1] &= 0x1ffffffffffffffL; for (i=18; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[18 + i]; - d1 <<= 57; - d1 += t1[18 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_18(t1[18 + i], t1[18 + i - 1], dv); -#endif sp_2048_mul_d_18(t2, sd, r1); (void)sp_2048_sub_18(&t1[i], &t1[i], t2); @@ -4880,14 +4347,7 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, t1[18 + i] -= t2[18]; t1[18 + i] += t1[18 + i - 1] >> 57; t1[18 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[18 + i]; - d1 <<= 57; - d1 -= t1[18 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_18(-t1[18 + i], -t1[18 + i - 1], dv); -#endif r1 -= t1[18 + i]; sp_2048_mul_d_18(t2, sd, r1); (void)sp_2048_add_18(&t1[i], &t1[i], t2); @@ -4905,8 +4365,7 @@ static int sp_2048_div_18(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_2048_cond_add_18(r, r, sd, 0 - ((r[17] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_18(r, r, sd, r[17] >> 63); sp_2048_norm_18(r); sp_2048_rshift_18(r, r, 2); @@ -4961,10 +4420,7 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -5029,8 +4485,7 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_18(t[0], m, mp); n = sp_2048_cmp_18(t[0], m); - sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_18(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 18 * 2); } @@ -5056,10 +4511,7 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -5124,8 +4576,7 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_18(t[0], m, mp); n = sp_2048_cmp_18(t[0], m); - sp_2048_cond_sub_18(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_18(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 18 * 2); } @@ -5151,10 +4602,7 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -5274,8 +4722,7 @@ static int sp_2048_mod_exp_18(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_18(rt, m, mp); n = sp_2048_cmp_18(rt, m); - sp_2048_cond_sub_18(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_18(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 36); } @@ -5336,18 +4783,18 @@ static sp_digit sp_2048_cmp_36(const sp_digit* a, const sp_digit* b) int i; r |= (a[35] - b[35]) & (0 - (sp_digit)1); - r |= (a[34] - b[34]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[33] - b[33]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[32] - b[32]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[34] - b[34]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[33] - b[33]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[32] - b[32]) & ~(((sp_digit)0 - r) >> 56); for (i = 24; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } return r; @@ -5473,6 +4920,7 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_2048_norm_36(a + 36); @@ -5511,8 +4959,8 @@ static void sp_2048_mont_reduce_36(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x1ffffffffffffffL; #endif sp_2048_mont_shift_36(a, a); - sp_2048_cond_sub_36(a, a, m, 0 - (((a[35] - m[35]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[35] - m[35]; + sp_2048_cond_sub_36(a, a, m, ~((over - 1) >> 63)); sp_2048_norm_36(a); } @@ -5638,95 +5086,47 @@ SP_NOINLINE static void sp_2048_rshift_36(sp_digit* r, const sp_digit* a, r[35] = a[35] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_2048_div_word_36(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -5743,7 +5143,6 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -5777,14 +5176,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, t1[36 + 36] += t1[36 + 36 - 1] >> 57; t1[36 + 36 - 1] &= 0x1ffffffffffffffL; for (i=36; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[36 + i]; - d1 <<= 57; - d1 += t1[36 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_36(t1[36 + i], t1[36 + i - 1], dv); -#endif sp_2048_mul_d_36(t2, sd, r1); (void)sp_2048_sub_36(&t1[i], &t1[i], t2); @@ -5792,14 +5184,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, t1[36 + i] -= t2[36]; t1[36 + i] += t1[36 + i - 1] >> 57; t1[36 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[36 + i]; - d1 <<= 57; - d1 -= t1[36 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_2048_div_word_36(-t1[36 + i], -t1[36 + i - 1], dv); -#endif r1 -= t1[36 + i]; sp_2048_mul_d_36(t2, sd, r1); (void)sp_2048_add_36(&t1[i], &t1[i], t2); @@ -5817,8 +5202,7 @@ static int sp_2048_div_36(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_2048_cond_add_36(r, r, sd, 0 - ((r[35] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_2048_cond_add_36(r, r, sd, r[35] >> 63); sp_2048_norm_36(r); sp_2048_rshift_36(r, r, 4); @@ -5876,10 +5260,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -5944,8 +5325,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(t[0], m, mp); n = sp_2048_cmp_36(t[0], m); - sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 36 * 2); } @@ -5971,10 +5351,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6039,8 +5416,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(t[0], m, mp); n = sp_2048_cmp_36(t[0], m); - sp_2048_cond_sub_36(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 36 * 2); } @@ -6066,10 +5442,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6172,8 +5545,7 @@ static int sp_2048_mod_exp_36(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_2048_mont_reduce_36(rt, m, mp); n = sp_2048_cmp_36(rt, m); - sp_2048_cond_sub_36(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 72); } @@ -6293,8 +5665,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_36(r, m, mp); mp = sp_2048_cmp_36(r, m); - sp_2048_cond_sub_36(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_2048_cond_sub_36(r, r, m, ~(mp >> 63)); sp_2048_to_bin_36(r, out); *outLen = 256; @@ -6402,8 +5773,7 @@ int sp_RsaPublic_2048(const byte* in, word32 inLen, const mp_int* em, } sp_2048_mont_reduce_36(r, m, mp); mp = sp_2048_cmp_36(r, m); - sp_2048_cond_sub_36(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(r, r, m, ~(mp >> 63)); } } } @@ -6626,6 +5996,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -6719,6 +6095,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -7110,7 +6492,7 @@ static int sp_2048_mod_exp_2_36(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -7186,14 +6568,12 @@ static int sp_2048_mod_exp_2_36(sp_digit* r, const sp_digit* e, int bits, const (void)sp_2048_add_36(r, r, tmp); sp_2048_norm_36(r); o = sp_2048_cmp_36(r, m); - sp_2048_cond_sub_36(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(r, r, m, ~(o >> 63)); } sp_2048_mont_reduce_36(r, m, mp); n = sp_2048_cmp_36(r, m); - sp_2048_cond_sub_36(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_2048_cond_sub_36(r, r, m, ~(n >> 63)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -7821,7 +7201,7 @@ static sp_digit sp_3072_cmp_26(const sp_digit* a, const sp_digit* b) int i; for (i=25; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 59); } return r; @@ -7911,6 +7291,7 @@ static void sp_3072_mont_reduce_26(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_26(a + 26); @@ -7924,8 +7305,8 @@ static void sp_3072_mont_reduce_26(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 60; a[i] &= 0xfffffffffffffffL; sp_3072_mont_shift_26(a, a); - sp_3072_cond_sub_26(a, a, m, 0 - (((a[25] - m[25]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[25] - m[25]; + sp_3072_cond_sub_26(a, a, m, ~((over - 1) >> 63)); sp_3072_norm_26(a); } @@ -8109,175 +7490,47 @@ SP_NOINLINE static void sp_3072_rshift_26(sp_digit* r, const sp_digit* a, r[25] = a[25] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_3072_div_word_26(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 60) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 29) + 1; - /* All 60 bits from d1 and top 3 bits from d0. */ - d = (d1 << 3) + (d0 >> 57); - r = d / dv; - d -= r * dv; - /* Up to 4 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 54) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 58); + t = (t / dv) << 29; r += t; - /* Up to 7 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 51) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 27); + t = t / (dv << 2); r += t; - /* Up to 10 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 48) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 13 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 45) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 16 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 42) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 39) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 22 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 36) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 33) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 28 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 30) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 27) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 34 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 24) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 21) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 40 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 18) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 15) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 46 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 12) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 9) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 52 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 6) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 3) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 58 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 60 bits from d1 and top 3 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -8294,7 +7547,6 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -8328,14 +7580,7 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, t1[26 + 26] += t1[26 + 26 - 1] >> 60; t1[26 + 26 - 1] &= 0xfffffffffffffffL; for (i=26; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[26 + i]; - d1 <<= 60; - d1 += t1[26 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_26(t1[26 + i], t1[26 + i - 1], dv); -#endif sp_3072_mul_d_26(t2, sd, r1); (void)sp_3072_sub_26(&t1[i], &t1[i], t2); @@ -8343,14 +7588,7 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, t1[26 + i] -= t2[26]; t1[26 + i] += t1[26 + i - 1] >> 60; t1[26 + i - 1] &= 0xfffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[26 + i]; - d1 <<= 60; - d1 -= t1[26 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_26(-t1[26 + i], -t1[26 + i - 1], dv); -#endif r1 -= t1[26 + i]; sp_3072_mul_d_26(t2, sd, r1); (void)sp_3072_add_26(&t1[i], &t1[i], t2); @@ -8368,8 +7606,7 @@ static int sp_3072_div_26(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 60; r[i] &= 0xfffffffffffffffL; } - sp_3072_cond_add_26(r, r, sd, 0 - ((r[25] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_26(r, r, sd, r[25] >> 63); sp_3072_norm_26(r); sp_3072_rshift_26(r, r, 24); @@ -8424,10 +7661,7 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -8492,8 +7726,7 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_26(t[0], m, mp); n = sp_3072_cmp_26(t[0], m); - sp_3072_cond_sub_26(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_26(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 26 * 2); } @@ -8519,10 +7752,7 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -8587,8 +7817,7 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_26(t[0], m, mp); n = sp_3072_cmp_26(t[0], m); - sp_3072_cond_sub_26(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_26(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 26 * 2); } @@ -8614,10 +7843,7 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -8737,8 +7963,7 @@ static int sp_3072_mod_exp_26(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_26(rt, m, mp); n = sp_3072_cmp_26(rt, m); - sp_3072_cond_sub_26(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_26(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 52); } @@ -8807,7 +8032,7 @@ static sp_digit sp_3072_cmp_52(const sp_digit* a, const sp_digit* b) int i; for (i=51; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 59); } return r; @@ -8903,6 +8128,7 @@ static void sp_3072_mont_reduce_52(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_52(a + 52); @@ -8941,8 +8167,8 @@ static void sp_3072_mont_reduce_52(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0xfffffffffffffffL; #endif sp_3072_mont_shift_52(a, a); - sp_3072_cond_sub_52(a, a, m, 0 - (((a[51] - m[51]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[51] - m[51]; + sp_3072_cond_sub_52(a, a, m, ~((over - 1) >> 63)); sp_3072_norm_52(a); } @@ -9046,175 +8272,47 @@ SP_NOINLINE static void sp_3072_rshift_52(sp_digit* r, const sp_digit* a, r[51] = a[51] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_3072_div_word_52(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 60) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 60) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 29) + 1; - /* All 60 bits from d1 and top 3 bits from d0. */ - d = (d1 << 3) + (d0 >> 57); - r = d / dv; - d -= r * dv; - /* Up to 4 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 54) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 58); + t = (t / dv) << 29; r += t; - /* Up to 7 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 51) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 27); + t = t / (dv << 2); r += t; - /* Up to 10 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 48) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 13 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 45) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 16 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 42) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 19 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 39) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 22 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 36) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 33) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 28 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 30) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 27) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 34 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 24) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 21) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 40 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 18) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 15) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 46 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 12) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 9) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 52 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 6) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Next 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += (d0 >> 3) & ((1 << 3) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 58 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 60 bits from d1 and top 3 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -9231,7 +8329,6 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -9265,14 +8362,7 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, t1[52 + 52] += t1[52 + 52 - 1] >> 60; t1[52 + 52 - 1] &= 0xfffffffffffffffL; for (i=52; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[52 + i]; - d1 <<= 60; - d1 += t1[52 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_52(t1[52 + i], t1[52 + i - 1], dv); -#endif sp_3072_mul_d_52(t2, sd, r1); (void)sp_3072_sub_52(&t1[i], &t1[i], t2); @@ -9280,14 +8370,7 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, t1[52 + i] -= t2[52]; t1[52 + i] += t1[52 + i - 1] >> 60; t1[52 + i - 1] &= 0xfffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[52 + i]; - d1 <<= 60; - d1 -= t1[52 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_52(-t1[52 + i], -t1[52 + i - 1], dv); -#endif r1 -= t1[52 + i]; sp_3072_mul_d_52(t2, sd, r1); (void)sp_3072_add_52(&t1[i], &t1[i], t2); @@ -9305,8 +8388,7 @@ static int sp_3072_div_52(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 60; r[i] &= 0xfffffffffffffffL; } - sp_3072_cond_add_52(r, r, sd, 0 - ((r[51] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_52(r, r, sd, r[51] >> 63); sp_3072_norm_52(r); sp_3072_rshift_52(r, r, 48); @@ -9362,10 +8444,7 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -9430,8 +8509,7 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_52(t[0], m, mp); n = sp_3072_cmp_52(t[0], m); - sp_3072_cond_sub_52(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 52 * 2); } @@ -9457,10 +8535,7 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -9525,8 +8600,7 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_52(t[0], m, mp); n = sp_3072_cmp_52(t[0], m); - sp_3072_cond_sub_52(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 52 * 2); } @@ -9552,10 +8626,7 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -9658,8 +8729,7 @@ static int sp_3072_mod_exp_52(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_52(rt, m, mp); n = sp_3072_cmp_52(rt, m); - sp_3072_cond_sub_52(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 104); } @@ -9777,8 +8847,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_52(r, m, mp); mp = sp_3072_cmp_52(r, m); - sp_3072_cond_sub_52(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_3072_cond_sub_52(r, r, m, ~(mp >> 63)); sp_3072_to_bin_52(r, out); *outLen = 384; @@ -9886,8 +8955,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_52(r, m, mp); mp = sp_3072_cmp_52(r, m); - sp_3072_cond_sub_52(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(r, r, m, ~(mp >> 63)); } } } @@ -10110,6 +9178,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -10203,6 +9277,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -10525,7 +9605,7 @@ static int sp_3072_mod_exp_2_52(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -10601,14 +9681,12 @@ static int sp_3072_mod_exp_2_52(sp_digit* r, const sp_digit* e, int bits, const (void)sp_3072_add_52(r, r, tmp); sp_3072_norm_52(r); o = sp_3072_cmp_52(r, m); - sp_3072_cond_sub_52(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(r, r, m, ~(o >> 63)); } sp_3072_mont_reduce_52(r, m, mp); n = sp_3072_cmp_52(r, m); - sp_3072_cond_sub_52(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_52(r, r, m, ~(n >> 63)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -11703,17 +10781,17 @@ static sp_digit sp_3072_cmp_27(const sp_digit* a, const sp_digit* b) int i; r |= (a[26] - b[26]) & (0 - (sp_digit)1); - r |= (a[25] - b[25]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[24] - b[24]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[25] - b[25]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[24] - b[24]) & ~(((sp_digit)0 - r) >> 56); for (i = 16; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } return r; @@ -11834,6 +10912,7 @@ static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_27(a + 27); @@ -11847,8 +10926,8 @@ static void sp_3072_mont_reduce_27(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 57; a[i] &= 0x1ffffffffffffffL; sp_3072_mont_shift_27(a, a); - sp_3072_cond_sub_27(a, a, m, 0 - (((a[26] - m[26]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[26] - m[26]; + sp_3072_cond_sub_27(a, a, m, ~((over - 1) >> 63)); sp_3072_norm_27(a); } @@ -11981,95 +11060,47 @@ SP_NOINLINE static void sp_3072_rshift_27(sp_digit* r, const sp_digit* a, r[26] = a[26] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_3072_div_word_27(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -12086,7 +11117,6 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -12120,14 +11150,7 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, t1[27 + 27] += t1[27 + 27 - 1] >> 57; t1[27 + 27 - 1] &= 0x1ffffffffffffffL; for (i=27; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[27 + i]; - d1 <<= 57; - d1 += t1[27 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_27(t1[27 + i], t1[27 + i - 1], dv); -#endif sp_3072_mul_d_27(t2, sd, r1); (void)sp_3072_sub_27(&t1[i], &t1[i], t2); @@ -12135,14 +11158,7 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, t1[27 + i] -= t2[27]; t1[27 + i] += t1[27 + i - 1] >> 57; t1[27 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[27 + i]; - d1 <<= 57; - d1 -= t1[27 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_27(-t1[27 + i], -t1[27 + i - 1], dv); -#endif r1 -= t1[27 + i]; sp_3072_mul_d_27(t2, sd, r1); (void)sp_3072_add_27(&t1[i], &t1[i], t2); @@ -12160,8 +11176,7 @@ static int sp_3072_div_27(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_3072_cond_add_27(r, r, sd, 0 - ((r[26] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_27(r, r, sd, r[26] >> 63); sp_3072_norm_27(r); sp_3072_rshift_27(r, r, 3); @@ -12216,10 +11231,7 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -12284,8 +11296,7 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_27(t[0], m, mp); n = sp_3072_cmp_27(t[0], m); - sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_27(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 27 * 2); } @@ -12311,10 +11322,7 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -12379,8 +11387,7 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_27(t[0], m, mp); n = sp_3072_cmp_27(t[0], m); - sp_3072_cond_sub_27(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_27(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 27 * 2); } @@ -12406,10 +11413,7 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -12529,8 +11533,7 @@ static int sp_3072_mod_exp_27(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_27(rt, m, mp); n = sp_3072_cmp_27(rt, m); - sp_3072_cond_sub_27(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_27(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 54); } @@ -12593,20 +11596,20 @@ static sp_digit sp_3072_cmp_54(const sp_digit* a, const sp_digit* b) int i; r |= (a[53] - b[53]) & (0 - (sp_digit)1); - r |= (a[52] - b[52]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[51] - b[51]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[50] - b[50]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[49] - b[49]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[48] - b[48]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[52] - b[52]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[51] - b[51]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[50] - b[50]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[49] - b[49]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[48] - b[48]) & ~(((sp_digit)0 - r) >> 56); for (i = 40; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } return r; @@ -12735,6 +11738,7 @@ static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_3072_norm_54(a + 54); @@ -12773,8 +11777,8 @@ static void sp_3072_mont_reduce_54(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x1ffffffffffffffL; #endif sp_3072_mont_shift_54(a, a); - sp_3072_cond_sub_54(a, a, m, 0 - (((a[53] - m[53]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[53] - m[53]; + sp_3072_cond_sub_54(a, a, m, ~((over - 1) >> 63)); sp_3072_norm_54(a); } @@ -12904,95 +11908,47 @@ SP_NOINLINE static void sp_3072_rshift_54(sp_digit* r, const sp_digit* a, r[53] = a[53] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_3072_div_word_54(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -13009,7 +11965,6 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -13043,14 +11998,7 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, t1[54 + 54] += t1[54 + 54 - 1] >> 57; t1[54 + 54 - 1] &= 0x1ffffffffffffffL; for (i=54; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[54 + i]; - d1 <<= 57; - d1 += t1[54 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_54(t1[54 + i], t1[54 + i - 1], dv); -#endif sp_3072_mul_d_54(t2, sd, r1); (void)sp_3072_sub_54(&t1[i], &t1[i], t2); @@ -13058,14 +12006,7 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, t1[54 + i] -= t2[54]; t1[54 + i] += t1[54 + i - 1] >> 57; t1[54 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[54 + i]; - d1 <<= 57; - d1 -= t1[54 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_3072_div_word_54(-t1[54 + i], -t1[54 + i - 1], dv); -#endif r1 -= t1[54 + i]; sp_3072_mul_d_54(t2, sd, r1); (void)sp_3072_add_54(&t1[i], &t1[i], t2); @@ -13083,8 +12024,7 @@ static int sp_3072_div_54(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_3072_cond_add_54(r, r, sd, 0 - ((r[53] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_3072_cond_add_54(r, r, sd, r[53] >> 63); sp_3072_norm_54(r); sp_3072_rshift_54(r, r, 6); @@ -13142,10 +12082,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -13210,8 +12147,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_54(t[0], m, mp); n = sp_3072_cmp_54(t[0], m); - sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 54 * 2); } @@ -13237,10 +12173,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -13305,8 +12238,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_54(t[0], m, mp); n = sp_3072_cmp_54(t[0], m); - sp_3072_cond_sub_54(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 54 * 2); } @@ -13332,10 +12264,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -13438,8 +12367,7 @@ static int sp_3072_mod_exp_54(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_3072_mont_reduce_54(rt, m, mp); n = sp_3072_cmp_54(rt, m); - sp_3072_cond_sub_54(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 108); } @@ -13559,8 +12487,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_54(r, m, mp); mp = sp_3072_cmp_54(r, m); - sp_3072_cond_sub_54(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_3072_cond_sub_54(r, r, m, ~(mp >> 63)); sp_3072_to_bin_54(r, out); *outLen = 384; @@ -13668,8 +12595,7 @@ int sp_RsaPublic_3072(const byte* in, word32 inLen, const mp_int* em, } sp_3072_mont_reduce_54(r, m, mp); mp = sp_3072_cmp_54(r, m); - sp_3072_cond_sub_54(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(r, r, m, ~(mp >> 63)); } } } @@ -13892,6 +12818,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -13985,6 +12917,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -14412,7 +13350,7 @@ static int sp_3072_mod_exp_2_54(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -14488,14 +13426,12 @@ static int sp_3072_mod_exp_2_54(sp_digit* r, const sp_digit* e, int bits, const (void)sp_3072_add_54(r, r, tmp); sp_3072_norm_54(r); o = sp_3072_cmp_54(r, m); - sp_3072_cond_sub_54(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(r, r, m, ~(o >> 63)); } sp_3072_mont_reduce_54(r, m, mp); n = sp_3072_cmp_54(r, m); - sp_3072_cond_sub_54(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_3072_cond_sub_54(r, r, m, ~(n >> 63)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -15126,7 +14062,7 @@ static sp_digit sp_4096_cmp_35(const sp_digit* a, const sp_digit* b) int i; for (i=34; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 58); } return r; @@ -15219,6 +14155,7 @@ static void sp_4096_mont_reduce_35(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_35(a + 35); @@ -15232,8 +14169,8 @@ static void sp_4096_mont_reduce_35(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 59; a[i] &= 0x7ffffffffffffffL; sp_4096_mont_shift_35(a, a); - sp_4096_cond_sub_35(a, a, m, 0 - (((a[34] - m[34]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[34] - m[34]; + sp_4096_cond_sub_35(a, a, m, ~((over - 1) >> 63)); sp_4096_norm_35(a); } @@ -15417,135 +14354,47 @@ SP_NOINLINE static void sp_4096_rshift_35(sp_digit* r, const sp_digit* a, r[34] = a[34] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_4096_div_word_35(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 59) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 28) + 1; - /* All 59 bits from d1 and top 4 bits from d0. */ - d = (d1 << 4) + (d0 >> 55); - r = d / dv; - d -= r * dv; - /* Up to 5 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 51) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 56); + t = (t / dv) << 28; r += t; - /* Up to 9 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 47) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 25); + t = t / (dv << 3); r += t; - /* Up to 13 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 43) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 17 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 39) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 35) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 31) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 27) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 33 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 23) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 19) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 41 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 15) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 45 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 11) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 7) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 53 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 3) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 57 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 59 bits from d1 and top 4 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -15562,7 +14411,6 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -15596,14 +14444,7 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, t1[35 + 35] += t1[35 + 35 - 1] >> 59; t1[35 + 35 - 1] &= 0x7ffffffffffffffL; for (i=35; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[35 + i]; - d1 <<= 59; - d1 += t1[35 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_35(t1[35 + i], t1[35 + i - 1], dv); -#endif sp_4096_mul_d_35(t2, sd, r1); (void)sp_4096_sub_35(&t1[i], &t1[i], t2); @@ -15611,14 +14452,7 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, t1[35 + i] -= t2[35]; t1[35 + i] += t1[35 + i - 1] >> 59; t1[35 + i - 1] &= 0x7ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[35 + i]; - d1 <<= 59; - d1 -= t1[35 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_35(-t1[35 + i], -t1[35 + i - 1], dv); -#endif r1 -= t1[35 + i]; sp_4096_mul_d_35(t2, sd, r1); (void)sp_4096_add_35(&t1[i], &t1[i], t2); @@ -15636,8 +14470,7 @@ static int sp_4096_div_35(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 59; r[i] &= 0x7ffffffffffffffL; } - sp_4096_cond_add_35(r, r, sd, 0 - ((r[34] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_35(r, r, sd, r[34] >> 63); sp_4096_norm_35(r); sp_4096_rshift_35(r, r, 17); @@ -15692,10 +14525,7 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -15760,8 +14590,7 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_35(t[0], m, mp); n = sp_4096_cmp_35(t[0], m); - sp_4096_cond_sub_35(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_35(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 35 * 2); } @@ -15787,10 +14616,7 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -15855,8 +14681,7 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_35(t[0], m, mp); n = sp_4096_cmp_35(t[0], m); - sp_4096_cond_sub_35(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_35(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 35 * 2); } @@ -15882,10 +14707,7 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -16005,8 +14827,7 @@ static int sp_4096_mod_exp_35(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_35(rt, m, mp); n = sp_4096_cmp_35(rt, m); - sp_4096_cond_sub_35(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_35(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 70); } @@ -16076,7 +14897,7 @@ static sp_digit sp_4096_cmp_70(const sp_digit* a, const sp_digit* b) int i; for (i=69; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 58); } return r; @@ -16166,6 +14987,7 @@ static void sp_4096_mont_reduce_70(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_70(a + 70); @@ -16204,8 +15026,8 @@ static void sp_4096_mont_reduce_70(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x7ffffffffffffffL; #endif sp_4096_mont_shift_70(a, a); - sp_4096_cond_sub_70(a, a, m, 0 - (((a[69] - m[69]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[69] - m[69]; + sp_4096_cond_sub_70(a, a, m, ~((over - 1) >> 63)); sp_4096_norm_70(a); } @@ -16309,135 +15131,47 @@ SP_NOINLINE static void sp_4096_rshift_70(sp_digit* r, const sp_digit* a, r[69] = a[69] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_4096_div_word_70(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 59) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 59) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 28) + 1; - /* All 59 bits from d1 and top 4 bits from d0. */ - d = (d1 << 4) + (d0 >> 55); - r = d / dv; - d -= r * dv; - /* Up to 5 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 51) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 56); + t = (t / dv) << 28; r += t; - /* Up to 9 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 47) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 25); + t = t / (dv << 3); r += t; - /* Up to 13 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 43) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 17 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 39) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 21 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 35) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 25 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 31) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 29 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 27) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 33 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 23) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 19) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 41 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 15) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 45 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 11) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 7) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 53 bits in r */ - /* Next 4 bits from d0. */ - r <<= 4; - d <<= 4; - d += (d0 >> 3) & ((1 << 4) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 57 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 59 bits from d1 and top 4 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -16454,7 +15188,6 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -16488,14 +15221,7 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, t1[70 + 70] += t1[70 + 70 - 1] >> 59; t1[70 + 70 - 1] &= 0x7ffffffffffffffL; for (i=70; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[70 + i]; - d1 <<= 59; - d1 += t1[70 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_70(t1[70 + i], t1[70 + i - 1], dv); -#endif sp_4096_mul_d_70(t2, sd, r1); (void)sp_4096_sub_70(&t1[i], &t1[i], t2); @@ -16503,14 +15229,7 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, t1[70 + i] -= t2[70]; t1[70 + i] += t1[70 + i - 1] >> 59; t1[70 + i - 1] &= 0x7ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[70 + i]; - d1 <<= 59; - d1 -= t1[70 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_70(-t1[70 + i], -t1[70 + i - 1], dv); -#endif r1 -= t1[70 + i]; sp_4096_mul_d_70(t2, sd, r1); (void)sp_4096_add_70(&t1[i], &t1[i], t2); @@ -16528,8 +15247,7 @@ static int sp_4096_div_70(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 59; r[i] &= 0x7ffffffffffffffL; } - sp_4096_cond_add_70(r, r, sd, 0 - ((r[69] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_70(r, r, sd, r[69] >> 63); sp_4096_norm_70(r); sp_4096_rshift_70(r, r, 34); @@ -16585,10 +15303,7 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -16653,8 +15368,7 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_70(t[0], m, mp); n = sp_4096_cmp_70(t[0], m); - sp_4096_cond_sub_70(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 70 * 2); } @@ -16680,10 +15394,7 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -16748,8 +15459,7 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_70(t[0], m, mp); n = sp_4096_cmp_70(t[0], m); - sp_4096_cond_sub_70(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 70 * 2); } @@ -16775,10 +15485,7 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -16881,8 +15588,7 @@ static int sp_4096_mod_exp_70(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_70(rt, m, mp); n = sp_4096_cmp_70(rt, m); - sp_4096_cond_sub_70(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 140); } @@ -17000,8 +15706,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_70(r, m, mp); mp = sp_4096_cmp_70(r, m); - sp_4096_cond_sub_70(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_4096_cond_sub_70(r, r, m, ~(mp >> 63)); sp_4096_to_bin_70(r, out); *outLen = 512; @@ -17109,8 +15814,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_70(r, m, mp); mp = sp_4096_cmp_70(r, m); - sp_4096_cond_sub_70(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(r, r, m, ~(mp >> 63)); } } } @@ -17333,6 +16037,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -17426,6 +16136,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -17748,7 +16464,7 @@ static int sp_4096_mod_exp_2_70(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -17824,14 +16540,12 @@ static int sp_4096_mod_exp_2_70(sp_digit* r, const sp_digit* e, int bits, const (void)sp_4096_add_70(r, r, tmp); sp_4096_norm_70(r); o = sp_4096_cmp_70(r, m); - sp_4096_cond_sub_70(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(r, r, m, ~(o >> 63)); } sp_4096_mont_reduce_70(r, m, mp); n = sp_4096_cmp_70(r, m); - sp_4096_cond_sub_70(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_70(r, r, m, ~(n >> 63)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -18957,21 +17671,21 @@ static sp_digit sp_4096_cmp_39(const sp_digit* a, const sp_digit* b) int i; r |= (a[38] - b[38]) & (0 - (sp_digit)1); - r |= (a[37] - b[37]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[36] - b[36]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[35] - b[35]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[34] - b[34]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[33] - b[33]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[32] - b[32]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[37] - b[37]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[36] - b[36]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[35] - b[35]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[34] - b[34]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[33] - b[33]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[32] - b[32]) & ~(((sp_digit)0 - r) >> 52); for (i = 24; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 52); } return r; @@ -19104,6 +17818,7 @@ static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_39(a + 39); @@ -19117,8 +17832,8 @@ static void sp_4096_mont_reduce_39(sp_digit* a, const sp_digit* m, sp_digit mp) a[i+1] += a[i] >> 53; a[i] &= 0x1fffffffffffffL; sp_4096_mont_shift_39(a, a); - sp_4096_cond_sub_39(a, a, m, 0 - (((a[38] - m[38]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[38] - m[38]; + sp_4096_cond_sub_39(a, a, m, ~((over - 1) >> 63)); sp_4096_norm_39(a); } @@ -19259,63 +17974,47 @@ SP_NOINLINE static void sp_4096_rshift_39(sp_digit* r, const sp_digit* a, r[38] = a[38] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_4096_div_word_39(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 53) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 22) + 1; - /* All 53 bits from d1 and top 10 bits from d0. */ - d = (d1 << 10) + (d0 >> 43); - r = d / dv; - d -= r * dv; - /* Up to 11 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 33) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 44); + t = (t / dv) << 22; r += t; - /* Up to 21 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 23) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 9); r += t; - /* Up to 31 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 13) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 41 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 3) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 51 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 53 bits from d1 and top 10 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -19332,7 +18031,6 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -19366,14 +18064,7 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, t1[39 + 39] += t1[39 + 39 - 1] >> 53; t1[39 + 39 - 1] &= 0x1fffffffffffffL; for (i=39; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[39 + i]; - d1 <<= 53; - d1 += t1[39 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_39(t1[39 + i], t1[39 + i - 1], dv); -#endif sp_4096_mul_d_39(t2, sd, r1); (void)sp_4096_sub_39(&t1[i], &t1[i], t2); @@ -19381,14 +18072,7 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, t1[39 + i] -= t2[39]; t1[39 + i] += t1[39 + i - 1] >> 53; t1[39 + i - 1] &= 0x1fffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[39 + i]; - d1 <<= 53; - d1 -= t1[39 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_39(-t1[39 + i], -t1[39 + i - 1], dv); -#endif r1 -= t1[39 + i]; sp_4096_mul_d_39(t2, sd, r1); (void)sp_4096_add_39(&t1[i], &t1[i], t2); @@ -19406,8 +18090,7 @@ static int sp_4096_div_39(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 53; r[i] &= 0x1fffffffffffffL; } - sp_4096_cond_add_39(r, r, sd, 0 - ((r[38] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_39(r, r, sd, r[38] >> 63); sp_4096_norm_39(r); sp_4096_rshift_39(r, r, 19); @@ -19462,10 +18145,7 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -19530,8 +18210,7 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_39(t[0], m, mp); n = sp_4096_cmp_39(t[0], m); - sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_39(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 39 * 2); } @@ -19557,10 +18236,7 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -19625,8 +18301,7 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_39(t[0], m, mp); n = sp_4096_cmp_39(t[0], m); - sp_4096_cond_sub_39(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_39(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 39 * 2); } @@ -19652,10 +18327,7 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -19775,8 +18447,7 @@ static int sp_4096_mod_exp_39(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_39(rt, m, mp); n = sp_4096_cmp_39(rt, m); - sp_4096_cond_sub_39(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_39(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 78); } @@ -19840,20 +18511,20 @@ static sp_digit sp_4096_cmp_78(const sp_digit* a, const sp_digit* b) int i; r |= (a[77] - b[77]) & (0 - (sp_digit)1); - r |= (a[76] - b[76]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[75] - b[75]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[74] - b[74]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[73] - b[73]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[72] - b[72]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[76] - b[76]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[75] - b[75]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[74] - b[74]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[73] - b[73]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[72] - b[72]) & ~(((sp_digit)0 - r) >> 52); for (i = 64; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 52); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 52); } return r; @@ -19982,6 +18653,7 @@ static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_4096_norm_78(a + 78); @@ -20020,8 +18692,8 @@ static void sp_4096_mont_reduce_78(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x1fffffffffffffL; #endif sp_4096_mont_shift_78(a, a); - sp_4096_cond_sub_78(a, a, m, 0 - (((a[77] - m[77]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[77] - m[77]; + sp_4096_cond_sub_78(a, a, m, ~((over - 1) >> 63)); sp_4096_norm_78(a); } @@ -20151,63 +18823,47 @@ SP_NOINLINE static void sp_4096_rshift_78(sp_digit* r, const sp_digit* a, r[77] = a[77] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_4096_div_word_78(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 53) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 53) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 22) + 1; - /* All 53 bits from d1 and top 10 bits from d0. */ - d = (d1 << 10) + (d0 >> 43); - r = d / dv; - d -= r * dv; - /* Up to 11 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 33) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 44); + t = (t / dv) << 22; r += t; - /* Up to 21 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 23) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 13); + t = t / (dv << 9); r += t; - /* Up to 31 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 13) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 41 bits in r */ - /* Next 10 bits from d0. */ - r <<= 10; - d <<= 10; - d += (d0 >> 3) & ((1 << 10) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 51 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 53 bits from d1 and top 10 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -20224,7 +18880,6 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -20258,14 +18913,7 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, t1[78 + 78] += t1[78 + 78 - 1] >> 53; t1[78 + 78 - 1] &= 0x1fffffffffffffL; for (i=78; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[78 + i]; - d1 <<= 53; - d1 += t1[78 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_78(t1[78 + i], t1[78 + i - 1], dv); -#endif sp_4096_mul_d_78(t2, sd, r1); (void)sp_4096_sub_78(&t1[i], &t1[i], t2); @@ -20273,14 +18921,7 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, t1[78 + i] -= t2[78]; t1[78 + i] += t1[78 + i - 1] >> 53; t1[78 + i - 1] &= 0x1fffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[78 + i]; - d1 <<= 53; - d1 -= t1[78 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_4096_div_word_78(-t1[78 + i], -t1[78 + i - 1], dv); -#endif r1 -= t1[78 + i]; sp_4096_mul_d_78(t2, sd, r1); (void)sp_4096_add_78(&t1[i], &t1[i], t2); @@ -20298,8 +18939,7 @@ static int sp_4096_div_78(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 53; r[i] &= 0x1fffffffffffffL; } - sp_4096_cond_add_78(r, r, sd, 0 - ((r[77] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_4096_cond_add_78(r, r, sd, r[77] >> 63); sp_4096_norm_78(r); sp_4096_rshift_78(r, r, 38); @@ -20357,10 +18997,7 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -20425,8 +19062,7 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_78(t[0], m, mp); n = sp_4096_cmp_78(t[0], m); - sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 78 * 2); } @@ -20452,10 +19088,7 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -20520,8 +19153,7 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_78(t[0], m, mp); n = sp_4096_cmp_78(t[0], m); - sp_4096_cond_sub_78(t[0], t[0], m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(t[0], t[0], m, ~(n >> 63)); XMEMCPY(r, t[0], sizeof(*r) * 78 * 2); } @@ -20547,10 +19179,7 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -20653,8 +19282,7 @@ static int sp_4096_mod_exp_78(sp_digit* r, const sp_digit* a, const sp_digit* e, sp_4096_mont_reduce_78(rt, m, mp); n = sp_4096_cmp_78(rt, m); - sp_4096_cond_sub_78(rt, rt, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(rt, rt, m, ~(n >> 63)); XMEMCPY(r, rt, sizeof(sp_digit) * 156); } @@ -20774,8 +19402,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_78(r, m, mp); mp = sp_4096_cmp_78(r, m); - sp_4096_cond_sub_78(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0)- 1); + sp_4096_cond_sub_78(r, r, m, ~(mp >> 63)); sp_4096_to_bin_78(r, out); *outLen = 512; @@ -20883,8 +19510,7 @@ int sp_RsaPublic_4096(const byte* in, word32 inLen, const mp_int* em, } sp_4096_mont_reduce_78(r, m, mp); mp = sp_4096_cmp_78(r, m); - sp_4096_cond_sub_78(r, r, m, ((mp < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(r, r, m, ~(mp >> 63)); } } } @@ -21107,6 +19733,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -21200,6 +19832,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -21675,7 +20313,7 @@ static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -21751,14 +20389,12 @@ static int sp_4096_mod_exp_2_78(sp_digit* r, const sp_digit* e, int bits, const (void)sp_4096_add_78(r, r, tmp); sp_4096_norm_78(r); o = sp_4096_cmp_78(r, m); - sp_4096_cond_sub_78(r, r, m, ((o < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(r, r, m, ~(o >> 63)); } sp_4096_mont_reduce_78(r, m, mp); n = sp_4096_cmp_78(r, m); - sp_4096_cond_sub_78(r, r, m, ((n < 0) ? - (sp_digit)1 : (sp_digit)0) - 1); + sp_4096_cond_sub_78(r, r, m, ~(n >> 63)); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -22405,14 +21041,14 @@ static sp_digit sp_256_cmp_5(const sp_digit* a, const sp_digit* b) int i; for (i=4; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 51); } #else r |= (a[ 4] - b[ 4]) & (0 - (sp_digit)1); - r |= (a[ 3] - b[ 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 2] - b[ 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 1] - b[ 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 0] - b[ 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 51); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 51); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 51); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 51); #endif /* WOLFSSL_SP_SMALL */ return r; @@ -22556,6 +21192,7 @@ static void sp_256_mont_reduce_order_5(sp_digit* a, const sp_digit* m, sp_digit { int i; sp_digit mu; + sp_digit over; sp_256_norm_5(a + 5); @@ -22569,8 +21206,8 @@ static void sp_256_mont_reduce_order_5(sp_digit* a, const sp_digit* m, sp_digit a[i+1] += a[i] >> 52; a[i] &= 0xfffffffffffffL; sp_256_mont_shift_5(a, a); - sp_256_cond_sub_5(a, a, m, 0 - (((a[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[4] >> 48; + sp_256_cond_sub_5(a, a, m, ~((over - 1) >> 63)); sp_256_norm_5(a); } @@ -22790,8 +21427,7 @@ static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_5(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_5(r->x, p256_mod); - sp_256_cond_sub_5(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_5(r->x, r->x, p256_mod, ~(n >> 51)); sp_256_norm_5(r->x); /* y /= z^3 */ @@ -22800,8 +21436,7 @@ static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_5(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_5(r->y, p256_mod); - sp_256_cond_sub_5(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_5(r->y, r->y, p256_mod, ~(n >> 51)); sp_256_norm_5(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -22819,10 +21454,11 @@ static void sp_256_map_5(sp_point_256* r, const sp_point_256* p, static void sp_256_mont_add_5(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_256_add_5(r, a, b); sp_256_norm_5(r); - sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[4] >> 48; + sp_256_cond_sub_5(r, r, m, ~((over - 1) >> 63)); sp_256_norm_5(r); } @@ -22834,10 +21470,11 @@ static void sp_256_mont_add_5(sp_digit* r, const sp_digit* a, const sp_digit* b, */ static void sp_256_mont_dbl_5(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_256_add_5(r, a, a); sp_256_norm_5(r); - sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[4] >> 48; + sp_256_cond_sub_5(r, r, m, ~((over - 1) >> 63)); sp_256_norm_5(r); } @@ -22849,15 +21486,16 @@ static void sp_256_mont_dbl_5(sp_digit* r, const sp_digit* a, const sp_digit* m) */ static void sp_256_mont_tpl_5(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_256_add_5(r, a, a); sp_256_norm_5(r); - sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[4] >> 48; + sp_256_cond_sub_5(r, r, m, ~((over - 1) >> 63)); sp_256_norm_5(r); (void)sp_256_add_5(r, r, a); sp_256_norm_5(r); - sp_256_cond_sub_5(r, r, m, 0 - (((r[4] >> 48) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[4] >> 48; + sp_256_cond_sub_5(r, r, m, ~((over - 1) >> 63)); sp_256_norm_5(r); } @@ -23233,7 +21871,8 @@ static int sp_256_proj_point_add_5_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, /* Check double */ (void)sp_256_sub_5(ctx->t1, p256_mod, q->y); sp_256_norm_5(ctx->t1); - if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -23401,7 +22040,8 @@ static void sp_256_proj_point_add_5(sp_point_256* r, /* Check double */ (void)sp_256_sub_5(t1, p256_mod, q->y); sp_256_norm_5(t1); - if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) { sp_256_proj_point_dbl_5(r, p, t); } @@ -23434,7 +22074,8 @@ static void sp_256_proj_point_add_5(sp_point_256* r, sp_256_mont_sub_5(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_5(t4, t4, t3, p256_mod); - if (sp_256_iszero_5(t2) & sp_256_iszero_5(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_256_iszero_5(t2) & sp_256_iszero_5(t4) & maskt) { sp_256_proj_point_dbl_5(r, p, t); } else { @@ -24391,7 +23032,8 @@ static void sp_256_proj_point_add_qz1_5(sp_point_256* r, const sp_point_256* p, /* Check double */ (void)sp_256_sub_5(t1, p256_mod, q->y); sp_256_norm_5(t1); - if ((sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_5(p->x, q->x) & sp_256_cmp_equal_5(p->z, q->z) & (sp_256_cmp_equal_5(p->y, q->y) | sp_256_cmp_equal_5(p->y, t1))) != 0) { sp_256_proj_point_dbl_5(r, p, t); } @@ -26890,8 +25532,7 @@ static int sp_256_div_5(const sp_digit* a, const sp_digit* d, t1[5 + i] -= t2[5]; sp_256_norm_5(&t1[i + 1]); - mask = (sp_digit)0 - ((t1[5 + i] > 0) ? - (sp_digit)1 : (sp_digit)0); + mask = ~((t1[5 + i] - 1) >> 63); sp_256_cond_sub_5(t1 + i, t1 + i, sd, mask); sp_256_norm_5(&t1[i + 1]); } @@ -29127,16 +27768,16 @@ static sp_digit sp_384_cmp_7(const sp_digit* a, const sp_digit* b) int i; for (i=6; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 54); } #else r |= (a[ 6] - b[ 6]) & (0 - (sp_digit)1); - r |= (a[ 5] - b[ 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 4] - b[ 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 3] - b[ 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 2] - b[ 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 1] - b[ 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 0] - b[ 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[ 5] - b[ 5]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 4] - b[ 4]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 54); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 54); #endif /* WOLFSSL_SP_SMALL */ return r; @@ -29296,6 +27937,7 @@ static void sp_384_mont_reduce_order_7(sp_digit* a, const sp_digit* m, sp_digit { int i; sp_digit mu; + sp_digit over; sp_384_norm_7(a + 7); @@ -29309,8 +27951,8 @@ static void sp_384_mont_reduce_order_7(sp_digit* a, const sp_digit* m, sp_digit a[i+1] += a[i] >> 55; a[i] &= 0x7fffffffffffffL; sp_384_mont_shift_7(a, a); - sp_384_cond_sub_7(a, a, m, 0 - (((a[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[6] >> 54; + sp_384_cond_sub_7(a, a, m, ~((over - 1) >> 63)); sp_384_norm_7(a); } @@ -29549,8 +28191,7 @@ static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_7(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_7(r->x, p384_mod); - sp_384_cond_sub_7(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_7(r->x, r->x, p384_mod, ~(n >> 54)); sp_384_norm_7(r->x); /* y /= z^3 */ @@ -29559,8 +28200,7 @@ static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_7(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_7(r->y, p384_mod); - sp_384_cond_sub_7(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_7(r->y, r->y, p384_mod, ~(n >> 54)); sp_384_norm_7(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -29578,10 +28218,11 @@ static void sp_384_map_7(sp_point_384* r, const sp_point_384* p, static void sp_384_mont_add_7(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_384_add_7(r, a, b); sp_384_norm_7(r); - sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[6] >> 54; + sp_384_cond_sub_7(r, r, m, ~((over - 1) >> 63)); sp_384_norm_7(r); } @@ -29593,10 +28234,11 @@ static void sp_384_mont_add_7(sp_digit* r, const sp_digit* a, const sp_digit* b, */ static void sp_384_mont_dbl_7(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_384_add_7(r, a, a); sp_384_norm_7(r); - sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[6] >> 54; + sp_384_cond_sub_7(r, r, m, ~((over - 1) >> 63)); sp_384_norm_7(r); } @@ -29608,15 +28250,16 @@ static void sp_384_mont_dbl_7(sp_digit* r, const sp_digit* a, const sp_digit* m) */ static void sp_384_mont_tpl_7(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_384_add_7(r, a, a); sp_384_norm_7(r); - sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[6] >> 54; + sp_384_cond_sub_7(r, r, m, ~((over - 1) >> 63)); sp_384_norm_7(r); (void)sp_384_add_7(r, r, a); sp_384_norm_7(r); - sp_384_cond_sub_7(r, r, m, 0 - (((r[6] >> 54) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[6] >> 54; + sp_384_cond_sub_7(r, r, m, ~((over - 1) >> 63)); sp_384_norm_7(r); } @@ -29997,7 +28640,8 @@ static int sp_384_proj_point_add_7_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, /* Check double */ (void)sp_384_sub_7(ctx->t1, p384_mod, q->y); sp_384_norm_7(ctx->t1); - if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -30165,7 +28809,8 @@ static void sp_384_proj_point_add_7(sp_point_384* r, /* Check double */ (void)sp_384_sub_7(t1, p384_mod, q->y); sp_384_norm_7(t1); - if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) { sp_384_proj_point_dbl_7(r, p, t); } @@ -30198,7 +28843,8 @@ static void sp_384_proj_point_add_7(sp_point_384* r, sp_384_mont_sub_7(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_7(t4, t4, t3, p384_mod); - if (sp_384_iszero_7(t2) & sp_384_iszero_7(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_384_iszero_7(t2) & sp_384_iszero_7(t4) & maskt) { sp_384_proj_point_dbl_7(r, p, t); } else { @@ -31203,7 +29849,8 @@ static void sp_384_proj_point_add_qz1_7(sp_point_384* r, const sp_point_384* p, /* Check double */ (void)sp_384_sub_7(t1, p384_mod, q->y); sp_384_norm_7(t1); - if ((sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_7(p->x, q->x) & sp_384_cmp_equal_7(p->z, q->z) & (sp_384_cmp_equal_7(p->y, q->y) | sp_384_cmp_equal_7(p->y, t1))) != 0) { sp_384_proj_point_dbl_7(r, p, t); } @@ -34228,8 +32875,7 @@ static int sp_384_div_7(const sp_digit* a, const sp_digit* d, t1[7 + i] -= t2[7]; sp_384_norm_7(&t1[i + 1]); - mask = (sp_digit)0 - ((t1[7 + i] > 0) ? - (sp_digit)1 : (sp_digit)0); + mask = ~((t1[7 + i] - 1) >> 63); sp_384_cond_sub_7(t1 + i, t1 + i, sd, mask); sp_384_norm_7(&t1[i + 1]); } @@ -36587,18 +35233,18 @@ static sp_digit sp_521_cmp_9(const sp_digit* a, const sp_digit* b) int i; for (i=8; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 57); } #else r |= (a[ 8] - b[ 8]) & (0 - (sp_digit)1); - r |= (a[ 7] - b[ 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 6] - b[ 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 5] - b[ 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 4] - b[ 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 3] - b[ 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 2] - b[ 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 1] - b[ 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[ 0] - b[ 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[ 7] - b[ 7]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 6] - b[ 6]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 5] - b[ 5]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 4] - b[ 4]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 3] - b[ 3]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 2] - b[ 2]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 1] - b[ 1]) & ~(((sp_digit)0 - r) >> 57); + r |= (a[ 0] - b[ 0]) & ~(((sp_digit)0 - r) >> 57); #endif /* WOLFSSL_SP_SMALL */ return r; @@ -36738,6 +35384,7 @@ static void sp_521_mont_reduce_order_9(sp_digit* a, const sp_digit* m, sp_digit { int i; sp_digit mu; + sp_digit over; sp_521_norm_9(a + 9); @@ -36751,8 +35398,8 @@ static void sp_521_mont_reduce_order_9(sp_digit* a, const sp_digit* m, sp_digit a[i+1] += a[i] >> 58; a[i] &= 0x3ffffffffffffffL; sp_521_mont_shift_9(a, a); - sp_521_cond_sub_9(a, a, m, 0 - (((a[8] >> 57) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[8] >> 57; + sp_521_cond_sub_9(a, a, m, ~((over - 1) >> 63)); sp_521_norm_9(a); } @@ -36919,8 +35566,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_9(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_9(r->x, p521_mod); - sp_521_cond_sub_9(r->x, r->x, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_9(r->x, r->x, p521_mod, ~(n >> 57)); sp_521_norm_9(r->x); /* y /= z^3 */ @@ -36929,8 +35575,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_9(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_9(r->y, p521_mod); - sp_521_cond_sub_9(r->y, r->y, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_9(r->y, r->y, p521_mod, ~(n >> 57)); sp_521_norm_9(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -36948,10 +35593,11 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, static void sp_521_mont_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_521_add_9(r, a, b); sp_521_norm_9(r); - sp_521_cond_sub_9(r, r, m, 0 - (((r[8] >> 57) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 57; + sp_521_cond_sub_9(r, r, m, ~((over - 1) >> 63)); sp_521_norm_9(r); } @@ -36963,10 +35609,11 @@ static void sp_521_mont_add_9(sp_digit* r, const sp_digit* a, const sp_digit* b, */ static void sp_521_mont_dbl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_521_add_9(r, a, a); sp_521_norm_9(r); - sp_521_cond_sub_9(r, r, m, 0 - (((r[8] >> 57) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 57; + sp_521_cond_sub_9(r, r, m, ~((over - 1) >> 63)); sp_521_norm_9(r); } @@ -36978,15 +35625,16 @@ static void sp_521_mont_dbl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) */ static void sp_521_mont_tpl_9(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_521_add_9(r, a, a); sp_521_norm_9(r); - sp_521_cond_sub_9(r, r, m, 0 - (((r[8] >> 57) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 57; + sp_521_cond_sub_9(r, r, m, ~((over - 1) >> 63)); sp_521_norm_9(r); (void)sp_521_add_9(r, r, a); sp_521_norm_9(r); - sp_521_cond_sub_9(r, r, m, 0 - (((r[8] >> 57) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[8] >> 57; + sp_521_cond_sub_9(r, r, m, ~((over - 1) >> 63)); sp_521_norm_9(r); } @@ -37372,7 +36020,8 @@ static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, /* Check double */ (void)sp_521_sub_9(ctx->t1, p521_mod, q->y); sp_521_norm_9(ctx->t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -37540,7 +36189,8 @@ static void sp_521_proj_point_add_9(sp_point_521* r, /* Check double */ (void)sp_521_sub_9(t1, p521_mod, q->y); sp_521_norm_9(t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { sp_521_proj_point_dbl_9(r, p, t); } @@ -37573,7 +36223,8 @@ static void sp_521_proj_point_add_9(sp_point_521* r, sp_521_mont_sub_9(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_9(t4, t4, t3, p521_mod); - if (sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { sp_521_proj_point_dbl_9(r, p, t); } else { @@ -38471,7 +37122,8 @@ static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, /* Check double */ (void)sp_521_sub_9(t1, p521_mod, q->y); sp_521_norm_9(t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { sp_521_proj_point_dbl_9(r, p, t); } @@ -41405,111 +40057,47 @@ SP_NOINLINE static void sp_521_mul_d_9(sp_digit* r, const sp_digit* a, #endif /* WOLFSSL_SP_SMALL */ } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_521_div_word_9(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 58) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 58) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 58) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 27) + 1; - /* All 58 bits from d1 and top 5 bits from d0. */ - d = (d1 << 5) + (d0 >> 53); - r = d / dv; - d -= r * dv; - /* Up to 6 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 48) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 54); + t = (t / dv) << 27; r += t; - /* Up to 11 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 43) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 23); + t = t / (dv << 4); r += t; - /* Up to 16 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 38) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 21 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 33) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 26 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 28) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 23) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 36 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 18) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 41 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 13) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 46 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 8) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 51 bits in r */ - /* Next 5 bits from d0. */ - r <<= 5; - d <<= 5; - d += (d0 >> 3) & ((1 << 5) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 56 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 58 bits from d1 and top 5 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -41521,13 +40109,10 @@ static WC_INLINE sp_digit sp_521_div_word_9(sp_digit d1, sp_digit d0, * r Remainder from the division. * returns MEMORY_E when unable to allocate memory and MP_OKAY otherwise. */ -static int sp_521_div_9(const sp_digit* a, const sp_digit* d, +static int sp_521_div_9(const sp_digit* a, const sp_digit* d, const sp_digit* m, sp_digit* r) { int i; -#ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; -#endif sp_digit dv; sp_digit r1; #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -41555,14 +40140,7 @@ static int sp_521_div_9(const sp_digit* a, const sp_digit* d, for (i=8; i>=0; i--) { t1[9 + i] += t1[9 + i - 1] >> 58; t1[9 + i - 1] &= 0x3ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[9 + i]; - d1 <<= 58; - d1 += t1[9 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_521_div_word_9(t1[9 + i], t1[9 + i - 1], dv); -#endif sp_521_mul_d_9(t2, d, r1); (void)sp_521_sub_9(&t1[i], &t1[i], t2); @@ -41570,14 +40148,7 @@ static int sp_521_div_9(const sp_digit* a, const sp_digit* d, t1[9 + i] -= t2[9]; t1[9 + i] += t1[9 + i - 1] >> 58; t1[9 + i - 1] &= 0x3ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[9 + i]; - d1 <<= 58; - d1 -= t1[9 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_521_div_word_9(-t1[9 + i], -t1[9 + i - 1], dv); -#endif r1++; sp_521_mul_d_9(t2, d, r1); (void)sp_521_add_9(&t1[i], &t1[i], t2); @@ -41595,8 +40166,7 @@ static int sp_521_div_9(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 58; r[i] &= 0x3ffffffffffffffL; } - sp_521_cond_add_9(r, r, d, 0 - ((r[8] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_add_9(r, r, d, r[8] >> 63); } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) @@ -43964,95 +42534,47 @@ SP_NOINLINE static void sp_1024_rshift_18(sp_digit* r, const sp_digit* a, r[17] = a[17] >> n; } -#ifdef WOLFSSL_SP_DIV_64 static WC_INLINE sp_digit sp_1024_div_word_18(sp_digit d1, sp_digit d0, - sp_digit dv) + sp_digit div) { - sp_digit d; - sp_digit r; +#ifdef SP_USE_DIVTI3 + sp_int128 d = ((sp_int128)d1 << 57) + d0; + + return d / div; +#elif defined(__x86_64__) || defined(__i386__) + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_uint64 lo = (sp_uint64)d; + sp_digit hi = (sp_digit)(d >> 64); + + __asm__ __volatile__ ( + "idiv %2" + : "+a" (lo) + : "d" (hi), "r" (div) + : "cc" + ); + + return (sp_digit)lo; +#else + sp_int128 d = ((sp_int128)d1 << 57) + d0; + sp_digit r = 0; sp_digit t; + sp_digit dv = (div >> 26) + 1; - /* All 57 bits from d1 and top 6 bits from d0. */ - d = (d1 << 6) + (d0 >> 51); - r = d / dv; - d -= r * dv; - /* Up to 7 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 45) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + t = (sp_digit)(d >> 52); + t = (t / dv) << 26; r += t; - /* Up to 13 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 39) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)(d >> 21); + t = t / (dv << 5); r += t; - /* Up to 19 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 33) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; + d -= (sp_int128)t * div; + t = (sp_digit)d; + t = t / div; r += t; - /* Up to 25 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 27) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 31 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 21) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 37 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 15) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 43 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 9) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 49 bits in r */ - /* Next 6 bits from d0. */ - r <<= 6; - d <<= 6; - d += (d0 >> 3) & ((1 << 6) - 1); - t = d / dv; - d -= t * dv; - r += t; - /* Up to 55 bits in r */ - /* Remaining 3 bits from d0. */ - r <<= 3; - d <<= 3; - d += d0 & ((1 << 3) - 1); - t = d / dv; - r += t; - - /* All 57 bits from d1 and top 6 bits from d0. */ + d -= (sp_int128)t * div; return r; +#endif } -#endif /* WOLFSSL_SP_DIV_64 */ - /* Divide d in a and put remainder into r (m*d + r = a) * m is not calculated as it is not needed at this time. * @@ -44069,7 +42591,6 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, { int i; #ifndef WOLFSSL_SP_DIV_64 - sp_int128 d1; #endif sp_digit dv; sp_digit r1; @@ -44103,14 +42624,7 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, t1[18 + 18] += t1[18 + 18 - 1] >> 57; t1[18 + 18 - 1] &= 0x1ffffffffffffffL; for (i=18; i>=0; i--) { -#ifndef WOLFSSL_SP_DIV_64 - d1 = t1[18 + i]; - d1 <<= 57; - d1 += t1[18 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_1024_div_word_18(t1[18 + i], t1[18 + i - 1], dv); -#endif sp_1024_mul_d_18(t2, sd, r1); (void)sp_1024_sub_18(&t1[i], &t1[i], t2); @@ -44118,14 +42632,7 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, t1[18 + i] -= t2[18]; t1[18 + i] += t1[18 + i - 1] >> 57; t1[18 + i - 1] &= 0x1ffffffffffffffL; -#ifndef WOLFSSL_SP_DIV_64 - d1 = -t1[18 + i]; - d1 <<= 57; - d1 -= t1[18 + i - 1]; - r1 = (sp_digit)(d1 / dv); -#else r1 = sp_1024_div_word_18(-t1[18 + i], -t1[18 + i - 1], dv); -#endif r1 -= t1[18 + i]; sp_1024_mul_d_18(t2, sd, r1); (void)sp_1024_add_18(&t1[i], &t1[i], t2); @@ -44143,8 +42650,7 @@ static int sp_1024_div_18(const sp_digit* a, const sp_digit* d, r[i+1] += r[i] >> 57; r[i] &= 0x1ffffffffffffffL; } - sp_1024_cond_add_18(r, r, sd, 0 - ((r[17] < 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_add_18(r, r, sd, r[17] >> 63); sp_1024_norm_18(r); sp_1024_rshift_18(r, r, 2); @@ -44451,22 +42957,22 @@ static sp_digit sp_1024_cmp_18(const sp_digit* a, const sp_digit* b) int i; for (i=17; i>=0; i--) { - r |= (a[i] - b[i]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i] - b[i]) & ~(((sp_digit)0 - r) >> 56); } #else int i; r |= (a[17] - b[17]) & (0 - (sp_digit)1); - r |= (a[16] - b[16]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[16] - b[16]) & ~(((sp_digit)0 - r) >> 56); for (i = 8; i >= 0; i -= 8) { - r |= (a[i + 7] - b[i + 7]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 6] - b[i + 6]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 5] - b[i + 5]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 4] - b[i + 4]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 3] - b[i + 3]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 2] - b[i + 2]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 1] - b[i + 1]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); - r |= (a[i + 0] - b[i + 0]) & (0 - (sp_digit)((r == 0) ? 1 : 0)); + r |= (a[i + 7] - b[i + 7]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 6] - b[i + 6]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 5] - b[i + 5]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 4] - b[i + 4]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 3] - b[i + 3]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 2] - b[i + 2]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 1] - b[i + 1]) & ~(((sp_digit)0 - r) >> 56); + r |= (a[i + 0] - b[i + 0]) & ~(((sp_digit)0 - r) >> 56); } #endif /* WOLFSSL_SP_SMALL */ @@ -44624,6 +43130,7 @@ static void sp_1024_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) { int i; sp_digit mu; + sp_digit over; sp_1024_norm_18(a + 18); @@ -44650,8 +43157,8 @@ static void sp_1024_mont_reduce_18(sp_digit* a, const sp_digit* m, sp_digit mp) a[i] &= 0x1ffffffffffffffL; } sp_1024_mont_shift_18(a, a); - sp_1024_cond_sub_18(a, a, m, 0 - (((a[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = a[17] - m[17]; + sp_1024_cond_sub_18(a, a, m, ~((over - 1) >> 63)); sp_1024_norm_18(a); } @@ -44767,8 +43274,7 @@ static void sp_1024_map_18(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_18(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_18(r->x, p1024_mod); - sp_1024_cond_sub_18(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_18(r->x, r->x, p1024_mod, ~(n >> 56)); sp_1024_norm_18(r->x); /* y /= z^3 */ @@ -44777,8 +43283,7 @@ static void sp_1024_map_18(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_18(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_18(r->y, p1024_mod); - sp_1024_cond_sub_18(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_18(r->y, r->y, p1024_mod, ~(n >> 56)); sp_1024_norm_18(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -44796,10 +43301,11 @@ static void sp_1024_map_18(sp_point_1024* r, const sp_point_1024* p, static void sp_1024_mont_add_18(sp_digit* r, const sp_digit* a, const sp_digit* b, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_18(r, a, b); sp_1024_norm_18(r); - sp_1024_cond_sub_18(r, r, m, 0 - (((r[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[17] - m[17]; + sp_1024_cond_sub_18(r, r, m, ~((over - 1) >> 63)); sp_1024_norm_18(r); } @@ -44811,10 +43317,11 @@ static void sp_1024_mont_add_18(sp_digit* r, const sp_digit* a, const sp_digit* */ static void sp_1024_mont_dbl_18(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_18(r, a, a); sp_1024_norm_18(r); - sp_1024_cond_sub_18(r, r, m, 0 - (((r[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[17] - m[17]; + sp_1024_cond_sub_18(r, r, m, ~((over - 1) >> 63)); sp_1024_norm_18(r); } @@ -44826,15 +43333,16 @@ static void sp_1024_mont_dbl_18(sp_digit* r, const sp_digit* a, const sp_digit* */ static void sp_1024_mont_tpl_18(sp_digit* r, const sp_digit* a, const sp_digit* m) { + sp_digit over; (void)sp_1024_add_18(r, a, a); sp_1024_norm_18(r); - sp_1024_cond_sub_18(r, r, m, 0 - (((r[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[17] - m[17]; + sp_1024_cond_sub_18(r, r, m, ~((over - 1) >> 63)); sp_1024_norm_18(r); (void)sp_1024_add_18(r, r, a); sp_1024_norm_18(r); - sp_1024_cond_sub_18(r, r, m, 0 - (((r[17] - m[17]) > 0) ? - (sp_digit)1 : (sp_digit)0)); + over = r[17] - m[17]; + sp_1024_cond_sub_18(r, r, m, ~((over - 1) >> 63)); sp_1024_norm_18(r); } @@ -45189,7 +43697,8 @@ static int sp_1024_proj_point_add_18_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, /* Check double */ (void)sp_1024_sub_18(ctx->t1, p1024_mod, q->y); sp_1024_norm_18(ctx->t1); - if ((sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & (sp_1024_cmp_equal_18(p->y, q->y) | sp_1024_cmp_equal_18(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -45357,7 +43866,8 @@ static void sp_1024_proj_point_add_18(sp_point_1024* r, /* Check double */ (void)sp_1024_mont_sub_18(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_18(t1); - if ((sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & (sp_1024_cmp_equal_18(p->y, q->y) | sp_1024_cmp_equal_18(p->y, t1))) != 0) { sp_1024_proj_point_dbl_18(r, p, t); } @@ -45390,7 +43900,8 @@ static void sp_1024_proj_point_add_18(sp_point_1024* r, sp_1024_mont_sub_18(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_18(t4, t4, t3, p1024_mod); - if (sp_1024_iszero_18(t2) & sp_1024_iszero_18(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_1024_iszero_18(t2) & sp_1024_iszero_18(t4) & maskt) { sp_1024_proj_point_dbl_18(r, p, t); } else { @@ -46228,7 +44739,8 @@ static void sp_1024_proj_point_add_qz1_18(sp_point_1024* r, const sp_point_1024* /* Check double */ (void)sp_1024_mont_sub_18(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_18(t1); - if ((sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_18(p->x, q->x) & sp_1024_cmp_equal_18(p->z, q->z) & (sp_1024_cmp_equal_18(p->y, q->y) | sp_1024_cmp_equal_18(p->y, t1))) != 0) { sp_1024_proj_point_dbl_18(r, p, t); } @@ -53596,8 +52108,7 @@ static int sp_1024_ecc_is_point_18(const sp_point_1024* point, sp_1024_mont_add_18(t1, t1, point->x, p1024_mod); n = sp_1024_cmp_18(t1, p1024_mod); - sp_1024_cond_sub_18(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_18(t1, t1, p1024_mod, ~(n >> 56)); sp_1024_norm_18(t1); if (!sp_1024_iszero_18(t1)) { err = MP_VAL; diff --git a/wolfcrypt/src/sp_cortexm.c b/wolfcrypt/src/sp_cortexm.c index 17b3f7fca..1ffb9170a 100644 --- a/wolfcrypt/src/sp_cortexm.c +++ b/wolfcrypt/src/sp_cortexm.c @@ -3433,10 +3433,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -3588,10 +3585,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -4426,10 +4420,7 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -4572,10 +4563,7 @@ static int sp_2048_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -5027,6 +5015,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -5644,7 +5638,7 @@ static int sp_2048_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -8951,10 +8945,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -9106,10 +9097,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -10028,10 +10016,7 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -10174,10 +10159,7 @@ static int sp_3072_mod_exp_96(sp_digit* r, const sp_digit* a, const sp_digit* e, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -10629,6 +10611,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -11442,7 +11430,7 @@ static int sp_3072_mod_exp_2_96(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -13805,10 +13793,7 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -13951,10 +13936,7 @@ static int sp_4096_mod_exp_128(sp_digit* r, const sp_digit* a, const sp_digit* e byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -14407,6 +14389,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -15416,7 +15404,7 @@ static int sp_4096_mod_exp_2_128(sp_digit* r, const sp_digit* e, int bits, byte y; int err = MP_OKAY; - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -18740,8 +18728,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_8(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_8(r->x, p256_mod); - sp_256_cond_sub_8(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_8(r->x, r->x, p256_mod, ~(n >> 31)); sp_256_norm_8(r->x); /* y /= z^3 */ @@ -18750,8 +18737,7 @@ static void sp_256_map_8(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_8(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_8(r->y, p256_mod); - sp_256_cond_sub_8(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_8(r->y, r->y, p256_mod, ~(n >> 31)); sp_256_norm_8(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -19406,7 +19392,8 @@ static int sp_256_proj_point_add_8_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, /* Check double */ (void)sp_256_sub_8(ctx->t1, p256_mod, q->y); sp_256_norm_8(ctx->t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -19574,7 +19561,8 @@ static void sp_256_proj_point_add_8(sp_point_256* r, /* Check double */ (void)sp_256_sub_8(t1, p256_mod, q->y); sp_256_norm_8(t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { sp_256_proj_point_dbl_8(r, p, t); } @@ -19607,7 +19595,8 @@ static void sp_256_proj_point_add_8(sp_point_256* r, sp_256_mont_sub_8(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_8(t4, t4, t3, p256_mod); - if (sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_256_iszero_8(t2) & sp_256_iszero_8(t4) & maskt) { sp_256_proj_point_dbl_8(r, p, t); } else { @@ -20049,7 +20038,8 @@ static void sp_256_proj_point_add_qz1_8(sp_point_256* r, const sp_point_256* p, /* Check double */ (void)sp_256_sub_8(t1, p256_mod, q->y); sp_256_norm_8(t1); - if ((sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_8(p->x, q->x) & sp_256_cmp_equal_8(p->z, q->z) & (sp_256_cmp_equal_8(p->y, q->y) | sp_256_cmp_equal_8(p->y, t1))) != 0) { sp_256_proj_point_dbl_8(r, p, t); } @@ -26035,8 +26025,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_12(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_12(r->x, p384_mod); - sp_384_cond_sub_12(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_12(r->x, r->x, p384_mod, ~(n >> 31)); sp_384_norm_12(r->x); /* y /= z^3 */ @@ -26045,8 +26034,7 @@ static void sp_384_map_12(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_12(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_12(r->y, p384_mod); - sp_384_cond_sub_12(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_12(r->y, r->y, p384_mod, ~(n >> 31)); sp_384_norm_12(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -26511,7 +26499,8 @@ static int sp_384_proj_point_add_12_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, /* Check double */ (void)sp_384_sub_12(ctx->t1, p384_mod, q->y); sp_384_norm_12(ctx->t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -26679,7 +26668,8 @@ static void sp_384_proj_point_add_12(sp_point_384* r, /* Check double */ (void)sp_384_sub_12(t1, p384_mod, q->y); sp_384_norm_12(t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { sp_384_proj_point_dbl_12(r, p, t); } @@ -26712,7 +26702,8 @@ static void sp_384_proj_point_add_12(sp_point_384* r, sp_384_mont_sub_12(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_12(t4, t4, t3, p384_mod); - if (sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_384_iszero_12(t2) & sp_384_iszero_12(t4) & maskt) { sp_384_proj_point_dbl_12(r, p, t); } else { @@ -27178,7 +27169,8 @@ static void sp_384_proj_point_add_qz1_12(sp_point_384* r, const sp_point_384* p, /* Check double */ (void)sp_384_sub_12(t1, p384_mod, q->y); sp_384_norm_12(t1); - if ((sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_12(p->x, q->x) & sp_384_cmp_equal_12(p->z, q->z) & (sp_384_cmp_equal_12(p->y, q->y) | sp_384_cmp_equal_12(p->y, t1))) != 0) { sp_384_proj_point_dbl_12(r, p, t); } @@ -33403,8 +33395,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_17(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_17(r->x, p521_mod); - sp_521_cond_sub_17(r->x, r->x, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_17(r->x, r->x, p521_mod, ~(n >> 31)); sp_521_norm_17(r->x); /* y /= z^3 */ @@ -33413,8 +33404,7 @@ static void sp_521_map_17(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_17(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_17(r->y, p521_mod); - sp_521_cond_sub_17(r->y, r->y, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_17(r->y, r->y, p521_mod, ~(n >> 31)); sp_521_norm_17(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -34204,7 +34194,8 @@ static int sp_521_proj_point_add_17_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, /* Check double */ (void)sp_521_sub_17(ctx->t1, p521_mod, q->y); sp_521_norm_17(ctx->t1); - if ((sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -34372,7 +34363,8 @@ static void sp_521_proj_point_add_17(sp_point_521* r, /* Check double */ (void)sp_521_sub_17(t1, p521_mod, q->y); sp_521_norm_17(t1); - if ((sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { sp_521_proj_point_dbl_17(r, p, t); } @@ -34405,7 +34397,8 @@ static void sp_521_proj_point_add_17(sp_point_521* r, sp_521_mont_sub_17(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_17(t4, t4, t3, p521_mod); - if (sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_521_iszero_17(t2) & sp_521_iszero_17(t4) & maskt) { sp_521_proj_point_dbl_17(r, p, t); } else { @@ -34905,7 +34898,8 @@ static void sp_521_proj_point_add_qz1_17(sp_point_521* r, const sp_point_521* p, /* Check double */ (void)sp_521_sub_17(t1, p521_mod, q->y); sp_521_norm_17(t1); - if ((sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_17(p->x, q->x) & sp_521_cmp_equal_17(p->z, q->z) & (sp_521_cmp_equal_17(p->y, q->y) | sp_521_cmp_equal_17(p->y, t1))) != 0) { sp_521_proj_point_dbl_17(r, p, t); } @@ -42772,8 +42766,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_32(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_32(r->x, p1024_mod); - sp_1024_cond_sub_32(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(r->x, r->x, p1024_mod, ~(n >> 31)); sp_1024_norm_32(r->x); /* y /= z^3 */ @@ -42782,8 +42775,7 @@ static void sp_1024_map_32(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_32(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_32(r->y, p1024_mod); - sp_1024_cond_sub_32(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(r->y, r->y, p1024_mod, ~(n >> 31)); sp_1024_norm_32(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -44212,7 +44204,8 @@ static int sp_1024_proj_point_add_32_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, /* Check double */ (void)sp_1024_sub_32(ctx->t1, p1024_mod, q->y); sp_1024_norm_32(ctx->t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -44380,7 +44373,8 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, /* Check double */ (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_32(t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { sp_1024_proj_point_dbl_32(r, p, t); } @@ -44413,7 +44407,8 @@ static void sp_1024_proj_point_add_32(sp_point_1024* r, sp_1024_mont_sub_32(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_32(t4, t4, t3, p1024_mod); - if (sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_1024_iszero_32(t2) & sp_1024_iszero_32(t4) & maskt) { sp_1024_proj_point_dbl_32(r, p, t); } else { @@ -44743,7 +44738,8 @@ static void sp_1024_proj_point_add_qz1_32(sp_point_1024* r, const sp_point_1024* /* Check double */ (void)sp_1024_mont_sub_32(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_32(t1); - if ((sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_32(p->x, q->x) & sp_1024_cmp_equal_32(p->z, q->z) & (sp_1024_cmp_equal_32(p->y, q->y) | sp_1024_cmp_equal_32(p->y, t1))) != 0) { sp_1024_proj_point_dbl_32(r, p, t); } @@ -52905,8 +52901,7 @@ static int sp_1024_ecc_is_point_32(const sp_point_1024* point, sp_1024_mont_add_32(t1, t1, point->x, p1024_mod); n = sp_1024_cmp_32(t1, p1024_mod); - sp_1024_cond_sub_32(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_32(t1, t1, p1024_mod, ~(n >> 31)); sp_1024_norm_32(t1); if (!sp_1024_iszero_32(t1)) { err = MP_VAL; diff --git a/wolfcrypt/src/sp_x86_64.c b/wolfcrypt/src/sp_x86_64.c index 74c17871f..9067c02c9 100644 --- a/wolfcrypt/src/sp_x86_64.c +++ b/wolfcrypt/src/sp_x86_64.c @@ -491,10 +491,7 @@ static int sp_2048_mod_exp_16(sp_digit* r, const sp_digit* a, const sp_digit* e, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -723,10 +720,7 @@ static int sp_2048_mod_exp_avx2_16(sp_digit* r, const sp_digit* a, const sp_digi ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -1216,10 +1210,7 @@ static int sp_2048_mod_exp_32(sp_digit* r, const sp_digit* a, const sp_digit* e, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -1483,10 +1474,7 @@ static int sp_2048_mod_exp_avx2_32(sp_digit* r, const sp_digit* a, const sp_digi ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -2041,6 +2029,12 @@ int sp_RsaPrivate_2048(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -2330,7 +2324,7 @@ static int sp_2048_mod_exp_2_avx2_32(sp_digit* r, const sp_digit* e, int bits, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -2469,7 +2463,7 @@ static int sp_2048_mod_exp_2_32(sp_digit* r, const sp_digit* e, int bits, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -3211,10 +3205,7 @@ static int sp_3072_mod_exp_24(sp_digit* r, const sp_digit* a, const sp_digit* e, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -3443,10 +3434,7 @@ static int sp_3072_mod_exp_avx2_24(sp_digit* r, const sp_digit* a, const sp_digi ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -3936,10 +3924,7 @@ static int sp_3072_mod_exp_48(sp_digit* r, const sp_digit* a, const sp_digit* e, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -4151,10 +4136,7 @@ static int sp_3072_mod_exp_avx2_48(sp_digit* r, const sp_digit* a, const sp_digi ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -4657,6 +4639,12 @@ int sp_RsaPrivate_3072(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -4946,7 +4934,7 @@ static int sp_3072_mod_exp_2_avx2_48(sp_digit* r, const sp_digit* e, int bits, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -5085,7 +5073,7 @@ static int sp_3072_mod_exp_2_48(sp_digit* r, const sp_digit* e, int bits, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -5898,10 +5886,7 @@ static int sp_4096_mod_exp_64(sp_digit* r, const sp_digit* a, const sp_digit* e, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6113,10 +6098,7 @@ static int sp_4096_mod_exp_avx2_64(sp_digit* r, const sp_digit* a, const sp_digi ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { - err = MP_VAL; - } - else if (bits == 0) { + if (bits == 0) { err = MP_VAL; } @@ -6619,6 +6601,12 @@ int sp_RsaPrivate_4096(const byte* in, word32 inLen, const mp_int* dm, else if (mp_iseven(mm)) { err = MP_VAL; } + else if (mp_iseven(pm)) { + err = MP_VAL; + } + else if (mp_iseven(qm)) { + err = MP_VAL; + } #if defined(WOLFSSL_SMALL_STACK) && !defined(WOLFSSL_SP_NO_MALLOC) if (err == MP_OKAY) { @@ -6908,7 +6896,7 @@ static int sp_4096_mod_exp_2_avx2_64(sp_digit* r, const sp_digit* e, int bits, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -7047,7 +7035,7 @@ static int sp_4096_mod_exp_2_64(sp_digit* r, const sp_digit* e, int bits, ASSERT_SAVED_VECTOR_REGISTERS(); - if ((m[0] & 1) == 0) { + if (bits == 0) { err = MP_VAL; } @@ -7755,8 +7743,7 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_4(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_4(r->x, p256_mod); - sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_4(r->x, r->x, p256_mod, ~(n >> 63)); sp_256_norm_4(r->x); /* y /= z^3 */ @@ -7765,8 +7752,7 @@ static void sp_256_map_4(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_4(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_4(r->y, p256_mod); - sp_256_cond_sub_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_4(r->y, r->y, p256_mod, ~(n >> 63)); sp_256_norm_4(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -8149,7 +8135,8 @@ static int sp_256_proj_point_add_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r, /* Check double */ (void)sp_256_sub_4(ctx->t1, p256_mod, q->y); sp_256_norm_4(ctx->t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -8317,7 +8304,8 @@ static void sp_256_proj_point_add_4(sp_point_256* r, /* Check double */ (void)sp_256_sub_4(t1, p256_mod, q->y); sp_256_norm_4(t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { sp_256_proj_point_dbl_4(r, p, t); } @@ -8350,7 +8338,8 @@ static void sp_256_proj_point_add_4(sp_point_256* r, sp_256_mont_sub_4(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_4(t4, t4, t3, p256_mod); - if (sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { sp_256_proj_point_dbl_4(r, p, t); } else { @@ -8876,8 +8865,7 @@ static void sp_256_map_avx2_4(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_avx2_4(r->x, p256_mod, p256_mp_mod); /* Reduce x to less than modulus */ n = sp_256_cmp_4(r->x, p256_mod); - sp_256_cond_sub_4(r->x, r->x, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_4(r->x, r->x, p256_mod, ~(n >> 63)); sp_256_norm_4(r->x); /* y /= z^3 */ @@ -8886,8 +8874,7 @@ static void sp_256_map_avx2_4(sp_point_256* r, const sp_point_256* p, sp_256_mont_reduce_avx2_4(r->y, p256_mod, p256_mp_mod); /* Reduce y to less than modulus */ n = sp_256_cmp_4(r->y, p256_mod); - sp_256_cond_sub_avx2_4(r->y, r->y, p256_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_256_cond_sub_avx2_4(r->y, r->y, p256_mod, ~(n >> 63)); sp_256_norm_4(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -9246,7 +9233,8 @@ static int sp_256_proj_point_add_avx2_4_nb(sp_ecc_ctx_t* sp_ctx, sp_point_256* r /* Check double */ (void)sp_256_sub_avx2_4(ctx->t1, p256_mod, q->y); sp_256_norm_avx2_4(ctx->t1); - if ((sp_256_cmp_equal_avx2_4(p->x, q->x) & sp_256_cmp_equal_avx2_4(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_avx2_4(p->x, q->x) & sp_256_cmp_equal_avx2_4(p->z, q->z) & (sp_256_cmp_equal_avx2_4(p->y, q->y) | sp_256_cmp_equal_avx2_4(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -9414,7 +9402,8 @@ static void sp_256_proj_point_add_avx2_4(sp_point_256* r, /* Check double */ (void)sp_256_sub_4(t1, p256_mod, q->y); sp_256_norm_4(t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { sp_256_proj_point_dbl_avx2_4(r, p, t); } @@ -9447,7 +9436,8 @@ static void sp_256_proj_point_add_avx2_4(sp_point_256* r, sp_256_mont_sub_avx2_4(t2, t2, t1, p256_mod); /* R = S2 - S1 */ sp_256_mont_sub_avx2_4(t4, t4, t3, p256_mod); - if (sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_256_iszero_4(t2) & sp_256_iszero_4(t4) & maskt) { sp_256_proj_point_dbl_avx2_4(r, p, t); } else { @@ -9811,7 +9801,8 @@ static void sp_256_proj_point_add_qz1_4(sp_point_256* r, const sp_point_256* p, /* Check double */ (void)sp_256_sub_4(t1, p256_mod, q->y); sp_256_norm_4(t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { sp_256_proj_point_dbl_4(r, p, t); } @@ -10270,7 +10261,8 @@ static void sp_256_proj_point_add_qz1_avx2_4(sp_point_256* r, const sp_point_256 /* Check double */ (void)sp_256_sub_4(t1, p256_mod, q->y); sp_256_norm_4(t1); - if ((sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_256_cmp_equal_4(p->x, q->x) & sp_256_cmp_equal_4(p->z, q->z) & (sp_256_cmp_equal_4(p->y, q->y) | sp_256_cmp_equal_4(p->y, t1))) != 0) { sp_256_proj_point_dbl_avx2_4(r, p, t); } @@ -26499,8 +26491,7 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_6(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_6(r->x, p384_mod); - sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_6(r->x, r->x, p384_mod, ~(n >> 63)); sp_384_norm_6(r->x); /* y /= z^3 */ @@ -26509,8 +26500,7 @@ static void sp_384_map_6(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_6(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_6(r->y, p384_mod); - sp_384_cond_sub_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_6(r->y, r->y, p384_mod, ~(n >> 63)); sp_384_norm_6(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -26897,7 +26887,8 @@ static int sp_384_proj_point_add_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r, /* Check double */ (void)sp_384_sub_6(ctx->t1, p384_mod, q->y); sp_384_norm_6(ctx->t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -27065,7 +27056,8 @@ static void sp_384_proj_point_add_6(sp_point_384* r, /* Check double */ (void)sp_384_sub_6(t1, p384_mod, q->y); sp_384_norm_6(t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { sp_384_proj_point_dbl_6(r, p, t); } @@ -27098,7 +27090,8 @@ static void sp_384_proj_point_add_6(sp_point_384* r, sp_384_mont_sub_6(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_6(t4, t4, t3, p384_mod); - if (sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { sp_384_proj_point_dbl_6(r, p, t); } else { @@ -27676,8 +27669,7 @@ static void sp_384_map_avx2_6(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_avx2_6(r->x, p384_mod, p384_mp_mod); /* Reduce x to less than modulus */ n = sp_384_cmp_6(r->x, p384_mod); - sp_384_cond_sub_6(r->x, r->x, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_6(r->x, r->x, p384_mod, ~(n >> 63)); sp_384_norm_6(r->x); /* y /= z^3 */ @@ -27686,8 +27678,7 @@ static void sp_384_map_avx2_6(sp_point_384* r, const sp_point_384* p, sp_384_mont_reduce_avx2_6(r->y, p384_mod, p384_mp_mod); /* Reduce y to less than modulus */ n = sp_384_cmp_6(r->y, p384_mod); - sp_384_cond_sub_avx2_6(r->y, r->y, p384_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_384_cond_sub_avx2_6(r->y, r->y, p384_mod, ~(n >> 63)); sp_384_norm_6(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -28050,7 +28041,8 @@ static int sp_384_proj_point_add_avx2_6_nb(sp_ecc_ctx_t* sp_ctx, sp_point_384* r /* Check double */ (void)sp_384_sub_avx2_6(ctx->t1, p384_mod, q->y); sp_384_norm_avx2_6(ctx->t1); - if ((sp_384_cmp_equal_avx2_6(p->x, q->x) & sp_384_cmp_equal_avx2_6(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_avx2_6(p->x, q->x) & sp_384_cmp_equal_avx2_6(p->z, q->z) & (sp_384_cmp_equal_avx2_6(p->y, q->y) | sp_384_cmp_equal_avx2_6(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -28218,7 +28210,8 @@ static void sp_384_proj_point_add_avx2_6(sp_point_384* r, /* Check double */ (void)sp_384_sub_6(t1, p384_mod, q->y); sp_384_norm_6(t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { sp_384_proj_point_dbl_avx2_6(r, p, t); } @@ -28251,7 +28244,8 @@ static void sp_384_proj_point_add_avx2_6(sp_point_384* r, sp_384_mont_sub_avx2_6(t2, t2, t1, p384_mod); /* R = S2 - S1 */ sp_384_mont_sub_avx2_6(t4, t4, t3, p384_mod); - if (sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_384_iszero_6(t2) & sp_384_iszero_6(t4) & maskt) { sp_384_proj_point_dbl_avx2_6(r, p, t); } else { @@ -28619,7 +28613,8 @@ static void sp_384_proj_point_add_qz1_6(sp_point_384* r, const sp_point_384* p, /* Check double */ (void)sp_384_sub_6(t1, p384_mod, q->y); sp_384_norm_6(t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { sp_384_proj_point_dbl_6(r, p, t); } @@ -29079,7 +29074,8 @@ static void sp_384_proj_point_add_qz1_avx2_6(sp_point_384* r, const sp_point_384 /* Check double */ (void)sp_384_sub_6(t1, p384_mod, q->y); sp_384_norm_6(t1); - if ((sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_384_cmp_equal_6(p->x, q->x) & sp_384_cmp_equal_6(p->z, q->z) & (sp_384_cmp_equal_6(p->y, q->y) | sp_384_cmp_equal_6(p->y, t1))) != 0) { sp_384_proj_point_dbl_avx2_6(r, p, t); } @@ -51011,8 +51007,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_9(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_9(r->x, p521_mod); - sp_521_cond_sub_9(r->x, r->x, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_9(r->x, r->x, p521_mod, ~(n >> 63)); sp_521_norm_9(r->x); /* y /= z^3 */ @@ -51021,8 +51016,7 @@ static void sp_521_map_9(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_9(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_9(r->y, p521_mod); - sp_521_cond_sub_9(r->y, r->y, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_9(r->y, r->y, p521_mod, ~(n >> 63)); sp_521_norm_9(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -51411,7 +51405,8 @@ static int sp_521_proj_point_add_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r, /* Check double */ (void)sp_521_sub_9(ctx->t1, p521_mod, q->y); sp_521_norm_9(ctx->t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -51579,7 +51574,8 @@ static void sp_521_proj_point_add_9(sp_point_521* r, /* Check double */ (void)sp_521_sub_9(t1, p521_mod, q->y); sp_521_norm_9(t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { sp_521_proj_point_dbl_9(r, p, t); } @@ -51612,7 +51608,8 @@ static void sp_521_proj_point_add_9(sp_point_521* r, sp_521_mont_sub_9(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_9(t4, t4, t3, p521_mod); - if (sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { sp_521_proj_point_dbl_9(r, p, t); } else { @@ -52155,8 +52152,7 @@ static void sp_521_map_avx2_9(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_avx2_9(r->x, p521_mod, p521_mp_mod); /* Reduce x to less than modulus */ n = sp_521_cmp_9(r->x, p521_mod); - sp_521_cond_sub_9(r->x, r->x, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_9(r->x, r->x, p521_mod, ~(n >> 63)); sp_521_norm_9(r->x); /* y /= z^3 */ @@ -52165,8 +52161,7 @@ static void sp_521_map_avx2_9(sp_point_521* r, const sp_point_521* p, sp_521_mont_reduce_avx2_9(r->y, p521_mod, p521_mp_mod); /* Reduce y to less than modulus */ n = sp_521_cmp_9(r->y, p521_mod); - sp_521_cond_sub_avx2_9(r->y, r->y, p521_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_521_cond_sub_avx2_9(r->y, r->y, p521_mod, ~(n >> 63)); sp_521_norm_9(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -52529,7 +52524,8 @@ static int sp_521_proj_point_add_avx2_9_nb(sp_ecc_ctx_t* sp_ctx, sp_point_521* r /* Check double */ (void)sp_521_sub_avx2_9(ctx->t1, p521_mod, q->y); sp_521_norm_avx2_9(ctx->t1); - if ((sp_521_cmp_equal_avx2_9(p->x, q->x) & sp_521_cmp_equal_avx2_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_avx2_9(p->x, q->x) & sp_521_cmp_equal_avx2_9(p->z, q->z) & (sp_521_cmp_equal_avx2_9(p->y, q->y) | sp_521_cmp_equal_avx2_9(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -52697,7 +52693,8 @@ static void sp_521_proj_point_add_avx2_9(sp_point_521* r, /* Check double */ (void)sp_521_sub_9(t1, p521_mod, q->y); sp_521_norm_9(t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { sp_521_proj_point_dbl_avx2_9(r, p, t); } @@ -52730,7 +52727,8 @@ static void sp_521_proj_point_add_avx2_9(sp_point_521* r, sp_521_mont_sub_avx2_9(t2, t2, t1, p521_mod); /* R = S2 - S1 */ sp_521_mont_sub_avx2_9(t4, t4, t3, p521_mod); - if (sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_521_iszero_9(t2) & sp_521_iszero_9(t4) & maskt) { sp_521_proj_point_dbl_avx2_9(r, p, t); } else { @@ -53098,7 +53096,8 @@ static void sp_521_proj_point_add_qz1_9(sp_point_521* r, const sp_point_521* p, /* Check double */ (void)sp_521_sub_9(t1, p521_mod, q->y); sp_521_norm_9(t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { sp_521_proj_point_dbl_9(r, p, t); } @@ -53558,7 +53557,8 @@ static void sp_521_proj_point_add_qz1_avx2_9(sp_point_521* r, const sp_point_521 /* Check double */ (void)sp_521_sub_9(t1, p521_mod, q->y); sp_521_norm_9(t1); - if ((sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_521_cmp_equal_9(p->x, q->x) & sp_521_cmp_equal_9(p->z, q->z) & (sp_521_cmp_equal_9(p->y, q->y) | sp_521_cmp_equal_9(p->y, t1))) != 0) { sp_521_proj_point_dbl_avx2_9(r, p, t); } @@ -91834,8 +91834,7 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_16(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_16(r->x, p1024_mod); - sp_1024_cond_sub_16(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(r->x, r->x, p1024_mod, ~(n >> 63)); sp_1024_norm_16(r->x); /* y /= z^3 */ @@ -91844,8 +91843,7 @@ static void sp_1024_map_16(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_16(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_16(r->y, p1024_mod); - sp_1024_cond_sub_16(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(r->y, r->y, p1024_mod, ~(n >> 63)); sp_1024_norm_16(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -92238,7 +92236,8 @@ static int sp_1024_proj_point_add_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024* r, /* Check double */ (void)sp_1024_sub_16(ctx->t1, p1024_mod, q->y); sp_1024_norm_16(ctx->t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -92406,7 +92405,8 @@ static void sp_1024_proj_point_add_16(sp_point_1024* r, /* Check double */ (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_16(t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { sp_1024_proj_point_dbl_16(r, p, t); } @@ -92439,7 +92439,8 @@ static void sp_1024_proj_point_add_16(sp_point_1024* r, sp_1024_mont_sub_16(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_16(t4, t4, t3, p1024_mod); - if (sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { sp_1024_proj_point_dbl_16(r, p, t); } else { @@ -92955,8 +92956,7 @@ static void sp_1024_map_avx2_16(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_avx2_16(r->x, p1024_mod, p1024_mp_mod); /* Reduce x to less than modulus */ n = sp_1024_cmp_16(r->x, p1024_mod); - sp_1024_cond_sub_16(r->x, r->x, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(r->x, r->x, p1024_mod, ~(n >> 63)); sp_1024_norm_16(r->x); /* y /= z^3 */ @@ -92965,8 +92965,7 @@ static void sp_1024_map_avx2_16(sp_point_1024* r, const sp_point_1024* p, sp_1024_mont_reduce_avx2_16(r->y, p1024_mod, p1024_mp_mod); /* Reduce y to less than modulus */ n = sp_1024_cmp_16(r->y, p1024_mod); - sp_1024_cond_sub_avx2_16(r->y, r->y, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_avx2_16(r->y, r->y, p1024_mod, ~(n >> 63)); sp_1024_norm_16(r->y); XMEMSET(r->z, 0, sizeof(r->z) / 2); @@ -93329,7 +93328,8 @@ static int sp_1024_proj_point_add_avx2_16_nb(sp_ecc_ctx_t* sp_ctx, sp_point_1024 /* Check double */ (void)sp_1024_sub_avx2_16(ctx->t1, p1024_mod, q->y); sp_1024_norm_avx2_16(ctx->t1); - if ((sp_1024_cmp_equal_avx2_16(p->x, q->x) & sp_1024_cmp_equal_avx2_16(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_avx2_16(p->x, q->x) & sp_1024_cmp_equal_avx2_16(p->z, q->z) & (sp_1024_cmp_equal_avx2_16(p->y, q->y) | sp_1024_cmp_equal_avx2_16(p->y, ctx->t1))) != 0) { XMEMSET(&ctx->dbl_ctx, 0, sizeof(ctx->dbl_ctx)); @@ -93497,7 +93497,8 @@ static void sp_1024_proj_point_add_avx2_16(sp_point_1024* r, /* Check double */ (void)sp_1024_mont_sub_avx2_16(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_16(t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { sp_1024_proj_point_dbl_avx2_16(r, p, t); } @@ -93530,7 +93531,8 @@ static void sp_1024_proj_point_add_avx2_16(sp_point_1024* r, sp_1024_mont_sub_avx2_16(t2, t2, t1, p1024_mod); /* R = S2 - S1 */ sp_1024_mont_sub_avx2_16(t4, t4, t3, p1024_mod); - if (sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { + if (~p->infinity & ~q->infinity & + sp_1024_iszero_16(t2) & sp_1024_iszero_16(t4) & maskt) { sp_1024_proj_point_dbl_avx2_16(r, p, t); } else { @@ -93902,7 +93904,8 @@ static void sp_1024_proj_point_add_qz1_16(sp_point_1024* r, const sp_point_1024* /* Check double */ (void)sp_1024_mont_sub_16(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_16(t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { sp_1024_proj_point_dbl_16(r, p, t); } @@ -94343,7 +94346,8 @@ static void sp_1024_proj_point_add_qz1_avx2_16(sp_point_1024* r, const sp_point_ /* Check double */ (void)sp_1024_mont_sub_avx2_16(t1, p1024_mod, q->y, p1024_mod); sp_1024_norm_16(t1); - if ((sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & + if ((~p->infinity & ~q->infinity & + sp_1024_cmp_equal_16(p->x, q->x) & sp_1024_cmp_equal_16(p->z, q->z) & (sp_1024_cmp_equal_16(p->y, q->y) | sp_1024_cmp_equal_16(p->y, t1))) != 0) { sp_1024_proj_point_dbl_avx2_16(r, p, t); } @@ -103359,8 +103363,7 @@ static int sp_1024_ecc_is_point_16(const sp_point_1024* point, sp_1024_mont_add_16(t1, t1, point->x, p1024_mod); n = sp_1024_cmp_16(t1, p1024_mod); - sp_1024_cond_sub_16(t1, t1, p1024_mod, 0 - ((n >= 0) ? - (sp_digit)1 : (sp_digit)0)); + sp_1024_cond_sub_16(t1, t1, p1024_mod, ~(n >> 63)); sp_1024_norm_16(t1); if (!sp_1024_iszero_16(t1)) { err = MP_VAL;